当前位置: 首页 > 工具软件 > SiLK > 使用案例 >

Silk和PCM数据之间的换转

甄德寿
2023-12-01

目录

一、使用场景

二、术语介绍

三、如何使用

四、参考资料


 

一、使用场景

        产品需求是开发一款名叫微信助手的即时通讯APP,安装在汽车中控台上,在行车过程中使用,使用者包括驾驶员与车内其它成员,主要针对驾驶者。此应用接收、发送消息的接口基于第三方微控提供的API,我们根据交互文档在API基础上封装业务逻辑,开发一款定制UI的类似微信的APP,驾驶者在行车过程中可以使用语音进行交互,减少了驾驶的危险性。

       微信的语音消息默认格式为silk,所以车机端接收到其它终端发送过来的语音消息是没有办法直接播放的,必须要转成pcm格式才能使用Android的控件进行播放,并且Android录制的pcm数据也需要转成silk发送出去,微信终端才能正常播放,所以这里就涉及到silk和pcm格式的互转,其它有用到silk语音格式的同学也可以参考。

 

二、术语介绍

1.silk语音格式

    silk是一种轻量级,体积小、音质高的一种音频文件格式。

2.pcm语音格式

    pcm是语音文件的原始数据,这种数据声卡可以直接播放。

3.SILKCodec库 

    它是由Skype向第三方开发人员和硬件制造商提供免版税认证(RF)的Silk宽带音频编码器。

 

三、如何使用

1.silk转pcm

#include <jni.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "time.h"

/* Define codec specific settings */
#define MAX_BYTES_ENC_PER_FRAME     250 // Equals peak bitrate of 100 kbps
#define MAX_BYTES_DEC_PER_FRAME     1024

#define MAX_INPUT_FRAMES        5
#define MAX_LBRR_DELAY          2
#define MAX_FRAME_LENGTH        480

#define	MAX_FRAME			160

#include <android/log.h>
#include <SKP_Silk_control.h>

#define LOG_TAG "silk" // text for log tag

#include "SKP_Silk_SDK_API.h"
#include "SKP_Silk_SigProc_FIX.h"

#undef DEBUG_SILK8

// the header length of the RTP frame (must skip when en/decoding)
#define	RTP_HDR_SIZE	12

/
#define LOG_I(TAG, ...)    __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
#define LOG_E(TAG, ...)    __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)

#define TAG "SILK"
#define ERROR_BAD_VALUE -2

#define MAX_BYTES_PER_FRAME     1024
//#define MAX_INPUT_FRAMES        5
#define FRAME_LENGTH_MS         20
#define MAX_API_FS_KHZ          48
/

static int codec_open = 0;

static JavaVM *gJavaVM;
const char *kInterfacePath = "org/sipdroid/pjlib/silk8";

/* encoder parameters */

SKP_int32 encSizeBytes;
void      *psEnc;

/* default settings */
SKP_int   fs_kHz = 8;
SKP_int   targetRate_bps = 20000;
SKP_int   packetSize_ms = 20;
SKP_int   frameSizeReadFromFile_ms = 20;
SKP_int   packetLoss_perc = 0, smplsSinceLastPacket;
SKP_int   INBandFec_enabled = 0, DTX_enabled = 0, quiet = 0;
SKP_SILK_SDK_EncControlStruct encControl; // Struct for input to encoder


/* decoder parameters */

jbyte payloadToDec[    MAX_BYTES_DEC_PER_FRAME * MAX_INPUT_FRAMES * ( MAX_LBRR_DELAY + 1 ) ];
jshort out[ ( MAX_FRAME_LENGTH << 1 ) * MAX_INPUT_FRAMES ], *outPtr;
SKP_int32 decSizeBytes;
void      *psDec;
SKP_SILK_SDK_DecControlStruct DecControl;
//silk->pcm
//inputPath_    //silk文件输入路径
//sampleRate    //微信采用8000
//outputPath_   //pcm文件输出路径
extern "C"
JNIEXPORT jboolean JNICALL Java_com_cneeds_silkcodec_lib_SILK8_silk2PCM
        (JNIEnv *env, jobject obj,jstring inputPath_,jint sampleRate,jstring outputPath_){
    const char *inputPath = env->GetStringUTFChars(inputPath_, 0);
    const char *outputPath = env->GetStringUTFChars(outputPath_, 0);

    unsigned long totTime, startTime;
    double fileLength;
    size_t counter;
    SKP_int32 ret, tot_len, totPackets;
    SKP_int32 decSizeBytes, frames, packetSize_ms = 0;
    SKP_int16 nBytes, len;
    SKP_uint8 payload[MAX_BYTES_PER_FRAME * MAX_INPUT_FRAMES], *payloadToDec = NULL;
    SKP_int16 out[((FRAME_LENGTH_MS * MAX_API_FS_KHZ) << 1) * MAX_INPUT_FRAMES], *outPtr;
    void *psDec;
    FILE *inFile, *outFile;
    SKP_SILK_SDK_DecControlStruct DecControl;

    LOG_I(TAG, "********** Silk Decoder (Fixed Point) v %s ********************",
          SKP_Silk_SDK_get_version());
    LOG_I(TAG, "********** Compiled for %d bit cpu *******************************",
          (int) sizeof(void *) * 8);
    LOG_I(TAG, "Input:                       %s", inputPath);
    LOG_I(TAG, "Output:                      %s", outputPath);

    // 打开输入文件
    inFile = fopen(inputPath, "rb");
    if (inFile == NULL) {
        LOG_E(TAG, "Error: could not open input file %s", inputPath);
        return false;
    }

    // 验证文件头(微信silk头文件)
    LOG_I(TAG, "验证微信silk头文件[.#!SILK_V3]");
    LOG_I(TAG, "验证微信silk头文件需要跳过第一个字节 begin");
    fseek(inFile,1,0);
    LOG_I(TAG, "验证微信silk头文件需要跳过第一个字节 end");

    //验证文件头
    {
        char header_buf[50];
        fread(header_buf, sizeof(char), strlen("#!SILK_V3"), inFile);
        header_buf[strlen("#!SILK_V3")] = '\0';
        if (strcmp(header_buf, "#!SILK_V3") != 0) {
            LOG_E(TAG, "Error: Wrong Header %s", header_buf);
            return false;
        }
        LOG_I(TAG, "Header is \"%s\"", header_buf);
    }

    // 打开输出文件
    outFile = fopen(outputPath, "wb");
    if (outFile == NULL) {
        LOG_E(TAG, "Error: could not open output file %s", outputPath);
        return false;
    }

    // 设置采样率
    if (sampleRate == 0) {
        DecControl.API_sampleRate = 8000;
    } else {
        DecControl.API_sampleRate = sampleRate;
    }

    // 获取 Silk 解码器状态的字节大小
    ret = SKP_Silk_SDK_Get_Decoder_Size(&decSizeBytes);
    if (ret) {
        LOG_E(TAG, "SKP_Silk_SDK_Get_Decoder_Size returned %d", ret);
    }

    psDec = malloc((size_t) decSizeBytes);

    // 初始化或充值解码器
    ret = SKP_Silk_SDK_InitDecoder(psDec);
    if (ret) {
        LOG_E(TAG, "SKP_Silk_SDK_InitDecoder returned %d", ret);
    }

    totPackets = 0;
    totTime = 0;

    while (1) {
        // 读取有效数据大小
        counter = fread(&nBytes, sizeof(SKP_int16), 1, inFile);
        if (nBytes < 0 || counter < 1) {
            break;
        }
        // 读取有效数据
        counter = fread(payload, sizeof(SKP_uint8), (size_t) nBytes, inFile);
        if ((SKP_int16) counter < nBytes) {
            break;
        }

        payloadToDec = payload;

        outPtr = out;
        tot_len = 0;
        startTime = GetHighResolutionTime();

        frames = 0;
        do {
            // 解码
            ret = SKP_Silk_SDK_Decode(psDec, &DecControl, 0, payloadToDec, nBytes, outPtr, &len);
            if (ret) {
                LOG_E(TAG, "SKP_Silk_SDK_Decode returned %d", ret);
            }

            frames++;
            outPtr += len;
            tot_len += len;
            if (frames > MAX_INPUT_FRAMES) {
                outPtr = out;
                tot_len = 0;
                frames = 0;
            }
        } while (DecControl.moreInternalDecoderFrames);

        packetSize_ms = tot_len / (DecControl.API_sampleRate / 1000);
        totTime += GetHighResolutionTime() - startTime;
        totPackets++;
        // 将解码后的数据保存到文件
        fwrite(out, sizeof(SKP_int16), (size_t) tot_len, outFile);
    }

    LOG_I(TAG, "Packets decoded:             %d", totPackets);
    LOG_I(TAG, "Decoding Finished");

    free(psDec);

    fclose(outFile);
    fclose(inFile);

    fileLength = totPackets * 1e-3 * packetSize_ms;

    LOG_I(TAG, "File length:                 %.3f s", fileLength);
    LOG_I(TAG, "Time for decoding:           %.3f s (%.3f%% of realTime)", 1e-6 * totTime,
          1e-4 * totTime / fileLength);

    env->ReleaseStringUTFChars(inputPath_, inputPath);
    env->ReleaseStringUTFChars(outputPath_, outputPath);
    return true;
}

2.pcm转silk


//pcm->silk
//inputPath_    //pcm文件输入路径
//outputPath_   //sil文件输出路径
extern "C"
JNIEXPORT jboolean JNICALL Java_com_cneeds_silkcodec_lib_SILK8_pcm2SILK
        (JNIEnv *env, jobject obj,jstring inputPath_,jstring outputPath_){
    const char *inputPath = env->GetStringUTFChars(inputPath_, 0);
    const char *outputPath = env->GetStringUTFChars(outputPath_, 0);

    LOG_I(TAG, "********** Silk Decoder (Fixed Point) v %s ********************",
          SKP_Silk_SDK_get_version());
    LOG_I(TAG, "********** Compiled for %d bit cpu *******************************",
          (int) sizeof(void *) * 8);
    LOG_I(TAG, "Input:                       %s", inputPath);
    LOG_I(TAG, "Output:                      %s", outputPath);

    unsigned long tottime, starttime;
    double    filetime;
    size_t    counter;
    SKP_int32 k, args, totPackets, totActPackets, ret;
    SKP_int16 nBytes;
    double    sumBytes, sumActBytes, avg_rate, act_rate, nrg;
    SKP_uint8 payload[ MAX_BYTES_PER_FRAME * MAX_INPUT_FRAMES ];
    SKP_int16 in[ FRAME_LENGTH_MS * MAX_API_FS_KHZ * MAX_INPUT_FRAMES ];
//    char      speechInFileName[ 150 ], bitOutFileName[ 150 ];
    FILE      *bitOutFile, *speechInFile;
    SKP_int32 encSizeBytes;
    void      *psEnc;
#ifdef _SYSTEM_IS_BIG_ENDIAN
    SKP_int16 nBytes_LE;
#endif

    /* default settings */
    SKP_int32 API_fs_Hz = 8000;
    SKP_int32 max_internal_fs_Hz = 0;
    SKP_int32 targetRate_bps = 25000; //?
    SKP_int32 smplsSinceLastPacket, packetSize_ms = 20;
    SKP_int32 frameSizeReadFromFile_ms = 20;
    SKP_int32 packetLoss_perc = 0;
    SKP_int32 complexity_mode = 0;
    SKP_int32 DTX_enabled = 0, INBandFEC_enabled = 0, quiet = 0;
    SKP_SILK_SDK_EncControlStruct encControl; // Struct for input to encoder
    SKP_SILK_SDK_EncControlStruct encStatus;  // Struct for status of encoder

    /* If no max internal is specified, set to minimum of API fs and 24 kHz */
    if( max_internal_fs_Hz == 0 ) {
        max_internal_fs_Hz = 24000;
        if( API_fs_Hz < max_internal_fs_Hz ) {
            max_internal_fs_Hz = API_fs_Hz;
        }
    }

    /* Open files */
    speechInFile = fopen( inputPath, "rb" );
    if( speechInFile == NULL ) {
         LOG_E(TAG, "Error: could not open input file %s", inputPath);
         return false;
    }
    bitOutFile = fopen( outputPath, "wb" );
    if( bitOutFile == NULL ) {
        LOG_E(TAG, "Error: could not open output file %s", outputPath);
        return false;
    }

    /* Add Silk header to stream */
    {
        //微信语音第一个字节是2
        jbyte j = 2;
        fwrite(&j, sizeof(jbyte), 1, bitOutFile );

        static const char Silk_header[] = "#!SILK_V3";
        fwrite( Silk_header, sizeof( char ), strlen( Silk_header ), bitOutFile );
    }

    /* Create Encoder */
    ret = SKP_Silk_SDK_Get_Encoder_Size( &encSizeBytes );
    if( ret ) {
        LOG_E(TAG, "SKP_Silk_SDK_Get_Encoder_Size returned %d", ret);
    }

    psEnc = malloc( encSizeBytes );

    /* Reset Encoder */
    ret = SKP_Silk_SDK_InitEncoder( psEnc, &encStatus );
    if( ret ) {
        LOG_E(TAG, "SKP_Silk_SDK_InitEncoder returned %d", ret);
    }

    /* Set Encoder parameters */
    encControl.API_sampleRate        = API_fs_Hz;
    encControl.maxInternalSampleRate = max_internal_fs_Hz;
    encControl.packetSize            = ( packetSize_ms * API_fs_Hz ) / 1000;
    encControl.packetLossPercentage  = packetLoss_perc;
    encControl.useInBandFEC          = INBandFEC_enabled;
    encControl.useDTX                = DTX_enabled;
    encControl.complexity            = complexity_mode;
    encControl.bitRate               = ( targetRate_bps > 0 ? targetRate_bps : 0 );

    if( API_fs_Hz > MAX_API_FS_KHZ * 1000 || API_fs_Hz < 0 ) {
        LOG_E(TAG, "Error: API sampling rate = %d out of range, valid range 8000 - 48000", API_fs_Hz);
    }

    tottime              = 0;
    totPackets           = 0;
    totActPackets        = 0;
    smplsSinceLastPacket = 0;
    sumBytes             = 0.0;
    sumActBytes          = 0.0;
    smplsSinceLastPacket = 0;

    while( 1 ) {
        /* Read input from file */
        counter = fread( in, sizeof( SKP_int16 ), ( frameSizeReadFromFile_ms * API_fs_Hz ) / 1000, speechInFile );
#ifdef _SYSTEM_IS_BIG_ENDIAN
        swap_endian( in, counter );
#endif
        if( ( SKP_int )counter < ( ( frameSizeReadFromFile_ms * API_fs_Hz ) / 1000 ) ) {
            break;
        }

        /* max payload size */
        nBytes = MAX_BYTES_PER_FRAME * MAX_INPUT_FRAMES;

        starttime = GetHighResolutionTime();

        /* Silk Encoder */
        ret = SKP_Silk_SDK_Encode( psEnc, &encControl, in, (SKP_int16)counter, payload, &nBytes );
        if( ret ) {
            printf( "\nSKP_Silk_Encode returned %d", ret );
        }

        tottime += GetHighResolutionTime() - starttime;

        /* Get packet size */
        packetSize_ms = ( SKP_int )( ( 1000 * ( SKP_int32 )encControl.packetSize ) / encControl.API_sampleRate );

        smplsSinceLastPacket += ( SKP_int )counter;

        if( ( ( 1000 * smplsSinceLastPacket ) / API_fs_Hz ) == packetSize_ms ) {
            /* Sends a dummy zero size packet in case of DTX period  */
            /* to make it work with the decoder test program.        */
            /* In practice should be handled by RTP sequence numbers */
            totPackets++;
            sumBytes  += nBytes;
            nrg = 0.0;
            for( k = 0; k < ( SKP_int )counter; k++ ) {
                nrg += in[ k ] * (double)in[ k ];
            }
            if( ( nrg / ( SKP_int )counter ) > 1e3 ) {
                sumActBytes += nBytes;
                totActPackets++;
            }

            /* Write payload size */
#ifdef _SYSTEM_IS_BIG_ENDIAN
            nBytes_LE = nBytes;
            swap_endian( &nBytes_LE, 1 );
            fwrite( &nBytes_LE, sizeof( SKP_int16 ), 1, bitOutFile );
#else
            fwrite( &nBytes, sizeof( SKP_int16 ), 1, bitOutFile );
#endif

            /* Write payload */
            fwrite( payload, sizeof( SKP_uint8 ), nBytes, bitOutFile );

            smplsSinceLastPacket = 0;

            if( !quiet ) {
                fprintf( stderr, "\rPackets encoded:                %d", totPackets );
            }
        }
    }

    /* Write dummy because it can not end with 0 bytes */
    nBytes = -1;

    /* Write payload size */
    fwrite( &nBytes, sizeof( SKP_int16 ), 1, bitOutFile );

    /* Free Encoder */
    free( psEnc );

    fclose( speechInFile );
    fclose( bitOutFile   );

    filetime  = totPackets * 1e-3 * packetSize_ms;
    avg_rate  = 8.0 / packetSize_ms * sumBytes       / totPackets;
    act_rate  = 8.0 / packetSize_ms * sumActBytes    / totActPackets;

    env->ReleaseStringUTFChars(inputPath_, inputPath);
    env->ReleaseStringUTFChars(outputPath_, outputPath);
    return true;
}

四、参考资料

1.SILKCodec源码:

https://github.com/hncsJackchen/SILKCodec

2.SilkSDK 源码

 

 类似资料: