前段时间,因为公司业务要求,需要实现音视频的倍速功能,其实现的原理是将音频解码完后的PCM数据进行倍速处理,然后送给同步模块,以音频为准的同步方式实现音视频的倍速。
环境背景:机顶盒Android系统;
一、sonic源码的demo演示:
GitHub源码:
https://github.com/waywardgeek/sonic.git
代码下载到本地后,可以看到作者提供了java和c的演示demo,其核心代码就是sonic.h和sonic.c。
使用gcc编译出可执行文件:
gcc -I ./ main.c wave.c sonic.c -o test
根据代码注释,带入参数如下:
./test -c -n -q -s 2.0 ./samples/talking.wav ./testfiles/1.wav
这里的talking.wav是作者给的测试WAV文件,大家也可以用自己的测试文件,只是需要确保格式为WAV的,这里有一个小插曲,就是如果大家是使用FFmpeg来将PCM数据转成的WAV格式文件测试的话,会报不支持的格式,原因是文件wav头解析的时候,最后几个字节跟作者代码中不匹配导致的,大家修改一下作者wave.c的文件即可。
二、sonic测试心得:
1.处理后的文件大小跟倍速值成反比;
2.sonic的处理单位是帧(frame),而不是按采样点数来的,在这里每一帧的概念如下,位宽固定为16bit,从处理API的入口参数类型为short可以看出来,如果是8bit的数据,需要先转成16bit,1 frame = 声道数 * sizeof(short);
3.sonic处理的时候,我们看到作者是写了一个do…while循环:
do {
samplesWritten = sonicReadShortFromStream(stream, outBuffer,
BUFFER_SIZE / numChannels);
if (samplesWritten > 0 && !computeSpectrogram) {
writeToWaveFile(outFile, outBuffer, samplesWritten);
}
} while (samplesWritten > 0);
samplesWritten即sonic处理出来的数据,单位为帧,多次打印这个数值可以发现,并不是每一次送进去的数据都会处理的,有时候会原封不动的出来,这应该是内部算法导致的;
三、代码封装:
使用sonic代码需要进行封装,头文件:
#ifndef __JZ_AUDIO_SONIC_H__
#define __JZ_AUDIO_SONIC_H__
#include <stdio.h>
#include "sonic.h"
#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif
#endif
typedef unsigned int JZ_U32;
typedef signed int JZ_S32;
typedef float JZ_FLOAT;
typedef unsigned short JZ_U16;
typedef signed short JZ_S16;
typedef unsigned char JZ_U8;
typedef signed char JZ_S8;
typedef enum JZ_ERRORTYPE_E
{
JZ_ErrorNone = 0,
JZ_ErrorNullPointer = (JZ_S32) 0X80001000,
JZ_ErrorInvalidParameter = (JZ_S32) 0x80001001,
JZ_ErrorEmptyBuffer = (JZ_S32) 0x80001002,
} JZ_ERRORTYPE_E;
typedef struct
{
JZ_U32 u32SampleRate;
JZ_U32 u32Ch;
JZ_U32 u32BitPerSample;
JZ_FLOAT fSpeed;
sonicStream pstream;
JZ_U32 u32OutBufSize;
JZ_S16* s16OutBuffer;
FILE* pInFile;
FILE* pOutFile;
} stSonicInstance;
JZ_S32 JZ_SonicInit();
JZ_S32 JZ_SonicSetConfig(stSonicInstance* pSonicParam);
JZ_S8* JZ_SonicProcess(void* pInBuffer, JZ_U32* u32samples);
JZ_S32 JZ_SonicDeinit();
#ifdef __cplusplus
#if __cplusplus
}
#endif
#endif
#endif
源文件:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include "JZ.AUDIO.SONIC.h"
#include "sonic.h"
#include <utils/Log.h>
//TODO:buffer 大一统
#define LOG_TAG "JZsonic"
#define SONIC_IN_PCM_FILE "/mnt/sonic_in.pcm"
#define SONIC_OUT_PCM_FILE "/mnt/sonic_out.pcm"
#define SONIC_DUMP_DEGUG
static stSonicInstance g_stSonicConfig;
JZ_S32 JZ_SonicInit()
{
g_stSonicConfig.u32BitPerSample = 16;
g_stSonicConfig.u32Ch = 2;
g_stSonicConfig.u32SampleRate = 44100;
g_stSonicConfig.fSpeed = 1.0;
g_stSonicConfig.u32OutBufSize = 15052;
g_stSonicConfig.pstream = sonicCreateStream(g_stSonicConfig.u32SampleRate, g_stSonicConfig.u32Ch);
g_stSonicConfig.s16OutBuffer = (JZ_S8* )malloc(sizeof(JZ_S8) * g_stSonicConfig.u32OutBufSize * 4);
if (!g_stSonicConfig.s16OutBuffer)
{
ALOGE("malloc outbuffer failed!");
return JZ_ErrorNullPointer;
}
sonicSetSpeed(g_stSonicConfig.pstream, 1.0);
sonicSetPitch(g_stSonicConfig.pstream, 1.0);
sonicSetRate(g_stSonicConfig.pstream, 1.0);
sonicSetVolume(g_stSonicConfig.pstream, 1.0);
sonicSetChordPitch(g_stSonicConfig.pstream, 0);
sonicSetQuality(g_stSonicConfig.pstream, 0);
#ifdef SONIC_DUMP_DEGUG
g_stSonicConfig.pInFile = fopen(SONIC_IN_PCM_FILE, "wb");
if (!g_stSonicConfig.pInFile)
{
ALOGE("jztech:failed to open sonic pcm_in file!");
}
g_stSonicConfig.pOutFile = fopen(SONIC_OUT_PCM_FILE, "wb");
if (!g_stSonicConfig.pInFile)
{
ALOGE("jztech:failed to open sonic pcm_out file!");
}
#endif
return JZ_ErrorNone;
}
JZ_S32 JZ_SonicSetConfig(stSonicInstance* pSonicParam)
{
if ((!pSonicParam) || (!g_stSonicConfig.pstream))
{
ALOGE("pSonicParam is a NULL point or sonic don't init!");
return JZ_ErrorNullPointer;
}
if (pSonicParam->u32Ch > 2)
{
ALOGE("unsupport chennnels:%d", pSonicParam->u32Ch);
return JZ_ErrorInvalidParameter;
}
if ((pSonicParam->u32SampleRate < 8000) || (pSonicParam->u32SampleRate > 96000))
{
ALOGE("unsupport samplerate:%d", pSonicParam->u32SampleRate);
return JZ_ErrorInvalidParameter;
}
if ((pSonicParam->fSpeed < 0) || (pSonicParam->fSpeed > 2))
{
ALOGE("unspport speed:%f", pSonicParam->fSpeed);
return JZ_ErrorInvalidParameter;
}
g_stSonicConfig.u32BitPerSample = 16;
g_stSonicConfig.u32Ch = pSonicParam->u32Ch;
g_stSonicConfig.u32SampleRate = pSonicParam->u32SampleRate;
g_stSonicConfig.fSpeed = pSonicParam->fSpeed;
sonicSetNumChannels(g_stSonicConfig.pstream, g_stSonicConfig.u32Ch);
sonicSetSampleRate(g_stSonicConfig.pstream, g_stSonicConfig.u32SampleRate);
sonicSetSpeed(g_stSonicConfig.pstream, g_stSonicConfig.fSpeed);
return JZ_ErrorNone;
}
JZ_S8* JZ_SonicProcess(void* pInBuffer, JZ_U32* u32samples)
{
JZ_U32 u32FramesWritten = 0, u32InBufferFrames, u32NewSamples = 0;
if ((!pInBuffer) || (!g_stSonicConfig.s16OutBuffer))
{
ALOGE("pInBuffer is the empty buffer or outbuffer don't malloc!");
return JZ_ErrorEmptyBuffer;
}
#ifdef SONIC_DUMP_DEGUG
if (g_stSonicConfig.pInFile)
{
fwrite((JZ_S8* )pInBuffer, 1, *u32samples, g_stSonicConfig.pInFile);
}
#endif
/* 传进来的是采样点数,要转化成frame之后才能使用sonic进行倍速:
转化计算:frames = u32samples / 声道 / 位宽 */
u32InBufferFrames = *u32samples / 4;
memset(g_stSonicConfig.s16OutBuffer, 0, g_stSonicConfig.u32OutBufSize);
sonicWriteShortToStream(g_stSonicConfig.pstream, (JZ_S16* )pInBuffer, u32InBufferFrames);
do {
u32FramesWritten = sonicReadShortFromStream(g_stSonicConfig.pstream, g_stSonicConfig.s16OutBuffer + u32NewSamples,
u32InBufferFrames);
u32NewSamples = u32NewSamples + u32FramesWritten * 4;
ALOGE("jztech:transformision befor:samples = %d, frames = %d \
after: samples = %d, frames = %d", *u32samples, u32InBufferFrames, u32NewSamples, u32FramesWritten);
}
while (u32FramesWritten > 0);
#ifdef SONIC_DUMP_DEGUG
if (g_stSonicConfig.pOutFile)
{
fwrite(((JZ_S8* )g_stSonicConfig.s16OutBuffer), 1, u32NewSamples, g_stSonicConfig.pOutFile);
}
#endif
*u32samples = u32NewSamples;
return g_stSonicConfig.s16OutBuffer;
}
JZ_S32 JZ_SonicDeinit()
{
#ifdef SONIC_DUMP_DEGUG
if (g_stSonicConfig.pInFile)
{
fclose(g_stSonicConfig.pInFile);
g_stSonicConfig.pInFile = NULL;
}
if (g_stSonicConfig.pOutFile)
{
fclose(g_stSonicConfig.pOutFile);
g_stSonicConfig.pOutFile = NULL;
}
#endif
if (!g_stSonicConfig.s16OutBuffer)
{
free(g_stSonicConfig.s16OutBuffer);
}
sonicDestroyStream(g_stSonicConfig.pstream);
return JZ_ErrorNone;
}
四、后记:
到底在什么时候去处理sonic数据呢?最好的地方是在解码完成之后,即从解码器中获取了PCM数据然后送去sonic处理,之后再返给播放器,因为在实际适配过程中,有公司想在Android原生播放通路audiotrack中再去处理,具体位置是在write函数中,实际上,这种方式是不可行的,因为audiotrack的buffer大小是在创建audiotrack的时候,根据采样率,位宽及声道数,还有实际的硬件支持算出来的最小buffer,在buffer确定的情况下,在write中去进行sonic处理,实际上改变的是本次write数据量的大小,大于一倍时,数据量小了,可能会underrun,小于一倍时,数据量大了,会撑爆下面申请的buffer,apk闪退抛异常,导致这些的根因都是audioflinger里面的buffer反压机制,由于目前工作已经不侧重音频了,所以,具体的影响效果分析了个大致,没有去看代码,但是,如果实测,肯定会出现各种闪退或者音频断续等问题的。
相关源码路径:
https://github.com/Balotelli-jy/sonic-package