当前位置: 首页 > 工具软件 > PortAudio > 使用案例 >

java webrtc process ns降噪,PortAudio+webrtc+lame实现采集降噪增益mp3

郗奇玮
2023-12-01

一、使用PortAudio采集

首先你应该下载这个库编出动态库放好头文件和链接库lib,这些都不是重点不赘述。

//定义pa的sample类型为int16,这个可以配合webrtc模块

#define PA_SAMPLE_TYPE paInt16 //采用双字节,一个sample=sizeof(int)=2字节

#define SAMPLE_RATE (32000) //代表1秒有32000个sample

#define FRAMES_PER_BUFFER (6400) //代表回调一次提供6400个sample

//说明:input是从设备读到的数据,字节长度是frameCount*sizeof(SAMPLE)

//如果要echo,需要在初始化的时候指定好设备,把input拷到output即可.

static int PACallback(

const void *input, void *output,

unsigned long frameCount,

const PaStreamCallbackTimeInfo* timeInfo,

PaStreamCallbackFlags statusFlags,

void *userData )

{

Q_UNUSED(output);

Q_UNUSED(timeInfo);

Q_UNUSED(statusFlags);

//进一步处理数据

WcRecordMedia2 *media=(WcRecordMedia2*)userData;

bool rt = media->tHandle(input,frameCount);

return rt?paContinue:paComplete;

}

PaError err;

err = Pa_Initialize();

if( err != paNoError ) goto error;

PaDeviceIndex didx = Pa_GetDefaultInputDevice();

if(didx == paNoDevice){

//没有录音设备

return false;

}

PaStreamParameters inputDev;

inputDev.device = Pa_GetDefaultInputDevice();

inputDev.channelCount = 1;

inputDev.sampleFormat = PA_SAMPLE_TYPE;

inputDev.suggestedLatency = 1;

inputDev.hostApiSpecificStreamInfo = NULL;

PaStreamParameters outputDev;

//以下写paNoDevice会导致Pa_OpenStream失败,所以如果没喇叭输出设备,录音也玩不了啦?!

outputDev.device = Pa_GetDefaultOutputDevice();

outputDev.channelCount = 1;

outputDev.sampleFormat = PA_SAMPLE_TYPE;

outputDev.suggestedLatency = 1;

outputDev.hostApiSpecificStreamInfo = NULL;

PaStream* paStream = NULL;

err = Pa_OpenStream(

&paStream,

&inputDev,

&outputDev,

SAMPLE_RATE,

FRAMES_PER_BUFFER, /* frames per buffer */

paDitherOff, /* paDitherOff, // flags */

PACallback,

this); //userdata,这里是我的类指针.

if( err != paNoError ) goto error;

err = Pa_StartStream( paStream );

if( err != paNoError ) goto error;

//如果是demo,可以紧接着睡一分钟.

for(int i=0;i<6000;i++){

Pa_Sleep(10);

}

//...结束的时候

PaError err;

if(paStream_){

err = Pa_CloseStream( paStream );

qDebug()<

paStream = NULL;

}

Pa_Terminate();

有用经验:①webrtc只支持SAMPLE_RATE 为8000,16000,32000,常见的44100罢工!

②FRAMES_PER_BUFFER 可以比较自由,但是给webrtc处理的frameCount有限制..咳咳..请坐稳你的小板凳: webrtc提供的Ns(降噪),每次给的frameCount最多是480,因为拆分高低频后,count减半不能超过240; AGC(增益)只支持160或者320; 所以我选择了320的20倍,分20次把6400个sample给进去.

二、webrtc处理

这部分代码是从另一个帖子下载抄回来的,作者从webrtc代码里抠出来需要的部分,神!原贴在这里,遥谢一下原始作者!

http://www.jianshu.com/p/77a363960711

以下代码是我根据原贴封装的C++类

//webrtc的音频处理模块,支持降噪和增益.

//只接受int16的sample,其他的..谁知道能不能支持呢!

class WebRtcAudioModule

{

public:

WebRtcAudioModule()

: nsInst_(NULL)

, agcHandle_(NULL)

, micLevel_(0)

{

memset(internalBuf,0,sizeof(int)*24);

filter_state1 = internalBuf + 0;

filter_state2 = internalBuf + 6;

synthesis_state1 = internalBuf + 12;

synthesis_state2 = internalBuf + 18;

memset(handlerBuf,0,sizeof(short)*240*4);

//以下4个buf长度只需要单次处理frame的一半长度.(单位short2字节)

shInL = handlerBuf + 0;

shInH = handlerBuf + 240;

shOutL = handlerBuf + 240*2;

shOutH = handlerBuf + 240*3;

}

~WebRtcAudioModule(){

if(nsInst_) WebRtcNsx_Free(nsInst_);

if(agcHandle_) WebRtcAgc_Free(agcHandle_);

}

bool initNs(uint32_t sampleRate)

{

int ret = WebRtcNsx_Create(&nsInst_);

if(ret != 0) return false;

ret = WebRtcNsx_Init(nsInst_,sampleRate);

if(ret != 0) return false;

ret = WebRtcNsx_set_policy(nsInst_,1);

if(ret != 0) return false;

return true;

}

bool initAgc(uint32_t sampleRate)

{

int ret = WebRtcAgc_Create(&agcHandle_);

if(ret != 0) return false;

int minLevel = 0;

int maxLevel = 255;

int agcMode = kAgcModeFixedDigital;

ret = WebRtcAgc_Init(agcHandle_, minLevel, maxLevel, agcMode, sampleRate);

if(ret != 0) return false;

WebRtcAgc_config_t agcConfig;

agcConfig.compressionGaindB = 20;

agcConfig.limiterEnable = 1;

agcConfig.targetLevelDbfs = 3;

ret = WebRtcAgc_set_config(agcHandle_, agcConfig);

if(ret != 0) return false;

return true;

}

//处理数据. data里面的有效长度是frameCount*sizeof(short). 也就是为什么只支持int16的sample

bool handle(short* data, unsigned long frameCount)

{ //记住,一个frame目前是2字节!

int ret = 0;

Q_ASSERT(frameCount%320==0); //agc操作要求frame必须是160, 320.

Q_ASSERT(nsInst_ || agcHandle_);

if(frameCount%320!=0) return false;

for(unsigned i=0;i

{

//分拆高频低频.这个函数一定会成功,否则崩掉.

WebRtcSpl_AnalysisQMF(data+i,320,shInL,shInH,

filter_state1,filter_state2);

if(nsInst_){

ret = WebRtcNsx_Process(nsInst_, shInL, shInH, shOutL, shOutH);

if(ret != 0) break;

}

if(agcHandle_){

if(nsInst_){

qSwap(shInL,shOutL);

qSwap(shInH,shOutH);

}

uint8_t saturationWarning;

int inMicLevel = micLevel_;

ret = WebRtcAgc_Process(agcHandle_, shInL, shInH, 160,

shOutL ,shOutH, inMicLevel,

&micLevel_, 0, &saturationWarning);

if(ret != 0) break;

}

WebRtcSpl_SynthesisQMF(shOutL,shOutH, 160, data+i, synthesis_state1,synthesis_state2);

}

return (ret==0);

}

private:

NsxHandle *nsInst_;

int internalBuf[24];

int *filter_state1;//[6];

int *filter_state2;//[6];

int *synthesis_state1;//[6];

int *synthesis_state2;//[6];

short handlerBuf[240*4]; //webrtc降噪函数限制最大240个short

short *shInL,*shInH,*shOutL,*shOutH;

void *agcHandle_;

int micLevel_; //micLevel输入换输出,下一个处理再作为输入.

};

有用经验:①我这里是先降噪后增益,噪音降低后增益把原声和降低的噪音又都放大了,效果是降噪不太明显,主声增大比较显著.

②如果想要降噪–增益–再降噪, 你需要多初始化一套NsxHandle *nsInst2_, 然后照着样子调用,否则不行.

③WebRtcNsx_Process和WebRtcNs_Process听不出差别,但是有x是不是要更牛一点呢.

④webrtc这部分代码免费取用http://download.csdn.net/detail/jinzeyu_cn/9894495

三、lame编码mp3

//sdk要求lame的buffer最少要7200用于lame_encode_flush

#define MIN_LAME_BUFFERSIZE (7200)

class LameMp3Encoder

{

public:

LameMp3Encoder()

: flags_(NULL)

, mp3buf(NULL)

, fp_(NULL)

{}

~LameMp3Encoder()

{

this->finish();

if(flags_) lame_close(flags_);

if(mp3buf) JFree(mp3buf);

if(fp_) fclose(fp_);

}

bool init(const char* filePath, uint32_t sampleRate, int bufferSize=FRAMES_PER_BUFFER*2)

{

fp_ = fopen(filePath,"wb+");

if(fp_ == NULL) return false;

bufferSize_ = qMax(bufferSize,MIN_LAME_BUFFERSIZE);

mp3buf = (unsigned char*)JMalloc(bufferSize_);

flags_ = lame_init();

if(flags_==NULL) return false;

int ret = 0;

lame_set_in_samplerate(flags_,sampleRate);

lame_set_num_channels(flags_,1);

lame_set_VBR(flags_,vbr_default); //vbr_abr);

//关闭tag自动写入,因为lame_mp3_tags_fid函数崩溃.

//参考 http://mp3-encoding.31853.n2.nabble.com/Re-lame-mp3-tags-fid-and-file-access-callbacks-td34000.html

lame_set_write_id3tag_automatic(flags_,0);

lame_set_brate(flags_,32); //好像是影响最低比特率.

lame_set_mode(flags_,MONO); //单声道即可,源都是单声道.

lame_set_quality(flags_,2); //最佳效果,webrtc的效果占主导,其实听不出来差别.

ret = lame_init_params(flags_);

if(ret < 0) return false;

ret = lame_get_id3v2_tag(flags_,mp3buf,bufferSize_);

if(ret > 0){

fwrite(mp3buf,1,ret,fp_);

}

tagPos_ = ftell(fp_); //记录位置,结束前要回头写.

return true;

}

bool handle(short* data, unsigned long frameCount)

{

if(fp_ == NULL) return false;

Q_ASSERT(frameCount*2 <= bufferSize_);

//int mp3bytes = lame_encode_buffer_interleaved(flags_,data,frameCount,mp3buf,bufferSize_);

int mp3bytes = lame_encode_buffer(flags_,data,data,frameCount,mp3buf,bufferSize_);

if(mp3bytes < 0) return false;

fwrite(mp3buf,1,mp3bytes,fp_);

return true;

}

void finish()

{

if(flags_==NULL || bufferSize_<7200 || fp_==NULL) return;

int mp3bytes = lame_encode_flush(flags_,mp3buf,bufferSize_);

if(mp3bytes>0){

fwrite(mp3buf,1,mp3bytes,fp_);

}

//lame_mp3_tags_fid(flags_,fp_); 此函数崩溃,看堆栈在fseek.使用手动tag写入解决.

mp3bytes = lame_get_id3v1_tag(flags_,mp3buf,bufferSize_);

if(mp3bytes>0){

fwrite(mp3buf,1,mp3bytes,fp_);

}

mp3bytes = lame_get_lametag_frame(flags_,mp3buf,bufferSize_);

if(mp3bytes>0){

fseek(fp_,tagPos_,SEEK_SET);

fwrite(mp3buf,1,mp3bytes,fp_);

}

fclose(fp_);

fp_ = NULL;

}

private:

lame_global_flags* flags_;

unsigned bufferSize_;

unsigned char* mp3buf;

FILE* fp_;

int tagPos_;

};

有用经验:如果不用我的代码,mp3文件编完后很容易出现时长偏差,播放器和文件夹里看的时间不一致,那是因为没给mp3写好tag文件头。简单的方法是结束的时候调用lame_mp3_tags_fid,但是我一次也没成功,后找到老外一个帖子,手工添加tag完美解决,就是略麻烦一点,参见以上代码和url

最后

如果您和我有一样的需求,以上的经验足以让您节省至少一天的工时.算一下您月薪假如是12000,工作日22天,为您节省500+的人民币,真实在!!

当然为了我的工作,我将PortAudio的调用也C++类化,并且支持录音设备的热插拔,用空白语音填充无设备的情况.这一部分的有用经验:一旦Pa_Initialize();成功后Pa_GetDefaultInputDevice()返回值不会变,无法发现设备插入、移除或变更哦。手头没代码,先卖个关子吧!讨论加群20487942

 类似资料: