一、使用PortAudio采集
首先你应该下载这个库编出动态库放好头文件和链接库lib,这些都不是重点不赘述。
//定义pa的sample类型为int16,这个可以配合webrtc模块
#define PA_SAMPLE_TYPE paInt16 //采用双字节,一个sample=sizeof(int)=2字节
#define SAMPLE_RATE (32000) //代表1秒有32000个sample
#define FRAMES_PER_BUFFER (6400) //代表回调一次提供6400个sample
//说明:input是从设备读到的数据,字节长度是frameCount*sizeof(SAMPLE)
//如果要echo,需要在初始化的时候指定好设备,把input拷到output即可.
static int PACallback(
const void *input, void *output,
unsigned long frameCount,
const PaStreamCallbackTimeInfo* timeInfo,
PaStreamCallbackFlags statusFlags,
void *userData )
{
Q_UNUSED(output);
Q_UNUSED(timeInfo);
Q_UNUSED(statusFlags);
//进一步处理数据
WcRecordMedia2 *media=(WcRecordMedia2*)userData;
bool rt = media->tHandle(input,frameCount);
return rt?paContinue:paComplete;
}
PaError err;
err = Pa_Initialize();
if( err != paNoError ) goto error;
PaDeviceIndex didx = Pa_GetDefaultInputDevice();
if(didx == paNoDevice){
//没有录音设备
return false;
}
PaStreamParameters inputDev;
inputDev.device = Pa_GetDefaultInputDevice();
inputDev.channelCount = 1;
inputDev.sampleFormat = PA_SAMPLE_TYPE;
inputDev.suggestedLatency = 1;
inputDev.hostApiSpecificStreamInfo = NULL;
PaStreamParameters outputDev;
//以下写paNoDevice会导致Pa_OpenStream失败,所以如果没喇叭输出设备,录音也玩不了啦?!
outputDev.device = Pa_GetDefaultOutputDevice();
outputDev.channelCount = 1;
outputDev.sampleFormat = PA_SAMPLE_TYPE;
outputDev.suggestedLatency = 1;
outputDev.hostApiSpecificStreamInfo = NULL;
PaStream* paStream = NULL;
err = Pa_OpenStream(
&paStream,
&inputDev,
&outputDev,
SAMPLE_RATE,
FRAMES_PER_BUFFER, /* frames per buffer */
paDitherOff, /* paDitherOff, // flags */
PACallback,
this); //userdata,这里是我的类指针.
if( err != paNoError ) goto error;
err = Pa_StartStream( paStream );
if( err != paNoError ) goto error;
//如果是demo,可以紧接着睡一分钟.
for(int i=0;i<6000;i++){
Pa_Sleep(10);
}
//...结束的时候
PaError err;
if(paStream_){
err = Pa_CloseStream( paStream );
qDebug()<<"Pa_CloseStream:"<<Pa_GetErrorText(err);
paStream = NULL;
}
Pa_Terminate();
有用经验:①webrtc只支持SAMPLE_RATE 为8000,16000,32000,常见的44100罢工!
②FRAMES_PER_BUFFER 可以比较自由,但是给webrtc处理的frameCount有限制..咳咳..请坐稳你的小板凳: webrtc提供的Ns(降噪),每次给的frameCount最多是480,因为拆分高低频后,count减半不能超过240; AGC(增益)只支持160或者320; 所以我选择了320的20倍,分20次把6400个sample给进去.
二、webrtc处理
这部分代码是从另一个帖子下载抄回来的,作者从webrtc代码里抠出来需要的部分,神!原贴在这里,遥谢一下原始作者!
http://www.jianshu.com/p/77a363960711
以下代码是我根据原贴封装的C++类
//webrtc的音频处理模块,支持降噪和增益.
//只接受int16的sample,其他的..谁知道能不能支持呢!
class WebRtcAudioModule
{
public:
WebRtcAudioModule()
: nsInst_(NULL)
, agcHandle_(NULL)
, micLevel_(0)
{
memset(internalBuf,0,sizeof(int)*24);
filter_state1 = internalBuf + 0;
filter_state2 = internalBuf + 6;
synthesis_state1 = internalBuf + 12;
synthesis_state2 = internalBuf + 18;
memset(handlerBuf,0,sizeof(short)*240*4);
//以下4个buf长度只需要单次处理frame的一半长度.(单位short2字节)
shInL = handlerBuf + 0;
shInH = handlerBuf + 240;
shOutL = handlerBuf + 240*2;
shOutH = handlerBuf + 240*3;
}
~WebRtcAudioModule(){
if(nsInst_) WebRtcNsx_Free(nsInst_);
if(agcHandle_) WebRtcAgc_Free(agcHandle_);
}
bool initNs(uint32_t sampleRate)
{
int ret = WebRtcNsx_Create(&nsInst_);
if(ret != 0) return false;
ret = WebRtcNsx_Init(nsInst_,sampleRate);
if(ret != 0) return false;
ret = WebRtcNsx_set_policy(nsInst_,1);
if(ret != 0) return false;
return true;
}
bool initAgc(uint32_t sampleRate)
{
int ret = WebRtcAgc_Create(&agcHandle_);
if(ret != 0) return false;
int minLevel = 0;
int maxLevel = 255;
int agcMode = kAgcModeFixedDigital;
ret = WebRtcAgc_Init(agcHandle_, minLevel, maxLevel, agcMode, sampleRate);
if(ret != 0) return false;
WebRtcAgc_config_t agcConfig;
agcConfig.compressionGaindB = 20;
agcConfig.limiterEnable = 1;
agcConfig.targetLevelDbfs = 3;
ret = WebRtcAgc_set_config(agcHandle_, agcConfig);
if(ret != 0) return false;
return true;
}
//处理数据. data里面的有效长度是frameCount*sizeof(short). 也就是为什么只支持int16的sample
bool handle(short* data, unsigned long frameCount)
{ //记住,一个frame目前是2字节!
int ret = 0;
Q_ASSERT(frameCount%320==0); //agc操作要求frame必须是160, 320.
Q_ASSERT(nsInst_ || agcHandle_);
if(frameCount%320!=0) return false;
for(unsigned i=0;i<frameCount;i+=320)
{
//分拆高频低频.这个函数一定会成功,否则崩掉.
WebRtcSpl_AnalysisQMF(data+i,320,shInL,shInH,
filter_state1,filter_state2);
if(nsInst_){
ret = WebRtcNsx_Process(nsInst_, shInL, shInH, shOutL, shOutH);
if(ret != 0) break;
}
if(agcHandle_){
if(nsInst_){
qSwap(shInL,shOutL);
qSwap(shInH,shOutH);
}
uint8_t saturationWarning;
int inMicLevel = micLevel_;
ret = WebRtcAgc_Process(agcHandle_, shInL, shInH, 160,
shOutL ,shOutH, inMicLevel,
&micLevel_, 0, &saturationWarning);
if(ret != 0) break;
}
WebRtcSpl_SynthesisQMF(shOutL,shOutH, 160, data+i, synthesis_state1,synthesis_state2);
}
return (ret==0);
}
private:
NsxHandle *nsInst_;
int internalBuf[24];
int *filter_state1;//[6];
int *filter_state2;//[6];
int *synthesis_state1;//[6];
int *synthesis_state2;//[6];
short handlerBuf[240*4]; //webrtc降噪函数限制最大240个short
short *shInL,*shInH,*shOutL,*shOutH;
void *agcHandle_;
int micLevel_; //micLevel输入换输出,下一个处理再作为输入.
};
有用经验:①我这里是先降噪后增益,噪音降低后增益把原声和降低的噪音又都放大了,效果是降噪不太明显,主声增大比较显著.
②如果想要降噪–增益–再降噪, 你需要多初始化一套NsxHandle *nsInst2_, 然后照着样子调用,否则不行.
③WebRtcNsx_Process和WebRtcNs_Process听不出差别,但是有x是不是要更牛一点呢.
④webrtc这部分代码免费取用http://download.csdn.net/detail/jinzeyu_cn/9894495
三、lame编码mp3
//sdk要求lame的buffer最少要7200用于lame_encode_flush
#define MIN_LAME_BUFFERSIZE (7200)
class LameMp3Encoder
{
public:
LameMp3Encoder()
: flags_(NULL)
, mp3buf(NULL)
, fp_(NULL)
{}
~LameMp3Encoder()
{
this->finish();
if(flags_) lame_close(flags_);
if(mp3buf) JFree(mp3buf);
if(fp_) fclose(fp_);
}
bool init(const char* filePath, uint32_t sampleRate, int bufferSize=FRAMES_PER_BUFFER*2)
{
fp_ = fopen(filePath,"wb+");
if(fp_ == NULL) return false;
bufferSize_ = qMax(bufferSize,MIN_LAME_BUFFERSIZE);
mp3buf = (unsigned char*)JMalloc(bufferSize_);
flags_ = lame_init();
if(flags_==NULL) return false;
int ret = 0;
lame_set_in_samplerate(flags_,sampleRate);
lame_set_num_channels(flags_,1);
lame_set_VBR(flags_,vbr_default); //vbr_abr);
//关闭tag自动写入,因为lame_mp3_tags_fid函数崩溃.
//参考 http://mp3-encoding.31853.n2.nabble.com/Re-lame-mp3-tags-fid-and-file-access-callbacks-td34000.html
lame_set_write_id3tag_automatic(flags_,0);
lame_set_brate(flags_,32); //好像是影响最低比特率.
lame_set_mode(flags_,MONO); //单声道即可,源都是单声道.
lame_set_quality(flags_,2); //最佳效果,webrtc的效果占主导,其实听不出来差别.
ret = lame_init_params(flags_);
if(ret < 0) return false;
ret = lame_get_id3v2_tag(flags_,mp3buf,bufferSize_);
if(ret > 0){
fwrite(mp3buf,1,ret,fp_);
}
tagPos_ = ftell(fp_); //记录位置,结束前要回头写.
return true;
}
bool handle(short* data, unsigned long frameCount)
{
if(fp_ == NULL) return false;
Q_ASSERT(frameCount*2 <= bufferSize_);
//int mp3bytes = lame_encode_buffer_interleaved(flags_,data,frameCount,mp3buf,bufferSize_);
int mp3bytes = lame_encode_buffer(flags_,data,data,frameCount,mp3buf,bufferSize_);
if(mp3bytes < 0) return false;
fwrite(mp3buf,1,mp3bytes,fp_);
return true;
}
void finish()
{
if(flags_==NULL || bufferSize_<7200 || fp_==NULL) return;
int mp3bytes = lame_encode_flush(flags_,mp3buf,bufferSize_);
if(mp3bytes>0){
fwrite(mp3buf,1,mp3bytes,fp_);
}
//lame_mp3_tags_fid(flags_,fp_); 此函数崩溃,看堆栈在fseek.使用手动tag写入解决.
mp3bytes = lame_get_id3v1_tag(flags_,mp3buf,bufferSize_);
if(mp3bytes>0){
fwrite(mp3buf,1,mp3bytes,fp_);
}
mp3bytes = lame_get_lametag_frame(flags_,mp3buf,bufferSize_);
if(mp3bytes>0){
fseek(fp_,tagPos_,SEEK_SET);
fwrite(mp3buf,1,mp3bytes,fp_);
}
fclose(fp_);
fp_ = NULL;
}
private:
lame_global_flags* flags_;
unsigned bufferSize_;
unsigned char* mp3buf;
FILE* fp_;
int tagPos_;
};
有用经验:如果不用我的代码,mp3文件编完后很容易出现时长偏差,播放器和文件夹里看的时间不一致,那是因为没给mp3写好tag文件头。简单的方法是结束的时候调用lame_mp3_tags_fid,但是我一次也没成功,后找到老外一个帖子,手工添加tag完美解决,就是略麻烦一点,参见以上代码和url
如果您和我有一样的需求,以上的经验足以让您节省至少一天的工时.算一下您月薪假如是12000,工作日22天,为您节省500+的人民币,真实在!!
当然为了我的工作,我将PortAudio的调用也C++类化,并且支持录音设备的热插拔,用空白语音填充无设备的情况.这一部分的有用经验:一旦Pa_Initialize();成功后Pa_GetDefaultInputDevice()返回值不会变,无法发现设备插入、移除或变更哦。手头没代码,先卖个关子吧!