音频-基于Core Audio技术采集音频(版本1)

公西鸿博
2023-12-01

(这个是第一次版本优化,优化是简单易懂)

**

代码

// WindowsAudioSession.cpp
// 基本的利用WAS采集音频的demo
#include <MMDeviceAPI.h>
#include <AudioClient.h>
#include <iostream>
using namespace std;

//  ns(nanosecond) : 纳秒,时间单位。一秒的十亿分之一
// 1秒=1000毫秒; 1毫秒=1000微秒; 1微秒=1000纳秒

// The REFERENCE_TIME data type defines the units for reference times in DirectShow. 
// Each unit of reference time is 100 nanoseconds.(100����Ϊһ��REFERENCE_TIMEʱ�䵥λ)

// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC       (10000000)
#define REFTIMES_PER_MILLISEC  (10000)
 

#define SAFE_RELEASE(punk)  \
	if ((punk) != NULL)  \
				{ (punk)->Release(); (punk) = NULL; }

const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID   IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID   IID_IAudioClient = __uuidof(IAudioClient);
const IID   IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);

#define MoveMemory RtlMoveMemory
#define CopyMemory RtlCopyMemory
#define FillMemory RtlFillMemory
#define ZeroMemory RtlZeroMemory

 


struct WAVEHEADER
{
	DWORD   dwRiff;                     // "RIFF"
	DWORD   dwSize;                     // Size
	DWORD   dwWave;                     // "WAVE"
	DWORD   dwFmt;                      // "fmt "
	DWORD   dwFmtSize;                  // Wave Format Size
};

//Static RIFF header, we'll append the format to it.
const BYTE WaveHeader[] =
{
	'R','I','F','F',0x00,0x00,0x00,0x00,'W','A','V', 'E', 'f','m','t', ' ', 0x00, 0x00, 0x00, 0x00
};

//  Static wave DATA tag.
const BYTE WaveData[] = { 'd', 'a', 't', 'a' };

//  Write the contents of a WAV file.  We take as input the data to write and the format of that data.
bool WriteWaveFile(HANDLE FileHandle, const BYTE* Buffer, const size_t BufferSize, const WAVEFORMATEX* WaveFormat)
{
	DWORD waveFileSize = sizeof(WAVEHEADER) + sizeof(WAVEFORMATEX) + WaveFormat->cbSize + sizeof(WaveData) + sizeof(DWORD) + static_cast<DWORD>(BufferSize);
	BYTE* waveFileData = new (std::nothrow) BYTE[waveFileSize];
	BYTE* waveFilePointer = waveFileData;
	WAVEHEADER* waveHeader = reinterpret_cast<WAVEHEADER*>(waveFileData);

	if (waveFileData == NULL)
	{
		printf("Unable to allocate %d bytes to hold output wave data\n", waveFileSize);
		return false;
	}

	//
	//  Copy in the wave header - we'll fix up the lengths later.
	//
	CopyMemory(waveFilePointer, WaveHeader, sizeof(WaveHeader));
	waveFilePointer += sizeof(WaveHeader);

	//
	//  Update the sizes in the header.
	//
	waveHeader->dwSize = waveFileSize - (2 * sizeof(DWORD));
	waveHeader->dwFmtSize = sizeof(WAVEFORMATEX) + WaveFormat->cbSize;

	//
	//  Next copy in the WaveFormatex structure.
	//
	CopyMemory(waveFilePointer, WaveFormat, sizeof(WAVEFORMATEX) + WaveFormat->cbSize);
	waveFilePointer += sizeof(WAVEFORMATEX) + WaveFormat->cbSize;


	//
	//  Then the data header.
	//
	CopyMemory(waveFilePointer, WaveData, sizeof(WaveData));
	waveFilePointer += sizeof(WaveData);
	*(reinterpret_cast<DWORD*>(waveFilePointer)) = static_cast<DWORD>(BufferSize);
	waveFilePointer += sizeof(DWORD);

	//
	//  And finally copy in the audio data.
	//
	CopyMemory(waveFilePointer, Buffer, BufferSize);

	//
	//  Last but not least, write the data to the file.
	//
	DWORD bytesWritten;
	if (!WriteFile(FileHandle, waveFileData, waveFileSize, &bytesWritten, NULL))
	{
		printf("Unable to write wave file: %d\n", GetLastError());
		delete[]waveFileData;
		return false;
	}

	if (bytesWritten != waveFileSize)
	{
		printf("Failed to write entire wave file\n");
		delete[]waveFileData;
		return false;
	}
	delete[]waveFileData;
	return true;
}

//  Write the captured wave data to an output file so that it can be examined later.
void SaveWaveData(BYTE* CaptureBuffer, size_t BufferSize, const WAVEFORMATEX* WaveFormat)
{
	HRESULT hr = NOERROR;

	SYSTEMTIME st;
	GetLocalTime(&st);
	char waveFileName[_MAX_PATH] = { 0 };
	sprintf_s(waveFileName, ".\\WAS_%04d-%02d-%02d_%02d_%02d_%02d_%02d.wav",st.wYear, st.wMonth, st.wDay,st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);

	HANDLE waveHandle = CreateFile(waveFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS,
		FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN,
		NULL);
	if (waveHandle != INVALID_HANDLE_VALUE)
	{
		if (WriteWaveFile(waveHandle, CaptureBuffer, BufferSize, WaveFormat))
		{
			printf("Successfully wrote WAVE data to %s\n", waveFileName);
		}
		else
		{
			printf("Unable to write wave file\n");
		}
		CloseHandle(waveHandle);
	}
	else
	{
		printf("Unable to open output WAV file %s: %d\n", waveFileName, GetLastError());
	}

}

#define DEF_CAPTURE_MIC
/*
注1: 静音时 填充0
注2: 测试时 应该将录音设备中的麦克风设为默认设备
注3: 定义DEF_CAPTURE_MIC时仅测试采集麦克风 否则测试采集声卡。
注4:
	 测试采集声卡:
	 Initialize时需要设置AUDCLNT_STREAMFLAGS_LOOPBACK
	 这种模式下,音频engine会将rending设备正在播放的音频流, 拷贝一份到音频的endpoint buffer
	 这样的话,WASAPI client可以采集到the stream.
	 此时仅采集到Speaker的声音
*/

int main(int argc, char* argv[])
{
	HRESULT hr_retrun;//用于接收函数返回值
	IAudioCaptureClient* pCaptureClient = NULL;
	REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
	 
	
	
	 
	//第一步:在应用程序中使用COM库,至少要调用一次CoInitializeEx函数
	hr_retrun = CoInitializeEx(NULL, COINIT_MULTITHREADED);//为当前线程初始化COM库并设置并发模式 
	if (FAILED(hr_retrun))
	{
		printf("无法在线程中初始化 COM:: %x\n", hr_retrun);
		return hr_retrun;
	}

	//第二步: 
	// 枚举你的音频设备,你可以在这个时候获取到你机器上所有可用的设备,并指定你需要用到的那个设置
	//用指定的类标识符创建一个Com对象,用指定的类标识符创建一个未初始化的对象
	//参数1:创建的Com对象的类标识符(CLSID)
	//参数2:指向接口IUnknown的指针
	//参数3:运行可执行代码的上下文
	//参数4:创建的Com对象的接口标识符
	//参数5:用来接收指向Com对象接口地址的指针变量
	IMMDeviceEnumerator* pEnumerator = NULL;
	hr_retrun = CoCreateInstance(CLSID_MMDeviceEnumerator,NULL,CLSCTX_ALL,IID_IMMDeviceEnumerator,(void**)&pEnumerator);
	if (FAILED(hr_retrun))
	{
		printf("无法枚举音频设备:: %x\n", hr_retrun);
		return hr_retrun;
	}

	//第三步:获取默认音频采集设备
	IMMDevice* pDevice = NULL;
#ifdef DEF_CAPTURE_MIC
		hr_retrun = pEnumerator->GetDefaultAudioEndpoint(eCapture, eConsole, &pDevice);// 采集麦克风
#else 
	    hr_retrun = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);   // 采集声卡
#endif	
		//参数1:渲染设备的数据流方向是 eRender,捕获设备的数据流方向是 eCapture
		//参数2:端点设备的角色 eConsole 控制台  eMultimedia多媒体  eCommunications通讯
		//参数3:
		if (FAILED(hr_retrun))
		{
			printf("获取默认音频采集设备: %x\n", hr_retrun);
			return hr_retrun;
		}

		// 第四步:创建一个管理对象,通过它可以获取到你需要的一切数据
		IAudioClient* pAudioClient = NULL;
		hr_retrun = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&pAudioClient);
		if (FAILED(hr_retrun))
		{
			printf("创建一个管理对象: %x\n", hr_retrun);
			return hr_retrun;
		}
		//第五步:GetMixFormat 方法检索音频引擎用于其内部处理共享模式流的流格式。
		WAVEFORMATEX* pwfx = NULL;
		hr_retrun = pAudioClient->GetMixFormat(&pwfx);
		if (FAILED(hr_retrun))
		{
			printf("XXX: %x\n", hr_retrun);
			return hr_retrun;
		}


	printf("\nGetMixFormat..。。。。。。。。。。.获取混合格式\n");
	cout << "wFormatTag      : " << pwfx->wFormatTag << endl;//音频数据的编码方式。1 表示是 PCM 编码
	cout << "nChannels       : " << pwfx->nChannels << endl;//2个通道
	cout << "nSamplesPerSec  : " << pwfx->nSamplesPerSec << endl;//采样率44100
	cout << "nAvgBytesPerSec : " << pwfx->nAvgBytesPerSec << endl;//SampleRate * NumChannels * BitsPerSample/8
	cout << "nBlockAlign     : " << pwfx->nBlockAlign << endl;//NumChannels * BitsPerSample/8=2*32/8=8
	cout << "wBitsPerSample  : " << pwfx->wBitsPerSample << endl;//32
	cout << "cbSize          : " << pwfx->cbSize << endl << endl;//[可选]附加数据的大小。 PCM中忽略此值
	int nFrameSize = (pwfx->wBitsPerSample / 8) * pwfx->nChannels;
	cout << "nFrameSize           : " << nFrameSize << " Bytes" << endl;
	cout << "hnsRequestedDuration : " << hnsRequestedDuration << endl;
	cout << " REFERENCE_TIME time units. 即(" << hnsRequestedDuration / 10000 << "ms)" << endl;

	 
	//初始化管理对象,在这里,你可以指定它的最大缓冲区长度,这个很重要,
	//应用程序控制数据块的大小以及延时长短都靠这里的初始化,具体参数大家看看文档解释
	//金:客户端调用IAudioClient :: Initialize方法来初始化终端设备上的流
	//参数1: 共享模式 AUDCLNT_STREAMFLAGS_EVENTCALLBACK 表示当audio buffer数据就绪时,会给系统发个信号,也就是事件触发。
	//参数2:StreamFlags,  控制流创建的标志。客户端应该将这个参数设置为0或者设置为一个或多个 AUDCLNT 
	//参数3:REFERENCE_TIME    参考时间,此处设定为=  10000000
	//参数4:REFERENCE_TIME     此参数只能在独占模式下为非零。在共享模式下,始终将此参数设置为0。
	//参数5:pFormat 格式
	//参数6:指向会话 GUID 的指针
#ifdef DEF_CAPTURE_MIC
	hr_retrun = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
		AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST,
		hnsRequestedDuration,0,pwfx,NULL);
#else
	hr_retrun = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,AUDCLNT_STREAMFLAGS_LOOPBACK,
		hnsRequestedDuration,0,pwfx,NULL);
#endif
	if (FAILED(hr_retrun))
	{
		printf("初始化终端设备上的流: %x\n", hr_retrun);
		return hr_retrun;
	}


	//木:获取流延迟时间,似乎没啥用
	REFERENCE_TIME hnsStreamLatency;//REFERENCE_TIME  =参考时间,一个longlong
	hr_retrun = pAudioClient->GetStreamLatency(&hnsStreamLatency);
	if (FAILED(hr_retrun))
	{
		printf("初始化终端设备上的流: %x\n", hr_retrun);
		return hr_retrun;
	}
	cout << "GetStreamLatency 流延迟    : " << hnsStreamLatency << endl;
	cout << " REFERENCE_TIME time units. 即(" << hnsStreamLatency / 10000 << "ms)" << endl;


	//水:
	REFERENCE_TIME hnsDefaultDevicePeriod;
	REFERENCE_TIME hnsMinimumDevicePeriod;
	hr_retrun = pAudioClient->GetDevicePeriod(&hnsDefaultDevicePeriod, &hnsMinimumDevicePeriod);
	if (FAILED(hr_retrun))
	{
		printf("GetDevicePeriod设备周期: %x\n", hr_retrun);
		return hr_retrun;
	}
	cout << "GetDevicePeriod设备周期  ...\n" << endl;
	cout << "hnsDefaultDevicePeriod : " << hnsDefaultDevicePeriod << endl;
	cout << " REFERENCE_TIME time units. 即(" << hnsDefaultDevicePeriod / 10000 << "ms)" << endl;
	cout << "hnsMinimumDevicePeriod : " << hnsMinimumDevicePeriod << endl;
	cout << " REFERENCE_TIME time units. 即(" << hnsMinimumDevicePeriod / 10000 << "ms)" << endl;



	//火:指的是缓冲区最多可以存放多少帧的数据量。指的就是采样率,比如44100
	UINT32         bufferFrameCount;
	hr_retrun = pAudioClient->GetBufferSize(&bufferFrameCount);
	if (FAILED(hr_retrun))
	{
		printf("GetBufferSize: %x\n", hr_retrun);
		return hr_retrun;
	}
	cout << "GetBufferSize        : " << bufferFrameCount << endl;



	// SetEventHandle ,创建或打开一个事件内核对象,并返回该内核对象的句柄.
	// 麦克风采集及扬声器播放时,都是通过设备事件驱动,可以在设备初始化完成后设置响应的事件句柄
	// SetEventHandle 方法设置当音频缓冲区准备好由客户端处理时系统发出信号的事件句柄。
	// 当音频的buffer就绪 可被client处理时, 会发出系统信号
	//SetEventHandle用于设置处理该信号的event的handle
	// 
	//
	HANDLE hAudioSamplesReadyEvent = CreateEventEx(NULL, NULL, 0, EVENT_MODIFY_STATE | SYNCHRONIZE);
	if (hAudioSamplesReadyEvent == NULL)
	{
		printf("Unable to create samples ready event: %d.\n", GetLastError());
	}
	// 
	hr_retrun = pAudioClient->SetEventHandle(hAudioSamplesReadyEvent);
	if (FAILED(hr_retrun))
	{
		printf("Unable to set ready event: %x.\n", hr_retrun);
		return false;
	}
	// GetService 方法从音频客户端对象访问其他服务
	hr_retrun = pAudioClient->GetService(IID_IAudioCaptureClient, (void**)&pCaptureClient);
	if (FAILED(hr_retrun))
	{
		printf("GetService 方法从音频客户端对象访问其他服务: %x.\n", hr_retrun);
		return false;
	}
	// Start recording.
	hr_retrun = pAudioClient->Start();  // Start recording.
	if (FAILED(hr_retrun))
	{
		printf("Start recording: %x.\n", hr_retrun);
		return false;
	}
	printf("\nAudio Capture begin...\n\n");
	int  n_lao_cishu_Cnt = 0;//记录采集循环次数
	size_t nCaptureBufferSize = 8 * 1024 * 1024;//单独就是一个大数据,用于开辟一个大内存空间,保存数据
	size_t nCurrentCaptureIndex = 0;//这个是数组元素的下标,从0开始,然后不断往上。主要用于采集的数据不断累计到数组里面
	 //new(std::nothrow)"在分配内存失败时会返回一个空指针。
	BYTE* pbyCaptureBuffer = new (std::nothrow) BYTE[nCaptureBufferSize];
	//pbyCaptureBuffer=1048576个字节

	HANDLE waitArray[3];
	waitArray[0] = hAudioSamplesReadyEvent;
	bool still_Recording = true;

	// Each loop fills about half of the shared buffer.
	//每个循环填充大约一半的共享缓冲区。
	while (still_Recording)
	{
		UINT32         packetLength = 0;
		//等待,直到一个或所有指定的对象处于信号状态或超时间隔过去。
		DWORD waitResult = WaitForMultipleObjects(1, waitArray, FALSE, INFINITE);
		switch (waitResult)
		{
		case WAIT_OBJECT_0 + 0:     // _AudioSamplesReadyEvent
			//方法检索捕获终结点缓冲区中下一个数据包中的帧数。
			hr_retrun = pCaptureClient->GetNextPacketSize(&packetLength);
			if (FAILED(hr_retrun))
			{
				printf("GetNextPacketSize: %x.\n", hr_retrun);
				return false;
			}
			else
			{
				printf("%06d # _AudioSamplesReadyEvent下一个数据包中的帧数 packetLength:%06u \n", n_lao_cishu_Cnt, packetLength);
			}
			while (packetLength != 0)
			{
				DWORD flags;
				BYTE* pData;
				UINT32 numFramesAvailable;//每次从缓存区域里捞出来的 数据帧个数
				//检索指向捕获终结点缓冲区中下一个可用数据包的指针
				hr_retrun = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
				//参数1:指向一个指针变量的指针,该方法将下一个可供客户端读取的数据包的起始地址写入该变量
				//参数2:指向UINT32变量的指针,该方法将帧计数(数据包中可用的音频帧数)写入该变量。客户端要么读取整个数据包,要么不读取。
				//参数3:指向方法写入缓冲区状态标志的DWORD变量的指针。
				//参数4:指向UINT64变量的指针,该方法将数据包中第一个音频帧的设备位置写入该变量。设备位置表示为从流开始的音频帧数。如果客户端不需要设备位置,此参数可以为NULL 
				if (FAILED(hr_retrun))
				{
					printf("pCaptureClient->GetBuffer: %x.\n", numFramesAvailable);
					return false;
				}
				else
				{
					printf("pCaptureClient->GetBuffe采集的帧个数: %d.\n", numFramesAvailable);
					//实测下来,每次都是提取到441帧,说明每秒提取100次,似乎很稳定,很奇怪。
				}
				n_lao_cishu_Cnt++;
				// test flags
				//
				//将数据包中的所有数据视为静音并忽略实际数据值
				if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
				{
					printf("AUDCLNT_BUFFERFLAGS_SILENT \n");
				}
				//包中的数据与前一个包的设备位置不相关;这可能是由于流状态转换或时间故障。
				if (flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY)//
				{
					printf("%06d # AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY \n", n_lao_cishu_Cnt);
				}
				//
				UINT32 gsz_zhen = static_cast<UINT32>((nCaptureBufferSize - nCurrentCaptureIndex) / nFrameSize);
				//printf(" 2个32位通道每次采样的数据的字节数目: %d.\n", nFrameSize);
				printf(" buffer空间里面 剩余采样gsz_zhen: %d.\n", gsz_zhen);
				UINT32 framesToCopy;//从音频硬件缓存里面 捞取 声音帧的个数(如果是2通道,每个通道32位,那每个帧8个字节)
				if (numFramesAvailable< gsz_zhen)//
				{
					framesToCopy = numFramesAvailable;
				}
				else
				{
					framesToCopy = gsz_zhen;
				}
				printf(" 从音频硬件缓存里面 捞取 声音帧的个数framesToCopy: %d.\n", framesToCopy);
				if (framesToCopy > 0)//不等于0 说明开辟的buffer 空间里面还有空的地方
				{
					if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
					{
						// 用 0 来填充一块内存区域:将捕获缓冲区中的 0 填充到输出缓存区中
						ZeroMemory(&pbyCaptureBuffer[nCurrentCaptureIndex], framesToCopy * nFrameSize);
					}
					else
					{
						// 从音频复制数据到输出缓冲区
						CopyMemory(&pbyCaptureBuffer[nCurrentCaptureIndex], pData, framesToCopy * nFrameSize);
					}
					//  Bump the capture buffer pointer.碰撞捕获缓冲区指针。
					nCurrentCaptureIndex += framesToCopy * nFrameSize;
				}
				//释放缓冲区方法释放缓冲区。
				hr_retrun = pCaptureClient->ReleaseBuffer(numFramesAvailable);
				if (FAILED(hr_retrun))
				{
					printf("pCaptureClient->ReleaseBuffer: %x.\n", hr_retrun);
					return false;
				}
				//方法检索捕获终结点缓冲区中下一个数据包中的帧数。
				hr_retrun = pCaptureClient->GetNextPacketSize(&packetLength);
				if (FAILED(hr_retrun))
				{
					printf("pCaptureClient->GetNextPacketSize: %x.\n", hr_retrun);
					return false;
				}
				//GetCurrentPadding方法检索终结点缓冲区中填充的帧数。
				UINT32 ui32NumPaddingFrames;
				hr_retrun = pAudioClient->GetCurrentPadding(&ui32NumPaddingFrames);
				if (FAILED(hr_retrun))
				{
					printf("pAudioClient->GetCurrentPadding: %x.\n", hr_retrun);
					return false;
				}
				if (0 != ui32NumPaddingFrames)
				{
					printf("GetCurrentPadding : %6u\n", ui32NumPaddingFrames);
				}
				//
				if (n_lao_cishu_Cnt == 500)
				{
					still_Recording = false;
					break;
				}
			} // end of 'while (packetLength != 0)'
			break;
		} // end of 'switch (waitResult)'
	} // end of 'while (stillPlaying)'


	SaveWaveData(pbyCaptureBuffer, nCurrentCaptureIndex, pwfx);
	//参数1:原始震动数据的指针
	//参数2:长度
	//参数3:声音的格式,就是通道数目,采样率之类的玩意
	printf("\n音频捕获完成.\n");

	hr_retrun = pAudioClient->Stop();  // Stop recording.
	if (FAILED(hr_retrun))
	{
		printf("pAudioClient->Stop: %x.\n", hr_retrun);
		return false;
	}

Exit:
	CoTaskMemFree(pwfx);//释放以前通过调用 CoTaskMemAlloc 或 CoTaskMemRealloc 函数分配的任务内存块。
	SAFE_RELEASE(pEnumerator)
		SAFE_RELEASE(pDevice)
		SAFE_RELEASE(pAudioClient)
		SAFE_RELEASE(pCaptureClient)

		CoUninitialize();//CoUninitialize关闭当前线程的COM库,卸载线程加载的所有dll,释放任何其他的资源,关闭在线程上维护所有的RPC连接。

	if (pbyCaptureBuffer)
	{
		delete[] pbyCaptureBuffer;
		pbyCaptureBuffer = NULL;
	}

	if (hAudioSamplesReadyEvent)
	{
		CloseHandle(hAudioSamplesReadyEvent);
		hAudioSamplesReadyEvent = NULL;
	}

	return 0;
}

 类似资料: