1.下载源码点击下载rnnoise代码,或者去github下载
2.编译源码
3.训练
pip install numpy h5py
pip install grpcio==1.36.1
pip install keras==2.2.4 tensorflow==1.12.0 #版本必须对应 tensorflow-gpu==1.12.0
pip install protobuf==3.8.0
4.降噪
附:
tensorflow: Your CPU supports instructions that this TensorFlow binary was not compiled to use: FMA
遇到了这个问题,意思是你的 CPU 支持AVX2 FMA(加速CPU计算),但安装的 TensorFlow 版本不支持
如果是初学者 或者 没有太大计算速度的需求,在开头加上这两行忽略这个提示即可
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
PS:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '1' # 默认,显示所有信息
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' # 只显示 warning 和 Error
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' # 只显示 Error
#coding: utf-8
import os
import os.path
import binascii
out_file_name='1.raw'
ignore_filename = [".raw", ".py"]
def ignore_check_file(file):
for ignore in ignore_filename:
if file.find(ignore) > 0:
return True
return False
#字符串 >> 二进制 >> hex >> hex 字符串
def str_to_hexStr(string):
str_bin = string #string.encode('utf-8')
return binascii.hexlify(str_bin).decode('utf-8')
#hex 字符串 >> hex >> 二进制 >> 字符串
def hexStr_to_str(hex_str):
hex = hex_str.encode('utf-8')
str_bin = binascii.unhexlify(hex)
return str_bin.decode('utf-8')
def print_wav_head(head_msg):
riff = head_msg[0:4]
len_byte_0 = str_to_hexStr(head_msg[7:8])
len_byte_1 = str_to_hexStr(head_msg[6:7])
len_byte_2 = str_to_hexStr(head_msg[5:6])
len_byte_3 = str_to_hexStr(head_msg[4:5])
file_len = int(len_byte_0+len_byte_1+len_byte_2+len_byte_3, 16)
wave = head_msg[8:12]
fmt = head_msg[12:16]
filtration = str_to_hexStr(head_msg[16:17])
FormatTag = str_to_hexStr(head_msg[20:22])
Channels = str_to_hexStr(head_msg[22:24])
samp_byte_0 = str_to_hexStr(head_msg[27:28])
samp_byte_1 = str_to_hexStr(head_msg[26:27])
samp_byte_2 = str_to_hexStr(head_msg[25:26])
samp_byte_3 = str_to_hexStr(head_msg[24:25])
SamplesPerSec = int(samp_byte_0+samp_byte_1+samp_byte_2+samp_byte_3, 16)
persec_byte_0 = str_to_hexStr(head_msg[31:32])
persec_byte_1 = str_to_hexStr(head_msg[30:31])
persec_byte_2 = str_to_hexStr(head_msg[29:30])
persec_byte_3 = str_to_hexStr(head_msg[28:29])
BytesPerSec = int(persec_byte_0+persec_byte_1+persec_byte_2+persec_byte_3, 16)
BlockAlign = str_to_hexStr(head_msg[32:34])
BitsPerSample = str_to_hexStr(head_msg[34:36])
data = head_msg[36:40]
audio_len_byte_0 = str_to_hexStr(head_msg[43:44])
audio_len_byte_1 = str_to_hexStr(head_msg[42:43])
audio_len_byte_2 = str_to_hexStr(head_msg[41:42])
audio_len_byte_3 = str_to_hexStr(head_msg[40:41])
audioDataLen = int(audio_len_byte_0+audio_len_byte_1+audio_len_byte_2+audio_len_byte_3, 16)
print "-----WAV Head info-----"
print "HEAD:",str_to_hexStr(head_msg)
print "RIFF(4):",riff #ckid:4字节 RIFF 标志,大写
print "Len(4):",file_len #cksize:4字节文件长度,这个长度不包括"RIFF"标志(4字节)和文件长度本身所占字节(4字节),即该长度等于整个文件长度-8
print "WAV(4):",wave #fcc type:4字节 "WAVE" 类型块标识, 大写
print "FMT(4):",fmt #ckid:4字节 表示"fmt" chunk的开始,此块中包括文件内部格式信息,小写, 最后一个字符是空格
print "FILT(4):",'0x'+filtration #cksize:4字节,文件内部格式信息数据的大小,过滤字节(一般为00000010H)
print "FTAG(2):",FormatTag #FormatTag:2字节,音频数据的编码方式,1:表示是PCM 编码
print "CHAN(2):",Channels #Channels:2字节,声道数,单声道为1,双声道为2
print "SAMP(4):",SamplesPerSec #SamplesPerSec:4字节,采样率,如44100
print "PERSEC(4):",BytesPerSec #BytesPerSec:4字节,音频数据传送速率, 单位是字节.其值为采样率×每次采样大小.播放软件利用此值可以估计缓冲区的大小;bytePerSecond = sampleRate * (bitsPerSample / 8) * channels
print "ALIGN(2):",BlockAlign #BlockAlign:2字节,每次采样的大小 = 采样精度*声道数/8(单位是字节); 这也是字节对齐的最小单位, 譬如 16bit 立体声在这里的值是 4 字节.播放软件需要一次处理多个该值大小的字节数据,以便将其值用于缓冲区的调整
print "PERS(2):",BitsPerSample #BitsPerSample:2字节,每个声道的采样精度; 譬如 16bit 在这里的值就是16.如果有多个声道,则每个声道的采样精度大小都一样的;
print "DATA(4):",data #ckid:4字节,数据标志符(data),表示 "data" chunk的开始.此块中包含音频数据,小写;
print "AUDIOLEN(4):",audioDataLen #cksize:音频数据的长度,4字节,audioDataLen = totalDataLen - 36 = fileLenIncludeHeader - 44
print "-----------------------"
def read_pcm_file():
file_count = 0
wf = open(out_file_name, 'wb')
for root, dirs, files in os.walk("."):
#root:当前目录路径 dirs:当前路径下所有子目录 files:当前路径下所有非目录子文件
for file in files:
src_file = root + '\\' + file
if ignore_check_file(file) == False:
file_count += 1
print "Process File Name:",src_file
rf = open(src_file, 'rb')
if file.find('.wav') > 0:
wav_head = rf.read(44)
print_wav_head(wav_head)
raw_data = rf.read()
pcm_idx = raw_data.rfind('data')
if pcm_idx > 0:
print "Read File Len:", len(raw_data), "PCM idx:", pcm_idx
wf.write(raw_data[pcm_idx+4+2:])
rf.close()
wf.close()
print "Process File Count:", file_count
if __name__=='__main__':
read_pcm_file()