from pypinyin import lazy_pinyin,Style
style = Style.TONE3
import re
with open ("/dnn4_added/fuyongze/tr/vt/base/King-ASR-256/DATA/CHANNEL1/SCRIPT/100100.TXT","r")as f:
r = '[’!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n。!,]+'
a = []
f_file = f.readlines()
for line in f_file:
a.append(line)
# print(a)
b=[]
for i in range(len(a)):
if i % 2 == 0:
b.append(a[i])
# print(b)
for l in b :
l1 = re.sub(r,"",l)
l1 = lazy_pinyin(l1, style=style)
with open("/dnn4_added/fuyongze/tr/vt/base/scripts/py/100100.txt", "a")as f1:
for dc in l1:
f1.write(dc + "\t")
f1.write("\n")
转换前的结果:
转换后的结果:
000100001 nai3 nai3 yao4 shui4 wu3 jiao4
000100002 yi1 bian1 bang1 zhu4 ba1 lu4 jun1 fang4 shao4
000100003 zai4 bo2 wu4 guan3 li3
000100004 zai4 cao3 di4 shang4 you2 xi4
000100005 xiao3 ji1 sheng1 bing4 le
000100006 you3 de xia4 ku1 le
000100007 ta1 shi4 zai4 zhao3 cai2 you2 piao4 de xiao3 dao1
000100008 zheng1 kai1 yan3 yi1 kan4
000100009 ren2 men zai4 shu4 yin1 xia4 cheng2 liang2
000100010 wo3 yi3 jing1 hui4 zuo4 yi3 zi le