python中文语音识别求教
我的程序是:
import torch
import numpy as np
import os
from collections import Counter
import librosa
import time
#import torch.nn
#训练样本路径
wav_path = 'data/wav/train'
label_file = 'data/doc/trans/train.word.txt'
# 获得训练用的wav文件路径列表
def get_wave_files(wav_path=wav_path):
wav_files = []
for (dirpath,dirnames,filenames) in os.walk(wav_path):#访问文件夹下的所有文件
#os.walk() 方法用于通过在目录树种游走输出在目录中的文件名,向上或者向下
for filename in filenames:
if filename.endswith('.wav') or filename.endswith('.WAV'):
#endswith() 方法用于判断字符串是否以指定后缀结尾,如果以指定后缀结尾返回True,否则返回False
filename_path = os.sep.join([dirpath,filename])#定义文件路径(连)
if os.stat(filename_path).st_size < 240000:#st_size文件的大小,以位为单位
continue
wav_files.append(filename_path)#加载文件
return wav_files
# 获得所需wav文件
wav_files = get_wave_files()#获取文件名列表
#读取wav文件对应的label
def get_wav_label(wav_files=wav_files,label_file=label_file):
labels_dict = {}
with open(label_file,encoding='utf-8') as f:
for label in f :
label =label.strip('\n') ####去空格及特殊符号 s.strip() lstrip() rstrip()
label_id = label.split(' ',1)[0]
label_text = label.split(' ',1)[1]
labels_dict[label_id]=label_text#以字典格式保存相应内容
labels=[]
new_wav_files = []
for wav_file in wav_files:
wav_id = os.path.basename(wav_file).split('.')[0]
#得到相应的文件名后进行'.'分割
if wav_id in labels_dict:
labels.append(labels_dict[wav_id])#存在该标签则放入
new_wav_files.append(wav_file)
return new_wav_files,labels#返回标签和对应的文件
但是运行的时候出现错误:
runfile('/home/liug/data/mfcccnn.py', wdir='/home/liug/data')
Traceback (most recent call last):
File "<ipython-input-1-aee84d4c319a>", line 1, in <module>
runfile('/home/liug/data/mfcccnn.py', wdir='/home/liug/data')
File "/usr/lib/python3/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 699, in runfile
execfile(filename, namespace)
File "/usr/lib/python3/dist-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 88, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "/home/liug/data/mfcccnn.py", line 59, in <module>
wav_files,labels = get_wav_label()#得到标签和对应的语音文件
File "/home/liug/data/mfcccnn.py", line 42, in get_wav_label
with open(label_file,encoding='utf-8') as f:
FileNotFoundError: [Errno 2] No such file or directory: 'data/doc/trans/train.word.txt'
我把train.word.txt文件和程序放在一个文件夹内,但是还是有问题。请大神们帮忙看看是为什么