5,915
社区成员
发帖
与我相关
我的任务
分享
#闲玩的,实时语音转文字调用AI,目前只支持中文,英文不太理想,哪位大神给优化下
#https://alphacephei.com/vosk/models 重要文件vosk-model-cn-0.22
import queue
import threading
import json
import os
import requests
from openai import OpenAI
import time
from datetime import datetime
#API Key
DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY')
try:
from vosk import Model, KaldiRecognizer
import pyaudio
VOSK_AVAILABLE = True
except ImportError:
print("的依赖库库库库库库库")
VOSK_AVAILABLE = False
class RealTimeASR:
def __init__(self, model_path="vosk-model-cn-0.22/vosk-model-cn-0.22"):
if not VOSK_AVAILABLE:
raise ImportError("Vosk库未正确安装")
self.model = Model(model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
self.recognizer.SetWords(True)
self.audio_queue = queue.Queue()
self.result_queue = queue.Queue()
self.running = False
self.last_partial = ""
# 初始化日志文件,保存在模型文件夹中
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
self.log_file = os.path.join("语音识别日志", f"{current_time}.txt")
print(f"识别结果将保存到文件: {self.log_file}")
try:
with open(self.log_file, 'w', encoding='utf-8') as f:
f.write(f"语音识别日志 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
except Exception as e:
print(f"创建日志文件失败: {e}")
print("准备大语言模型接口...")
def audio_callback(self, in_data, frame_count, time_info, status):
if self.running:
self.audio_queue.put(in_data)
return (None, pyaudio.paContinue)
def processing_thread(self):
while self.running:
try:
data = self.audio_queue.get(timeout=1.0)
if self.recognizer.AcceptWaveform(data):
result = json.loads(self.recognizer.Result())
if result.get("text", "").strip():
self.last_partial = ""
self.result_queue.put(result["text"])
partial = json.loads(self.recognizer.PartialResult())
if partial.get("partial", "").strip():
partial_text = partial["partial"]
if partial_text != self.last_partial and len(partial_text) > 3:
self.last_partial = partial_text
if any(char.isalpha() for char in partial_text):
pass
except queue.Empty:
continue
except Exception as e:
print(f"错误: {e}")
def call_language_model(self, text):
try:
print(f"正在向大语言模型发送请求: {text}")
print("正在连接百炼API...")
client = OpenAI(
api_key=DASHSCOPE_API_KEY,
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
response = client.chat.completions.create(
model="qwen-turbo-latest",
messages=[{"role": "user", "content": text}]
)
return response.choices[0].message.content
except Exception as e:
print(f"调用大语言模型失败: {e}")
def start(self):
print("启动实时语音识别(说'退出'或'exit'可停止)...")
print("识别到的内容将自动发送给大语言模型,并显示回复")
self.running = True
try:
p = pyaudio.PyAudio()
print("可用音频输入设备:")
for i in range(p.get_device_count()):
dev = p.get_device_info_by_index(i)
if dev['maxInputChannels'] > 0:
print(f" {i}: {dev['name']}")
# 默认音频设备
print("尝试打开音频流...")
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
input_device_index=None,
stream_callback=self.audio_callback,
frames_per_buffer=4096)
# 启动处理线程
print("启动语音处理线程...")
processing = threading.Thread(target=self.processing_thread, daemon=True)
processing.start()
print("处理线程已启动!")
# 主循环
while self.running:
try:
text = self.result_queue.get(timeout=1.0)
print(f"识别结果: {text}")
try:
with open(self.log_file, 'a', encoding='utf-8') as f:
f.write(f"[用户]: {text}\n")
print(f"已保存到文件: {os.path.basename(self.log_file)}")
except Exception as e:
print(f"保存到文件失败: {e}")
if '退出' == text or 'exit' in text.lower():
self.running = False
print("正在停止语音识别...")
break
# 调用大语言模型并获取回复
llm_response = self.call_language_model(text)
print(f"大语言模型回复: {llm_response}")
# 保存大语言模型回复到日志
try:
with open(self.log_file, 'a', encoding='utf-8') as f:
f.write(f"[AI]: {llm_response}\n\n")
except Exception as e:
print(f"保存AI回复失败: {e}")
except queue.Empty:
continue
except KeyboardInterrupt:
print("\n正在停止语音识别...")
except Exception as e:
print(f"启动错误: {e}")
finally:
self.running = False
if 'stream' in locals():
stream.stop_stream()
stream.close()
if 'p' in locals():
p.terminate()
print("语音识别已停止")
if __name__ == "__main__":
start()