ZizaM's picture
Update app.py
20f31d6 verified
import gradio as gr
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
import logging
# 设置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 指定模型ID
model_id = "ZizaM/whisperfinetune_shanxi"
try:
logger.info(f"开始加载模型: {model_id}")
# 加载模型和处理器
processor = WhisperProcessor.from_pretrained(model_id)
model = WhisperForConditionalGeneration.from_pretrained(model_id)
logger.info("模型加载成功")
# 创建pipeline
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor
)
logger.info("Pipeline 创建成功")
def transcribe(audio):
if audio is None:
logger.warning("没有接收到音频输入")
return "请先录制音频"
try:
logger.info("开始处理音频...")
# 确保音频路径是字符串类型
if isinstance(audio, tuple):
audio_path = audio[1] # 如果是元组,取第二个元素(文件路径)
else:
audio_path = audio
logger.info(f"处理音频文件: {audio_path}")
result = pipe(audio_path, generate_kwargs={"language": "zh"})
if not result or "text" not in result:
logger.error("模型返回结果格式错误")
return "识别失败:模型返回结果格式错误"
transcribed_text = result["text"].strip()
if not transcribed_text:
logger.warning("识别结果为空")
return "未能识别出有效内容,请重试"
logger.info(f"识别成功: {transcribed_text}")
return transcribed_text
except Exception as e:
logger.error(f"识别过程中出错: {str(e)}", exc_info=True)
return f"识别失败: {str(e)}"
# 创建 Gradio 接口
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(
sources=["microphone"],
type="filepath",
label="点击麦克风录音"
),
outputs=gr.Textbox(
label="识别结果",
placeholder="这里将显示识别结果..."
),
title="语音识别系统",
description="点击麦克风图标开始录音,再次点击停止录音。等待几秒钟后会显示识别结果。",
examples=None,
cache_examples=False
)
iface.launch()
except Exception as e:
logger.error(f"应用启动失败: {str(e)}", exc_info=True)
raise e