ZizaM commited on
Commit
20f31d6
·
verified ·
1 Parent(s): ae62510

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -23
app.py CHANGED
@@ -1,31 +1,82 @@
1
  import gradio as gr
2
  from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
 
3
 
4
- # 加载模型和处理器
 
 
 
 
5
  model_id = "ZizaM/whisperfinetune_shanxi"
6
- processor = WhisperProcessor.from_pretrained(model_id)
7
- model = WhisperForConditionalGeneration.from_pretrained(model_id)
8
 
9
- # 创建pipeline
10
- pipe = pipeline(
11
- "automatic-speech-recognition",
12
- model=model,
13
- tokenizer=processor.tokenizer,
14
- feature_extractor=processor.feature_extractor
15
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def transcribe(audio):
18
- # 进行语音识别并返回中文结果
19
- result = pipe(audio, generate_kwargs={"language": "zh"})
20
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # 创建 Gradio 接口
23
- iface = gr.Interface(
24
- fn=transcribe,
25
- inputs=gr.Audio(sources=["microphone"]),
26
- outputs="text",
27
- title="语音识别",
28
- description="点击麦克风图标开始录音,再次点击停止录音"
29
- )
30
 
31
- iface.launch()
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
3
+ import logging
4
 
5
+ # 设置日志
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # 指定模型ID
10
  model_id = "ZizaM/whisperfinetune_shanxi"
 
 
11
 
12
+ try:
13
+ logger.info(f"开始加载模型: {model_id}")
14
+ # 加载模型和处理器
15
+ processor = WhisperProcessor.from_pretrained(model_id)
16
+ model = WhisperForConditionalGeneration.from_pretrained(model_id)
17
+ logger.info("模型加载成功")
18
+
19
+ # 创建pipeline
20
+ pipe = pipeline(
21
+ "automatic-speech-recognition",
22
+ model=model,
23
+ tokenizer=processor.tokenizer,
24
+ feature_extractor=processor.feature_extractor
25
+ )
26
+ logger.info("Pipeline 创建成功")
27
+
28
+ def transcribe(audio):
29
+ if audio is None:
30
+ logger.warning("没有接收到音频输入")
31
+ return "请先录制音频"
32
+
33
+ try:
34
+ logger.info("开始处理音频...")
35
+ # 确保音频路径是字符串类型
36
+ if isinstance(audio, tuple):
37
+ audio_path = audio[1] # 如果是元组,取第二个元素(文件路径)
38
+ else:
39
+ audio_path = audio
40
+
41
+ logger.info(f"处理音频文件: {audio_path}")
42
+ result = pipe(audio_path, generate_kwargs={"language": "zh"})
43
+
44
+ if not result or "text" not in result:
45
+ logger.error("模型返回结果格式错误")
46
+ return "识别失败:模型返回结果格式错误"
47
+
48
+ transcribed_text = result["text"].strip()
49
+ if not transcribed_text:
50
+ logger.warning("识别结果为空")
51
+ return "未能识别出有效内容,请重试"
52
+
53
+ logger.info(f"识别成功: {transcribed_text}")
54
+ return transcribed_text
55
+
56
+ except Exception as e:
57
+ logger.error(f"识别过程中出错: {str(e)}", exc_info=True)
58
+ return f"识别失败: {str(e)}"
59
 
60
+ # 创建 Gradio 接口
61
+ iface = gr.Interface(
62
+ fn=transcribe,
63
+ inputs=gr.Audio(
64
+ sources=["microphone"],
65
+ type="filepath",
66
+ label="点击麦克风录音"
67
+ ),
68
+ outputs=gr.Textbox(
69
+ label="识别结果",
70
+ placeholder="这里将显示识别结果..."
71
+ ),
72
+ title="语音识别系统",
73
+ description="点击麦克风图标开始录音,再次点击停止录音。等待几秒钟后会显示识别结果。",
74
+ examples=None,
75
+ cache_examples=False
76
+ )
77
 
78
+ iface.launch()
 
 
 
 
 
 
 
79
 
80
+ except Exception as e:
81
+ logger.error(f"应用启动失败: {str(e)}", exc_info=True)
82
+ raise e