Abigail99216 commited on
Commit
61642c8
·
verified ·
1 Parent(s): 99d5161

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -82
app.py CHANGED
@@ -1,24 +1,16 @@
1
- from transformers import pipeline
2
  import gradio as gr
3
  import numpy as np
4
  import time
5
  import json
6
  import os
7
- from langchain_openai import ChatOpenAI
8
- from langchain_core.output_parsers import StrOutputParser
9
  from dotenv import load_dotenv
10
  import logging
11
 
12
  load_dotenv()
13
  zhipuai_api_key = os.getenv("ZHIPUAI_API_KEY")
14
 
15
- # 使用更小的中文Whisper模型
16
- try:
17
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
18
- logging.info("Whisper model loaded successfully")
19
- except Exception as e:
20
- logging.error(f"Error loading Whisper model: {e}")
21
- transcriber = None
22
 
23
  # 初始化对话记录
24
  conversation = []
@@ -31,40 +23,22 @@ def transcribe(audio):
31
  return "No audio input received"
32
 
33
  try:
34
- logging.info(f"Audio input received: {type(audio)}, {len(audio)}")
35
- sr, y = audio
36
- logging.info(f"Sample rate: {sr}, Audio data shape: {y.shape}")
37
 
38
- # 转换为单声道
39
- if y.ndim > 1:
40
- y = y.mean(axis=1)
41
 
42
- logging.info(f"Audio data shape after conversion: {y.shape}")
43
-
44
- y = y.astype(np.float32)
45
- y /= np.max(np.abs(y))
46
-
47
- # 使用中文进行转录
48
- if transcriber is not None:
49
- logging.info("Starting transcription")
50
- result = transcriber({"sampling_rate": sr, "raw": y}, generate_kwargs={"language": "chinese"})
51
- text = result["text"].strip()
52
- logging.info(f"Transcription result: {text}")
53
- else:
54
- logging.error("Transcriber not initialized")
55
- return "Transcriber not initialized"
56
-
57
  # 创建结构化数据
58
- if text:
59
- current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
60
- conversation.append({
61
- "时间": current_time,
62
- "角色": current_speaker,
63
- "内容": text
64
- })
65
-
66
- # 切换说话者
67
- current_speaker = "医生" if current_speaker == "患者" else "患者"
68
 
69
  # 将对话记录转换为格式化的字符串
70
  formatted_conversation = json.dumps(conversation, ensure_ascii=False, indent=2)
@@ -79,56 +53,20 @@ def switch_speaker():
79
  current_speaker = "医生" if current_speaker == "患者" else "患者"
80
  return f"当前说话者:{current_speaker}"
81
 
82
- def generate_memo(conversation_json):
83
- llm = ChatOpenAI(
84
- model="glm-3-turbo",
85
- temperature=0.7,
86
- openai_api_key=zhipuai_api_key,
87
- openai_api_base="https://open.bigmodel.cn/api/paas/v4/"
88
- )
89
-
90
- prompt = f"""
91
- 请根据以下医生和患者的对话,生成一份结构化的备忘录。备忘录应包含以下字段:主诉、检查、诊断、治疗和备注。
92
- 如果某个字段在对话中没有明确提及,请填写"未提及"。
93
-
94
- 对话内容:
95
- {conversation_json}
96
-
97
- 请以JSON格式输出备忘录,格式如下:
98
- {{
99
- "主诉": "患者的主要症状和不适",
100
- "检查": "医生建议或已进行的检查",
101
- "诊断": "医生对患者的诊断",
102
- "治疗": "医生对患者的治疗建议",
103
- "备注": "医生对患者的备注"
104
- }}
105
- """
106
-
107
- output = llm.invoke(prompt)
108
- output_parser = StrOutputParser()
109
- output = output_parser.invoke(output)
110
- #st.info(output)
111
- return output
112
-
113
-
114
  # 创建Gradio界面
115
  with gr.Blocks() as demo:
116
- gr.Markdown("# 实时中文对话转录与备忘录生成")
117
- gr.Markdown("点击麦克风图标开始录音,说话后会自动进行语音识别。支持中文识别。")
118
 
119
  with gr.Row():
120
- audio_input = gr.Audio(sources=["microphone"], type="numpy", streaming=True)
121
  speaker_button = gr.Button("切换说话者")
122
 
123
  speaker_label = gr.Label("当前说话者:患者")
124
  conversation_output = gr.JSON(label="对话记录")
125
- memo_output = gr.JSON(label="备忘录")
126
-
127
- generate_memo_button = gr.Button("生成备忘录")
128
 
129
- audio_input.stream(transcribe, inputs=[audio_input], outputs=[conversation_output])
130
  speaker_button.click(switch_speaker, outputs=[speaker_label])
131
- generate_memo_button.click(generate_memo, inputs=[conversation_output], outputs=[memo_output])
132
 
133
  if __name__ == "__main__":
134
- demo.launch()
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import time
4
  import json
5
  import os
 
 
6
  from dotenv import load_dotenv
7
  import logging
8
 
9
  load_dotenv()
10
  zhipuai_api_key = os.getenv("ZHIPUAI_API_KEY")
11
 
12
+ # 设置日志记录
13
+ logging.basicConfig(level=logging.INFO)
 
 
 
 
 
14
 
15
  # 初始化对话记录
16
  conversation = []
 
23
  return "No audio input received"
24
 
25
  try:
26
+ logging.info(f"Audio input received: {type(audio)}")
 
 
27
 
28
+ # 简单的音频处理
29
+ audio_array = audio.flatten() # 将音频转换为一维数组
30
+ audio_text = f"Received audio with length: {len(audio_array)}"
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # 创建结构化数据
33
+ current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
34
+ conversation.append({
35
+ "时间": current_time,
36
+ "角色": current_speaker,
37
+ "内容": audio_text
38
+ })
39
+
40
+ # 切换说话者
41
+ current_speaker = "医生" if current_speaker == "患者" else "患者"
 
42
 
43
  # 将对话记录转换为格式化的字符串
44
  formatted_conversation = json.dumps(conversation, ensure_ascii=False, indent=2)
 
53
  current_speaker = "医生" if current_speaker == "患者" else "患者"
54
  return f"当前说话者:{current_speaker}"
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # 创建Gradio界面
57
  with gr.Blocks() as demo:
58
+ gr.Markdown("# 音频输入测试")
59
+ gr.Markdown("上传音频文件或使用麦克风录音。")
60
 
61
  with gr.Row():
62
+ audio_input = gr.Audio(source="microphone", type="numpy")
63
  speaker_button = gr.Button("切换说话者")
64
 
65
  speaker_label = gr.Label("当前说话者:患者")
66
  conversation_output = gr.JSON(label="对话记录")
 
 
 
67
 
68
+ audio_input.change(transcribe, inputs=[audio_input], outputs=[conversation_output])
69
  speaker_button.click(switch_speaker, outputs=[speaker_label])
 
70
 
71
  if __name__ == "__main__":
72
+ demo.launch()