Snow-White-995's picture
Upload 4 files
c127f2d
import json
import gradio as gr
import torch
import uvicorn
from transformers import AutoProcessor, pipeline
model_path = "models/whisper-small-shanghainese"
language = "Chinese"
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
processor = AutoProcessor.from_pretrained(model_path, language=language)
pipe = pipeline("automatic-speech-recognition",
model=model_path,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
torch_dtype=torch_dtype,
device=device)
def transcribe(audio_file):
result = pipe(audio_file, return_timestamps=True, generate_kwargs={
"task": "transcribe",
"language": language
})
return json.dumps(result, ensure_ascii=False)
def main():
with gr.Blocks() as app:
with gr.Row():
with gr.Column():
gr.Markdown('''
# Finetune whisper-small for Shanghainese
''')
with gr.Row():
with gr.Column():
audio_input = gr.Audio(label="Audio", type="filepath")
submit_btn = gr.Button("Submit", variant="primary")
with gr.Row():
with gr.Column():
text_output = gr.TextArea(label="Output (JSON)", )
submit_btn.click(transcribe, inputs=[audio_input], outputs=[text_output])
app.launch(share=False, server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
uvicorn.run(main)