Spaces:

Snow-White-995
/

whisper-small-shanghainese

Sleeping

Upload 4 files

c127f2d about 1 year ago

1.67 kB

	import json

	import gradio as gr
	import torch
	import uvicorn
	from transformers import AutoProcessor, pipeline

	model_path = "models/whisper-small-shanghainese"
	language = "Chinese"

	device = "cuda" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	processor = AutoProcessor.from_pretrained(model_path, language=language)

	pipe = pipeline("automatic-speech-recognition",
	model=model_path,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	chunk_length_s=30,
	torch_dtype=torch_dtype,
	device=device)


	def transcribe(audio_file):
	result = pipe(audio_file, return_timestamps=True, generate_kwargs={
	"task": "transcribe",
	"language": language
	})

	return json.dumps(result, ensure_ascii=False)


	def main():
	with gr.Blocks() as app:
	with gr.Row():
	with gr.Column():
	gr.Markdown('''
	# Finetune whisper-small for Shanghainese
	''')
	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(label="Audio", type="filepath")
	submit_btn = gr.Button("Submit", variant="primary")

	with gr.Row():
	with gr.Column():
	text_output = gr.TextArea(label="Output (JSON)", )

	submit_btn.click(transcribe, inputs=[audio_input], outputs=[text_output])

	app.launch(share=False, server_name="0.0.0.0", server_port=7860)


	if __name__ == "__main__":
	uvicorn.run(main)