espnet_onnx_demo / modules /asr_frontend.py
ms180's picture
Added several languages
54a30e5
raw
history blame
999 Bytes
import os
import aiofiles
import librosa
from espnet_onnx import Speech2Text
from fastapi import Form, UploadFile
from pydantic import BaseModel
class AudioInput(BaseModel):
audio: UploadFile = Form(...)
async def audio_from_file(file):
if os.path.exists("sample.mp3"):
os.remove("sample.mp3")
async with aiofiles.open("sample.mp3", "wb") as out_file:
content = file.read() # async read
await out_file.write(content) # async write
audio, _ = librosa.load("sample.mp3", sr=16000)
return audio
class ASRModel:
def __init__(self):
self.model = None
self.model_path = "models/asr"
def load_model(self, model_path):
self.model = Speech2Text(
model_dir=f"{self.model_path}/{model_path}",
use_quantized=True
)
def generate(self, audio):
if self.model is None:
raise RuntimeError("Model is not loaded.")
hyp = self.model(audio)
return hyp[0][0]