TedCasSpeechRecognitionMulti

Runtime error

App Files Files Community

JPLTedCas commited on May 20, 2023

Commit

2a0c00f

1 Parent(s): 510bcda

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -20

app.py CHANGED Viewed

@@ -2,32 +2,49 @@ from transformers import pipeline
 import gradio as gr
 import time
-p = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
-pc = pipeline("automatic-speech-recognition",model="softcatala/wav2vec2-large-xlsr-catala")
-pe = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
-pj = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-japanese")
-pf = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-french")
 def transcribe(language,audio, state=""):#language="Spanish",
     time.sleep(1)
-    if language=="Spanish":
-        state=""
-        text = p(audio)["text"]
-    if language=="Catalan":
         state=""
-        text = pc(audio)["text"]
-    if language=="English":
-        state=""
-        text = pe(audio)["text"]
-    if language=="French":
-        state=""
-        text = pf(audio)["text"]
-    if language=="Japanese":
-        state=""
-        text = pj(audio)["text"]
     state += text + " "
     #text2="Esto es loq ue te he entendido"
     return state, state
@@ -39,7 +56,9 @@ demo=gr.Interface(
     description="1)Select language 2)Click on 'record from microphone' and talk 3)Click on 'stop recording' 4)Click on submit 5)Before starting again, click on 'clear'",
     inputs=[
-        gr.Dropdown(["Spanish","Catalan","English", "French", "Japanese"],value="Spanish"),
         #gr.Audio(source="microphone", type="filepath", streaming=True),
         gr.inputs.Audio(source="microphone", type="filepath"),
         "state"#,"language"

 import gradio as gr
 import time
+#p = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+from datasets import load_dataset
+# load model and processor
+processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
+model.config.forced_decoder_ids = None
+# load dummy dataset and read audio files
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+sample = ds[0]["audio"]
+input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features
+# generate token ids
+predicted_ids = model.generate(input_features)
+# decode token ids to text
+#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
+#['<|startoftranscript|><|en|><|transcribe|><|notimestamps|> Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.<|endoftext|>']
+#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+#[' Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.']
 def transcribe(language,audio, state=""):#language="Spanish",
     time.sleep(1)
+    if language=="Multi":
         state=""
+        text = processor.batch_decode(predicted_ids, skip_special_tokens=False)
+#    if language=="Catalan":
+#        state=""
+#        text = pc(audio)["text"]
+#    if language=="English":
+#        state=""
+#        text = pe(audio)["text"]
+#    if language=="French":
+#        state=""
+#        text = pf(audio)["text"]
+#    if language=="Japanese":
+#        state=""
+#        text = pj(audio)["text"]
     state += text + " "
     #text2="Esto es loq ue te he entendido"
     return state, state
     description="1)Select language 2)Click on 'record from microphone' and talk 3)Click on 'stop recording' 4)Click on submit 5)Before starting again, click on 'clear'",
     inputs=[
+        #gr.Dropdown(["Spanish","Catalan","English", "French", "Japanese"],value="Spanish"),
+        gr.Dropdown(["Multi"],value="Multi"),
         #gr.Audio(source="microphone", type="filepath", streaming=True),
         gr.inputs.Audio(source="microphone", type="filepath"),
         "state"#,"language"