Spaces:

Sunbird
/

sb-mms-inference

Sleeping

App Files Files Community

akera commited on Feb 20, 2024

Commit

59bf002

verified ·

1 Parent(s): d4afb45

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -37

app.py CHANGED Viewed

@@ -15,22 +15,24 @@ auth_token = os.environ.get("HF_TOKEN")
 target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
-target_lang_code = target_lang_options[target_lang]
 languages = list(target_lang_options.keys())
-if target_lang_code=="eng":
-    model_id = "facebook/mms-1b-all"
-else:
-    model_id = "Sunbird/sunbird-mms"
 # Transcribe audio using custom model
-def transcribe_audio(input_file, target_lang_code,
                       device, model_id=model_id,
                       chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
     pipe = pipeline(model=model_id, device=device, token=hf_auth_token)
     pipe.tokenizer.set_target_lang(target_lang_code)
     pipe.model.load_adapter(target_lang_code)
@@ -41,41 +43,13 @@ def transcribe_audio(input_file, target_lang_code,
     return output
-# def transcribe(audio_file_mic=None, audio_file_upload=None, language="Luganda (lug)"):
-#     if audio_file_mic:
-#         audio_file = audio_file_mic
-#     elif audio_file_upload:
-#         audio_file = audio_file_upload
-#     else:
-#         return "Please upload an audio file or record one"
-#     # Make sure audio is 16kHz
-#     speech, sample_rate = librosa.load(audio_file)
-#     if sample_rate != 16000:
-#         speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
-#     # Keep the same model in memory and simply switch out the language adapters by calling load_adapter() for the model and set_target_lang() for the tokenizer
-#     language_code = language
-#     processor.tokenizer.set_target_lang(language_code)
-#     model.load_adapter(language_code)
-#     inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
-#     with torch.no_grad():
-#         outputs = model(**inputs).logits
-#     ids = torch.argmax(outputs, dim=-1)[0]
-#     transcription = processor.decode(ids)
-#     return transcription
 description = '''ASR with salt-mms'''
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
                          gr.Audio(source="microphone", type="filepath", label="Record Audio"),
                          gr.Audio(source="upload", type="filepath", label="Upload Audio"),
-                         gr.Dropdown(choices=languages, label="Language", value="lug")
                          ],
                      outputs=gr.Textbox(label="Transcription"),
                      description=description

 target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
 languages = list(target_lang_options.keys())
 # Transcribe audio using custom model
+def transcribe_audio(input_file, language,
                       device, model_id=model_id,
                       chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
+    target_lang_code = target_lang_options[target_lang_code]
+    # Determine the model_id based on the language
+    if target_lang_code == "eng":
+        model_id = "facebook/mms-1b-all"
+    else:
+        model_id = "Sunbird/sunbird-mms"
     pipe = pipeline(model=model_id, device=device, token=hf_auth_token)
     pipe.tokenizer.set_target_lang(target_lang_code)
     pipe.model.load_adapter(target_lang_code)
     return output
 description = '''ASR with salt-mms'''
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
                          gr.Audio(source="microphone", type="filepath", label="Record Audio"),
                          gr.Audio(source="upload", type="filepath", label="Upload Audio"),
+                         gr.Dropdown(choices=languages, label="Language", value="English"
                          ],
                      outputs=gr.Textbox(label="Transcription"),
                      description=description