import gradio as gr import whisper from transformers import MBartForConditionalGeneration, MBart50TokenizerFast # función para transcribir el audio model = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate") tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX") dropdown = gr.Dropdown(["de_DE", "es_XX", "fr_XX", "sv_SE", "ru_RU"], label="Choose Output Language") def execute(input, dropdown_value): model_inputs = tokenizer(input, return_tensors="pt") generated_tokens = model.generate( **model_inputs, forced_bos_token_id=tokenizer.lang_code_to_id[dropdown_value] ) output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] output = output.strip("[]' ") return output def whisper_transcript(model_size, audio_file): source = audio_file loaded_model = whisper.load_model(model_size) transcript = loaded_model.transcribe(source, language="english") return transcript["text"] # interfaz gradio gradio_ui = gr.Interface( fn=whisper_transcript, theme="Nymbo/Nymbo_Theme", title="Transcribir audios en inglés a texto", description="**Cómo usar**: Elegir uno de los 4 modelos, subir un audio o grabarlo y clicar el botón de Submit.", article="**Nota**: Exclusivo para audios en inglés.", inputs=[ gr.Dropdown( label="Select Model", choices=[ "tiny.en", "base.en", "small.en", "medium.en", ], value="base", ), gr.Audio(label="Upload Audio File", sources=["upload", "microphone"], type="filepath"), ], outputs=gr.Textbox(label="Whisper Transcript"), ) gradio_ui.queue().launch()