import gradio as gr import whisper # Initialize the Whisper model model = whisper.load_model("large") def transcribe(audio_file): # Load audio file audio = whisper.load_audio(audio_file.name) # Pad or trim audio to the expected length audio = whisper.pad_or_trim(audio) # Generate a mel spectrogram mel = whisper.log_mel_spectrogram(audio).to(model.device) # Options for decoding the spectrogram options = whisper.DecodingOptions() # Perform the transcription result = whisper.decode(model, mel, options) return result.text # Create the Gradio interface iface = gr.Interface( fn=transcribe, inputs=gr.Audio(source="upload", type="file", label="Upload your audio file"), outputs="text", title="Whisper ASR", description="Upload an audio file and it will be transcribed using OpenAI's Whisper model." ) # Launch the app if __name__ == "__main__": iface.launch()