Spaces:

Shanuka01
/

ASR-wisper-large

Running

Shanuka01 commited on Nov 4, 2023

Commit

2bd0dd9

1 Parent(s): a1f4950

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,22 +5,20 @@ import whisper
 model = whisper.load_model("large")
 def transcribe(audio_file):
-    # Load audio file
-    audio = whisper.load_audio(audio_file.name)
-    # Pad or trim audio to the expected length
     audio = whisper.pad_or_trim(audio)
-    # Generate a mel spectrogram
     mel = whisper.log_mel_spectrogram(audio).to(model.device)
-    # Options for decoding the spectrogram
     options = whisper.DecodingOptions()
-    # Perform the transcription
     result = whisper.decode(model, mel, options)
     return result.text
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(label="Upload your audio file"),
     outputs="text",
     title="Whisper ASR",
     description="Upload an audio file and it will be transcribed using OpenAI's Whisper model."

 model = whisper.load_model("large")
 def transcribe(audio_file):
+    # The audio_file parameter is a tuple with the filename and the file object
+    # We only need the file object which is the second element of the tuple
+    audio_data = audio_file[1]
+    audio = whisper.load_audio(audio_data)
     audio = whisper.pad_or_trim(audio)
     mel = whisper.log_mel_spectrogram(audio).to(model.device)
     options = whisper.DecodingOptions()
     result = whisper.decode(model, mel, options)
     return result.text
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(label="Upload your audio file", type="file"),
     outputs="text",
     title="Whisper ASR",
     description="Upload an audio file and it will be transcribed using OpenAI's Whisper model."