File size: 882 Bytes
a34f94b
 
 
 
 
 
 
70cc84d
 
 
a34f94b
 
 
 
 
 
 
 
 
70cc84d
a34f94b
 
 
 
 
 
 
a1f4950
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr
import whisper

# Initialize the Whisper model
model = whisper.load_model("large")

def transcribe(audio_file):
    # Whisper expects a filepath, so we use the 'filepath' type in gr.Audio
    # audio_file now directly contains the path to the uploaded file
    audio = whisper.load_audio(audio_file)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    options = whisper.DecodingOptions()
    result = whisper.decode(model, mel, options)
    return result.text

# Create the Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(label="Upload your audio file", type="filepath"),
    outputs="text",
    title="Whisper ASR",
    description="Upload an audio file and it will be transcribed using OpenAI's Whisper model."
)

# Launch the app
if __name__ == "__main__":
    iface.launch()