|
import streamlit as st |
|
from transformers import pipeline |
|
import torchaudio |
|
import tempfile |
|
import os |
|
import torch |
|
|
|
|
|
st.title("ASR with Hugging Face Whisper") |
|
|
|
|
|
asr = pipeline(task = "automatic-speech-recognition", model="openai/whisper-large-v2", |
|
device=0 if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
uploaded_audio = st.file_uploader("Upload an audio file (wav/mp3)") |
|
|
|
|
|
if uploaded_audio: |
|
|
|
audio_data, sample_rate = torchaudio.load(uploaded_audio) |
|
|
|
|
|
with st.spinner("Performing ASR..."): |
|
transcriptions = asr(audio_data.numpy(), sample_rate=sample_rate) |
|
|
|
|
|
st.subheader("Transcription:") |
|
for idx, transcription in enumerate(transcriptions): |
|
st.write(f"Segment {idx + 1}: {transcription['text']}") |
|
|
|
|
|
st.write("Instructions:") |
|
st.write("1. Upload an audio file in WAV or MP3 format.") |
|
st.write("2. Click the 'Perform ASR' button to transcribe the audio.") |
|
|
|
|
|
st.write("Sample Audio for Testing:") |
|
sample_audio = "Wave_files_demos_Welcome.wav" |
|
st.audio(sample_audio, format="audio/wav") |
|
|
|
|
|
sample_audio_path = os.path.join(os.getcwd(), sample_audio) |
|
|
|
|
|
if st.button("Transcribe Sample Audio"): |
|
|
|
sample_audio_data, sample_audio_rate = torchaudio.load(sample_audio_path) |
|
|
|
|
|
with st.spinner("Performing ASR..."): |
|
sample_transcriptions = asr(sample_audio_data.numpy(), sample_rate=sample_audio_rate) |
|
|
|
|
|
st.subheader("Transcription (Sample Audio):") |
|
for idx, transcription in enumerate(sample_transcriptions): |
|
st.write(f"Segment {idx + 1}: {transcription['text']}") |
|
|