SakecAI / app.py
dlmn's picture
created app.py
489b056
import streamlit as st
from transformers import pipeline
import torchaudio
import tempfile
import os
import torch
# Create a Streamlit app title
st.title("ASR with Hugging Face Whisper")
# Load the ASR model
asr = pipeline(task = "automatic-speech-recognition", model="openai/whisper-large-v2",
device=0 if torch.cuda.is_available() else "cpu")
# Create a file uploader widget
uploaded_audio = st.file_uploader("Upload an audio file (wav/mp3)")
# Check if an audio file is uploaded
if uploaded_audio:
# Read the uploaded audio file
audio_data, sample_rate = torchaudio.load(uploaded_audio)
# Perform ASR on the uploaded audio
with st.spinner("Performing ASR..."):
transcriptions = asr(audio_data.numpy(), sample_rate=sample_rate)
# Display the ASR result
st.subheader("Transcription:")
for idx, transcription in enumerate(transcriptions):
st.write(f"Segment {idx + 1}: {transcription['text']}")
# Provide instructions
st.write("Instructions:")
st.write("1. Upload an audio file in WAV or MP3 format.")
st.write("2. Click the 'Perform ASR' button to transcribe the audio.")
# Add a sample audio file for testing (optional)
st.write("Sample Audio for Testing:")
sample_audio = "Wave_files_demos_Welcome.wav"
st.audio(sample_audio, format="audio/wav")
# Define the path to the sample audio file
sample_audio_path = os.path.join(os.getcwd(), sample_audio)
# Add a button to transcribe the sample audio (optional)
if st.button("Transcribe Sample Audio"):
# Read the sample audio file
sample_audio_data, sample_audio_rate = torchaudio.load(sample_audio_path)
# Perform ASR on the sample audio
with st.spinner("Performing ASR..."):
sample_transcriptions = asr(sample_audio_data.numpy(), sample_rate=sample_audio_rate)
# Display the ASR result for the sample audio
st.subheader("Transcription (Sample Audio):")
for idx, transcription in enumerate(sample_transcriptions):
st.write(f"Segment {idx + 1}: {transcription['text']}")