Spaces:
Sleeping
Sleeping
File size: 1,136 Bytes
834d8bd 040d848 9f50d19 834d8bd 040d848 834d8bd 040d848 76bd650 040d848 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import streamlit as st
from transformers import pipeline
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import av
import numpy as np
# ASR Model
pipe = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-mustc-multilingual-st")
# Function to process audio frames
def audio_callback(frame: av.AudioFrame) -> av.AudioFrame:
audio_data = frame.to_ndarray().mean(axis=1).astype(np.int16) # Convert to mono
if "audio_buffer" not in st.session_state:
st.session_state.audio_buffer = b""
st.session_state.audio_buffer += audio_data.tobytes()
return frame
# Transcribe audio buffer
def transcribe_audio():
if "audio_buffer" in st.session_state:
audio_data = st.session_state.audio_buffer
result = pipe(audio_data)
st.write("Transcription:", result["text"])
# Streamlit UI
st.title("Voice Recognition App")
webrtc_streamer(
key="audio",
mode=WebRtcMode.SENDRECV,
audio_processor_factory=lambda: audio_callback,
media_stream_constraints={"audio": True, "video": False},
)
if st.button("Transcribe Audio"):
transcribe_audio()
|