File size: 1,136 Bytes
834d8bd
040d848
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f50d19
834d8bd
040d848
834d8bd
040d848
 
 
 
 
 
76bd650
040d848
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import streamlit as st
from transformers import pipeline
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import av
import numpy as np

# ASR Model
pipe = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-mustc-multilingual-st")

# Function to process audio frames
def audio_callback(frame: av.AudioFrame) -> av.AudioFrame:
    audio_data = frame.to_ndarray().mean(axis=1).astype(np.int16)  # Convert to mono
    if "audio_buffer" not in st.session_state:
        st.session_state.audio_buffer = b""
    st.session_state.audio_buffer += audio_data.tobytes()
    return frame

# Transcribe audio buffer
def transcribe_audio():
    if "audio_buffer" in st.session_state:
        audio_data = st.session_state.audio_buffer
        result = pipe(audio_data)
        st.write("Transcription:", result["text"])

# Streamlit UI
st.title("Voice Recognition App")

webrtc_streamer(
    key="audio",
    mode=WebRtcMode.SENDRECV,
    audio_processor_factory=lambda: audio_callback,
    media_stream_constraints={"audio": True, "video": False},
)

if st.button("Transcribe Audio"):
    transcribe_audio()