Spaces:
Sleeping
Sleeping
import streamlit as st | |
import speech_recognition as sr | |
from transformers import pipeline | |
import re | |
# Load NLP models | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
def extract_info(text): | |
candidate_labels = ["project status", "risks", "questions", "administration"] | |
result = classifier(text, candidate_labels) | |
return result | |
def normalize_text(text): | |
text = text.lower() | |
text = re.sub(r'\s+', ' ', text) | |
return text | |
st.title("Audio to Text Processing and Categorization") | |
audio_file = st.file_uploader("Upload an audio file", type=["wav"]) | |
if audio_file is not None: | |
st.audio(audio_file, format='audio/wav') | |
# Convert audio to text | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(audio_file) as source: | |
audio_data = recognizer.record(source) | |
text = recognizer.recognize_google(audio_data) | |
st.write("Transcribed Text:") | |
st.write(text) | |
# NLP processing | |
summary = summarizer(text, max_length=150, min_length=30, do_sample=False) | |
st.write("Summarized Text:") | |
st.write(summary[0]['summary_text']) | |
# Information extraction | |
extracted_info = extract_info(summary[0]['summary_text']) | |
st.write("Extracted Information:") | |
st.write(extracted_info) | |
# Text normalization | |
normalized_text = normalize_text(str(extracted_info)) | |
st.write("Normalized Text:") | |
st.write(normalized_text) |