import gradio as gr import whisper from transformers import pipeline # Load Whisper model whisper_model = whisper.load_model("small", device="cpu") # Load the text correction model correction_pipeline = pipeline("text2text-generation", model="tiiuae/falcon3-1b-instruct", device=-1) # Function to preprocess audio and transcribe it using Whisper def transcribe_audio(audio_file): transcription = whisper_model.transcribe(audio_file) return transcription["text"] # Function to correct grammar in text def chunk_text(text, max_tokens=2000): """ Splits the text into smaller chunks to ensure it doesn't exceed the token limit. """ words = text.split() chunks = [] chunk = [] current_tokens = 0 for word in words: word_tokens = len(word.split()) # Approximate token count if current_tokens + word_tokens > max_tokens: chunks.append(" ".join(chunk)) chunk = [word] current_tokens = word_tokens else: chunk.append(word) current_tokens += word_tokens if chunk: chunks.append(" ".join(chunk)) return chunks # Function to process the pipeline def process_pipeline(audio_file): raw_transcription = transcribe_audio(audio_file) corrected_transcription = correct_text(raw_transcription) return raw_transcription, corrected_transcription # Gradio Interface interface = gr.Interface( fn=process_pipeline, inputs=gr.Audio(type="filepath", label="Upload Audio"), outputs=[ gr.Textbox(label="Raw Transcription"), gr.Textbox(label="Corrected Transcription"), ], title="Speech Correction Demo", description="Upload an audio file to see raw transcription and grammar-corrected output.", ) # Launch the app interface.launch(share=True)