import streamlit as st import os import subprocess import whisper URL = 'URL' TEXT = 'TEXT' TITLE = 'TITLE' PROCESSING = 'PROCESSING' AUDIO_EXISTS = "AUDIO_EXISTS" TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS" STATES = [ TEXT, TITLE] BOOL_STATES = [ AUDIO_EXISTS, TRANSCRIPT_EXISTS, PROCESSING] AUDIO_FILE = "audio.mp3" TRANSCRIPT = "transcript.txt" model = '' st.title('Youtube Assistant') def init_state(): # print("Page refreshed") for state in STATES: if state not in st.session_state: st.session_state[state] = '' for state in BOOL_STATES: if state not in st.session_state: st.session_state[state] = False def clear_old_files(): print("Clearing old files") for file in os.listdir(): if file.endswith(".mp3") or file == TRANSCRIPT: os.remove(file) print(f"Removed old files::{file}") #Refresh audio state check_audio() @st.cache_data def load_whisper(): check_audio() model = whisper.load_model("small") print('Loaded Whisper Medium model') return model def transcribe(): if st.session_state[AUDIO_EXISTS]: model = load_whisper() result = model.transcribe("audio.mp3") text = result["text"] st.session_state[TEXT] = text print(f"Start - { text[:100]}") print(f"End - { text[-100:]}") write_file(text, "transcript.txt") check_audio() write_file(str(result["segments"]), "segments.txt") return text def check_audio(): st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE) st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT) def load_audio(): if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]: audio_file = open(AUDIO_FILE, 'rb') audio_bytes =, format="audio/mp3") def display(): check_audio() container = st.container() text_container = st.container() with container: with st.form(key='input_form', clear_on_submit=False): user_input = st.text_input("Youtube URL:", placeholder="", key=URL) input_submit_button = st.form_submit_button(label='Send') if input_submit_button and user_input: st.session_state[PROCESSING] = True clear_old_files() with st.spinner('Downloading Audio...'): download() load_audio() with st.spinner('Transcribing Audio...'): transcribe() st.session_state[PROCESSING] = False with text_container: st.text_area(label=f"Youtube Transcript: {st.session_state[TITLE]}", height=200, value=st.session_state[TEXT], ) #Download Button section col1, col2 = st.columns(2) with col1: if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]: with open("audio.mp3", "rb") as f: data = st.download_button('Download MP3', data,"audio.mp3", key="mp3") with col2: if st.session_state[TRANSCRIPT_EXISTS]: if st.session_state[TEXT] == '': with open(TRANSCRIPT, "rb") as f: data = # convert bytes to utf-8 string data = data.decode("utf-8") st.session_state[TEXT] = data st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt", key="transcript") def download(): #Get youtube title text =["yt-dlp", "--get-title", st.session_state[URL]], capture_output=True) st.session_state[TITLE] = text.stdout.decode("utf-8").strip() # Download and convert audio command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"] print(command), shell=True) check_audio() def write_file(text, filename): with open(filename, "w") as f: f.write(text) def main(): init_state() display() if __name__ == "__main__": main()