Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import subprocess | |
import whisper | |
URL = 'URL' | |
TEXT = 'TEXT' | |
TITLE = 'TITLE' | |
PROCESSING = 'PROCESSING' | |
AUDIO_EXISTS = "AUDIO_EXISTS" | |
TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS" | |
STATES = [ TEXT, TITLE] | |
BOOL_STATES = [ AUDIO_EXISTS, TRANSCRIPT_EXISTS, PROCESSING] | |
AUDIO_FILE = "audio.mp3" | |
TRANSCRIPT = "transcript.txt" | |
model = '' | |
st.title('Youtube Assistant') | |
def init_state(): | |
# print("Page refreshed") | |
for state in STATES: | |
if state not in st.session_state: | |
st.session_state[state] = '' | |
for state in BOOL_STATES: | |
if state not in st.session_state: | |
st.session_state[state] = False | |
def clear_old_files(): | |
print("Clearing old files") | |
for file in os.listdir(): | |
if file.endswith(".mp3") or file == TRANSCRIPT: | |
os.remove(file) | |
print(f"Removed old files::{file}") | |
#Refresh audio state | |
check_audio() | |
def load_whisper(): | |
check_audio() | |
model = whisper.load_model("small") | |
print('Loaded Whisper Medium model') | |
return model | |
def transcribe(): | |
if st.session_state[AUDIO_EXISTS]: | |
model = load_whisper() | |
result = model.transcribe("audio.mp3") | |
text = result["text"] | |
st.session_state[TEXT] = text | |
print(f"Start - { text[:100]}") | |
print(f"End - { text[-100:]}") | |
write_file(text, "transcript.txt") | |
check_audio() | |
write_file(str(result["segments"]), "segments.txt") | |
return text | |
def check_audio(): | |
st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE) | |
st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT) | |
def load_audio(): | |
if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]: | |
audio_file = open(AUDIO_FILE, 'rb') | |
audio_bytes = audio_file.read() | |
st.audio(audio_bytes, format="audio/mp3") | |
def display(): | |
check_audio() | |
container = st.container() | |
text_container = st.container() | |
with container: | |
with st.form(key='input_form', clear_on_submit=False): | |
user_input = st.text_input("Youtube URL:", placeholder="https://www.youtube.com", key=URL) | |
input_submit_button = st.form_submit_button(label='Send') | |
if input_submit_button and user_input: | |
st.session_state[PROCESSING] = True | |
clear_old_files() | |
with st.spinner('Downloading Audio...'): | |
download() | |
load_audio() | |
with st.spinner('Transcribing Audio...'): | |
transcribe() | |
st.session_state[PROCESSING] = False | |
with text_container: | |
st.text_area(label=f"Youtube Transcript: {st.session_state[TITLE]}", | |
height=200, | |
value=st.session_state[TEXT], | |
) | |
#Download Button section | |
col1, col2 = st.columns(2) | |
with col1: | |
if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]: | |
with open("audio.mp3", "rb") as f: | |
data = f.read() | |
st.download_button('Download MP3', data,"audio.mp3", key="mp3") | |
with col2: | |
if st.session_state[TRANSCRIPT_EXISTS]: | |
if st.session_state[TEXT] == '': | |
with open(TRANSCRIPT, "rb") as f: | |
data = f.read() | |
# convert bytes to utf-8 string | |
data = data.decode("utf-8") | |
st.session_state[TEXT] = data | |
st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt", key="transcript") | |
def download(): | |
#Get youtube title | |
text = subprocess.run(["yt-dlp", "--get-title", st.session_state[URL]], capture_output=True) | |
st.session_state[TITLE] = text.stdout.decode("utf-8").strip() | |
# Download and convert audio | |
command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"] | |
print(command) | |
subprocess.run(command, shell=True) | |
check_audio() | |
def write_file(text, filename): | |
with open(filename, "w") as f: | |
f.write(text) | |
def main(): | |
init_state() | |
display() | |
if __name__ == "__main__": | |
main() |