Spaces:

olanigan
/

YoutubeAssistant

Sleeping

Ibrahim Olanigan

Fixed transcript download

7dfdb5c about 1 year ago

4.18 kB

	import streamlit as st
	import os
	import subprocess
	import whisper

	URL = 'URL'
	TEXT = 'TEXT'
	TITLE = 'TITLE'

	PROCESSING = 'PROCESSING'
	AUDIO_EXISTS = "AUDIO_EXISTS"
	TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS"
	STATES = [ TEXT, TITLE]
	BOOL_STATES = [ AUDIO_EXISTS, TRANSCRIPT_EXISTS, PROCESSING]
	AUDIO_FILE = "audio.mp3"
	TRANSCRIPT = "transcript.txt"

	model = ''

	st.title('Youtube Assistant')

	def init_state():
	# print("Page refreshed")
	for state in STATES:
	if state not in st.session_state:
	st.session_state[state] = ''

	for state in BOOL_STATES:
	if state not in st.session_state:
	st.session_state[state] = False


	def clear_old_files():
	print("Clearing old files")
	for file in os.listdir():
	if file.endswith(".mp3") or file == TRANSCRIPT:
	os.remove(file)
	print(f"Removed old files::{file}")
	#Refresh audio state
	check_audio()

	@st.cache_data
	def load_whisper():
	check_audio()
	model = whisper.load_model("small")
	print('Loaded Whisper Medium model')
	return model

	def transcribe():
	if st.session_state[AUDIO_EXISTS]:
	model = load_whisper()
	result = model.transcribe("audio.mp3")
	text = result["text"]

	st.session_state[TEXT] = text
	print(f"Start - { text[:100]}")
	print(f"End - { text[-100:]}")
	write_file(text, "transcript.txt")
	check_audio()
	write_file(str(result["segments"]), "segments.txt")
	return text

	def check_audio():
	st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE)
	st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT)

	def load_audio():
	if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]:
	audio_file = open(AUDIO_FILE, 'rb')
	audio_bytes = audio_file.read()
	st.audio(audio_bytes, format="audio/mp3")

	def display():
	check_audio()
	container = st.container()
	text_container = st.container()

	with container:
	with st.form(key='input_form', clear_on_submit=False):
	user_input = st.text_input("Youtube URL:", placeholder="https://www.youtube.com", key=URL)
	input_submit_button = st.form_submit_button(label='Send')

	if input_submit_button and user_input:
	st.session_state[PROCESSING] = True
	clear_old_files()
	with st.spinner('Downloading Audio...'):
	download()
	load_audio()
	with st.spinner('Transcribing Audio...'):
	transcribe()
	st.session_state[PROCESSING] = False

	with text_container:
	st.text_area(label=f"Youtube Transcript: {st.session_state[TITLE]}",
	height=200,
	value=st.session_state[TEXT],
	)

	#Download Button section
	col1, col2 = st.columns(2)
	with col1:
	if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]:
	with open("audio.mp3", "rb") as f:
	data = f.read()
	st.download_button('Download MP3', data,"audio.mp3", key="mp3")
	with col2:
	if st.session_state[TRANSCRIPT_EXISTS]:
	if st.session_state[TEXT] == '':
	with open(TRANSCRIPT, "rb") as f:
	data = f.read()
	# convert bytes to utf-8 string
	data = data.decode("utf-8")
	st.session_state[TEXT] = data

	st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt", key="transcript")


	def download():
	#Get youtube title
	text = subprocess.run(["yt-dlp", "--get-title", st.session_state[URL]], capture_output=True)
	st.session_state[TITLE] = text.stdout.decode("utf-8").strip()
	# Download and convert audio
	command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"]
	print(command)
	subprocess.run(command, shell=True)
	check_audio()

	def write_file(text, filename):
	with open(filename, "w") as f:
	f.write(text)

	def main():
	init_state()
	display()


	if __name__ == "__main__":
	main()