Ibrahim Olanigan commited on
Commit
c6097eb
·
1 Parent(s): 5b15a61

Resolved page state

Browse files
Files changed (1) hide show
  1. app.py +48 -35
app.py CHANGED
@@ -5,27 +5,30 @@ import whisper
5
 
6
  URL = 'URL'
7
  TEXT = 'TEXT'
8
- WHISPER = 'WHISPER'
 
9
  PROCESSING = 'PROCESSING'
10
- STATES = [ TEXT, WHISPER, PROCESSING]
11
- AUDIO_FILE = "audio.mp3"
12
- TRANSCRIPT = "transcript.txt"
13
  AUDIO_EXISTS = "AUDIO_EXISTS"
14
  TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS"
 
 
 
 
 
15
  model = ''
16
 
17
  st.title('Youtube Assistant')
18
 
19
  def init_state():
 
20
  for state in STATES:
21
- st.session_state[state] = ''
 
 
 
 
 
22
 
23
- if AUDIO_EXISTS not in st.session_state:
24
- st.session_state[AUDIO_EXISTS] = False
25
- st.session_state[TRANSCRIPT_EXISTS] = False
26
-
27
- if URL not in st.session_state or not st.session_state[URL]:
28
- clear_old_files()
29
 
30
  def clear_old_files():
31
  print("Clearing old files")
@@ -36,71 +39,81 @@ def clear_old_files():
36
  #Refresh audio state
37
  check_audio()
38
 
39
-
40
  def load_whisper():
41
  check_audio()
42
  model = whisper.load_model("small")
43
  print('Loaded Whisper Medium model')
 
 
 
44
  if st.session_state[AUDIO_EXISTS]:
45
- print('Transcribing with Whisper model')
46
  result = model.transcribe("audio.mp3")
47
- st.session_state[WHISPER] = result["text"]
48
- write_file(result["text"], "transcript.txt")
 
 
 
 
49
  check_audio()
50
- print(f"Transcribe results: {result.keys()}")
51
  write_file(str(result["segments"]), "segments.txt")
52
-
53
-
54
 
55
  def check_audio():
56
  st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE)
57
  st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT)
58
-
59
  def load_audio():
60
- if st.session_state[AUDIO_EXISTS]:
61
  audio_file = open(AUDIO_FILE, 'rb')
62
  audio_bytes = audio_file.read()
63
- print(f"Audio file exists...{len(audio_bytes)}")
64
  st.audio(audio_bytes, format="audio/mp3")
65
-
66
  def display():
67
  container = st.container()
68
  text_container = st.container()
69
 
70
-
71
  with container:
72
  with st.form(key='input_form', clear_on_submit=False):
73
- user_input = st.text_input("Youtube URL:", placeholder="https://www.youtube.com/watch?v=rdONCqZoUZE", key=URL)
74
  input_submit_button = st.form_submit_button(label='Send')
75
- load_audio()
76
  if input_submit_button and user_input:
 
77
  clear_old_files()
78
  with st.spinner('Downloading Audio...'):
79
  download()
80
  load_audio()
81
  with st.spinner('Transcribing Audio...'):
82
- load_whisper()
83
-
84
 
85
  with text_container:
86
- st.text_area(label="Youtube Transcript:",
87
  height=200,
88
- value=st.session_state[WHISPER])
89
 
90
  #Download Button section
91
  col1, col2 = st.columns(2)
92
  with col1:
93
- if st.session_state[AUDIO_EXISTS]:
94
- st.download_button("Download Audio","file","audio.mp3","application/octet-stream")
 
 
95
  with col2:
96
- if os.path.exists("transcript.txt"):
97
- st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt","text/plain")
 
98
 
99
-
100
  def download():
 
 
 
 
101
  command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"]
102
  print(command)
103
- out = subprocess.run(command, shell=True)
104
  check_audio()
105
 
106
  def write_file(text, filename):
 
5
 
6
  URL = 'URL'
7
  TEXT = 'TEXT'
8
+ TITLE = 'TITLE'
9
+
10
  PROCESSING = 'PROCESSING'
 
 
 
11
  AUDIO_EXISTS = "AUDIO_EXISTS"
12
  TRANSCRIPT_EXISTS = "TRANSCRIPT_EXISTS"
13
+ STATES = [ TEXT, TITLE]
14
+ BOOL_STATES = [ AUDIO_EXISTS, TRANSCRIPT_EXISTS, PROCESSING]
15
+ AUDIO_FILE = "audio.mp3"
16
+ TRANSCRIPT = "transcript.txt"
17
+
18
  model = ''
19
 
20
  st.title('Youtube Assistant')
21
 
22
  def init_state():
23
+ # print("Page refreshed")
24
  for state in STATES:
25
+ if state not in st.session_state:
26
+ st.session_state[state] = ''
27
+
28
+ for state in BOOL_STATES:
29
+ if state not in st.session_state:
30
+ st.session_state[state] = False
31
 
 
 
 
 
 
 
32
 
33
  def clear_old_files():
34
  print("Clearing old files")
 
39
  #Refresh audio state
40
  check_audio()
41
 
42
+ @st.cache_data
43
  def load_whisper():
44
  check_audio()
45
  model = whisper.load_model("small")
46
  print('Loaded Whisper Medium model')
47
+ return model
48
+
49
+ def transcribe():
50
  if st.session_state[AUDIO_EXISTS]:
51
+ model = load_whisper()
52
  result = model.transcribe("audio.mp3")
53
+ text = result["text"]
54
+
55
+ st.session_state[TEXT] = text
56
+ print(f"Start - { text[:100]}")
57
+ print(f"End - { text[-100:]}")
58
+ write_file(text, "transcript.txt")
59
  check_audio()
 
60
  write_file(str(result["segments"]), "segments.txt")
61
+ return text
 
62
 
63
  def check_audio():
64
  st.session_state[AUDIO_EXISTS] = os.path.exists(AUDIO_FILE)
65
  st.session_state[TRANSCRIPT_EXISTS] = os.path.exists(TRANSCRIPT)
66
+
67
  def load_audio():
68
+ if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]:
69
  audio_file = open(AUDIO_FILE, 'rb')
70
  audio_bytes = audio_file.read()
 
71
  st.audio(audio_bytes, format="audio/mp3")
72
+
73
  def display():
74
  container = st.container()
75
  text_container = st.container()
76
 
 
77
  with container:
78
  with st.form(key='input_form', clear_on_submit=False):
79
+ user_input = st.text_input("Youtube URL:", placeholder="https://www.youtube.com", key=URL)
80
  input_submit_button = st.form_submit_button(label='Send')
81
+
82
  if input_submit_button and user_input:
83
+ st.session_state[PROCESSING] = True
84
  clear_old_files()
85
  with st.spinner('Downloading Audio...'):
86
  download()
87
  load_audio()
88
  with st.spinner('Transcribing Audio...'):
89
+ transcribe()
90
+ st.session_state[PROCESSING] = False
91
 
92
  with text_container:
93
+ st.text_area(label=f"Youtube Transcript: {st.session_state[TITLE]}",
94
  height=200,
95
+ value=st.session_state[TEXT])
96
 
97
  #Download Button section
98
  col1, col2 = st.columns(2)
99
  with col1:
100
+ if AUDIO_EXISTS in st.session_state and st.session_state[AUDIO_EXISTS]:
101
+ with open("audio.mp3", "rb") as f:
102
+ data = f.read()
103
+ st.download_button('Download MP3', data,"audio.mp3")
104
  with col2:
105
+ if st.session_state[TRANSCRIPT_EXISTS]:
106
+ st.download_button("Download Transcript",st.session_state[TEXT],"transcript.txt")
107
+
108
 
 
109
  def download():
110
+ #Get youtube title
111
+ text = subprocess.run(["yt-dlp", "--get-title", st.session_state[URL]], capture_output=True)
112
+ st.session_state[TITLE] = text.stdout.decode("utf-8").strip()
113
+ # Download and convert audio
114
  command = [f"yt-dlp --no-config -v --extract-audio --audio-format mp3 {st.session_state[URL]} -o audio.mp3"]
115
  print(command)
116
+ subprocess.run(command, shell=True)
117
  check_audio()
118
 
119
  def write_file(text, filename):