pham thuy tien commited on
Commit
6eeb282
·
verified ·
1 Parent(s): 0e6081d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -267,7 +267,7 @@ def get_video_id(youtube_url):
267
  parsed_url = urlparse(youtube_url)
268
  video_id = parse_qs(parsed_url.query).get("v")
269
  return video_id[0] if video_id else None
270
-
271
  def get_transcript(video_id):
272
  tran = []
273
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -279,11 +279,10 @@ def get_transcript(video_id):
279
 
280
  for t in transcript_data:
281
  text = t['text'].lower().strip()
282
- if not any(word in text for word in words_to_remove):
283
- print()
284
- print(t['text'])
285
- print()
286
- tran.append(t['text'])
287
 
288
  return ' '.join(tran)
289
 
 
267
  parsed_url = urlparse(youtube_url)
268
  video_id = parse_qs(parsed_url.query).get("v")
269
  return video_id[0] if video_id else None
270
+ import re
271
  def get_transcript(video_id):
272
  tran = []
273
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
 
279
 
280
  for t in transcript_data:
281
  text = t['text'].lower().strip()
282
+ for word in words_to_remove:
283
+ t = re.sub(re.escape(word), '',text)
284
+ t = t.strip()
285
+ tran.append(t)
 
286
 
287
  return ' '.join(tran)
288