Spaces:

huudan12345
/

tl

Sleeping

pham thuy tien commited on Jun 27, 2024

Commit

6eeb282

verified ·

1 Parent(s): 0e6081d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -267,7 +267,7 @@ def get_video_id(youtube_url):
     parsed_url = urlparse(youtube_url)
     video_id = parse_qs(parsed_url.query).get("v")
     return video_id[0] if video_id else None
 def get_transcript(video_id):
     tran = []
     transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -279,11 +279,10 @@ def get_transcript(video_id):
     for t in transcript_data:
         text = t['text'].lower().strip()
-        if not any(word in text for word in words_to_remove):
-            print()
-            print(t['text'])
-            print()
-            tran.append(t['text'])
     return ' '.join(tran)

     parsed_url = urlparse(youtube_url)
     video_id = parse_qs(parsed_url.query).get("v")
     return video_id[0] if video_id else None
+import re
 def get_transcript(video_id):
     tran = []
     transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
     for t in transcript_data:
         text = t['text'].lower().strip()
+        for word in words_to_remove:
+            t = re.sub(re.escape(word), '',text)
+        t = t.strip()
+        tran.append(t)
     return ' '.join(tran)