Spaces:
Sleeping
Sleeping
pham thuy tien
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -267,7 +267,7 @@ def get_video_id(youtube_url):
|
|
267 |
parsed_url = urlparse(youtube_url)
|
268 |
video_id = parse_qs(parsed_url.query).get("v")
|
269 |
return video_id[0] if video_id else None
|
270 |
-
|
271 |
def get_transcript(video_id):
|
272 |
tran = []
|
273 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
@@ -279,11 +279,10 @@ def get_transcript(video_id):
|
|
279 |
|
280 |
for t in transcript_data:
|
281 |
text = t['text'].lower().strip()
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
tran.append(t['text'])
|
287 |
|
288 |
return ' '.join(tran)
|
289 |
|
|
|
267 |
parsed_url = urlparse(youtube_url)
|
268 |
video_id = parse_qs(parsed_url.query).get("v")
|
269 |
return video_id[0] if video_id else None
|
270 |
+
import re
|
271 |
def get_transcript(video_id):
|
272 |
tran = []
|
273 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
|
|
279 |
|
280 |
for t in transcript_data:
|
281 |
text = t['text'].lower().strip()
|
282 |
+
for word in words_to_remove:
|
283 |
+
t = re.sub(re.escape(word), '',text)
|
284 |
+
t = t.strip()
|
285 |
+
tran.append(t)
|
|
|
286 |
|
287 |
return ' '.join(tran)
|
288 |
|