BatuhanYilmaz commited on
Commit
9a2eb37
·
1 Parent(s): e4fd7ee

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from pytube import YouTube
3
+ import requests, io
4
+ from urllib.request import urlopen
5
+ from PIL import Image
6
+ import time
7
+ import streamlit as st
8
+ from streamlit_lottie import st_lottie
9
+ import numpy as np
10
+ import os
11
+
12
+ st.set_page_config(page_title="Youtube Transcriber", page_icon="🗣", layout="wide")
13
+
14
+
15
+ # Define a function that we can use to load lottie files from a link.
16
+ @st.cache(allow_output_mutation=True)
17
+ def load_lottieurl(url: str):
18
+ r = requests.get(url)
19
+ if r.status_code != 200:
20
+ return None
21
+ return r.json()
22
+
23
+ col1, col2 = st.columns([1, 3])
24
+ with col1:
25
+ lottie = load_lottieurl("https://assets9.lottiefiles.com/private_files/lf30_bntlaz7t.json")
26
+ st_lottie(lottie, speed=1, height=200, width=200)
27
+
28
+ with col2:
29
+ st.write("""
30
+ ## Youtube Transcriber
31
+ ##### This is an app that transcribes YouTube videos into text.""")
32
+
33
+
34
+ #def load_model(size):
35
+ #default_size = size
36
+ #if size == default_size:
37
+ #return None
38
+ #else:
39
+ #loaded_model = whisper.load_model(size)
40
+ #return loaded_model
41
+
42
+
43
+ @st.cache(allow_output_mutation=True)
44
+ def populate_metadata(link):
45
+ yt = YouTube(link)
46
+ author = yt.author
47
+ title = yt.title
48
+ description = yt.description
49
+ thumbnail = yt.thumbnail_url
50
+ length = yt.length
51
+ views = yt.views
52
+ return author, title, description, thumbnail, length, views
53
+
54
+ # Uncomment if you want to fetch the thumbnails as well.
55
+ #def fetch_thumbnail(thumbnail):
56
+ #tnail = urlopen(thumbnail)
57
+ #raw_data = tnail.read()
58
+ #image = Image.open(io.BytesIO(raw_data))
59
+ #st.image(image, use_column_width=True)
60
+
61
+
62
+ def convert(seconds):
63
+ return time.strftime("%H:%M:%S", time.gmtime(seconds))
64
+
65
+
66
+ loaded_model = whisper.load_model("base")
67
+ current_size = "None"
68
+ size = st.selectbox("Model Size", ["tiny", "base", "small", "medium", "large"], index=1)
69
+
70
+
71
+ def change_model(current_size, size):
72
+ if current_size != size:
73
+ loaded_model = whisper.load_model(size)
74
+ st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
75
+ f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
76
+ return loaded_model
77
+ else:
78
+ return None
79
+
80
+
81
+ @st.cache(allow_output_mutation=True)
82
+ def inference(link):
83
+ yt = YouTube(link)
84
+ path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
85
+ results = loaded_model.transcribe(path)
86
+ return results["text"]
87
+
88
+
89
+ def main():
90
+ change_model(current_size, size)
91
+ link = st.text_input("YouTube Link")
92
+ if st.button("Transcribe"):
93
+ author, title, description, thumbnail, length, views = populate_metadata(link)
94
+ results = inference(link)
95
+
96
+ col3, col4 = st.columns(2)
97
+ with col3:
98
+ #fetch_thumbnail(thumbnail)
99
+ st.video(link)
100
+ st.markdown(f"**Channel**: {author}")
101
+ st.markdown(f"**Title**: {title}")
102
+ st.markdown(f"**Length**: {convert(length)}")
103
+ st.markdown(f"**Views**: {views:,}")
104
+
105
+ with col4:
106
+ with st.expander("Video Description"):
107
+ st.write(description)
108
+ #st.markdown(f"**Video Description**: {description}")
109
+ with st.expander("Video Transcript"):
110
+ st.write(results)
111
+ # Write the results to a .txt file and download it.
112
+ with open("transcript.txt", "w+") as f:
113
+ f.writelines(results)
114
+ f.close()
115
+ with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
116
+ data = f.read()
117
+ if st.download_button(label="Download Transcript",
118
+ data=data,
119
+ file_name="transcript.txt"):
120
+ st.success("Downloaded Successfully!")
121
+
122
+ if __name__ == "__main__":
123
+ main()