Spaces:
Build error
Build error
DiegoLigtenberg
commited on
Commit
·
e711356
1
Parent(s):
2652f0e
Add requirements file
Browse files- models.py +140 -0
- parsarg.py +26 -0
- requirements.txt +6 -0
- settings.py +4 -0
models.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
2 |
+
from pydub import AudioSegment
|
3 |
+
import whisper
|
4 |
+
from settings import MODEL_PARSER
|
5 |
+
from pytube import YouTube
|
6 |
+
|
7 |
+
class BagOfModels:
|
8 |
+
'''model -> is a model from hugging face
|
9 |
+
model_names -> modelnames that can be chosen from in streamlit
|
10 |
+
model_settinsg -> settings of model that can be customized by user
|
11 |
+
'''
|
12 |
+
args = MODEL_PARSER
|
13 |
+
barfs = 5
|
14 |
+
|
15 |
+
def __init__(self,model,model_names,model_settings,model_tasks, **kwargs):
|
16 |
+
self.model = model
|
17 |
+
self.model_names = model_names
|
18 |
+
self.model_settings = model_settings
|
19 |
+
self.model_tasks = model_tasks
|
20 |
+
self.kwargs = kwargs
|
21 |
+
|
22 |
+
@classmethod
|
23 |
+
def get_model_settings(cls):
|
24 |
+
bag_of_models = BagOfModels(**vars(cls.args))
|
25 |
+
return bag_of_models.model_settings
|
26 |
+
|
27 |
+
@classmethod
|
28 |
+
def get_model_names(cls):
|
29 |
+
bag_of_models = BagOfModels(**vars(cls.args))
|
30 |
+
return bag_of_models.model_names
|
31 |
+
|
32 |
+
@classmethod
|
33 |
+
def get_model(cls):
|
34 |
+
bag_of_models = BagOfModels(**vars(cls.args))
|
35 |
+
return bag_of_models.model
|
36 |
+
|
37 |
+
@classmethod
|
38 |
+
def get_model_tasks(cls):
|
39 |
+
bag_of_models = BagOfModels(**vars(cls.args))
|
40 |
+
return bag_of_models.model_tasks
|
41 |
+
|
42 |
+
@classmethod
|
43 |
+
def load_model(cls,model_name,**kwargs):
|
44 |
+
bag_of_models = BagOfModels(**vars(cls.args))
|
45 |
+
cls.model = bag_of_models.model
|
46 |
+
assert model_name in bag_of_models.model_names, f"please pick one of the available models: {bag_of_models.model_names}"
|
47 |
+
return Model(model_name,**cls.model[model_name])
|
48 |
+
|
49 |
+
|
50 |
+
class Model:
|
51 |
+
def __init__(self,model_name,task,url,**kwargs):
|
52 |
+
self.url = url
|
53 |
+
self.model_name = model_name
|
54 |
+
self.name = self.url.split("https://huggingface.co/")[1]
|
55 |
+
self.task = task
|
56 |
+
self.kwargs = kwargs
|
57 |
+
self.init_optional_args(**self.kwargs)
|
58 |
+
|
59 |
+
def init_optional_args(self,year=None,description=None):
|
60 |
+
self._year = year
|
61 |
+
self._description = description
|
62 |
+
|
63 |
+
def predict_stt(self,source,source_type,model_task):
|
64 |
+
model = whisper.load_model(self.model_name.split("_")[1]) #tiny - base - medium
|
65 |
+
stt = SoundToText(source,source_type,model_task,model=model,tokenizer=None)
|
66 |
+
stt.whisper()
|
67 |
+
return stt
|
68 |
+
|
69 |
+
def predict_summary(self):
|
70 |
+
tokenizer = Wav2Vec2Processor.from_pretrained(self.name)
|
71 |
+
model = Wav2Vec2ForCTC.from_pretrained(self.name) # Note: PyTorch Model
|
72 |
+
|
73 |
+
class Transcription():
|
74 |
+
def __init__(self,model,source,source_type) -> None:
|
75 |
+
pass
|
76 |
+
|
77 |
+
class SoundToText():
|
78 |
+
def __init__(self,source,source_type,model_task,model,tokenizer=None):
|
79 |
+
self.source = source
|
80 |
+
self.source_type = source_type
|
81 |
+
self.model = model
|
82 |
+
self.model_task = model_task
|
83 |
+
self.tokenizer = tokenizer
|
84 |
+
|
85 |
+
def wav2vec(self,size):
|
86 |
+
pass
|
87 |
+
|
88 |
+
def wav2vec2(self,size):
|
89 |
+
pass
|
90 |
+
|
91 |
+
def whisper(self):
|
92 |
+
# download youtube url
|
93 |
+
if self.source_type == "YouTube":
|
94 |
+
self.audio_path = YouTube(self.source).streams.get_by_itag(140).download("output/", filename="audio")
|
95 |
+
|
96 |
+
if self.source_type == "File":
|
97 |
+
audio = None
|
98 |
+
if self.source.name.endswith('.wav'): audio = AudioSegment.from_wav(self.source)
|
99 |
+
elif self.source.name.endswith('.mp3'): audio = AudioSegment.from_mp3(self.source)
|
100 |
+
audio.export('output/audio.wav', format='wav')
|
101 |
+
self.audio_path = "output/audio.wav"
|
102 |
+
|
103 |
+
model = whisper.load_model("base")
|
104 |
+
self.raw_output = model.transcribe(self.audio_path,verbose=True)
|
105 |
+
|
106 |
+
self.text = self.raw_output["text"]
|
107 |
+
self.language = self.raw_output["language"]
|
108 |
+
self.segments = self.raw_output["segments"]
|
109 |
+
|
110 |
+
# Remove token ids from the output
|
111 |
+
for segment in self.segments:
|
112 |
+
del segment["tokens"]
|
113 |
+
|
114 |
+
self.transcribed = True
|
115 |
+
|
116 |
+
class TextToSummary():
|
117 |
+
def __init__(self,input_text,min_length,max_length):
|
118 |
+
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
119 |
+
self.summary_input = input_text
|
120 |
+
self.summary_output = (self.summarizer(self.summary_input, min_length=min_length, max_length=max_length, do_sample=False))
|
121 |
+
|
122 |
+
def get_summary(self):
|
123 |
+
return self.summary_output
|
124 |
+
|
125 |
+
def wav2vec(self):
|
126 |
+
pass
|
127 |
+
|
128 |
+
def record(model_name):
|
129 |
+
args = MODEL_PARSER
|
130 |
+
models = BagOfModels.get_model_names()
|
131 |
+
tasks = BagOfModels.get_model_tasks()
|
132 |
+
whisper_base = BagOfModels.load_model(model_name,**vars(args))
|
133 |
+
whisper_base.predict()
|
134 |
+
|
135 |
+
if __name__== "__main__":
|
136 |
+
args = MODEL_PARSER
|
137 |
+
models = BagOfModels.get_model_names()
|
138 |
+
tasks = BagOfModels.get_model_tasks()
|
139 |
+
whisper_base = BagOfModels.load_model("whisper_base",**vars(args))
|
140 |
+
whisper_base.predict_stt()
|
parsarg.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import yaml
|
3 |
+
|
4 |
+
def model_parser_args():
|
5 |
+
with open(r'utils/models.yaml') as f:
|
6 |
+
settings = yaml.full_load(f)
|
7 |
+
parser = argparse.ArgumentParser()
|
8 |
+
parser.add_argument("--model", help="see model_settings.yaml",default=settings)
|
9 |
+
parser.add_argument("--model_names", help="see model_settings.yaml",default=list(settings))
|
10 |
+
setting_list = []
|
11 |
+
task_list = []
|
12 |
+
for i in range(len(settings)):
|
13 |
+
setting_list.append(list(settings[list(settings.keys())[i]].keys()))
|
14 |
+
for model in (list(settings.keys())):
|
15 |
+
task = (settings[model]["task"])
|
16 |
+
if task not in task_list:task_list.append(task)
|
17 |
+
setting_list = ([setting for sublist in setting_list for setting in sublist]) # generate all sublists
|
18 |
+
setting_list = [x for i, x in enumerate(setting_list) if x not in setting_list[:i]] # remain order of sublists
|
19 |
+
parser.add_argument("--model_settings",help="see model_settings.yaml",default=setting_list)
|
20 |
+
parser.add_argument("--model_tasks",help="see model_settings.yaml",default=task_list)
|
21 |
+
parser=parser.parse_args()
|
22 |
+
return parser
|
23 |
+
|
24 |
+
if __name__ == "__main__":
|
25 |
+
model_parser_args()
|
26 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pydub==0.25.1
|
2 |
+
pytube==12.1.0
|
3 |
+
PyYAML==6.0
|
4 |
+
streamlit==1.13.0
|
5 |
+
transformers==4.23.1
|
6 |
+
git+https://github.com/openai/whisper.git
|
settings.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from parsarg import model_parser_args
|
2 |
+
|
3 |
+
MODEL_PARSER = model_parser_args()
|
4 |
+
|