DiegoLigtenberg commited on
Commit
e711356
·
1 Parent(s): 2652f0e

Add requirements file

Browse files
Files changed (4) hide show
  1. models.py +140 -0
  2. parsarg.py +26 -0
  3. requirements.txt +6 -0
  4. settings.py +4 -0
models.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
2
+ from pydub import AudioSegment
3
+ import whisper
4
+ from settings import MODEL_PARSER
5
+ from pytube import YouTube
6
+
7
+ class BagOfModels:
8
+ '''model -> is a model from hugging face
9
+ model_names -> modelnames that can be chosen from in streamlit
10
+ model_settinsg -> settings of model that can be customized by user
11
+ '''
12
+ args = MODEL_PARSER
13
+ barfs = 5
14
+
15
+ def __init__(self,model,model_names,model_settings,model_tasks, **kwargs):
16
+ self.model = model
17
+ self.model_names = model_names
18
+ self.model_settings = model_settings
19
+ self.model_tasks = model_tasks
20
+ self.kwargs = kwargs
21
+
22
+ @classmethod
23
+ def get_model_settings(cls):
24
+ bag_of_models = BagOfModels(**vars(cls.args))
25
+ return bag_of_models.model_settings
26
+
27
+ @classmethod
28
+ def get_model_names(cls):
29
+ bag_of_models = BagOfModels(**vars(cls.args))
30
+ return bag_of_models.model_names
31
+
32
+ @classmethod
33
+ def get_model(cls):
34
+ bag_of_models = BagOfModels(**vars(cls.args))
35
+ return bag_of_models.model
36
+
37
+ @classmethod
38
+ def get_model_tasks(cls):
39
+ bag_of_models = BagOfModels(**vars(cls.args))
40
+ return bag_of_models.model_tasks
41
+
42
+ @classmethod
43
+ def load_model(cls,model_name,**kwargs):
44
+ bag_of_models = BagOfModels(**vars(cls.args))
45
+ cls.model = bag_of_models.model
46
+ assert model_name in bag_of_models.model_names, f"please pick one of the available models: {bag_of_models.model_names}"
47
+ return Model(model_name,**cls.model[model_name])
48
+
49
+
50
+ class Model:
51
+ def __init__(self,model_name,task,url,**kwargs):
52
+ self.url = url
53
+ self.model_name = model_name
54
+ self.name = self.url.split("https://huggingface.co/")[1]
55
+ self.task = task
56
+ self.kwargs = kwargs
57
+ self.init_optional_args(**self.kwargs)
58
+
59
+ def init_optional_args(self,year=None,description=None):
60
+ self._year = year
61
+ self._description = description
62
+
63
+ def predict_stt(self,source,source_type,model_task):
64
+ model = whisper.load_model(self.model_name.split("_")[1]) #tiny - base - medium
65
+ stt = SoundToText(source,source_type,model_task,model=model,tokenizer=None)
66
+ stt.whisper()
67
+ return stt
68
+
69
+ def predict_summary(self):
70
+ tokenizer = Wav2Vec2Processor.from_pretrained(self.name)
71
+ model = Wav2Vec2ForCTC.from_pretrained(self.name) # Note: PyTorch Model
72
+
73
+ class Transcription():
74
+ def __init__(self,model,source,source_type) -> None:
75
+ pass
76
+
77
+ class SoundToText():
78
+ def __init__(self,source,source_type,model_task,model,tokenizer=None):
79
+ self.source = source
80
+ self.source_type = source_type
81
+ self.model = model
82
+ self.model_task = model_task
83
+ self.tokenizer = tokenizer
84
+
85
+ def wav2vec(self,size):
86
+ pass
87
+
88
+ def wav2vec2(self,size):
89
+ pass
90
+
91
+ def whisper(self):
92
+ # download youtube url
93
+ if self.source_type == "YouTube":
94
+ self.audio_path = YouTube(self.source).streams.get_by_itag(140).download("output/", filename="audio")
95
+
96
+ if self.source_type == "File":
97
+ audio = None
98
+ if self.source.name.endswith('.wav'): audio = AudioSegment.from_wav(self.source)
99
+ elif self.source.name.endswith('.mp3'): audio = AudioSegment.from_mp3(self.source)
100
+ audio.export('output/audio.wav', format='wav')
101
+ self.audio_path = "output/audio.wav"
102
+
103
+ model = whisper.load_model("base")
104
+ self.raw_output = model.transcribe(self.audio_path,verbose=True)
105
+
106
+ self.text = self.raw_output["text"]
107
+ self.language = self.raw_output["language"]
108
+ self.segments = self.raw_output["segments"]
109
+
110
+ # Remove token ids from the output
111
+ for segment in self.segments:
112
+ del segment["tokens"]
113
+
114
+ self.transcribed = True
115
+
116
+ class TextToSummary():
117
+ def __init__(self,input_text,min_length,max_length):
118
+ self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
119
+ self.summary_input = input_text
120
+ self.summary_output = (self.summarizer(self.summary_input, min_length=min_length, max_length=max_length, do_sample=False))
121
+
122
+ def get_summary(self):
123
+ return self.summary_output
124
+
125
+ def wav2vec(self):
126
+ pass
127
+
128
+ def record(model_name):
129
+ args = MODEL_PARSER
130
+ models = BagOfModels.get_model_names()
131
+ tasks = BagOfModels.get_model_tasks()
132
+ whisper_base = BagOfModels.load_model(model_name,**vars(args))
133
+ whisper_base.predict()
134
+
135
+ if __name__== "__main__":
136
+ args = MODEL_PARSER
137
+ models = BagOfModels.get_model_names()
138
+ tasks = BagOfModels.get_model_tasks()
139
+ whisper_base = BagOfModels.load_model("whisper_base",**vars(args))
140
+ whisper_base.predict_stt()
parsarg.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import yaml
3
+
4
+ def model_parser_args():
5
+ with open(r'utils/models.yaml') as f:
6
+ settings = yaml.full_load(f)
7
+ parser = argparse.ArgumentParser()
8
+ parser.add_argument("--model", help="see model_settings.yaml",default=settings)
9
+ parser.add_argument("--model_names", help="see model_settings.yaml",default=list(settings))
10
+ setting_list = []
11
+ task_list = []
12
+ for i in range(len(settings)):
13
+ setting_list.append(list(settings[list(settings.keys())[i]].keys()))
14
+ for model in (list(settings.keys())):
15
+ task = (settings[model]["task"])
16
+ if task not in task_list:task_list.append(task)
17
+ setting_list = ([setting for sublist in setting_list for setting in sublist]) # generate all sublists
18
+ setting_list = [x for i, x in enumerate(setting_list) if x not in setting_list[:i]] # remain order of sublists
19
+ parser.add_argument("--model_settings",help="see model_settings.yaml",default=setting_list)
20
+ parser.add_argument("--model_tasks",help="see model_settings.yaml",default=task_list)
21
+ parser=parser.parse_args()
22
+ return parser
23
+
24
+ if __name__ == "__main__":
25
+ model_parser_args()
26
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pydub==0.25.1
2
+ pytube==12.1.0
3
+ PyYAML==6.0
4
+ streamlit==1.13.0
5
+ transformers==4.23.1
6
+ git+https://github.com/openai/whisper.git
settings.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from parsarg import model_parser_args
2
+
3
+ MODEL_PARSER = model_parser_args()
4
+