Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -34,7 +34,7 @@ _whitespace_re = re.compile(r"\s+")
|
|
34 |
def get_random_string():
|
35 |
return "".join(str(uuid.uuid4()).split("-"))
|
36 |
|
37 |
-
@spaces.GPU
|
38 |
def seed_everything(seed):
|
39 |
if seed != -1:
|
40 |
os.environ['PYTHONHASHSEED'] = str(seed)
|
@@ -73,7 +73,7 @@ def get_mask_interval(transcribe_state, word_span):
|
|
73 |
|
74 |
return (start, end)
|
75 |
|
76 |
-
@spaces.GPU
|
77 |
class WhisperxAlignModel:
|
78 |
def __init__(self):
|
79 |
from whisperx import load_align_model
|
@@ -84,7 +84,7 @@ class WhisperxAlignModel:
|
|
84 |
audio = load_audio(audio_path)
|
85 |
return align(segments, self.model, self.metadata, audio, device, return_char_alignments=False)["segments"]
|
86 |
|
87 |
-
@spaces.GPU
|
88 |
class WhisperModel:
|
89 |
def __init__(self, model_name):
|
90 |
from whisper import load_model
|
@@ -101,7 +101,7 @@ class WhisperModel:
|
|
101 |
def transcribe(self, audio_path):
|
102 |
return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
|
103 |
|
104 |
-
@spaces.GPU
|
105 |
class WhisperxModel:
|
106 |
def __init__(self, model_name, align_model: WhisperxAlignModel):
|
107 |
from whisperx import load_model
|
@@ -114,7 +114,7 @@ class WhisperxModel:
|
|
114 |
segment['text'] = replace_numbers_with_words(segment['text'])
|
115 |
return self.align_model.align(segments, audio_path)
|
116 |
|
117 |
-
@spaces.GPU
|
118 |
def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
|
119 |
global transcribe_model, align_model, ssrspeech_model
|
120 |
|
@@ -174,7 +174,7 @@ def get_transcribe_state(segments):
|
|
174 |
"word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in words_info]
|
175 |
}
|
176 |
|
177 |
-
@spaces.GPU
|
178 |
def transcribe(seed, audio_path):
|
179 |
if transcribe_model is None:
|
180 |
raise gr.Error("Transcription model not loaded")
|
@@ -189,7 +189,7 @@ def transcribe(seed, audio_path):
|
|
189 |
state, success_message
|
190 |
]
|
191 |
|
192 |
-
@spaces.GPU
|
193 |
def align_segments(transcript, audio_path):
|
194 |
from aeneas.executetask import ExecuteTask
|
195 |
from aeneas.task import Task
|
@@ -211,7 +211,7 @@ def align_segments(transcript, audio_path):
|
|
211 |
with open(tmp_sync_map_path, "r") as f:
|
212 |
return json.load(f)
|
213 |
|
214 |
-
@spaces.GPU
|
215 |
def align(seed, transcript, audio_path):
|
216 |
if align_model is None:
|
217 |
raise gr.Error("Align model not loaded")
|
@@ -250,7 +250,7 @@ def replace_numbers_with_words(sentence):
|
|
250 |
return num # In case num2words fails (unlikely with digits but just to be safe)
|
251 |
return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
|
252 |
|
253 |
-
@spaces.GPU
|
254 |
def run(seed, sub_amount, ssrspeech_model_choice, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
255 |
stop_repetition, kvcache, silence_tokens, aug_text, cfg_coef,
|
256 |
audio_path, transcribe_state, original_transcript, transcript,
|
|
|
34 |
def get_random_string():
|
35 |
return "".join(str(uuid.uuid4()).split("-"))
|
36 |
|
37 |
+
@spaces.GPU
|
38 |
def seed_everything(seed):
|
39 |
if seed != -1:
|
40 |
os.environ['PYTHONHASHSEED'] = str(seed)
|
|
|
73 |
|
74 |
return (start, end)
|
75 |
|
76 |
+
@spaces.GPU
|
77 |
class WhisperxAlignModel:
|
78 |
def __init__(self):
|
79 |
from whisperx import load_align_model
|
|
|
84 |
audio = load_audio(audio_path)
|
85 |
return align(segments, self.model, self.metadata, audio, device, return_char_alignments=False)["segments"]
|
86 |
|
87 |
+
@spaces.GPU
|
88 |
class WhisperModel:
|
89 |
def __init__(self, model_name):
|
90 |
from whisper import load_model
|
|
|
101 |
def transcribe(self, audio_path):
|
102 |
return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
|
103 |
|
104 |
+
@spaces.GPU
|
105 |
class WhisperxModel:
|
106 |
def __init__(self, model_name, align_model: WhisperxAlignModel):
|
107 |
from whisperx import load_model
|
|
|
114 |
segment['text'] = replace_numbers_with_words(segment['text'])
|
115 |
return self.align_model.align(segments, audio_path)
|
116 |
|
117 |
+
@spaces.GPU
|
118 |
def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
|
119 |
global transcribe_model, align_model, ssrspeech_model
|
120 |
|
|
|
174 |
"word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in words_info]
|
175 |
}
|
176 |
|
177 |
+
@spaces.GPU
|
178 |
def transcribe(seed, audio_path):
|
179 |
if transcribe_model is None:
|
180 |
raise gr.Error("Transcription model not loaded")
|
|
|
189 |
state, success_message
|
190 |
]
|
191 |
|
192 |
+
@spaces.GPU
|
193 |
def align_segments(transcript, audio_path):
|
194 |
from aeneas.executetask import ExecuteTask
|
195 |
from aeneas.task import Task
|
|
|
211 |
with open(tmp_sync_map_path, "r") as f:
|
212 |
return json.load(f)
|
213 |
|
214 |
+
@spaces.GPU
|
215 |
def align(seed, transcript, audio_path):
|
216 |
if align_model is None:
|
217 |
raise gr.Error("Align model not loaded")
|
|
|
250 |
return num # In case num2words fails (unlikely with digits but just to be safe)
|
251 |
return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
|
252 |
|
253 |
+
@spaces.GPU
|
254 |
def run(seed, sub_amount, ssrspeech_model_choice, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
255 |
stop_repetition, kvcache, silence_tokens, aug_text, cfg_coef,
|
256 |
audio_path, transcribe_state, original_transcript, transcript,
|