OpenSound commited on
Commit
292c2fc
·
1 Parent(s): 69e76a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -34,7 +34,7 @@ _whitespace_re = re.compile(r"\s+")
34
  def get_random_string():
35
  return "".join(str(uuid.uuid4()).split("-"))
36
 
37
- @spaces.GPU(duration=30)
38
  def seed_everything(seed):
39
  if seed != -1:
40
  os.environ['PYTHONHASHSEED'] = str(seed)
@@ -73,7 +73,7 @@ def get_mask_interval(transcribe_state, word_span):
73
 
74
  return (start, end)
75
 
76
- @spaces.GPU(duration=120)
77
  class WhisperxAlignModel:
78
  def __init__(self):
79
  from whisperx import load_align_model
@@ -84,7 +84,7 @@ class WhisperxAlignModel:
84
  audio = load_audio(audio_path)
85
  return align(segments, self.model, self.metadata, audio, device, return_char_alignments=False)["segments"]
86
 
87
- @spaces.GPU(duration=120)
88
  class WhisperModel:
89
  def __init__(self, model_name):
90
  from whisper import load_model
@@ -101,7 +101,7 @@ class WhisperModel:
101
  def transcribe(self, audio_path):
102
  return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
103
 
104
- @spaces.GPU(duration=120)
105
  class WhisperxModel:
106
  def __init__(self, model_name, align_model: WhisperxAlignModel):
107
  from whisperx import load_model
@@ -114,7 +114,7 @@ class WhisperxModel:
114
  segment['text'] = replace_numbers_with_words(segment['text'])
115
  return self.align_model.align(segments, audio_path)
116
 
117
- @spaces.GPU(duration=120)
118
  def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
119
  global transcribe_model, align_model, ssrspeech_model
120
 
@@ -174,7 +174,7 @@ def get_transcribe_state(segments):
174
  "word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in words_info]
175
  }
176
 
177
- @spaces.GPU(duration=60)
178
  def transcribe(seed, audio_path):
179
  if transcribe_model is None:
180
  raise gr.Error("Transcription model not loaded")
@@ -189,7 +189,7 @@ def transcribe(seed, audio_path):
189
  state, success_message
190
  ]
191
 
192
- @spaces.GPU(duration=60)
193
  def align_segments(transcript, audio_path):
194
  from aeneas.executetask import ExecuteTask
195
  from aeneas.task import Task
@@ -211,7 +211,7 @@ def align_segments(transcript, audio_path):
211
  with open(tmp_sync_map_path, "r") as f:
212
  return json.load(f)
213
 
214
- @spaces.GPU(duration=90)
215
  def align(seed, transcript, audio_path):
216
  if align_model is None:
217
  raise gr.Error("Align model not loaded")
@@ -250,7 +250,7 @@ def replace_numbers_with_words(sentence):
250
  return num # In case num2words fails (unlikely with digits but just to be safe)
251
  return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
252
 
253
- @spaces.GPU(duration=90)
254
  def run(seed, sub_amount, ssrspeech_model_choice, codec_audio_sr, codec_sr, top_k, top_p, temperature,
255
  stop_repetition, kvcache, silence_tokens, aug_text, cfg_coef,
256
  audio_path, transcribe_state, original_transcript, transcript,
 
34
  def get_random_string():
35
  return "".join(str(uuid.uuid4()).split("-"))
36
 
37
+ @spaces.GPU
38
  def seed_everything(seed):
39
  if seed != -1:
40
  os.environ['PYTHONHASHSEED'] = str(seed)
 
73
 
74
  return (start, end)
75
 
76
+ @spaces.GPU
77
  class WhisperxAlignModel:
78
  def __init__(self):
79
  from whisperx import load_align_model
 
84
  audio = load_audio(audio_path)
85
  return align(segments, self.model, self.metadata, audio, device, return_char_alignments=False)["segments"]
86
 
87
+ @spaces.GPU
88
  class WhisperModel:
89
  def __init__(self, model_name):
90
  from whisper import load_model
 
101
  def transcribe(self, audio_path):
102
  return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
103
 
104
+ @spaces.GPU
105
  class WhisperxModel:
106
  def __init__(self, model_name, align_model: WhisperxAlignModel):
107
  from whisperx import load_model
 
114
  segment['text'] = replace_numbers_with_words(segment['text'])
115
  return self.align_model.align(segments, audio_path)
116
 
117
+ @spaces.GPU
118
  def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
119
  global transcribe_model, align_model, ssrspeech_model
120
 
 
174
  "word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in words_info]
175
  }
176
 
177
+ @spaces.GPU
178
  def transcribe(seed, audio_path):
179
  if transcribe_model is None:
180
  raise gr.Error("Transcription model not loaded")
 
189
  state, success_message
190
  ]
191
 
192
+ @spaces.GPU
193
  def align_segments(transcript, audio_path):
194
  from aeneas.executetask import ExecuteTask
195
  from aeneas.task import Task
 
211
  with open(tmp_sync_map_path, "r") as f:
212
  return json.load(f)
213
 
214
+ @spaces.GPU
215
  def align(seed, transcript, audio_path):
216
  if align_model is None:
217
  raise gr.Error("Align model not loaded")
 
250
  return num # In case num2words fails (unlikely with digits but just to be safe)
251
  return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
252
 
253
+ @spaces.GPU
254
  def run(seed, sub_amount, ssrspeech_model_choice, codec_audio_sr, codec_sr, top_k, top_p, temperature,
255
  stop_repetition, kvcache, silence_tokens, aug_text, cfg_coef,
256
  audio_path, transcribe_state, original_transcript, transcript,