Spaces:

OpenSound
/

SSR-Speech

Running on Zero

App Files Files Community

OpenSound commited on Sep 22, 2024

Commit

292c2fc

1 Parent(s): 69e76a4

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -34,7 +34,7 @@ _whitespace_re = re.compile(r"\s+")
 def get_random_string():
     return "".join(str(uuid.uuid4()).split("-"))
-@spaces.GPU(duration=30)
 def seed_everything(seed):
     if seed != -1:
         os.environ['PYTHONHASHSEED'] = str(seed)
@@ -73,7 +73,7 @@ def get_mask_interval(transcribe_state, word_span):
     return (start, end)
-@spaces.GPU(duration=120)
 class WhisperxAlignModel:
     def __init__(self):
         from whisperx import load_align_model
@@ -84,7 +84,7 @@ class WhisperxAlignModel:
         audio = load_audio(audio_path)
         return align(segments, self.model, self.metadata, audio, device, return_char_alignments=False)["segments"]
-@spaces.GPU(duration=120)
 class WhisperModel:
     def __init__(self, model_name):
         from whisper import load_model
@@ -101,7 +101,7 @@ class WhisperModel:
     def transcribe(self, audio_path):
         return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
-@spaces.GPU(duration=120)
 class WhisperxModel:
     def __init__(self, model_name, align_model: WhisperxAlignModel):
         from whisperx import load_model
@@ -114,7 +114,7 @@ class WhisperxModel:
             segment['text'] = replace_numbers_with_words(segment['text'])
         return self.align_model.align(segments, audio_path)
-@spaces.GPU(duration=120)
 def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
     global transcribe_model, align_model, ssrspeech_model
@@ -174,7 +174,7 @@ def get_transcribe_state(segments):
         "word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in words_info]
     }
-@spaces.GPU(duration=60)
 def transcribe(seed, audio_path):
     if transcribe_model is None:
         raise gr.Error("Transcription model not loaded")
@@ -189,7 +189,7 @@ def transcribe(seed, audio_path):
         state, success_message
     ]
-@spaces.GPU(duration=60)
 def align_segments(transcript, audio_path):
     from aeneas.executetask import ExecuteTask
     from aeneas.task import Task
@@ -211,7 +211,7 @@ def align_segments(transcript, audio_path):
     with open(tmp_sync_map_path, "r") as f:
         return json.load(f)
-@spaces.GPU(duration=90)
 def align(seed, transcript, audio_path):
     if align_model is None:
         raise gr.Error("Align model not loaded")
@@ -250,7 +250,7 @@ def replace_numbers_with_words(sentence):
             return num # In case num2words fails (unlikely with digits but just to be safe)
     return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
-@spaces.GPU(duration=90)
 def run(seed, sub_amount, ssrspeech_model_choice, codec_audio_sr, codec_sr, top_k, top_p, temperature,
         stop_repetition, kvcache, silence_tokens, aug_text, cfg_coef,
         audio_path, transcribe_state, original_transcript, transcript,

 def get_random_string():
     return "".join(str(uuid.uuid4()).split("-"))
+@spaces.GPU
 def seed_everything(seed):
     if seed != -1:
         os.environ['PYTHONHASHSEED'] = str(seed)
     return (start, end)
+@spaces.GPU
 class WhisperxAlignModel:
     def __init__(self):
         from whisperx import load_align_model
         audio = load_audio(audio_path)
         return align(segments, self.model, self.metadata, audio, device, return_char_alignments=False)["segments"]
+@spaces.GPU
 class WhisperModel:
     def __init__(self, model_name):
         from whisper import load_model
     def transcribe(self, audio_path):
         return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
+@spaces.GPU
 class WhisperxModel:
     def __init__(self, model_name, align_model: WhisperxAlignModel):
         from whisperx import load_model
             segment['text'] = replace_numbers_with_words(segment['text'])
         return self.align_model.align(segments, audio_path)
+@spaces.GPU
 def load_models(whisper_backend_name, whisper_model_name, alignment_model_name, ssrspeech_model_name):
     global transcribe_model, align_model, ssrspeech_model
         "word_bounds": [f"{word['start']} {word['word']} {word['end']}" for word in words_info]
     }
+@spaces.GPU
 def transcribe(seed, audio_path):
     if transcribe_model is None:
         raise gr.Error("Transcription model not loaded")
         state, success_message
     ]
+@spaces.GPU
 def align_segments(transcript, audio_path):
     from aeneas.executetask import ExecuteTask
     from aeneas.task import Task
     with open(tmp_sync_map_path, "r") as f:
         return json.load(f)
+@spaces.GPU
 def align(seed, transcript, audio_path):
     if align_model is None:
         raise gr.Error("Align model not loaded")
             return num # In case num2words fails (unlikely with digits but just to be safe)
     return re.sub(r'\b\d+\b', replace_with_words, sentence) # Regular expression that matches numbers
+@spaces.GPU
 def run(seed, sub_amount, ssrspeech_model_choice, codec_audio_sr, codec_sr, top_k, top_p, temperature,
         stop_repetition, kvcache, silence_tokens, aug_text, cfg_coef,
         audio_path, transcribe_state, original_transcript, transcript,