Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -113,12 +113,11 @@ def get_transcribe_state(segments):
|
|
113 |
|
114 |
@spaces.GPU
|
115 |
def transcribe(audio_path):
|
116 |
-
align_model, _ = load_align_model(language_code=language, device=device)
|
117 |
transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
|
118 |
segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
|
119 |
for segment in segments:
|
120 |
segment['text'] = replace_numbers_with_words(segment['text'])
|
121 |
-
segments =
|
122 |
state = get_transcribe_state(segments)
|
123 |
success_message = "<span style='color:green;'>Success: Transcribe completed successfully!</span>"
|
124 |
|
@@ -135,7 +134,7 @@ def align(segments, audio_path):
|
|
135 |
segments = align_func(segments, align_model, metadata, audio, device, return_char_alignments=False)["segments"]
|
136 |
state = get_transcribe_state(segments)
|
137 |
|
138 |
-
return state
|
139 |
|
140 |
|
141 |
def get_output_audio(audio_tensors, codec_audio_sr):
|
@@ -177,7 +176,7 @@ def run(seed, sub_amount, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
|
177 |
[orig_transcript, segments, _] = transcribe(audio_path)
|
178 |
orig_transcript = orig_transcript.lower()
|
179 |
target_transcript = target_transcript.lower()
|
180 |
-
transcribe_state = align(segments, audio_path)
|
181 |
print(orig_transcript)
|
182 |
print(target_transcript)
|
183 |
|
@@ -201,7 +200,7 @@ def run(seed, sub_amount, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
|
201 |
|
202 |
orig_transcript = orig_transcript.lower()
|
203 |
target_transcript = target_transcript.lower()
|
204 |
-
transcribe_state = align(segments, audio_path)
|
205 |
print(orig_transcript)
|
206 |
target_transcript_copy = target_transcript # for tts cut out
|
207 |
target_transcript_copy = target_transcript_copy.split(' ')[0]
|
@@ -277,7 +276,7 @@ def run(seed, sub_amount, codec_audio_sr, codec_sr, top_k, top_p, temperature,
|
|
277 |
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
278 |
if tts: # remove the start parts
|
279 |
[new_transcript, new_segments, _] = transcribe(audio_path)
|
280 |
-
transcribe_state = align(new_segments, audio_path)
|
281 |
tmp1 = transcribe_state['segments'][0]['words'][0]['word'].lower()
|
282 |
tmp2 = target_transcript_copy.lower()
|
283 |
if tmp1 == tmp2:
|
|
|
113 |
|
114 |
@spaces.GPU
|
115 |
def transcribe(audio_path):
|
|
|
116 |
transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
|
117 |
segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
|
118 |
for segment in segments:
|
119 |
segment['text'] = replace_numbers_with_words(segment['text'])
|
120 |
+
_, segments = align(segments, audio_path)
|
121 |
state = get_transcribe_state(segments)
|
122 |
success_message = "<span style='color:green;'>Success: Transcribe completed successfully!</span>"
|
123 |
|
|
|
134 |
segments = align_func(segments, align_model, metadata, audio, device, return_char_alignments=False)["segments"]
|
135 |
state = get_transcribe_state(segments)
|
136 |
|
137 |
+
return state, segments
|
138 |
|
139 |
|
140 |
def get_output_audio(audio_tensors, codec_audio_sr):
|
|
|
176 |
[orig_transcript, segments, _] = transcribe(audio_path)
|
177 |
orig_transcript = orig_transcript.lower()
|
178 |
target_transcript = target_transcript.lower()
|
179 |
+
transcribe_state,_ = align(segments, audio_path)
|
180 |
print(orig_transcript)
|
181 |
print(target_transcript)
|
182 |
|
|
|
200 |
|
201 |
orig_transcript = orig_transcript.lower()
|
202 |
target_transcript = target_transcript.lower()
|
203 |
+
transcribe_state,_ = align(segments, audio_path)
|
204 |
print(orig_transcript)
|
205 |
target_transcript_copy = target_transcript # for tts cut out
|
206 |
target_transcript_copy = target_transcript_copy.split(' ')[0]
|
|
|
276 |
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
277 |
if tts: # remove the start parts
|
278 |
[new_transcript, new_segments, _] = transcribe(audio_path)
|
279 |
+
transcribe_state,_ = align(new_segments, audio_path)
|
280 |
tmp1 = transcribe_state['segments'][0]['words'][0]['word'].lower()
|
281 |
tmp2 = target_transcript_copy.lower()
|
282 |
if tmp1 == tmp2:
|