OpenSound commited on
Commit
f52a712
·
1 Parent(s): 4b8ea71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -113,12 +113,11 @@ def get_transcribe_state(segments):
113
 
114
  @spaces.GPU
115
  def transcribe(audio_path):
116
- align_model, _ = load_align_model(language_code=language, device=device)
117
  transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
118
  segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
119
  for segment in segments:
120
  segment['text'] = replace_numbers_with_words(segment['text'])
121
- segments = align_model.align(segments, audio_path)
122
  state = get_transcribe_state(segments)
123
  success_message = "<span style='color:green;'>Success: Transcribe completed successfully!</span>"
124
 
@@ -135,7 +134,7 @@ def align(segments, audio_path):
135
  segments = align_func(segments, align_model, metadata, audio, device, return_char_alignments=False)["segments"]
136
  state = get_transcribe_state(segments)
137
 
138
- return state
139
 
140
 
141
  def get_output_audio(audio_tensors, codec_audio_sr):
@@ -177,7 +176,7 @@ def run(seed, sub_amount, codec_audio_sr, codec_sr, top_k, top_p, temperature,
177
  [orig_transcript, segments, _] = transcribe(audio_path)
178
  orig_transcript = orig_transcript.lower()
179
  target_transcript = target_transcript.lower()
180
- transcribe_state = align(segments, audio_path)
181
  print(orig_transcript)
182
  print(target_transcript)
183
 
@@ -201,7 +200,7 @@ def run(seed, sub_amount, codec_audio_sr, codec_sr, top_k, top_p, temperature,
201
 
202
  orig_transcript = orig_transcript.lower()
203
  target_transcript = target_transcript.lower()
204
- transcribe_state = align(segments, audio_path)
205
  print(orig_transcript)
206
  target_transcript_copy = target_transcript # for tts cut out
207
  target_transcript_copy = target_transcript_copy.split(' ')[0]
@@ -277,7 +276,7 @@ def run(seed, sub_amount, codec_audio_sr, codec_sr, top_k, top_p, temperature,
277
  torchaudio.save(audio_path, new_audio, codec_audio_sr)
278
  if tts: # remove the start parts
279
  [new_transcript, new_segments, _] = transcribe(audio_path)
280
- transcribe_state = align(new_segments, audio_path)
281
  tmp1 = transcribe_state['segments'][0]['words'][0]['word'].lower()
282
  tmp2 = target_transcript_copy.lower()
283
  if tmp1 == tmp2:
 
113
 
114
  @spaces.GPU
115
  def transcribe(audio_path):
 
116
  transcribe_model = load_model(transcribe_model_name, device, asr_options={"suppress_numerals": True, "max_new_tokens": None, "clip_timestamps": None, "hallucination_silence_threshold": None}, language=language)
117
  segments = transcribe_model.transcribe(audio_path, batch_size=8)["segments"]
118
  for segment in segments:
119
  segment['text'] = replace_numbers_with_words(segment['text'])
120
+ _, segments = align(segments, audio_path)
121
  state = get_transcribe_state(segments)
122
  success_message = "<span style='color:green;'>Success: Transcribe completed successfully!</span>"
123
 
 
134
  segments = align_func(segments, align_model, metadata, audio, device, return_char_alignments=False)["segments"]
135
  state = get_transcribe_state(segments)
136
 
137
+ return state, segments
138
 
139
 
140
  def get_output_audio(audio_tensors, codec_audio_sr):
 
176
  [orig_transcript, segments, _] = transcribe(audio_path)
177
  orig_transcript = orig_transcript.lower()
178
  target_transcript = target_transcript.lower()
179
+ transcribe_state,_ = align(segments, audio_path)
180
  print(orig_transcript)
181
  print(target_transcript)
182
 
 
200
 
201
  orig_transcript = orig_transcript.lower()
202
  target_transcript = target_transcript.lower()
203
+ transcribe_state,_ = align(segments, audio_path)
204
  print(orig_transcript)
205
  target_transcript_copy = target_transcript # for tts cut out
206
  target_transcript_copy = target_transcript_copy.split(' ')[0]
 
276
  torchaudio.save(audio_path, new_audio, codec_audio_sr)
277
  if tts: # remove the start parts
278
  [new_transcript, new_segments, _] = transcribe(audio_path)
279
+ transcribe_state,_ = align(new_segments, audio_path)
280
  tmp1 = transcribe_state['segments'][0]['words'][0]['word'].lower()
281
  tmp2 = target_transcript_copy.lower()
282
  if tmp1 == tmp2: