Spaces:

jlvdoorn
/

WhisperATC

Running

App Files Files Community

Jan van Doorn commited on Nov 29, 2023

Commit

b6e8b6c

1 Parent(s): ae58876

reworked to bare minimum

Browse files

Files changed (1) hide show

app.py +11 -50

app.py CHANGED Viewed

@@ -5,73 +5,34 @@ from datasets import load_dataset
 import gradio as gr
 import os
-login(token=os.environ['hf_token'])
 atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
 atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
 num_examples = 3
 examples_atco2   = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
-examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
-examples = examples_atco2 + examples_atcosim
 # examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
 # examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]
-bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
-whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
-whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
 #%%
 def transcribe(audio, model_version):
-    if model_version == 'large-v2':
-        whisper = whisper_v2
-        ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
-        dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
-    elif model_version == 'large-v3':
-        whisper = whisper_v3
-        ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
-        dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
     if audio is not None:
         return whisper(audio)['text']
     else:
         return 'There was no audio to transcribe...'
-#%%
-def extractCallSignCommand(transcription):
-    if type(transcription) is str:
-        result = bert_atco_ner(transcription)
-        callsigns = []
-        commands = []
-        values = []
-        for item in result:
-            if 'callsign' in item['entity']:
-                callsigns.append(item['word'])
-            if 'command' in item['entity']:
-                commands.append(item['word'])
-            if 'value' in item['entity']:
-                values.append(item['word'])
-        return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
-    else:
-        return 'There was no transcription to extract a callsign or command from...'
-#%%
-def transcribeAndExtract(audio, transcribe_only, model_version):
-    transcription = transcribe(audio, model_version)
-    if not transcribe_only:
-        callSignCommandValues = extractCallSignCommand(transcription)
-    else:
-        callSignCommandValues = ''
-    return transcription, callSignCommandValues
 #%%
 file_iface = gr.Interface(
     fn = transcribeAndExtract,
-    inputs = [gr.Audio(source='upload', interactive=True),
-              gr.Checkbox(label='Transcribe only', default=False),
               gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
             ],
     outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
     title = 'Whisper ATC - Large v3',
     description = 'Transcribe and extract',
@@ -80,15 +41,15 @@ file_iface = gr.Interface(
 mic_iface = gr.Interface(
     fn = transcribeAndExtract,
-    inputs = [gr.Audio(source='microphone', type='filepath'),
-              gr.Checkbox(label='Transcribe only', default=False),
               gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
             ],
     outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
     title = 'Whisper ATC - Large v3',
     description = 'Transcribe and extract',
 )
 #%%
 demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
-demo.launch()

 import gradio as gr
 import os
+#login(token=os.environ['hf_token'])
 atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
 atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
 num_examples = 3
 examples_atco2   = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
+#examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
+examples = examples_atco2 #+ examples_atcosim
 # examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
 # examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]
+whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
 #%%
 def transcribe(audio, model_version):
     if audio is not None:
         return whisper(audio)['text']
     else:
         return 'There was no audio to transcribe...'
 #%%
 file_iface = gr.Interface(
     fn = transcribeAndExtract,
+    inputs = [gr.Audio(source='upload', interactive=True),
+              gr.Checkbox(label='Transcribe only', default=False),
               gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
             ],
     outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
     title = 'Whisper ATC - Large v3',
     description = 'Transcribe and extract',
 mic_iface = gr.Interface(
     fn = transcribeAndExtract,
+    inputs = [gr.Audio(source='microphone', type='filepath'),
+              gr.Checkbox(label='Transcribe only', default=False),
               gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
             ],
     outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
     title = 'Whisper ATC - Large v3',
     description = 'Transcribe and extract',
 )
 #%%
 demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
+demo.launch()