Jan van Doorn commited on
Commit
b6e8b6c
·
1 Parent(s): ae58876

reworked to bare minimum

Browse files
Files changed (1) hide show
  1. app.py +11 -50
app.py CHANGED
@@ -5,73 +5,34 @@ from datasets import load_dataset
5
  import gradio as gr
6
  import os
7
 
8
- login(token=os.environ['hf_token'])
9
 
10
  atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
11
  atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
12
 
13
  num_examples = 3
14
  examples_atco2 = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
15
- examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
16
- examples = examples_atco2 + examples_atcosim
17
  # examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
18
  # examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]
19
 
20
- bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
21
-
22
- whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
23
- whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
24
  #%%
25
  def transcribe(audio, model_version):
26
- if model_version == 'large-v2':
27
- whisper = whisper_v2
28
- ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
29
- dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
30
- elif model_version == 'large-v3':
31
- whisper = whisper_v3
32
- ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
33
- dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
34
  if audio is not None:
35
  return whisper(audio)['text']
36
  else:
37
  return 'There was no audio to transcribe...'
38
 
39
- #%%
40
- def extractCallSignCommand(transcription):
41
- if type(transcription) is str:
42
- result = bert_atco_ner(transcription)
43
- callsigns = []
44
- commands = []
45
- values = []
46
- for item in result:
47
- if 'callsign' in item['entity']:
48
- callsigns.append(item['word'])
49
- if 'command' in item['entity']:
50
- commands.append(item['word'])
51
- if 'value' in item['entity']:
52
- values.append(item['word'])
53
-
54
- return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
55
- else:
56
- return 'There was no transcription to extract a callsign or command from...'
57
-
58
- #%%
59
- def transcribeAndExtract(audio, transcribe_only, model_version):
60
- transcription = transcribe(audio, model_version)
61
- if not transcribe_only:
62
- callSignCommandValues = extractCallSignCommand(transcription)
63
- else:
64
- callSignCommandValues = ''
65
- return transcription, callSignCommandValues
66
-
67
  #%%
68
  file_iface = gr.Interface(
69
  fn = transcribeAndExtract,
70
- inputs = [gr.Audio(source='upload', interactive=True),
71
- gr.Checkbox(label='Transcribe only', default=False),
72
  gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
73
  ],
74
-
75
  outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
76
  title = 'Whisper ATC - Large v3',
77
  description = 'Transcribe and extract',
@@ -80,15 +41,15 @@ file_iface = gr.Interface(
80
 
81
  mic_iface = gr.Interface(
82
  fn = transcribeAndExtract,
83
- inputs = [gr.Audio(source='microphone', type='filepath'),
84
- gr.Checkbox(label='Transcribe only', default=False),
85
  gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
86
  ],
87
-
88
  outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
89
  title = 'Whisper ATC - Large v3',
90
  description = 'Transcribe and extract',
91
  )
92
  #%%
93
  demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
94
- demo.launch()
 
5
  import gradio as gr
6
  import os
7
 
8
+ #login(token=os.environ['hf_token'])
9
 
10
  atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
11
  atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
12
 
13
  num_examples = 3
14
  examples_atco2 = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
15
+ #examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
16
+ examples = examples_atco2 #+ examples_atcosim
17
  # examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
18
  # examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]
19
 
20
+ whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
 
 
 
21
  #%%
22
  def transcribe(audio, model_version):
 
 
 
 
 
 
 
 
23
  if audio is not None:
24
  return whisper(audio)['text']
25
  else:
26
  return 'There was no audio to transcribe...'
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  #%%
29
  file_iface = gr.Interface(
30
  fn = transcribeAndExtract,
31
+ inputs = [gr.Audio(source='upload', interactive=True),
32
+ gr.Checkbox(label='Transcribe only', default=False),
33
  gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
34
  ],
35
+
36
  outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
37
  title = 'Whisper ATC - Large v3',
38
  description = 'Transcribe and extract',
 
41
 
42
  mic_iface = gr.Interface(
43
  fn = transcribeAndExtract,
44
+ inputs = [gr.Audio(source='microphone', type='filepath'),
45
+ gr.Checkbox(label='Transcribe only', default=False),
46
  gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
47
  ],
48
+
49
  outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
50
  title = 'Whisper ATC - Large v3',
51
  description = 'Transcribe and extract',
52
  )
53
  #%%
54
  demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
55
+ demo.launch()