Spaces:
Running
Running
Jan van Doorn
commited on
Commit
·
b6e8b6c
1
Parent(s):
ae58876
reworked to bare minimum
Browse files
app.py
CHANGED
@@ -5,73 +5,34 @@ from datasets import load_dataset
|
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
|
8 |
-
login(token=os.environ['hf_token'])
|
9 |
|
10 |
atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
|
11 |
atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
|
12 |
|
13 |
num_examples = 3
|
14 |
examples_atco2 = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
|
15 |
-
examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
|
16 |
-
examples = examples_atco2
|
17 |
# examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
|
18 |
# examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
|
23 |
-
whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
|
24 |
#%%
|
25 |
def transcribe(audio, model_version):
|
26 |
-
if model_version == 'large-v2':
|
27 |
-
whisper = whisper_v2
|
28 |
-
ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
|
29 |
-
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
30 |
-
elif model_version == 'large-v3':
|
31 |
-
whisper = whisper_v3
|
32 |
-
ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
|
33 |
-
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
34 |
if audio is not None:
|
35 |
return whisper(audio)['text']
|
36 |
else:
|
37 |
return 'There was no audio to transcribe...'
|
38 |
|
39 |
-
#%%
|
40 |
-
def extractCallSignCommand(transcription):
|
41 |
-
if type(transcription) is str:
|
42 |
-
result = bert_atco_ner(transcription)
|
43 |
-
callsigns = []
|
44 |
-
commands = []
|
45 |
-
values = []
|
46 |
-
for item in result:
|
47 |
-
if 'callsign' in item['entity']:
|
48 |
-
callsigns.append(item['word'])
|
49 |
-
if 'command' in item['entity']:
|
50 |
-
commands.append(item['word'])
|
51 |
-
if 'value' in item['entity']:
|
52 |
-
values.append(item['word'])
|
53 |
-
|
54 |
-
return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
|
55 |
-
else:
|
56 |
-
return 'There was no transcription to extract a callsign or command from...'
|
57 |
-
|
58 |
-
#%%
|
59 |
-
def transcribeAndExtract(audio, transcribe_only, model_version):
|
60 |
-
transcription = transcribe(audio, model_version)
|
61 |
-
if not transcribe_only:
|
62 |
-
callSignCommandValues = extractCallSignCommand(transcription)
|
63 |
-
else:
|
64 |
-
callSignCommandValues = ''
|
65 |
-
return transcription, callSignCommandValues
|
66 |
-
|
67 |
#%%
|
68 |
file_iface = gr.Interface(
|
69 |
fn = transcribeAndExtract,
|
70 |
-
inputs = [gr.Audio(source='upload', interactive=True),
|
71 |
-
gr.Checkbox(label='Transcribe only', default=False),
|
72 |
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
|
73 |
],
|
74 |
-
|
75 |
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
|
76 |
title = 'Whisper ATC - Large v3',
|
77 |
description = 'Transcribe and extract',
|
@@ -80,15 +41,15 @@ file_iface = gr.Interface(
|
|
80 |
|
81 |
mic_iface = gr.Interface(
|
82 |
fn = transcribeAndExtract,
|
83 |
-
inputs = [gr.Audio(source='microphone', type='filepath'),
|
84 |
-
gr.Checkbox(label='Transcribe only', default=False),
|
85 |
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
|
86 |
],
|
87 |
-
|
88 |
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
|
89 |
title = 'Whisper ATC - Large v3',
|
90 |
description = 'Transcribe and extract',
|
91 |
)
|
92 |
#%%
|
93 |
demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
|
94 |
-
demo.launch()
|
|
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
|
8 |
+
#login(token=os.environ['hf_token'])
|
9 |
|
10 |
atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
|
11 |
atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
|
12 |
|
13 |
num_examples = 3
|
14 |
examples_atco2 = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
|
15 |
+
#examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
|
16 |
+
examples = examples_atco2 #+ examples_atcosim
|
17 |
# examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
|
18 |
# examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]
|
19 |
|
20 |
+
whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
|
|
|
|
|
|
|
21 |
#%%
|
22 |
def transcribe(audio, model_version):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
if audio is not None:
|
24 |
return whisper(audio)['text']
|
25 |
else:
|
26 |
return 'There was no audio to transcribe...'
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
#%%
|
29 |
file_iface = gr.Interface(
|
30 |
fn = transcribeAndExtract,
|
31 |
+
inputs = [gr.Audio(source='upload', interactive=True),
|
32 |
+
gr.Checkbox(label='Transcribe only', default=False),
|
33 |
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
|
34 |
],
|
35 |
+
|
36 |
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
|
37 |
title = 'Whisper ATC - Large v3',
|
38 |
description = 'Transcribe and extract',
|
|
|
41 |
|
42 |
mic_iface = gr.Interface(
|
43 |
fn = transcribeAndExtract,
|
44 |
+
inputs = [gr.Audio(source='microphone', type='filepath'),
|
45 |
+
gr.Checkbox(label='Transcribe only', default=False),
|
46 |
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
|
47 |
],
|
48 |
+
|
49 |
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
|
50 |
title = 'Whisper ATC - Large v3',
|
51 |
description = 'Transcribe and extract',
|
52 |
)
|
53 |
#%%
|
54 |
demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
|
55 |
+
demo.launch()
|