Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,32 +2,49 @@ from transformers import pipeline
|
|
2 |
import gradio as gr
|
3 |
import time
|
4 |
|
5 |
-
p = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
|
6 |
-
pc = pipeline("automatic-speech-recognition",model="softcatala/wav2vec2-large-xlsr-catala")
|
7 |
-
pe = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
|
8 |
-
pj = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-japanese")
|
9 |
-
pf = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-french")
|
10 |
|
|
|
|
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
|
14 |
def transcribe(language,audio, state=""):#language="Spanish",
|
15 |
time.sleep(1)
|
16 |
-
if language=="
|
17 |
-
state=""
|
18 |
-
text = p(audio)["text"]
|
19 |
-
if language=="Catalan":
|
20 |
state=""
|
21 |
-
text =
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
state += text + " "
|
32 |
#text2="Esto es loq ue te he entendido"
|
33 |
return state, state
|
@@ -39,7 +56,9 @@ demo=gr.Interface(
|
|
39 |
description="1)Select language 2)Click on 'record from microphone' and talk 3)Click on 'stop recording' 4)Click on submit 5)Before starting again, click on 'clear'",
|
40 |
|
41 |
inputs=[
|
42 |
-
gr.Dropdown(["Spanish","Catalan","English", "French", "Japanese"],value="Spanish"),
|
|
|
|
|
43 |
#gr.Audio(source="microphone", type="filepath", streaming=True),
|
44 |
gr.inputs.Audio(source="microphone", type="filepath"),
|
45 |
"state"#,"language"
|
|
|
2 |
import gradio as gr
|
3 |
import time
|
4 |
|
5 |
+
#p = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
8 |
+
from datasets import load_dataset
|
9 |
|
10 |
+
# load model and processor
|
11 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
12 |
+
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
13 |
+
model.config.forced_decoder_ids = None
|
14 |
+
|
15 |
+
# load dummy dataset and read audio files
|
16 |
+
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
17 |
+
sample = ds[0]["audio"]
|
18 |
+
input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features
|
19 |
+
|
20 |
+
# generate token ids
|
21 |
+
predicted_ids = model.generate(input_features)
|
22 |
+
# decode token ids to text
|
23 |
+
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
|
24 |
+
#['<|startoftranscript|><|en|><|transcribe|><|notimestamps|> Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.<|endoftext|>']
|
25 |
+
|
26 |
+
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
27 |
+
#[' Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.']
|
28 |
|
29 |
|
30 |
def transcribe(language,audio, state=""):#language="Spanish",
|
31 |
time.sleep(1)
|
32 |
+
if language=="Multi":
|
|
|
|
|
|
|
33 |
state=""
|
34 |
+
text = processor.batch_decode(predicted_ids, skip_special_tokens=False)
|
35 |
+
|
36 |
+
# if language=="Catalan":
|
37 |
+
# state=""
|
38 |
+
# text = pc(audio)["text"]
|
39 |
+
# if language=="English":
|
40 |
+
# state=""
|
41 |
+
# text = pe(audio)["text"]
|
42 |
+
# if language=="French":
|
43 |
+
# state=""
|
44 |
+
# text = pf(audio)["text"]
|
45 |
+
# if language=="Japanese":
|
46 |
+
# state=""
|
47 |
+
# text = pj(audio)["text"]
|
48 |
state += text + " "
|
49 |
#text2="Esto es loq ue te he entendido"
|
50 |
return state, state
|
|
|
56 |
description="1)Select language 2)Click on 'record from microphone' and talk 3)Click on 'stop recording' 4)Click on submit 5)Before starting again, click on 'clear'",
|
57 |
|
58 |
inputs=[
|
59 |
+
#gr.Dropdown(["Spanish","Catalan","English", "French", "Japanese"],value="Spanish"),
|
60 |
+
gr.Dropdown(["Multi"],value="Multi"),
|
61 |
+
|
62 |
#gr.Audio(source="microphone", type="filepath", streaming=True),
|
63 |
gr.inputs.Audio(source="microphone", type="filepath"),
|
64 |
"state"#,"language"
|