Spaces:
Runtime error
Runtime error
ANYANTUDRE
commited on
Commit
·
21f74cc
1
Parent(s):
78d1101
refactor code
Browse files- app.py +89 -85
- app_2.py +0 -140
- goai_helpers/__init__.py +0 -0
- goai_stt.py → goai_helpers/goai_stt.py +0 -0
- goai_helpers/goai_stt2.py +38 -0
- goai_traduction.py → goai_helpers/goai_traduction.py +0 -0
- goai_tts.py → goai_helpers/goai_tts.py +3 -1
- goai_tts2.py → goai_helpers/goai_tts2.py +1 -1
- utils.py → goai_helpers/utils.py +0 -0
- goai_stt2.py +0 -32
- whisper_notebook.ipynb +0 -192
app.py
CHANGED
@@ -1,108 +1,65 @@
|
|
1 |
import spaces
|
2 |
import torch
|
|
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
-
from transformers import pipeline
|
6 |
-
from
|
7 |
|
8 |
-
import tempfile
|
9 |
import os
|
10 |
-
import time
|
11 |
-
import requests
|
12 |
-
|
13 |
from languages import get_language_names
|
14 |
-
from
|
15 |
|
16 |
-
from huggingface_hub import login
|
17 |
|
18 |
auth_token = os.getenv('HF_SPACE_TOKEN')
|
19 |
login(token=auth_token)
|
20 |
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
# Whisper's full language ID mapping
|
51 |
-
lang_to_id = {
|
52 |
-
"en": 0, "zh": 1, "de": 2, "es": 3, "ru": 4, "ko": 5, "fr": 6, "ja": 7,
|
53 |
-
"pt": 8, "tr": 9, "pl": 10, "ca": 11, "nl": 12, "ar": 13, "sv": 14,
|
54 |
-
"it": 15, "id": 16, "hi": 17, "fi": 18, "vi": 19, "he": 20, "uk": 21,
|
55 |
-
"el": 22, "ms": 23, "cs": 24, "ro": 25, "da": 26, "hu": 27, "ta": 28,
|
56 |
-
"no": 29, "th": 30, "ur": 31, "hr": 32, "bg": 33, "lt": 34, "la": 35,
|
57 |
-
"mi": 36, "ml": 37, "cy": 38, "sk": 39, "te": 40, "fa": 41, "lv": 42,
|
58 |
-
"bn": 43, "sr": 44, "az": 45, "sl": 46, "kn": 47, "et": 48, "mk": 49,
|
59 |
-
"br": 50, "eu": 51, "is": 52, "hy": 53, "ne": 54, "mn": 55, "bs": 56,
|
60 |
-
"kk": 57, "sq": 58, "sw": 59, "gl": 60, "mr": 61, "pa": 62, "si": 63,
|
61 |
-
"km": 64, "sn": 65, "yo": 66, "so": 67, "af": 68, "oc": 69, "ka": 70,
|
62 |
-
"be": 71, "tg": 72, "sd": 73, "gu": 74, "am": 75, "yi": 76, "lo": 77,
|
63 |
-
"uz": 78, "fo": 79, "ht": 80, "ps": 81, "tk": 82, "nn": 83, "mt": 84,
|
64 |
-
"sa": 85, "lb": 86, "my": 87, "bo": 88, "tl": 89, "mg": 90, "as": 91,
|
65 |
-
"tt": 92, "haw": 93, "ln": 94, "ha": 95, "ba": 96, "jw": 97, "su": 98
|
66 |
-
}
|
67 |
-
|
68 |
-
forced_decoder_ids = None
|
69 |
-
if model.endswith(".en") == False and language in lang_to_id:
|
70 |
-
forced_decoder_ids = [[2, lang_to_id[language]]] # Setting forced decoder for language
|
71 |
-
|
72 |
-
generate_kwargs = {}
|
73 |
-
if forced_decoder_ids:
|
74 |
-
generate_kwargs["forced_decoder_ids"] = forced_decoder_ids
|
75 |
-
|
76 |
-
#if model.endswith(".en") == False:
|
77 |
-
#generate_kwargs["task"] = task
|
78 |
-
|
79 |
-
output = pipe(inputs, batch_size=batch_size, **generate_kwargs)
|
80 |
-
|
81 |
-
transcription_text = output['text']
|
82 |
-
|
83 |
-
transcription_file_path = "transcription.txt"
|
84 |
-
with open(transcription_file_path, "w") as f:
|
85 |
-
f.write(transcription_text)
|
86 |
-
|
87 |
-
return transcription_text, transcription_file_path
|
88 |
|
89 |
|
90 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
91 |
|
92 |
-
|
93 |
-
fn=
|
94 |
inputs=[
|
95 |
gr.Audio(sources=["microphone", "upload"], type="filepath"),
|
96 |
gr.Dropdown(
|
97 |
choices=[
|
98 |
"ArissBandoss/whisper-small-mos",
|
99 |
-
|
100 |
-
#"openai/whisper-base",
|
101 |
-
#"openai/whisper-small",
|
102 |
-
#"openai/whisper-medium",
|
103 |
-
"openai/whisper-large",
|
104 |
-
#"openai/whisper-large-v1",
|
105 |
-
#"openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
|
106 |
],
|
107 |
value="ArissBandoss/whisper-small-mos",
|
108 |
label="Model Name"
|
@@ -113,15 +70,62 @@ mf_transcribe = gr.Interface(
|
|
113 |
gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
|
114 |
],
|
115 |
outputs=[gr.Textbox(label="Output"), gr.File(label="Download Files")],
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
flagging_mode="auto",
|
119 |
)
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
with demo:
|
123 |
gr.TabbedInterface(
|
124 |
-
interface_list=[
|
125 |
tab_names=["Microphone & Audio file"]
|
126 |
)
|
127 |
|
|
|
1 |
import spaces
|
2 |
import torch
|
3 |
+
import scipy
|
4 |
+
import torchaudio
|
5 |
|
6 |
import gradio as gr
|
7 |
+
from transformers import pipeline, set_seed
|
8 |
+
from huggingface_hub import login
|
9 |
|
|
|
10 |
import os
|
|
|
|
|
|
|
11 |
from languages import get_language_names
|
12 |
+
from goai_helpers import goai_traduction, goai_stt, goai_stt2, goai_tts, goai_tts2
|
13 |
|
|
|
14 |
|
15 |
auth_token = os.getenv('HF_SPACE_TOKEN')
|
16 |
login(token=auth_token)
|
17 |
|
18 |
|
19 |
+
# list all files in the ./audios directory for the dropdown
|
20 |
+
AUDIO_FILES = [f for f in os.listdir('./exples_voix') if os.path.isfile(os.path.join('./exples_voix', f))]
|
21 |
+
|
22 |
+
DESCRIPTION = """<div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap;">
|
23 |
+
<div style="flex: 1; min-width: 250px;">
|
24 |
+
Ce modèle de traduction vers la <b>langue Mooré</b> a été développé from scratch par <b>GO AI CORP</b> et la version disponible en test est celle à 700 millions de paramètres.
|
25 |
+
<br><br>
|
26 |
+
Pour les détails techniques sur l'architecture du modèle, prendre attache avec nous via WhatsApp au <b>+226 66 62 83 03</b>.
|
27 |
+
</div>
|
28 |
+
<div style="flex-shrink: 0; min-width: 150px; text-align: center;">
|
29 |
+
<img src="https://github.com/ANYANTUDRE/Stage-IA-Selever-GO-AI-Corp/blob/main/img/goaicorp-logo2.jpg?raw=true" width="300px" style="max-width: 100%; height: auto;">
|
30 |
+
</div>
|
31 |
+
</div>
|
32 |
+
"""
|
33 |
+
# Whisper's full language ID mapping
|
34 |
+
LANG_TO_ID = {
|
35 |
+
"en": 0, "zh": 1, "de": 2, "es": 3, "ru": 4, "ko": 5, "fr": 6, "ja": 7,
|
36 |
+
"pt": 8, "tr": 9, "pl": 10, "ca": 11, "nl": 12, "ar": 13, "sv": 14,
|
37 |
+
"it": 15, "id": 16, "hi": 17, "fi": 18, "vi": 19, "he": 20, "uk": 21,
|
38 |
+
"el": 22, "ms": 23, "cs": 24, "ro": 25, "da": 26, "hu": 27, "ta": 28,
|
39 |
+
"no": 29, "th": 30, "ur": 31, "hr": 32, "bg": 33, "lt": 34, "la": 35,
|
40 |
+
"mi": 36, "ml": 37, "cy": 38, "sk": 39, "te": 40, "fa": 41, "lv": 42,
|
41 |
+
"bn": 43, "sr": 44, "az": 45, "sl": 46, "kn": 47, "et": 48, "mk": 49,
|
42 |
+
"br": 50, "eu": 51, "is": 52, "hy": 53, "ne": 54, "mn": 55, "bs": 56,
|
43 |
+
"kk": 57, "sq": 58, "sw": 59, "gl": 60, "mr": 61, "pa": 62, "si": 63,
|
44 |
+
"km": 64, "sn": 65, "yo": 66, "so": 67, "af": 68, "oc": 69, "ka": 70,
|
45 |
+
"be": 71, "tg": 72, "sd": 73, "gu": 74, "am": 75, "yi": 76, "lo": 77,
|
46 |
+
"uz": 78, "fo": 79, "ht": 80, "ps": 81, "tk": 82, "nn": 83, "mt": 84,
|
47 |
+
"sa": 85, "lb": 86, "my": 87, "bo": 88, "tl": 89, "mg": 90, "as": 91,
|
48 |
+
"tt": 92, "haw": 93, "ln": 94, "ha": 95, "ba": 96, "jw": 97, "su": 98
|
49 |
+
}
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
54 |
|
55 |
+
goai_stt = gr.Interface(
|
56 |
+
fn=goai_stt2.goai_stt2
|
57 |
inputs=[
|
58 |
gr.Audio(sources=["microphone", "upload"], type="filepath"),
|
59 |
gr.Dropdown(
|
60 |
choices=[
|
61 |
"ArissBandoss/whisper-small-mos",
|
62 |
+
"openai/whisper-large-v3-turbo",
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
],
|
64 |
value="ArissBandoss/whisper-small-mos",
|
65 |
label="Model Name"
|
|
|
70 |
gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
|
71 |
],
|
72 |
outputs=[gr.Textbox(label="Output"), gr.File(label="Download Files")],
|
73 |
+
examples=[["./audios/example1.mp3", "a ye ligdi"],
|
74 |
+
["./audios/example2.mp3", "zoe nimbãanega"],
|
75 |
+
["./audios/example3.mp3", "zãng-zãnga"],
|
76 |
+
["./audios/example4.mp3", "yõk foto"]
|
77 |
+
],
|
78 |
+
cache_examples=False,
|
79 |
+
title="Mooré ASR: Transcribe Audio",
|
80 |
+
description=DESCRIPTION,
|
81 |
flagging_mode="auto",
|
82 |
)
|
83 |
|
84 |
+
goai_tts = gr.Interface(
|
85 |
+
fn=goai_tts2.goai_ttt_tts,
|
86 |
+
inputs=[
|
87 |
+
gr.Text(label="Texte à traduire", lines=2, value="Par cette ouverture, le centre se veut contribuer à la formation professionnelle des jeunes et des femmes, renforcer les capacités des acteurs du monde agricole, et contribuer à la lutte contre le chômage au Burkina Faso."),
|
88 |
+
gr.Dropdown(label="Voix", choices=audio_files, value="exple_voix_masculine.wav"),
|
89 |
+
gr.Audio(label="Cloner votre voix (optionel)", type="numpy", format="wav"),
|
90 |
+
],
|
91 |
+
outputs=[
|
92 |
+
gr.Text(label="Texte traduit"),
|
93 |
+
gr.Audio(label="Audio original généré", format="wav"),
|
94 |
+
gr.Audio(label="Denoised Audio", format='wav'),
|
95 |
+
gr.Audio(label="Enhanced Audio", format='wav')
|
96 |
+
],
|
97 |
+
examples=[["Ils vont bien, merci. Mon père travaille dur dans les champs et ma mère est toujours occupée à la maison.", "exple_voix_masculine.wav", None],
|
98 |
+
["La finale s’est jouée en présence du Président du Faso, Ibrahim Traoré.", "exple_voix_feminine.wav", None],
|
99 |
+
["Les enfants apprennent les danses traditionnelles de leurs ancêtres, jouent à des jeux traditionnels dans les rues et aident leurs parents dans les tâches quotidiennes.", "exple_voix_masculine.wav", None],
|
100 |
+
["Ils achetèrent des troupeaux, firent construire des cases, parcoururent tout le pays pour offrir à leur mère et à leurs femmes les plus beaux bijoux, les plus belles étoffes.", "exple_voix_feminine.wav", None]
|
101 |
+
],
|
102 |
+
cache_examples=False,
|
103 |
+
title="Démo des Modèles pour le Mooré: Traduction (Text-to-Text) et Synthèse Vocale (Text-to-Speech)",
|
104 |
+
description=DESCRIPTION,
|
105 |
+
)
|
106 |
+
|
107 |
+
goai_traduction = gr.Interface(
|
108 |
+
fn=goai_traduction.goai_traduction,
|
109 |
+
inputs=[
|
110 |
+
gr.Textbox(label="Texte", placeholder="Yaa sõama"),
|
111 |
+
gr.Dropdown(label="Langue source", choices=["fra_Latn", "mos_Latn"], value='fra_Latn'),
|
112 |
+
gr.Dropdown(label="Langue cible", choices=["fra_Latn", "mos_Latn"], value='mos_Latn')
|
113 |
+
],
|
114 |
+
outputs=["text"],
|
115 |
+
examples=[["Yʋʋm a wãn la b kẽesd biig lekolle?", "mos_Latn", "fra_Latn"],
|
116 |
+
["Zak-soab la kasma.", "mos_Latn", "fra_Latn"],
|
117 |
+
["Le gouvernement avait pris des mesures louables par rapport à l’augmentation des prix de certaines denrées alimentaires.", "fra_Latn", "mos_Latn"],
|
118 |
+
["Comme lors du match face à la Côte d’Ivoire, c’est sur un coup de pied arrêté que les Etalons encaissent leur but.", "fra_Latn", "mos_Latn"],
|
119 |
+
],
|
120 |
+
cache_examples=False,
|
121 |
+
title="Traduction du Mooré: texte vers texte",
|
122 |
+
description=DESCRIPTION
|
123 |
+
)
|
124 |
+
|
125 |
|
126 |
with demo:
|
127 |
gr.TabbedInterface(
|
128 |
+
interface_list=[goai_stt, goai_tts, goai_traduction],
|
129 |
tab_names=["Microphone & Audio file"]
|
130 |
)
|
131 |
|
app_2.py
DELETED
@@ -1,140 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import torch
|
3 |
-
import torchaudio
|
4 |
-
import scipy
|
5 |
-
|
6 |
-
import gradio as gr
|
7 |
-
from transformers import set_seed, pipeline
|
8 |
-
from datasets import load_dataset, Audio
|
9 |
-
|
10 |
-
import goai_traduction, goai_stt, goai_stt2, goai_tts, goai_tts2
|
11 |
-
|
12 |
-
#language_list = ['mos', 'fra', 'eng']
|
13 |
-
|
14 |
-
# list all files in the ./audios directory for the dropdown
|
15 |
-
audio_files = [f for f in os.listdir('./exples_voix') if os.path.isfile(os.path.join('./exples_voix', f))]
|
16 |
-
|
17 |
-
# device
|
18 |
-
device = 0 if torch.cuda.is_available() else "cpu"
|
19 |
-
|
20 |
-
# texte décrivant chaque tab
|
21 |
-
description = """<div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap;">
|
22 |
-
<div style="flex: 1; min-width: 250px;">
|
23 |
-
Ce modèle de traduction vers la <b>langue Mooré</b> a été développé from scratch par <b>GO AI CORP</b> et la version disponible en test est celle à 700 millions de paramètres.
|
24 |
-
<br><br>
|
25 |
-
Pour les détails techniques sur l'architecture du modèle, prendre attache avec nous via WhatsApp au <b>+226 66 62 83 03</b>.
|
26 |
-
</div>
|
27 |
-
<div style="flex-shrink: 0; min-width: 150px; text-align: center;">
|
28 |
-
<img src="https://github.com/ANYANTUDRE/Stage-IA-Selever-GO-AI-Corp/blob/main/img/goaicorp-logo2.jpg?raw=true" width="300px" style="max-width: 100%; height: auto;">
|
29 |
-
</div>
|
30 |
-
</div>
|
31 |
-
"""
|
32 |
-
|
33 |
-
# gradio app
|
34 |
-
demo = gr.Blocks()
|
35 |
-
|
36 |
-
goai_stt = gr.Interface(
|
37 |
-
fn = goai_stt.goai_stt,
|
38 |
-
inputs=[
|
39 |
-
#gr.Audio(sources=["microphone", "upload"], type="numpy")
|
40 |
-
gr.Audio(sources=["upload"], type="numpy")
|
41 |
-
],
|
42 |
-
outputs="text",
|
43 |
-
examples=[["./audios/example1.mp3", "a ye ligdi"],
|
44 |
-
["./audios/example2.mp3", "zoe nimbãanega"],
|
45 |
-
["./audios/example3.mp3", "zãng-zãnga"],
|
46 |
-
["./audios/example4.mp3", "yõk foto"]
|
47 |
-
],
|
48 |
-
cache_examples=False,
|
49 |
-
title="Traduction du Mooré: texte vers texte",
|
50 |
-
description=description
|
51 |
-
)
|
52 |
-
|
53 |
-
|
54 |
-
goai_stt2 = gr.Interface(
|
55 |
-
fn = goai_stt2.goai_stt2,
|
56 |
-
inputs=[
|
57 |
-
#gr.Audio(sources=["microphone", "upload"], type="numpy")
|
58 |
-
gr.Audio(sources=["upload"], type="numpy")
|
59 |
-
],
|
60 |
-
outputs="text",
|
61 |
-
examples=[["./audios/example1.mp3", "a ye ligdi"],
|
62 |
-
["./audios/example2.mp3", "zoe nimbãanega"],
|
63 |
-
["./audios/example3.mp3", "zãng-zãnga"],
|
64 |
-
["./audios/example4.mp3", "yõk foto"]
|
65 |
-
],
|
66 |
-
cache_examples=False,
|
67 |
-
title="Traduction du Mooré: texte vers texte",
|
68 |
-
description=description
|
69 |
-
)
|
70 |
-
|
71 |
-
|
72 |
-
goai_tts = gr.Interface(
|
73 |
-
fn=goai_tts.goai_tts,
|
74 |
-
inputs=[
|
75 |
-
gr.Text(label="Texte", placeholder="a ye ligdi")
|
76 |
-
],
|
77 |
-
outputs=[
|
78 |
-
gr.Audio(label="Audio généré", type="numpy")
|
79 |
-
],
|
80 |
-
examples=[["a ye ligdi"],
|
81 |
-
["zoe nimbãanega "],
|
82 |
-
["zãng-zãnga"],
|
83 |
-
["yõk foto"]
|
84 |
-
],
|
85 |
-
cache_examples=False,
|
86 |
-
title="Traduction du Mooré: texte vers texte",
|
87 |
-
description=description
|
88 |
-
)
|
89 |
-
|
90 |
-
goai_tts2 = gr.Interface(
|
91 |
-
fn=goai_tts2.goai_ttt_tts,
|
92 |
-
inputs=[
|
93 |
-
gr.Text(label="Texte à traduire", lines=2, value="Par cette ouverture, le centre se veut contribuer à la formation professionnelle des jeunes et des femmes, renforcer les capacités des acteurs du monde agricole, et contribuer à la lutte contre le chômage au Burkina Faso."),
|
94 |
-
gr.Dropdown(label="Voix", choices=audio_files, value="exple_voix_masculine.wav"),
|
95 |
-
gr.Audio(label="Cloner votre voix (optionel)", type="numpy", format="wav"),
|
96 |
-
],
|
97 |
-
outputs=[
|
98 |
-
gr.Text(label="Texte traduit"),
|
99 |
-
gr.Audio(label="Audio original généré", format="wav"),
|
100 |
-
gr.Audio(label="Denoised Audio", format='wav'),
|
101 |
-
gr.Audio(label="Enhanced Audio", format='wav')
|
102 |
-
],
|
103 |
-
examples=[["Ils vont bien, merci. Mon père travaille dur dans les champs et ma mère est toujours occupée à la maison.", "exple_voix_masculine.wav", None],
|
104 |
-
["La finale s’est jouée en présence du Président du Faso, Ibrahim Traoré.", "exple_voix_feminine.wav", None],
|
105 |
-
["Les enfants apprennent les danses traditionnelles de leurs ancêtres, jouent à des jeux traditionnels dans les rues et aident leurs parents dans les tâches quotidiennes.", "exple_voix_masculine.wav", None],
|
106 |
-
["Ils achetèrent des troupeaux, firent construire des cases, parcoururent tout le pays pour offrir à leur mère et à leurs femmes les plus beaux bijoux, les plus belles étoffes.", "exple_voix_feminine.wav", None]
|
107 |
-
],
|
108 |
-
cache_examples=False,
|
109 |
-
title="Démo des Modèles pour le Mooré: Traduction (Text-to-Text) et Synthèse Vocale (Text-to-Speech)",
|
110 |
-
description=description,
|
111 |
-
)
|
112 |
-
|
113 |
-
goai_traduction = gr.Interface(
|
114 |
-
fn=goai_traduction.goai_traduction,
|
115 |
-
inputs=[
|
116 |
-
gr.Textbox(label="Texte", placeholder="Yaa sõama"),
|
117 |
-
gr.Dropdown(label="Langue source", choices=["fra_Latn", "mos_Latn"], value='fra_Latn'),
|
118 |
-
gr.Dropdown(label="Langue cible", choices=["fra_Latn", "mos_Latn"], value='mos_Latn')
|
119 |
-
],
|
120 |
-
outputs=["text"],
|
121 |
-
examples=[["Yʋʋm a wãn la b kẽesd biig lekolle?", "mos_Latn", "fra_Latn"],
|
122 |
-
["Zak-soab la kasma.", "mos_Latn", "fra_Latn"],
|
123 |
-
["Le gouvernement avait pris des mesures louables par rapport à l’augmentation des prix de certaines denrées alimentaires.", "fra_Latn", "mos_Latn"],
|
124 |
-
["Comme lors du match face à la Côte d’Ivoire, c’est sur un coup de pied arrêté que les Etalons encaissent leur but.", "fra_Latn", "mos_Latn"],
|
125 |
-
],
|
126 |
-
cache_examples=False,
|
127 |
-
title="Traduction du Mooré: texte vers texte",
|
128 |
-
description=description
|
129 |
-
)
|
130 |
-
|
131 |
-
|
132 |
-
with demo:
|
133 |
-
gr.TabbedInterface(
|
134 |
-
#[goai_traduction, goai_tts, goai_stt, goai_tts2],
|
135 |
-
#["Traduction", "Text-2-speech", "Speech-2-text", "Text-2-speech-2"],
|
136 |
-
[goai_tts2, goai_stt, goai_stt2],
|
137 |
-
["Traduction et Synthèse vocale du Mooré", "Speech-2-text", "Speech-2-text-Whisper"],
|
138 |
-
)
|
139 |
-
|
140 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
goai_helpers/__init__.py
ADDED
File without changes
|
goai_stt.py → goai_helpers/goai_stt.py
RENAMED
File without changes
|
goai_helpers/goai_stt2.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import spaces
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
DEVICE = 0 if torch.cuda.is_available() else "cpu"
|
6 |
+
|
7 |
+
|
8 |
+
@spaces.GPU
|
9 |
+
def transcribe(inputs, model, language, batch_size, chunk_length_s, stride_length_s):
|
10 |
+
if inputs is None:
|
11 |
+
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
12 |
+
|
13 |
+
pipe = pipeline(
|
14 |
+
task="automatic-speech-recognition",
|
15 |
+
model=model,
|
16 |
+
chunk_length_s=chunk_length_s,
|
17 |
+
stride_length_s=stride_length_s,
|
18 |
+
device=DEVICE,
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
forced_decoder_ids = None
|
23 |
+
if model.endswith(".en") == False and language in LANG_TO_ID:
|
24 |
+
forced_decoder_ids = [[2, LANG_TO_ID[language]]] # Setting forced decoder for language
|
25 |
+
|
26 |
+
generate_kwargs = {}
|
27 |
+
if forced_decoder_ids:
|
28 |
+
generate_kwargs["forced_decoder_ids"] = forced_decoder_ids
|
29 |
+
|
30 |
+
output = pipe(inputs, batch_size=batch_size, **generate_kwargs)
|
31 |
+
|
32 |
+
transcription_text = output['text']
|
33 |
+
|
34 |
+
transcription_file_path = "transcription.txt"
|
35 |
+
with open(transcription_file_path, "w") as f:
|
36 |
+
f.write(transcription_text)
|
37 |
+
|
38 |
+
return transcription_text, transcription_file_path
|
goai_traduction.py → goai_helpers/goai_traduction.py
RENAMED
File without changes
|
goai_tts.py → goai_helpers/goai_tts.py
RENAMED
@@ -6,9 +6,11 @@ import numpy as np
|
|
6 |
from transformers import pipeline, set_seed
|
7 |
from huggingface_hub import login
|
8 |
|
9 |
-
|
10 |
auth_token = os.getenv('HF_SPACE_TOKEN')
|
11 |
login(token=auth_token)
|
|
|
|
|
12 |
@spaces.GPU
|
13 |
def goai_tts(texte):
|
14 |
"""
|
|
|
6 |
from transformers import pipeline, set_seed
|
7 |
from huggingface_hub import login
|
8 |
|
9 |
+
|
10 |
auth_token = os.getenv('HF_SPACE_TOKEN')
|
11 |
login(token=auth_token)
|
12 |
+
|
13 |
+
|
14 |
@spaces.GPU
|
15 |
def goai_tts(texte):
|
16 |
"""
|
goai_tts2.py → goai_helpers/goai_tts2.py
RENAMED
@@ -18,7 +18,7 @@ from TTS.tts.models.xtts import Xtts
|
|
18 |
|
19 |
from resemble_enhance.enhancer.inference import denoise, enhance
|
20 |
from flore200_codes import flores_codes
|
21 |
-
from utils import download_file, diviser_phrases_moore, enhance_speech
|
22 |
import goai_traduction
|
23 |
|
24 |
# authentification
|
|
|
18 |
|
19 |
from resemble_enhance.enhancer.inference import denoise, enhance
|
20 |
from flore200_codes import flores_codes
|
21 |
+
from goai_helpers.utils import download_file, diviser_phrases_moore, enhance_speech
|
22 |
import goai_traduction
|
23 |
|
24 |
# authentification
|
utils.py → goai_helpers/utils.py
RENAMED
File without changes
|
goai_stt2.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
import gradio as gr
|
4 |
-
#import yt_dlp as youtube_dl
|
5 |
-
from transformers import pipeline
|
6 |
-
from transformers.pipelines.audio_utils import ffmpeg_read
|
7 |
-
|
8 |
-
import spaces
|
9 |
-
import tempfile
|
10 |
-
import os
|
11 |
-
|
12 |
-
|
13 |
-
MODEL_NAME = "ArissBandoss/whisper-small-mos"
|
14 |
-
BATCH_SIZE = 8
|
15 |
-
|
16 |
-
device = 0 if torch.cuda.is_available() else "cpu"
|
17 |
-
|
18 |
-
pipe = pipeline(
|
19 |
-
task="automatic-speech-recognition",
|
20 |
-
model=MODEL_NAME,
|
21 |
-
chunk_length_s=30,
|
22 |
-
device=device,
|
23 |
-
)
|
24 |
-
|
25 |
-
@spaces.GPU
|
26 |
-
def goai_stt2(inputs, task):
|
27 |
-
print(inputs)
|
28 |
-
if inputs is None:
|
29 |
-
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
30 |
-
|
31 |
-
text = pipe(inputs[1], batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
32 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
whisper_notebook.ipynb
DELETED
@@ -1,192 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"nbformat": 4,
|
3 |
-
"nbformat_minor": 0,
|
4 |
-
"metadata": {
|
5 |
-
"colab": {
|
6 |
-
"provenance": [],
|
7 |
-
"gpuType": "T4"
|
8 |
-
},
|
9 |
-
"kernelspec": {
|
10 |
-
"name": "python3",
|
11 |
-
"display_name": "Python 3"
|
12 |
-
},
|
13 |
-
"language_info": {
|
14 |
-
"name": "python"
|
15 |
-
},
|
16 |
-
"accelerator": "GPU"
|
17 |
-
},
|
18 |
-
"cells": [
|
19 |
-
{
|
20 |
-
"cell_type": "markdown",
|
21 |
-
"source": [
|
22 |
-
"# Whisper v3 is here!\n",
|
23 |
-
"\n",
|
24 |
-
"Whisper v3 is a new model open sourced by OpenAI. The model can do multilingual transcriptions and is quite impressive. For example, you can change from English to Spanish or Chinese in the middle of a sentence and it will work well!\n",
|
25 |
-
"\n",
|
26 |
-
"The model can be run in a free Google Colab instance and is integrated into `transformers` already, so switching can be a very smooth process if you already use the previous versions."
|
27 |
-
],
|
28 |
-
"metadata": {
|
29 |
-
"id": "OXaUqiE-eyXM"
|
30 |
-
}
|
31 |
-
},
|
32 |
-
{
|
33 |
-
"cell_type": "code",
|
34 |
-
"execution_count": null,
|
35 |
-
"metadata": {
|
36 |
-
"id": "WFQeUT9EcIcK"
|
37 |
-
},
|
38 |
-
"outputs": [],
|
39 |
-
"source": [
|
40 |
-
"%%capture\n",
|
41 |
-
"!pip install git+https://github.com/huggingface/transformers gradio"
|
42 |
-
]
|
43 |
-
},
|
44 |
-
{
|
45 |
-
"cell_type": "markdown",
|
46 |
-
"source": [
|
47 |
-
"Let's use the high level `pipeline` from the `transformers` library to load the model."
|
48 |
-
],
|
49 |
-
"metadata": {
|
50 |
-
"id": "sZONes21fHTA"
|
51 |
-
}
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"cell_type": "code",
|
55 |
-
"source": [
|
56 |
-
"import torch\n",
|
57 |
-
"from transformers import pipeline\n",
|
58 |
-
"\n",
|
59 |
-
"pipe = pipeline(\"automatic-speech-recognition\",\n",
|
60 |
-
" \"openai/whisper-large-v3\",\n",
|
61 |
-
" torch_dtype=torch.float16,\n",
|
62 |
-
" device=\"cuda:0\")"
|
63 |
-
],
|
64 |
-
"metadata": {
|
65 |
-
"colab": {
|
66 |
-
"base_uri": "https://localhost:8080/"
|
67 |
-
},
|
68 |
-
"id": "DvBdwMdPcr-Y",
|
69 |
-
"outputId": "47f32218-fd85-49ea-d880-d31577bcf9b8"
|
70 |
-
},
|
71 |
-
"execution_count": null,
|
72 |
-
"outputs": [
|
73 |
-
{
|
74 |
-
"output_type": "stream",
|
75 |
-
"name": "stderr",
|
76 |
-
"text": [
|
77 |
-
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
78 |
-
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
79 |
-
]
|
80 |
-
}
|
81 |
-
]
|
82 |
-
},
|
83 |
-
{
|
84 |
-
"cell_type": "code",
|
85 |
-
"source": [
|
86 |
-
"pipe(\"https://cdn-media.huggingface.co/speech_samples/sample1.flac\")"
|
87 |
-
],
|
88 |
-
"metadata": {
|
89 |
-
"colab": {
|
90 |
-
"base_uri": "https://localhost:8080/"
|
91 |
-
},
|
92 |
-
"id": "GZFkIyhjc0Nc",
|
93 |
-
"outputId": "f1463431-3e08-4438-815f-b71e5e7a1503"
|
94 |
-
},
|
95 |
-
"execution_count": null,
|
96 |
-
"outputs": [
|
97 |
-
{
|
98 |
-
"output_type": "execute_result",
|
99 |
-
"data": {
|
100 |
-
"text/plain": [
|
101 |
-
"{'text': \" going along slushy country roads and speaking to damp audiences in draughty schoolrooms day after day for a fortnight he'll have to put in an appearance at some place of worship on sunday morning and he can come to us immediately afterwards\"}"
|
102 |
-
]
|
103 |
-
},
|
104 |
-
"metadata": {},
|
105 |
-
"execution_count": 2
|
106 |
-
}
|
107 |
-
]
|
108 |
-
},
|
109 |
-
{
|
110 |
-
"cell_type": "markdown",
|
111 |
-
"source": [
|
112 |
-
"Let's now build a quick Gradio demo where we can play with the model directly using our microphone! You can run this code in a Google Colab instance (or locally!) or just head to the <a href=\"https://huggingface.co/spaces/hf-audio/whisper-large-v3\" target=\"_blank\">Space</a> to play directly with it online."
|
113 |
-
],
|
114 |
-
"metadata": {
|
115 |
-
"id": "pt3YtM_PfTQY"
|
116 |
-
}
|
117 |
-
},
|
118 |
-
{
|
119 |
-
"cell_type": "code",
|
120 |
-
"source": [
|
121 |
-
"import gradio as gr\n",
|
122 |
-
"\n",
|
123 |
-
"def transcribe(inputs):\n",
|
124 |
-
" if inputs is None:\n",
|
125 |
-
" raise gr.Error(\"No audio file submitted! Please record an audio before submitting your request.\")\n",
|
126 |
-
"\n",
|
127 |
-
" text = pipe(inputs, generate_kwargs={\"task\": \"transcribe\"}, return_timestamps=True)[\"text\"]\n",
|
128 |
-
" return text\n",
|
129 |
-
"\n",
|
130 |
-
"demo = gr.Interface(\n",
|
131 |
-
" fn=transcribe,\n",
|
132 |
-
" inputs=[\n",
|
133 |
-
" gr.Audio(sources=[\"microphone\", \"upload\"], type=\"filepath\"),\n",
|
134 |
-
" ],\n",
|
135 |
-
" outputs=\"text\",\n",
|
136 |
-
" title=\"Whisper Large V3: Transcribe Audio\",\n",
|
137 |
-
" description=(\n",
|
138 |
-
" \"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the\"\n",
|
139 |
-
" \" checkpoint [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) and 🤗 Transformers to transcribe audio files\"\n",
|
140 |
-
" \" of arbitrary length.\"\n",
|
141 |
-
" ),\n",
|
142 |
-
" allow_flagging=\"never\",\n",
|
143 |
-
")\n",
|
144 |
-
"\n",
|
145 |
-
"demo.launch()\n"
|
146 |
-
],
|
147 |
-
"metadata": {
|
148 |
-
"colab": {
|
149 |
-
"base_uri": "https://localhost:8080/",
|
150 |
-
"height": 648
|
151 |
-
},
|
152 |
-
"id": "K0b2UZLVdIze",
|
153 |
-
"outputId": "bcff00e0-4fc8-4883-9ba4-480f5a6665f0"
|
154 |
-
},
|
155 |
-
"execution_count": null,
|
156 |
-
"outputs": [
|
157 |
-
{
|
158 |
-
"output_type": "stream",
|
159 |
-
"name": "stdout",
|
160 |
-
"text": [
|
161 |
-
"Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
|
162 |
-
"\n",
|
163 |
-
"Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
|
164 |
-
"Running on public URL: https://037dbdb04542aa1a29.gradio.live\n",
|
165 |
-
"\n",
|
166 |
-
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
|
167 |
-
]
|
168 |
-
},
|
169 |
-
{
|
170 |
-
"output_type": "display_data",
|
171 |
-
"data": {
|
172 |
-
"text/plain": [
|
173 |
-
"<IPython.core.display.HTML object>"
|
174 |
-
],
|
175 |
-
"text/html": [
|
176 |
-
"<div><iframe src=\"https://037dbdb04542aa1a29.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
177 |
-
]
|
178 |
-
},
|
179 |
-
"metadata": {}
|
180 |
-
},
|
181 |
-
{
|
182 |
-
"output_type": "execute_result",
|
183 |
-
"data": {
|
184 |
-
"text/plain": []
|
185 |
-
},
|
186 |
-
"metadata": {},
|
187 |
-
"execution_count": 4
|
188 |
-
}
|
189 |
-
]
|
190 |
-
}
|
191 |
-
]
|
192 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|