Nuno-Tome commited on
Commit
9f3446c
·
1 Parent(s): 48d3fc2

no message

Browse files
Files changed (3) hide show
  1. app.py +13 -1
  2. bark_exemple copy.py +175 -0
  3. bark_exemple.py +86 -0
app.py CHANGED
@@ -1,9 +1,20 @@
1
  import gradio as gr
2
  import json
 
 
 
 
3
 
4
  DEBUG_MODE = False
5
 
6
 
 
 
 
 
 
 
 
7
  def echo(text, request: gr.Request):
8
  output_text = {"report1": "SUCCESS"} # Initialize as a dictionary
9
  output_text["report3"] = text
@@ -15,7 +26,8 @@ def echo(text, request: gr.Request):
15
 
16
 
17
 
18
-
 
19
 
20
  output_text_json = json.dumps(output_text)
21
  return output_text_json
 
1
  import gradio as gr
2
  import json
3
+ from bark import SAMPLE_RATE, generate_audio, preload_models
4
+ from bark.generation import SUPPORTED_LANGS
5
+ from share_btn import community_icon_html, loading_icon_html, share_js
6
+ from flask import Flask, jsonify
7
 
8
  DEBUG_MODE = False
9
 
10
 
11
+ @app.route('/api/test1', methods=['GET'])
12
+ def endpoint1():
13
+ # Your logic for the first endpoint
14
+ #return jsonify({"message": "Endpoint 1"})
15
+ return "Endpoint 1"
16
+
17
+
18
  def echo(text, request: gr.Request):
19
  output_text = {"report1": "SUCCESS"} # Initialize as a dictionary
20
  output_text["report3"] = text
 
26
 
27
 
28
 
29
+ #audio_out = gr.Audio(label="Generated Audio",
30
+ # type="numpy", elem_id="audio_out")
31
 
32
  output_text_json = json.dumps(output_text)
33
  return output_text_json
bark_exemple copy.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ from bark import SAMPLE_RATE, generate_audio, preload_models
4
+ from bark.generation import SUPPORTED_LANGS
5
+ from share_btn import community_icon_html, loading_icon_html, share_js
6
+
7
+ DEBUG_MODE = False
8
+
9
+ if not DEBUG_MODE:
10
+ _ = preload_models()
11
+
12
+ AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
13
+ PROMPT_LOOKUP = {}
14
+ for _, lang in SUPPORTED_LANGS:
15
+ for n in range(10):
16
+ label = f"Speaker {n} ({lang})"
17
+ AVAILABLE_PROMPTS.append(label)
18
+ PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
19
+ PROMPT_LOOKUP["Unconditional"] = None
20
+ PROMPT_LOOKUP["Announcer"] = "announcer"
21
+
22
+ default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
23
+
24
+ title = "# 🐶 Bark</div>"
25
+
26
+ description = """
27
+ <div>
28
+ <a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
29
+ <a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
30
+ <a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a>
31
+ </div>
32
+ Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
33
+ Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
34
+ This demo should be used for research purposes only. Commercial use is strictly prohibited. \
35
+ The model output is not censored and the authors do not endorse the opinions in the generated content. \
36
+ Use at your own risk.
37
+ """
38
+
39
+ article = """
40
+ ## 🌎 Foreign Language
41
+ Bark supports various languages out-of-the-box and automatically determines language from input text. \
42
+ When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.
43
+ Try the prompt:
44
+ ```
45
+ Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.
46
+ ```
47
+ ## 🤭 Non-Speech Sounds
48
+ Below is a list of some known non-speech sounds, but we are finding more every day. \
49
+ Please let us know if you find patterns that work particularly well on Discord!
50
+ * [laughter]
51
+ * [laughs]
52
+ * [sighs]
53
+ * [music]
54
+ * [gasps]
55
+ * [clears throat]
56
+ * — or ... for hesitations
57
+ * ♪ for song lyrics
58
+ * capitalization for emphasis of a word
59
+ * MAN/WOMAN: for bias towards speaker
60
+ Try the prompt:
61
+ ```
62
+ " [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪."
63
+ ```
64
+ ## 🎶 Music
65
+ Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \
66
+ Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.
67
+ Try the prompt:
68
+ ```
69
+ ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
70
+ ```
71
+ ## 🧬 Voice Cloning
72
+ Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \
73
+ The model also attempts to preserve music, ambient noise, etc. from input audio. \
74
+ However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from.
75
+ ## 👥 Speaker Prompts
76
+ You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \
77
+ Please note that these are not always respected, especially if a conflicting audio history prompt is given.
78
+ Try the prompt:
79
+ ```
80
+ WOMAN: I would like an oatmilk latte please.
81
+ MAN: Wow, that's expensive!
82
+ ```
83
+ ## Details
84
+ Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark) and model weights. \
85
+ Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC, see details on [GitHub](https://github.com/suno-ai/bark).
86
+ """
87
+
88
+ examples = [
89
+ ["Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!",
90
+ "Unconditional"], # , 0.7, 0.7],
91
+ ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.",
92
+ "Speaker 1 (en)"], # , 0.7, 0.7],
93
+ ["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.",
94
+ "Speaker 0 (es)"], # , 0.7, 0.7],
95
+ ]
96
+
97
+
98
+ def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
99
+ history_prompt = PROMPT_LOOKUP[history_prompt]
100
+ if DEBUG_MODE:
101
+ audio_arr = np.zeros(SAMPLE_RATE)
102
+ else:
103
+ # , text_temp=temp_semantic, waveform_temp=temp_waveform)
104
+ audio_arr = generate_audio(text, history_prompt=history_prompt)
105
+ audio_arr = (audio_arr * 32767).astype(np.int16)
106
+ return (SAMPLE_RATE, audio_arr)
107
+
108
+
109
+ css = """
110
+ #share-btn-container {
111
+ display: flex;
112
+ padding-left: 0.5rem !important;
113
+ padding-right: 0.5rem !important;
114
+ background-color: #000000;
115
+ justify-content: center;
116
+ align-items: center;
117
+ border-radius: 9999px !important;
118
+ width: 13rem;
119
+ margin-top: 10px;
120
+ margin-left: auto;
121
+ flex: unset !important;
122
+ }
123
+ #share-btn {
124
+ all: initial;
125
+ color: #ffffff;
126
+ font-weight: 600;
127
+ cursor: pointer;
128
+ font-family: 'IBM Plex Sans', sans-serif;
129
+ margin-left: 0.5rem !important;
130
+ padding-top: 0.25rem !important;
131
+ padding-bottom: 0.25rem !important;
132
+ right:0;
133
+ }
134
+ #share-btn * {
135
+ all: unset !important;
136
+ }
137
+ #share-btn-container div:nth-child(-n+2){
138
+ width: auto !important;
139
+ min-height: 0px !important;
140
+ }
141
+ #share-btn-container .wrap {
142
+ display: none !important;
143
+ }
144
+ """
145
+ with gr.Blocks(css=css) as block:
146
+ gr.Markdown(title)
147
+ gr.Markdown(description)
148
+ with gr.Row():
149
+ with gr.Column():
150
+ input_text = gr.Textbox(
151
+ label="Input Text", lines=2, value=default_text, elem_id="input_text")
152
+ options = gr.Dropdown(
153
+ AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt", elem_id="speaker_option")
154
+ run_button = gr.Button(text="Generate Audio", type="button")
155
+ with gr.Column():
156
+ audio_out = gr.Audio(label="Generated Audio",
157
+ type="numpy", elem_id="audio_out")
158
+ with gr.Row(visible=False) as share_row:
159
+ with gr.Group(elem_id="share-btn-container"):
160
+ community_icon = gr.HTML(community_icon_html)
161
+ loading_icon = gr.HTML(loading_icon_html)
162
+ share_button = gr.Button(
163
+ "Share to community", elem_id="share-btn")
164
+ share_button.click(None, [], [], _js=share_js)
165
+ inputs = [input_text, options]
166
+ outputs = [audio_out]
167
+ gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,
168
+ outputs=outputs, cache_examples=True)
169
+ gr.Markdown(article)
170
+ run_button.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=share_row, queue=False).then(
171
+ fn=gen_tts, inputs=inputs, outputs=outputs, queue=True).then(
172
+ fn=lambda: gr.update(visible=True), inputs=None, outputs=share_row, queue=False)
173
+
174
+ block.queue()
175
+ block.launch()
bark_exemple.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ from bark import SAMPLE_RATE, generate_audio, preload_models
4
+ from bark.generation import SUPPORTED_LANGS
5
+ from share_btn import community_icon_html, loading_icon_html, share_js
6
+
7
+ DEBUG_MODE = False
8
+
9
+ if not DEBUG_MODE:
10
+ _ = preload_models()
11
+
12
+ AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
13
+ PROMPT_LOOKUP = {}
14
+ for _, lang in SUPPORTED_LANGS:
15
+ for n in range(10):
16
+ label = f"Speaker {n} ({lang})"
17
+ AVAILABLE_PROMPTS.append(label)
18
+ PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
19
+ PROMPT_LOOKUP["Unconditional"] = None
20
+ PROMPT_LOOKUP["Announcer"] = "announcer"
21
+
22
+ default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
23
+
24
+ title = "# 🐶 Bark</div>"
25
+
26
+ description = """
27
+ """
28
+
29
+ article = """
30
+ """
31
+
32
+ examples = [
33
+ ["Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!",
34
+ "Unconditional"], # , 0.7, 0.7],
35
+ ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.",
36
+ "Speaker 1 (en)"], # , 0.7, 0.7],
37
+ ["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.",
38
+ "Speaker 0 (es)"], # , 0.7, 0.7],
39
+ ]
40
+
41
+
42
+ def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
43
+ history_prompt = PROMPT_LOOKUP[history_prompt]
44
+ if DEBUG_MODE:
45
+ audio_arr = np.zeros(SAMPLE_RATE)
46
+ else:
47
+ # , text_temp=temp_semantic, waveform_temp=temp_waveform)
48
+ audio_arr = generate_audio(text, history_prompt=history_prompt)
49
+ audio_arr = (audio_arr * 32767).astype(np.int16)
50
+ return (SAMPLE_RATE, audio_arr)
51
+
52
+
53
+ css = """
54
+
55
+ """
56
+ with gr.Blocks(css=css) as block:
57
+ gr.Markdown(title)
58
+ gr.Markdown(description)
59
+ with gr.Row():
60
+ with gr.Column():
61
+ input_text = gr.Textbox(
62
+ label="Input Text", lines=2, value=default_text, elem_id="input_text")
63
+ options = gr.Dropdown(
64
+ AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt", elem_id="speaker_option")
65
+ run_button = gr.Button(text="Generate Audio", type="button")
66
+ with gr.Column():
67
+ audio_out = gr.Audio(label="Generated Audio",
68
+ type="numpy", elem_id="audio_out")
69
+ with gr.Row(visible=False) as share_row:
70
+ with gr.Group(elem_id="share-btn-container"):
71
+ community_icon = gr.HTML(community_icon_html)
72
+ loading_icon = gr.HTML(loading_icon_html)
73
+ share_button = gr.Button(
74
+ "Share to community", elem_id="share-btn")
75
+ share_button.click(None, [], [], _js=share_js)
76
+ inputs = [input_text, options]
77
+ outputs = [audio_out]
78
+ gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,
79
+ outputs=outputs, cache_examples=True)
80
+ gr.Markdown(article)
81
+ run_button.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=share_row, queue=False).then(
82
+ fn=gen_tts, inputs=inputs, outputs=outputs, queue=True).then(
83
+ fn=lambda: gr.update(visible=True), inputs=None, outputs=share_row, queue=False)
84
+
85
+ block.queue()
86
+ block.launch()