haoheliu radames commited on
Commit
4b70abe
·
1 Parent(s): 27e435d

add few examples and share to community button (#7)

Browse files

- add few examples and share to community button (ebf12568b1845b59385e6653033b48efe332ea62)


Co-authored-by: Radamés Ajna <[email protected]>

Files changed (2) hide show
  1. app.py +62 -26
  2. share_btn.py +34 -20
app.py CHANGED
@@ -30,7 +30,7 @@ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
30
  # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
31
  if(len(waveform) == 1):
32
  waveform = waveform[0]
33
- return waveform # ,gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
34
 
35
  # iface = gr.Interface(fn=text2audio, inputs=[
36
  # gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
@@ -42,17 +42,22 @@ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
42
  # )
43
  # iface.launch(share=True)
44
 
 
45
  css = """
 
 
 
 
46
  .gradio-container {
47
  font-family: 'IBM Plex Sans', sans-serif;
48
  }
49
  .gr-button {
50
  color: white;
51
- border-color: black;
52
- background: black;
53
  }
54
  input[type='range'] {
55
- accent-color: black;
56
  }
57
  .dark input[type='range'] {
58
  accent-color: #dfdfdf;
@@ -98,7 +103,6 @@ css = """
98
  border-radius: 14px !important;
99
  }
100
  #advanced-options {
101
- display: none;
102
  margin-bottom: 20px;
103
  }
104
  .footer {
@@ -125,6 +129,12 @@ css = """
125
  font-weight: bold;
126
  font-size: 115%;
127
  }
 
 
 
 
 
 
128
  .animate-spin {
129
  animation: spin 1s linear infinite;
130
  }
@@ -154,16 +164,20 @@ css = """
154
  #share-btn-container .wrap {
155
  display: none !important;
156
  }
157
-
158
  .gr-form{
159
  flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
160
  }
161
  #prompt-container{
162
  gap: 0;
163
  }
164
- #prompt-text-input, #negative-prompt-text-input{padding: .45rem 0.625rem}
165
- #component-16{border-top-width: 1px!important;margin-top: 1em}
166
- .image_duplication{position: absolute; width: 100px; left: 50px}
 
 
 
 
 
167
  """
168
  iface = gr.Blocks(css=css)
169
 
@@ -188,17 +202,21 @@ with iface:
188
  </p>
189
  </div>
190
  """
191
- )
192
  gr.HTML("""
193
- <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
194
- <br/>
195
- <a href="https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation?duplicate=true">
196
- <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
197
- <p/>""")
 
 
 
 
198
  with gr.Group():
199
  with gr.Box():
200
  ############# Input
201
- textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1, label="Input your text here. Please ensure it is descriptive and of moderate length.")
202
 
203
  with gr.Accordion("Click to modify detailed configurations", open=False):
204
  seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
@@ -207,7 +225,7 @@ with iface:
207
  n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
208
  ############# Output
209
  # outputs=gr.Audio(label="Output", type="numpy")
210
- outputs=gr.Video(label="Output")
211
 
212
  # with gr.Group(elem_id="container-advanced-btns"):
213
  # # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
@@ -216,10 +234,17 @@ with iface:
216
  # loading_icon = gr.HTML(loading_icon_html, visible=False)
217
  # share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
218
  # outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
219
-
220
  btn = gr.Button("Submit").style(full_width=True)
221
- btn.click(text2audio, inputs=[textbox, duration, guidance_scale, seed, n_candidates], outputs=[outputs]) # , share_button, community_icon, loading_icon
222
- # share_button.click(None, [], [], _js=share_js)
 
 
 
 
 
 
 
 
223
  gr.HTML('''
224
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
225
  <p>Follow the latest update of AudioLDM on our<a href="https://github.com/haoheliu/AudioLDM" style="text-decoration: underline;" target="_blank"> Github repo</a>
@@ -229,17 +254,28 @@ with iface:
229
  <br>
230
  </div>
231
  ''')
232
-
 
 
 
 
 
 
 
 
 
 
 
233
  with gr.Accordion("Additional information", open=False):
234
  gr.HTML(
235
- """
236
  <div class="acknowledgments">
237
  <p> We build the model with data from <a href="http://research.google.com/audioset/">AudioSet</a>, <a href="https://freesound.org/">Freesound</a> and <a href="https://sound-effects.bbcrewind.co.uk/">BBC Sound Effect library</a>. We share this demo based on the <a href="https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/375954/Research.pdf">UK copyright exception</a> of data for academic research. </p>
238
  </div>
239
  """
240
- )
241
  # <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
242
-
243
- iface.queue(concurrency_count = 3)
244
  iface.launch(debug=True)
245
- # iface.launch(debug=True, share=True)
 
30
  # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
31
  if(len(waveform) == 1):
32
  waveform = waveform[0]
33
+ return waveform
34
 
35
  # iface = gr.Interface(fn=text2audio, inputs=[
36
  # gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
 
42
  # )
43
  # iface.launch(share=True)
44
 
45
+
46
  css = """
47
+ a {
48
+ color: inherit;
49
+ text-decoration: underline;
50
+ }
51
  .gradio-container {
52
  font-family: 'IBM Plex Sans', sans-serif;
53
  }
54
  .gr-button {
55
  color: white;
56
+ border-color: #000000;
57
+ background: #000000;
58
  }
59
  input[type='range'] {
60
+ accent-color: #000000;
61
  }
62
  .dark input[type='range'] {
63
  accent-color: #dfdfdf;
 
103
  border-radius: 14px !important;
104
  }
105
  #advanced-options {
 
106
  margin-bottom: 20px;
107
  }
108
  .footer {
 
129
  font-weight: bold;
130
  font-size: 115%;
131
  }
132
+ #container-advanced-btns{
133
+ display: flex;
134
+ flex-wrap: wrap;
135
+ justify-content: space-between;
136
+ align-items: center;
137
+ }
138
  .animate-spin {
139
  animation: spin 1s linear infinite;
140
  }
 
164
  #share-btn-container .wrap {
165
  display: none !important;
166
  }
 
167
  .gr-form{
168
  flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
169
  }
170
  #prompt-container{
171
  gap: 0;
172
  }
173
+ #generated_id{
174
+ min-height: 700px
175
+ }
176
+ #setting_id{
177
+ margin-bottom: 12px;
178
+ text-align: center;
179
+ font-weight: 900;
180
+ }
181
  """
182
  iface = gr.Blocks(css=css)
183
 
 
202
  </p>
203
  </div>
204
  """
205
+ )
206
  gr.HTML("""
207
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
208
+ AudioLDM: Text-to-Audio Generation with Latent Diffusion Models
209
+ </h1>
210
+ <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
211
+ <br/>
212
+ <a href="https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation?duplicate=true">
213
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
214
+ <p/>
215
+ """)
216
  with gr.Group():
217
  with gr.Box():
218
  ############# Input
219
+ textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1, label="Input your text here. Please ensure it is descriptive and of moderate length.", elem_id="prompt-in")
220
 
221
  with gr.Accordion("Click to modify detailed configurations", open=False):
222
  seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
 
225
  n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
226
  ############# Output
227
  # outputs=gr.Audio(label="Output", type="numpy")
228
+ outputs=gr.Video(label="Output", elem_id="output-video")
229
 
230
  # with gr.Group(elem_id="container-advanced-btns"):
231
  # # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
 
234
  # loading_icon = gr.HTML(loading_icon_html, visible=False)
235
  # share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
236
  # outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
 
237
  btn = gr.Button("Submit").style(full_width=True)
238
+
239
+ with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
240
+ community_icon = gr.HTML(community_icon_html)
241
+ loading_icon = gr.HTML(loading_icon_html)
242
+ share_button = gr.Button("Share to community", elem_id="share-btn")
243
+
244
+ btn.click(text2audio, inputs=[
245
+ textbox, duration, guidance_scale, seed, n_candidates], outputs=[outputs, share_group])
246
+
247
+ share_button.click(None, [], [], _js=share_js)
248
  gr.HTML('''
249
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
250
  <p>Follow the latest update of AudioLDM on our<a href="https://github.com/haoheliu/AudioLDM" style="text-decoration: underline;" target="_blank"> Github repo</a>
 
254
  <br>
255
  </div>
256
  ''')
257
+ gr.Examples([
258
+ ["A hammer is hitting a wooden surface", 5, 2.5, 45, 3],
259
+ ["Peaceful and calming ambient music with singing bowl and other instruments.", 5, 2.5, 45, 3],
260
+ ["A man is speaking in a small room.", 5, 2.5, 45, 3],
261
+ ["A female is speaking followed by footstep sound", 5, 2.5, 45, 3],
262
+ ["Wooden table tapping sound followed by water pouring sound.", 5, 2.5, 45, 3],
263
+ ],
264
+ fn=text2audio,
265
+ inputs=[textbox, duration, guidance_scale, seed, n_candidates],
266
+ outputs=[outputs],
267
+ cache_examples=True,
268
+ )
269
  with gr.Accordion("Additional information", open=False):
270
  gr.HTML(
271
+ """
272
  <div class="acknowledgments">
273
  <p> We build the model with data from <a href="http://research.google.com/audioset/">AudioSet</a>, <a href="https://freesound.org/">Freesound</a> and <a href="https://sound-effects.bbcrewind.co.uk/">BBC Sound Effect library</a>. We share this demo based on the <a href="https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/375954/Research.pdf">UK copyright exception</a> of data for academic research. </p>
274
  </div>
275
  """
276
+ )
277
  # <p>This demo is strictly for research demo purpose only. For commercial use please <a href="[email protected]">contact us</a>.</p>
278
+
279
+ iface.queue(concurrency_count=3)
280
  iface.launch(debug=True)
281
+ # iface.launch(debug=True, share=True)
share_btn.py CHANGED
@@ -22,34 +22,48 @@ share_js = """async () => {
22
  const url = await response.text();
23
  return url;
24
  }
25
- const gradioEl = document.querySelector('body > gradio-app');
26
- const imgEls = gradioEl.querySelectorAll('#gallery img');
27
- const promptTxt = gradioEl.querySelector('#prompt-text-input input').value;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  const shareBtnEl = gradioEl.querySelector('#share-btn');
29
  const shareIconEl = gradioEl.querySelector('#share-btn-share-icon');
30
  const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon');
31
- if(!imgEls.length){
32
  return;
33
  };
34
  shareBtnEl.style.pointerEvents = 'none';
35
  shareIconEl.style.display = 'none';
36
  loadingIconEl.style.removeProperty('display');
37
- const files = await Promise.all(
38
- [...imgEls].map(async (imgEl) => {
39
- const res = await fetch(imgEl.src);
40
- const blob = await res.blob();
41
- const imgId = Date.now() % 200;
42
- const fileName = `diffuse-the-rest-${{imgId}}.jpg`;
43
- return new File([blob], fileName, { type: 'image/jpeg' });
44
- })
45
- );
46
- const urls = await Promise.all(files.map((f) => uploadFile(f)));
47
- const htmlImgs = urls.map(url => `<img src='${url}' width='400' height='400'>`);
48
- const descriptionMd = `<div style='display: flex; flex-wrap: wrap; column-gap: 0.75rem;'>
49
- ${htmlImgs.join(`\n`)}
50
- </div>`;
51
  const params = new URLSearchParams({
52
- title: promptTxt,
53
  description: descriptionMd,
54
  });
55
  const paramsStr = params.toString();
@@ -57,4 +71,4 @@ ${htmlImgs.join(`\n`)}
57
  shareBtnEl.style.removeProperty('pointer-events');
58
  shareIconEl.style.removeProperty('display');
59
  loadingIconEl.style.display = 'none';
60
- }"""
 
22
  const url = await response.text();
23
  return url;
24
  }
25
+ async function getInputVideoFile(videoEl){
26
+ const res = await fetch(videoEl.src);
27
+ const blob = await res.blob();
28
+ const videoId = Date.now() % 200;
29
+ const fileName = `sd-perception-${{videoId}}.mp4`;
30
+ return new File([blob], fileName, { type: 'video/mp4' });
31
+ }
32
+
33
+ async function audioToBase64(audioFile) {
34
+ return new Promise((resolve, reject) => {
35
+ let reader = new FileReader();
36
+ reader.readAsDataURL(audioFile);
37
+ reader.onload = () => resolve(reader.result);
38
+ reader.onerror = error => reject(error);
39
+
40
+ });
41
+ }
42
+ const gradioEl = document.querySelector("gradio-app").shadowRoot || document.querySelector('body > gradio-app');
43
+ const inputPromptEl = gradioEl.querySelector('#prompt-in input').value;
44
+ const outputVideoEl = gradioEl.querySelector('#output-video video');
45
+
46
+ let titleTxt = `Text-to-Audio: ${inputPromptEl}`;
47
+
48
  const shareBtnEl = gradioEl.querySelector('#share-btn');
49
  const shareIconEl = gradioEl.querySelector('#share-btn-share-icon');
50
  const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon');
51
+ if(!outputVideoEl){
52
  return;
53
  };
54
  shareBtnEl.style.pointerEvents = 'none';
55
  shareIconEl.style.display = 'none';
56
  loadingIconEl.style.removeProperty('display');
57
+ const outputVideo = await getInputVideoFile(outputVideoEl);
58
+ const urlOutputVideo = await uploadFile(outputVideo);
59
+
60
+ const descriptionMd = `
61
+ ##### ${inputPromptEl}
62
+
63
+ ${urlOutputVideo}
64
+ `;
 
 
 
 
 
 
65
  const params = new URLSearchParams({
66
+ title: titleTxt,
67
  description: descriptionMd,
68
  });
69
  const paramsStr = params.toString();
 
71
  shareBtnEl.style.removeProperty('pointer-events');
72
  shareIconEl.style.removeProperty('display');
73
  loadingIconEl.style.display = 'none';
74
+ }"""