Surn commited on
Commit
ee1911a
·
1 Parent(s): e62d648

Add Background Image to make music easily shareable on FB as video

Browse files
app.py CHANGED
@@ -25,8 +25,9 @@ def load_model(version):
25
  return MusicGen.get_pretrained(version)
26
 
27
 
28
- def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef):
29
- global MODEL
 
30
  topk = int(topk)
31
  if MODEL is None or MODEL.name != model:
32
  MODEL = load_model(model)
@@ -77,7 +78,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
77
  audio_write(
78
  file.name, output, MODEL.sample_rate, strategy="loudness",
79
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
80
- waveform_video = gr.make_waveform(file.name)
81
  return waveform_video
82
 
83
 
@@ -105,6 +106,8 @@ def ui(**kwargs):
105
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
106
  with gr.Row():
107
  submit = gr.Button("Submit")
 
 
108
  with gr.Row():
109
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
110
  with gr.Row():
@@ -117,7 +120,7 @@ def ui(**kwargs):
117
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
118
  with gr.Column():
119
  output = gr.Video(label="Generated Music")
120
- submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef], outputs=[output])
121
  gr.Examples(
122
  fn=predict,
123
  examples=[
 
25
  return MusicGen.get_pretrained(version)
26
 
27
 
28
+ def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background):
29
+ global MODEL
30
+ output_segments = None
31
  topk = int(topk)
32
  if MODEL is None or MODEL.name != model:
33
  MODEL = load_model(model)
 
78
  audio_write(
79
  file.name, output, MODEL.sample_rate, strategy="loudness",
80
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
81
+ waveform_video = gr.make_waveform(file.name,bg_image=background, bar_count=40)
82
  return waveform_video
83
 
84
 
 
106
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
107
  with gr.Row():
108
  submit = gr.Button("Submit")
109
+ with gr.Row():
110
+ background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
111
  with gr.Row():
112
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
113
  with gr.Row():
 
120
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
121
  with gr.Column():
122
  output = gr.Video(label="Generated Music")
123
+ submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background], outputs=[output])
124
  gr.Examples(
125
  fn=predict,
126
  examples=[
app_batched.py CHANGED
@@ -40,6 +40,8 @@ def predict(texts, melodies):
40
  processed_melodies.append(None)
41
  else:
42
  sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
 
 
43
  if melody.dim() == 1:
44
  melody = melody[None]
45
  melody = melody[..., :int(sr * duration)]
@@ -50,7 +52,7 @@ def predict(texts, melodies):
50
  descriptions=texts,
51
  melody_wavs=processed_melodies,
52
  melody_sample_rate=target_sr,
53
- progress=False
54
  )
55
 
56
  outputs = outputs.detach().cpu().float()
 
40
  processed_melodies.append(None)
41
  else:
42
  sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
43
+ duration = min(duration, melody.shape[-1] / sr)
44
+ MODEL.set_generation_params(duration=duration)
45
  if melody.dim() == 1:
46
  melody = melody[None]
47
  melody = melody[..., :int(sr * duration)]
 
52
  descriptions=texts,
53
  melody_wavs=processed_melodies,
54
  melody_sample_rate=target_sr,
55
+ progress=True
56
  )
57
 
58
  outputs = outputs.detach().cpu().float()
assets/background.png ADDED
audiocraft/utils/extend.py CHANGED
@@ -45,7 +45,7 @@ def generate_music_segments(text, melody, MODEL, duration:int=10, segment_durati
45
 
46
  # Iterate over the segments to create list of Meldoy tensors
47
  for segment_idx in range(total_segments):
48
- print(f"segment {segment_idx} of {total_segments} \r")
49
  sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
50
 
51
  print(f"shape:{verse.shape} dim:{verse.dim()}")
 
45
 
46
  # Iterate over the segments to create list of Meldoy tensors
47
  for segment_idx in range(total_segments):
48
+ print(f"segment {segment_idx + 1} of {total_segments} \r")
49
  sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
50
 
51
  print(f"shape:{verse.shape} dim:{verse.dim()}")