H-Liu1997 commited on
Commit
f9d911c
·
verified ·
1 Parent(s): a49013b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -31
app.py CHANGED
@@ -22,6 +22,7 @@ from decord import VideoReader
22
  from PIL import Image
23
  import copy
24
  import cv2
 
25
 
26
  import importlib
27
  import torch
@@ -349,7 +350,7 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
349
  res_motion = []
350
  counter = 0
351
  for path, is_continue in zip(path_list, is_continue_list):
352
- if create_graph:
353
  # time is limited if we create graph on hugging face, lets skip blending.
354
  res_motion_current = path_visualization(
355
  graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
@@ -481,7 +482,7 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
481
  new_width = int(original_width * (max_length / original_height))
482
 
483
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
484
- out = cv2.VideoWriter(output_path, fourcc, fps, (new_width, new_height))
485
 
486
  frames_to_save = fps * 20
487
  frame_count = 0
@@ -498,6 +499,14 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
498
 
499
  cap.release()
500
  out.release()
 
 
 
 
 
 
 
 
501
 
502
 
503
  character_name_to_yaml = {
@@ -510,6 +519,7 @@ character_name_to_yaml = {
510
 
511
  @spaces.GPU(duration=200)
512
  def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
 
513
  cfg = prepare_all("./configs/gradio.yaml")
514
  cfg.seed = seed
515
  seed_everything(cfg.seed)
@@ -601,8 +611,8 @@ examples_video = [
601
  ]
602
 
603
  combined_examples = [
604
- ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
605
- ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
606
  ]
607
 
608
 
@@ -641,23 +651,29 @@ def make_demo():
641
 
642
  # Create a gallery with 5 videos
643
  with gr.Row():
644
- video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
645
- video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
646
- video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
647
- video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
648
- video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
649
  with gr.Row():
650
- video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
651
- video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
652
- video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
653
- video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
654
- video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
655
 
656
  with gr.Row():
657
  gr.Markdown(
658
  """
659
  <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
660
- This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
 
 
 
 
 
 
661
  </div>
662
  """
663
  )
@@ -668,13 +684,15 @@ def make_demo():
668
  interactive=False,
669
  autoplay=False,
670
  loop=False,
671
- show_share_button=True)
 
672
  with gr.Column(scale=4):
673
  video_output_2 = gr.Video(label="Generated video - 2",
674
  interactive=False,
675
  autoplay=False,
676
  loop=False,
677
- show_share_button=True)
 
678
  with gr.Column(scale=1):
679
  file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
680
  file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
@@ -682,8 +700,6 @@ def make_demo():
682
  <div style="display: flex; justify-content: center; align-items: center; text-align: left;">
683
  Details of the low-quality mode:
684
  <br>
685
- 0. for free users, hugging face zero-gpu has quota, if you see "over quota", please try it later, e.g., after 30 mins. for saving your quota, this project is estimated to run around 120~160s. by the following trade-off.
686
- <br>
687
  1. lower resolution, video resized as long-side 512 and keep aspect ratio.
688
  <br>
689
  2. subgraph instead of full-graph, causing noticeable "frame jumps".
@@ -733,17 +749,16 @@ def make_demo():
733
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
734
  )
735
 
736
- # with gr.Row():
737
- # with gr.Column(scale=4):
738
- # print(combined_examples)
739
- # gr.Examples(
740
- # examples=combined_examples,
741
- # inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
742
- # outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
743
- # fn=tango, # Function that processes both audio and video inputs
744
- # label="Select Combined Audio and Video Examples (Cached)",
745
- # cache_examples=True
746
- # )
747
 
748
  return Interface
749
 
@@ -752,4 +767,4 @@ if __name__ == "__main__":
752
  os.environ["MASTER_PORT"]='8675'
753
 
754
  demo = make_demo()
755
- demo.launch(share=True)
 
22
  from PIL import Image
23
  import copy
24
  import cv2
25
+ import subprocess
26
 
27
  import importlib
28
  import torch
 
350
  res_motion = []
351
  counter = 0
352
  for path, is_continue in zip(path_list, is_continue_list):
353
+ if False:
354
  # time is limited if we create graph on hugging face, lets skip blending.
355
  res_motion_current = path_visualization(
356
  graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
 
482
  new_width = int(original_width * (max_length / original_height))
483
 
484
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
485
+ out = cv2.VideoWriter(output_path.replace(".mp4", "_fps.mp4"), fourcc, fps, (new_width, new_height))
486
 
487
  frames_to_save = fps * 20
488
  frame_count = 0
 
499
 
500
  cap.release()
501
  out.release()
502
+ command = [
503
+ 'ffmpeg',
504
+ '-i', output_path.replace(".mp4", "_fps.mp4"),
505
+ '-vf', 'minterpolate=fps=30:mi_mode=mci:mc_mode=aobmc:vsbmc=1',
506
+ output_path
507
+ ]
508
+ subprocess.run(command)
509
+ os.remove(output_path.replace(".mp4", "_fps.mp4"))
510
 
511
 
512
  character_name_to_yaml = {
 
519
 
520
  @spaces.GPU(duration=200)
521
  def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
522
+ os.system("rm -r ./outputs/")
523
  cfg = prepare_all("./configs/gradio.yaml")
524
  cfg.seed = seed
525
  seed_everything(cfg.seed)
 
611
  ]
612
 
613
  combined_examples = [
614
+ ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/female_test_V1.mp4", 2024],
615
+ # ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
616
  ]
617
 
618
 
 
651
 
652
  # Create a gallery with 5 videos
653
  with gr.Row():
654
+ video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0", watermark="./datasets/watermark.png")
655
+ video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1", watermark="./datasets/watermark.png")
656
+ video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2", watermark="./datasets/watermark.png")
657
+ video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3", watermark="./datasets/watermark.png")
658
+ video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4", watermark="./datasets/watermark.png")
659
  with gr.Row():
660
+ video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5", watermark="./datasets/watermark.png")
661
+ video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6", watermark="./datasets/watermark.png")
662
+ video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7", watermark="./datasets/watermark.png")
663
+ video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8", watermark="./datasets/watermark.png")
664
+ video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9", watermark="./datasets/watermark.png")
665
 
666
  with gr.Row():
667
  gr.Markdown(
668
  """
669
  <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
670
+ This is an open-source project supported by Hugging Face's free L40S GPU. Runtime is limited, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
671
+ <br>
672
+ News:
673
+ <br>
674
+ [10/15]: Add watermark, fix bugs on custom character by downgrades to py3.9
675
+ <br>
676
+ [10/14]: Hugging face supports free L40S GPU for this project now!
677
  </div>
678
  """
679
  )
 
684
  interactive=False,
685
  autoplay=False,
686
  loop=False,
687
+ show_share_button=True,
688
+ watermark="./datasets/watermark.png")
689
  with gr.Column(scale=4):
690
  video_output_2 = gr.Video(label="Generated video - 2",
691
  interactive=False,
692
  autoplay=False,
693
  loop=False,
694
+ show_share_button=True,
695
+ watermark="./datasets/watermark.png")
696
  with gr.Column(scale=1):
697
  file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
698
  file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
 
700
  <div style="display: flex; justify-content: center; align-items: center; text-align: left;">
701
  Details of the low-quality mode:
702
  <br>
 
 
703
  1. lower resolution, video resized as long-side 512 and keep aspect ratio.
704
  <br>
705
  2. subgraph instead of full-graph, causing noticeable "frame jumps".
 
749
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
750
  )
751
 
752
+ with gr.Row():
753
+ with gr.Column(scale=4):
754
+ gr.Examples(
755
+ examples=combined_examples,
756
+ inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
757
+ outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
758
+ fn=tango, # Function that processes both audio and video inputs
759
+ label="Select Combined Audio and Video Examples (Cached)",
760
+ cache_examples=True
761
+ )
 
762
 
763
  return Interface
764
 
 
767
  os.environ["MASTER_PORT"]='8675'
768
 
769
  demo = make_demo()
770
+ demo.launch(share=True)