mayuema commited on
Commit
3a8c535
·
1 Parent(s): 0754e7d

first release

Browse files
app_followyourpose.py → app.py RENAMED
@@ -9,15 +9,10 @@ import gradio as gr
9
  from inference_followyourpose import merge_config_then_run
10
 
11
 
12
- # TITLE = '# [FateZero](http://fate-zero-edit.github.io/)'
13
  HF_TOKEN = os.getenv('HF_TOKEN')
14
- # pipe = InferencePipeline(HF_TOKEN)
15
  pipe = merge_config_then_run()
16
- # app = InferenceUtil(HF_TOKEN)
17
 
18
  with gr.Blocks(css='style.css') as demo:
19
- # gr.Markdown(TITLE)
20
-
21
  gr.HTML(
22
  """
23
  <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
@@ -134,11 +129,6 @@ with gr.Blocks(css='style.css') as demo:
134
 
135
  with gr.Accordion('Text Prompt', open=True):
136
 
137
- # source_prompt = gr.Textbox(label='Source Prompt',
138
- # info='A good prompt describes each frame and most objects in video. Especially, it has the object or attribute that we want to edit or preserve.',
139
- # max_lines=1,
140
- # placeholder='Example: "a silver jeep driving down a curvy road in the countryside"',
141
- # value='a silver jeep driving down a curvy road in the countryside')
142
  target_prompt = gr.Textbox(label='Target Prompt',
143
  info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")',
144
  max_lines=1,
@@ -154,33 +144,6 @@ with gr.Blocks(css='style.css') as demo:
154
  with gr.Column():
155
  result = gr.Video(label='Result')
156
  # result.style(height=512, width=512)
157
- # with gr.Accordion('FateZero Parameters for attention fusing', open=True):
158
- # cross_replace_steps = gr.Slider(label='Cross-att replace steps',
159
- # info='More steps, replace more cross attention to preserve semantic layout.',
160
- # minimum=0.0,
161
- # maximum=1.0,
162
- # step=0.1,
163
- # value=0.7)
164
-
165
- # self_replace_steps = gr.Slider(label='Self-att replace steps',
166
- # info='More steps, replace more spatial-temporal self-attention to preserve geometry and motion.',
167
- # minimum=0.0,
168
- # maximum=1.0,
169
- # step=0.1,
170
- # value=0.7)
171
-
172
- # enhance_words = gr.Textbox(label='Enhanced words',
173
- # info='Amplify the target-words cross attention',
174
- # max_lines=1,
175
- # placeholder='Example: "watercolor "',
176
- # value='watercolor')
177
-
178
- # enhance_words_value = gr.Slider(label='Target cross-att amplification',
179
- # info='larger value, more elements of target words',
180
- # minimum=0.0,
181
- # maximum=20.0,
182
- # step=1,
183
- # value=10)
184
  with gr.Accordion('DDIM Parameters', open=True):
185
  num_steps = gr.Slider(label='Number of Steps',
186
  info='larger value has better editing capacity, but takes more time and memory.',
@@ -208,4 +171,4 @@ with gr.Blocks(css='style.css') as demo:
208
  target_prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
209
  run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
210
 
211
- demo.queue().launch(share=False, server_name='0.0.0.0', server_port=80)
 
9
  from inference_followyourpose import merge_config_then_run
10
 
11
 
 
12
  HF_TOKEN = os.getenv('HF_TOKEN')
 
13
  pipe = merge_config_then_run()
 
14
 
15
  with gr.Blocks(css='style.css') as demo:
 
 
16
  gr.HTML(
17
  """
18
  <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
 
129
 
130
  with gr.Accordion('Text Prompt', open=True):
131
 
 
 
 
 
 
132
  target_prompt = gr.Textbox(label='Target Prompt',
133
  info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")',
134
  max_lines=1,
 
144
  with gr.Column():
145
  result = gr.Video(label='Result')
146
  # result.style(height=512, width=512)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  with gr.Accordion('DDIM Parameters', open=True):
148
  num_steps = gr.Slider(label='Number of Steps',
149
  info='larger value has better editing capacity, but takes more time and memory.',
 
171
  target_prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
172
  run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
173
 
174
+ demo.queue().launch()
inference_followyourpose.py CHANGED
@@ -14,41 +14,16 @@ def get_time_string() -> str:
14
  class merge_config_then_run():
15
  def __init__(self) -> None:
16
  # Load the tokenizer
17
- # pretrained_model_path = 'FateZero/ckpt/stable-diffusion-v1-4'
18
  self.tokenizer = None
19
  self.text_encoder = None
20
  self.vae = None
21
  self.unet = None
22
 
23
- # cache_ckpt = False
24
- # if cache_ckpt:
25
- # self.tokenizer = AutoTokenizer.from_pretrained(
26
- # pretrained_model_path,
27
- # # 'FateZero/ckpt/stable-diffusion-v1-4',
28
- # subfolder="tokenizer",
29
- # use_fast=False,
30
- # )
31
-
32
- # # Load models and create wrapper for stable diffusion
33
- # self.text_encoder = CLIPTextModel.from_pretrained(
34
- # pretrained_model_path,
35
- # subfolder="text_encoder",
36
- # )
37
-
38
- # self.vae = AutoencoderKL.from_pretrained(
39
- # pretrained_model_path,
40
- # subfolder="vae",
41
- # )
42
- # model_config = {
43
- # "lora": 160,
44
- # # temporal_downsample_time: 4
45
- # "SparseCausalAttention_index": ['mid'],
46
- # "least_sc_channel": 640
47
- # }
48
- # self.unet = UNetPseudo3DConditionModel.from_2d_model(
49
- # os.path.join(pretrained_model_path, "unet"), model_config=model_config
50
- # )
51
-
52
  def run(
53
  self,
54
  data_path,
@@ -64,12 +39,12 @@ class merge_config_then_run():
64
  top_crop=0,
65
  bottom_crop=0,
66
  ):
 
67
  default_edit_config='FollowYourPose/configs/pose_sample.yaml'
68
  Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
69
 
70
  dataset_time_string = get_time_string()
71
  config_now = copy.deepcopy(Omegadict_default_edit_config)
72
- # print(f"config_now['pretrained_model_path'] = model_id {model_id}")
73
 
74
  offset_dict = {
75
  "left": left_crop,
 
14
  class merge_config_then_run():
15
  def __init__(self) -> None:
16
  # Load the tokenizer
 
17
  self.tokenizer = None
18
  self.text_encoder = None
19
  self.vae = None
20
  self.unet = None
21
 
22
+ def download_model(self):
23
+ REPO_ID = 'YueMafighting/FollowYourPose_v1'
24
+ hf_hub_download(repo_id=REPO_ID, local_dir='./FollowYourPose/checkpoints', local_dir_use_symlinks=False)
25
+
26
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def run(
28
  self,
29
  data_path,
 
39
  top_crop=0,
40
  bottom_crop=0,
41
  ):
42
+ self.download_model()
43
  default_edit_config='FollowYourPose/configs/pose_sample.yaml'
44
  Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
45
 
46
  dataset_time_string = get_time_string()
47
  config_now = copy.deepcopy(Omegadict_default_edit_config)
 
48
 
49
  offset_dict = {
50
  "left": left_crop,