Nupur Kumari commited on
Commit
e417d7a
·
1 Parent(s): 7db1586

custom-diffusion-space

Browse files
Files changed (2) hide show
  1. app.py +7 -3
  2. trainer.py +3 -0
app.py CHANGED
@@ -76,8 +76,8 @@ def create_training_demo(trainer: Trainer,
76
  class_prompt = gr.Textbox(label='Regularization set Prompt',
77
  max_lines=1, placeholder='Example: "cat"')
78
  gr.Markdown('''
79
- - We use "\<new1\>" appended in front of the concept. E.g. "\<new1\> cat".
80
- - For a new concept, use "photo of a \<new1\> cat" for concept_prompt and "cat" for class_prompt.
81
  - For a style concept, use "painting in the style of \<new1\> art" for concept_prompt and "art" for class_prompt.
82
  ''')
83
  with gr.Box():
@@ -95,10 +95,13 @@ def create_training_demo(trainer: Trainer,
95
  label='Number of Gradient Accumulation',
96
  value=1,
97
  precision=0)
98
- use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
 
 
99
  gr.Markdown('''
100
  - Only enable one of "Train Text Encoder" or "modifier token" or None.
101
  - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
 
102
  - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
103
  ''')
104
 
@@ -129,6 +132,7 @@ def create_training_demo(trainer: Trainer,
129
  gradient_accumulation,
130
  batch_size,
131
  use_8bit_adam,
 
132
  ],
133
  outputs=[
134
  training_status,
 
76
  class_prompt = gr.Textbox(label='Regularization set Prompt',
77
  max_lines=1, placeholder='Example: "cat"')
78
  gr.Markdown('''
79
+ - Use "\<new1\>" appended in front of the concept. E.g. "\<new1\> cat" if modifier_token is enabled.
80
+ - For a new concept e.g. concept_prompt is "photo of a \<new1\> cat" and "cat" for class_prompt.
81
  - For a style concept, use "painting in the style of \<new1\> art" for concept_prompt and "art" for class_prompt.
82
  ''')
83
  with gr.Box():
 
95
  label='Number of Gradient Accumulation',
96
  value=1,
97
  precision=0)
98
+ with gr.Row():
99
+ use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
100
+ gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
101
  gr.Markdown('''
102
  - Only enable one of "Train Text Encoder" or "modifier token" or None.
103
  - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
104
+ - Enable gradient checkpointing to save memory (~14GB) at the expense of slower backward pass.
105
  - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
106
  ''')
107
 
 
132
  gradient_accumulation,
133
  batch_size,
134
  use_8bit_adam,
135
+ gradient_checkpointing
136
  ],
137
  outputs=[
138
  training_status,
trainer.py CHANGED
@@ -69,6 +69,7 @@ class Trainer:
69
  gradient_accumulation: int,
70
  batch_size: int,
71
  use_8bit_adam: bool,
 
72
  ) -> tuple[dict, list[pathlib.Path]]:
73
  if not torch.cuda.is_available():
74
  raise gr.Error('CUDA is not available.')
@@ -111,6 +112,8 @@ class Trainer:
111
  command += ' --use_8bit_adam'
112
  if train_text_encoder:
113
  command += f' --train_text_encoder'
 
 
114
 
115
  with open(self.output_dir / 'train.sh', 'w') as f:
116
  command_s = ' '.join(command.split())
 
69
  gradient_accumulation: int,
70
  batch_size: int,
71
  use_8bit_adam: bool,
72
+ gradient_checkpointing: bool,
73
  ) -> tuple[dict, list[pathlib.Path]]:
74
  if not torch.cuda.is_available():
75
  raise gr.Error('CUDA is not available.')
 
112
  command += ' --use_8bit_adam'
113
  if train_text_encoder:
114
  command += f' --train_text_encoder'
115
+ if gradient_checkpointing:
116
+ command += f' --gradient_checkpointing'
117
 
118
  with open(self.output_dir / 'train.sh', 'w') as f:
119
  command_s = ' '.join(command.split())