seungduk commited on
Commit
b0ee9ec
·
unverified ·
1 Parent(s): 0bc114d

Set `gradient_clipping` to `auto` in DeepSpeed configs (#1382) [skip ci]

Browse files
deepspeed_configs/zero1.json CHANGED
@@ -16,6 +16,7 @@
16
  "min_loss_scale": 1
17
  },
18
  "gradient_accumulation_steps": "auto",
 
19
  "train_batch_size": "auto",
20
  "train_micro_batch_size_per_gpu": "auto",
21
  "wall_clock_breakdown": false
 
16
  "min_loss_scale": 1
17
  },
18
  "gradient_accumulation_steps": "auto",
19
+ "gradient_clipping": "auto",
20
  "train_batch_size": "auto",
21
  "train_micro_batch_size_per_gpu": "auto",
22
  "wall_clock_breakdown": false
deepspeed_configs/zero2.json CHANGED
@@ -20,6 +20,7 @@
20
  "min_loss_scale": 1
21
  },
22
  "gradient_accumulation_steps": "auto",
 
23
  "train_batch_size": "auto",
24
  "train_micro_batch_size_per_gpu": "auto",
25
  "wall_clock_breakdown": false
 
20
  "min_loss_scale": 1
21
  },
22
  "gradient_accumulation_steps": "auto",
23
+ "gradient_clipping": "auto",
24
  "train_batch_size": "auto",
25
  "train_micro_batch_size_per_gpu": "auto",
26
  "wall_clock_breakdown": false
deepspeed_configs/zero3.json CHANGED
@@ -24,6 +24,7 @@
24
  "min_loss_scale": 1
25
  },
26
  "gradient_accumulation_steps": "auto",
 
27
  "train_batch_size": "auto",
28
  "train_micro_batch_size_per_gpu": "auto",
29
  "wall_clock_breakdown": false
 
24
  "min_loss_scale": 1
25
  },
26
  "gradient_accumulation_steps": "auto",
27
+ "gradient_clipping": "auto",
28
  "train_batch_size": "auto",
29
  "train_micro_batch_size_per_gpu": "auto",
30
  "wall_clock_breakdown": false
deepspeed_configs/zero3_bf16.json CHANGED
@@ -24,6 +24,7 @@
24
  "min_loss_scale": 1
25
  },
26
  "gradient_accumulation_steps": "auto",
 
27
  "train_batch_size": "auto",
28
  "train_micro_batch_size_per_gpu": "auto",
29
  "wall_clock_breakdown": false
 
24
  "min_loss_scale": 1
25
  },
26
  "gradient_accumulation_steps": "auto",
27
+ "gradient_clipping": "auto",
28
  "train_batch_size": "auto",
29
  "train_micro_batch_size_per_gpu": "auto",
30
  "wall_clock_breakdown": false