Set `gradient_clipping` to `auto` in DeepSpeed configs (#1382) [skip ci]
Browse files
deepspeed_configs/zero1.json
CHANGED
@@ -16,6 +16,7 @@
|
|
16 |
"min_loss_scale": 1
|
17 |
},
|
18 |
"gradient_accumulation_steps": "auto",
|
|
|
19 |
"train_batch_size": "auto",
|
20 |
"train_micro_batch_size_per_gpu": "auto",
|
21 |
"wall_clock_breakdown": false
|
|
|
16 |
"min_loss_scale": 1
|
17 |
},
|
18 |
"gradient_accumulation_steps": "auto",
|
19 |
+
"gradient_clipping": "auto",
|
20 |
"train_batch_size": "auto",
|
21 |
"train_micro_batch_size_per_gpu": "auto",
|
22 |
"wall_clock_breakdown": false
|
deepspeed_configs/zero2.json
CHANGED
@@ -20,6 +20,7 @@
|
|
20 |
"min_loss_scale": 1
|
21 |
},
|
22 |
"gradient_accumulation_steps": "auto",
|
|
|
23 |
"train_batch_size": "auto",
|
24 |
"train_micro_batch_size_per_gpu": "auto",
|
25 |
"wall_clock_breakdown": false
|
|
|
20 |
"min_loss_scale": 1
|
21 |
},
|
22 |
"gradient_accumulation_steps": "auto",
|
23 |
+
"gradient_clipping": "auto",
|
24 |
"train_batch_size": "auto",
|
25 |
"train_micro_batch_size_per_gpu": "auto",
|
26 |
"wall_clock_breakdown": false
|
deepspeed_configs/zero3.json
CHANGED
@@ -24,6 +24,7 @@
|
|
24 |
"min_loss_scale": 1
|
25 |
},
|
26 |
"gradient_accumulation_steps": "auto",
|
|
|
27 |
"train_batch_size": "auto",
|
28 |
"train_micro_batch_size_per_gpu": "auto",
|
29 |
"wall_clock_breakdown": false
|
|
|
24 |
"min_loss_scale": 1
|
25 |
},
|
26 |
"gradient_accumulation_steps": "auto",
|
27 |
+
"gradient_clipping": "auto",
|
28 |
"train_batch_size": "auto",
|
29 |
"train_micro_batch_size_per_gpu": "auto",
|
30 |
"wall_clock_breakdown": false
|
deepspeed_configs/zero3_bf16.json
CHANGED
@@ -24,6 +24,7 @@
|
|
24 |
"min_loss_scale": 1
|
25 |
},
|
26 |
"gradient_accumulation_steps": "auto",
|
|
|
27 |
"train_batch_size": "auto",
|
28 |
"train_micro_batch_size_per_gpu": "auto",
|
29 |
"wall_clock_breakdown": false
|
|
|
24 |
"min_loss_scale": 1
|
25 |
},
|
26 |
"gradient_accumulation_steps": "auto",
|
27 |
+
"gradient_clipping": "auto",
|
28 |
"train_batch_size": "auto",
|
29 |
"train_micro_batch_size_per_gpu": "auto",
|
30 |
"wall_clock_breakdown": false
|