Update README.md
Browse files
README.md
CHANGED
@@ -66,7 +66,7 @@ start generating the assistant reply.
|
|
66 |
|
67 |
## Dev Details
|
68 |
|
69 |
-
- base model: [
|
70 |
- checkpoint: 1 epoch (6000 steps)
|
71 |
|
72 |
command: `deepspeed trainer_sft.py --configs defaults stablelm-7b oasst-mix --cache_dir /home/ubuntu/data_cache --output_dir .saved/stable-lm-7b-1 --num_train_epochs 4 --deepspeed`
|
@@ -111,7 +111,7 @@ bloom-zh-3b:
|
|
111 |
max_length: 5120
|
112 |
warmup_steps: 2000
|
113 |
gradient_checkpointing: true
|
114 |
-
gradient_accumulation_steps:
|
115 |
per_device_train_batch_size: 1
|
116 |
per_device_eval_batch_size: 1
|
117 |
eval_steps: 500
|
|
|
66 |
|
67 |
## Dev Details
|
68 |
|
69 |
+
- base model: [ckip-joint/bloom-3b-zh](https://huggingface.co/ckip-joint/bloom-3b-zh)
|
70 |
- checkpoint: 1 epoch (6000 steps)
|
71 |
|
72 |
command: `deepspeed trainer_sft.py --configs defaults stablelm-7b oasst-mix --cache_dir /home/ubuntu/data_cache --output_dir .saved/stable-lm-7b-1 --num_train_epochs 4 --deepspeed`
|
|
|
111 |
max_length: 5120
|
112 |
warmup_steps: 2000
|
113 |
gradient_checkpointing: true
|
114 |
+
gradient_accumulation_steps: 32
|
115 |
per_device_train_batch_size: 1
|
116 |
per_device_eval_batch_size: 1
|
117 |
eval_steps: 500
|