error577 commited on
Commit
3c31568
·
verified ·
1 Parent(s): c245c78

End of training

Browse files
README.md CHANGED
@@ -66,11 +66,11 @@ lora_model_dir: null
66
  lora_r: 32
67
  lora_target_linear: true
68
  lr_scheduler: cosine
69
- max_steps: 100
70
  micro_batch_size: 1
71
  mlflow_experiment_name: /tmp/a8d492346672a1c8_train_data.json
72
  model_type: AutoModelForCausalLM
73
- num_epochs: 4
74
  optimizer: adamw_bnb_8bit
75
  output_dir: miner_id_24
76
  pad_to_sequence_len: true
@@ -103,7 +103,7 @@ xformers_attention: null
103
 
104
  This model is a fine-tuned version of [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) on the None dataset.
105
  It achieves the following results on the evaluation set:
106
- - Loss: 1.3402
107
 
108
  ## Model description
109
 
@@ -131,17 +131,25 @@ The following hyperparameters were used during training:
131
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
132
  - lr_scheduler_type: cosine
133
  - lr_scheduler_warmup_steps: 10
134
- - training_steps: 100
135
 
136
  ### Training results
137
 
138
  | Training Loss | Epoch | Step | Validation Loss |
139
  |:-------------:|:------:|:----:|:---------------:|
140
  | 3.7335 | 0.0029 | 1 | 3.8303 |
141
- | 1.6115 | 0.0729 | 25 | 2.3264 |
142
- | 0.952 | 0.1458 | 50 | 1.4975 |
143
- | 1.9603 | 0.2187 | 75 | 1.3585 |
144
- | 1.434 | 0.2915 | 100 | 1.3402 |
 
 
 
 
 
 
 
 
145
 
146
 
147
  ### Framework versions
 
66
  lora_r: 32
67
  lora_target_linear: true
68
  lr_scheduler: cosine
69
+ max_steps: 300
70
  micro_batch_size: 1
71
  mlflow_experiment_name: /tmp/a8d492346672a1c8_train_data.json
72
  model_type: AutoModelForCausalLM
73
+ num_epochs: 12
74
  optimizer: adamw_bnb_8bit
75
  output_dir: miner_id_24
76
  pad_to_sequence_len: true
 
103
 
104
  This model is a fine-tuned version of [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) on the None dataset.
105
  It achieves the following results on the evaluation set:
106
+ - Loss: 1.0501
107
 
108
  ## Model description
109
 
 
131
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
132
  - lr_scheduler_type: cosine
133
  - lr_scheduler_warmup_steps: 10
134
+ - training_steps: 300
135
 
136
  ### Training results
137
 
138
  | Training Loss | Epoch | Step | Validation Loss |
139
  |:-------------:|:------:|:----:|:---------------:|
140
  | 3.7335 | 0.0029 | 1 | 3.8303 |
141
+ | 1.5917 | 0.0729 | 25 | 2.3094 |
142
+ | 0.9143 | 0.1458 | 50 | 1.4404 |
143
+ | 1.7505 | 0.2187 | 75 | 1.2449 |
144
+ | 1.2376 | 0.2915 | 100 | 1.1931 |
145
+ | 1.2788 | 0.3644 | 125 | 1.1278 |
146
+ | 1.4158 | 0.4373 | 150 | 1.0998 |
147
+ | 0.9088 | 0.5102 | 175 | 1.0870 |
148
+ | 1.1591 | 0.5831 | 200 | 1.0699 |
149
+ | 1.0273 | 0.6560 | 225 | 1.0602 |
150
+ | 1.9042 | 0.7289 | 250 | 1.0567 |
151
+ | 0.9941 | 0.8017 | 275 | 1.0524 |
152
+ | 1.1544 | 0.8746 | 300 | 1.0501 |
153
 
154
 
155
  ### Framework versions
adapter_config.json CHANGED
@@ -20,12 +20,12 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
  "gate_proj",
25
  "up_proj",
 
26
  "down_proj",
27
  "o_proj",
28
- "v_proj",
29
  "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
  "gate_proj",
25
  "up_proj",
26
+ "q_proj",
27
  "down_proj",
28
  "o_proj",
 
29
  "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:276ad78cd1e7cec305cda80c44d587d1b2f2a40c8fd77b008d3ed30b28d3cf69
3
  size 319977674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e6ee602386a820be0e08e0dc35ba90ab1e8d13d11507992be368693a2f48e46
3
  size 319977674
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7211c6973eef28e52bea4d96a74846e994ad7ebb2c1bccd845772d42aa497ee6
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e0ea1556d29c6d79e48933779339bb1a59db98e44cdb9697922abeb465b0322
3
  size 319876032
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f8188926fa47a0c6a507ee13a218f5419bcf03d5cb9408289fcb2ae6e89945
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1dafb245219169482eca522df88859feac6a8e53af11c374d72d3c6395fdc05
3
  size 6776