indiejoseph
commited on
Upload folder using huggingface_hub
Browse files- README.md +73 -21
- all_results.json +10 -18
- config.json +2 -2
- eval_results.json +5 -13
- generation_config.json +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- runs/Oct03_20-36-50_c0002/events.out.tfevents.1727955439.c0002 +3 -0
- runs/Oct03_20-36-50_c0002/events.out.tfevents.1727993248.c0002 +3 -0
- runs/Sep23_06-10-16_c0002/events.out.tfevents.1727039792.c0002 +3 -0
- runs/Sep23_06-10-16_c0002/events.out.tfevents.1727057464.c0002 +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +1 -1
- train_results.json +6 -6
- trainer_log.jsonl +0 -0
- trainer_state.json +0 -0
- training_args.bin +1 -1
- training_eval_loss.png +0 -0
- training_loss.png +0 -0
README.md
CHANGED
@@ -1,32 +1,24 @@
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: other
|
4 |
-
base_model: hon9kon9ize/CantoneseLLM-
|
5 |
tags:
|
6 |
- llama-factory
|
7 |
- full
|
8 |
- generated_from_trainer
|
9 |
model-index:
|
10 |
-
- name:
|
11 |
results: []
|
12 |
---
|
13 |
|
14 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
should probably proofread and complete it, then remove this comment. -->
|
16 |
|
17 |
-
#
|
18 |
|
19 |
-
This model is a fine-tuned version of [hon9kon9ize/CantoneseLLM-
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
-
- Rewards/chosen: -0.7307
|
23 |
-
- Rewards/rejected: -3.1239
|
24 |
-
- Rewards/accuracies: 0.8464
|
25 |
-
- Rewards/margins: 2.3931
|
26 |
-
- Logps/rejected: -226.0627
|
27 |
-
- Logps/chosen: -191.7517
|
28 |
-
- Logits/rejected: -1.5777
|
29 |
-
- Logits/chosen: -1.5363
|
30 |
|
31 |
## Model description
|
32 |
|
@@ -45,23 +37,83 @@ More information needed
|
|
45 |
### Training hyperparameters
|
46 |
|
47 |
The following hyperparameters were used during training:
|
48 |
-
- learning_rate:
|
49 |
- train_batch_size: 4
|
50 |
- eval_batch_size: 4
|
51 |
- seed: 42
|
52 |
-
- gradient_accumulation_steps:
|
53 |
-
- total_train_batch_size:
|
54 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
55 |
- lr_scheduler_type: cosine
|
56 |
-
- lr_scheduler_warmup_ratio: 0.
|
57 |
- num_epochs: 3.0
|
58 |
|
59 |
### Training results
|
60 |
|
61 |
-
| Training Loss | Epoch | Step | Validation Loss |
|
62 |
-
|
63 |
-
|
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
|
67 |
### Framework versions
|
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: other
|
4 |
+
base_model: hon9kon9ize/CantoneseLLM-v1.0
|
5 |
tags:
|
6 |
- llama-factory
|
7 |
- full
|
8 |
- generated_from_trainer
|
9 |
model-index:
|
10 |
+
- name: Qwen2.5-7B-sft
|
11 |
results: []
|
12 |
---
|
13 |
|
14 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
should probably proofread and complete it, then remove this comment. -->
|
16 |
|
17 |
+
# Qwen2.5-7B-sft
|
18 |
|
19 |
+
This model is a fine-tuned version of [hon9kon9ize/CantoneseLLM-v1.0](https://huggingface.co/hon9kon9ize/CantoneseLLM-v1.0) on the sft_v1 dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.9464
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
37 |
### Training hyperparameters
|
38 |
|
39 |
The following hyperparameters were used during training:
|
40 |
+
- learning_rate: 1e-05
|
41 |
- train_batch_size: 4
|
42 |
- eval_batch_size: 4
|
43 |
- seed: 42
|
44 |
+
- gradient_accumulation_steps: 8
|
45 |
+
- total_train_batch_size: 32
|
46 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
- lr_scheduler_type: cosine
|
48 |
+
- lr_scheduler_warmup_ratio: 0.3
|
49 |
- num_epochs: 3.0
|
50 |
|
51 |
### Training results
|
52 |
|
53 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
54 |
+
|:-------------:|:------:|:----:|:---------------:|
|
55 |
+
| 1.3332 | 0.0480 | 100 | 1.3140 |
|
56 |
+
| 1.2185 | 0.0960 | 200 | 1.2879 |
|
57 |
+
| 1.1976 | 0.1439 | 300 | 1.2533 |
|
58 |
+
| 1.1627 | 0.1919 | 400 | 1.2169 |
|
59 |
+
| 1.178 | 0.2399 | 500 | 1.1766 |
|
60 |
+
| 1.133 | 0.2879 | 600 | 1.1296 |
|
61 |
+
| 1.0466 | 0.3359 | 700 | 1.0983 |
|
62 |
+
| 1.0657 | 0.3839 | 800 | 1.0770 |
|
63 |
+
| 1.054 | 0.4318 | 900 | 1.0617 |
|
64 |
+
| 1.0744 | 0.4798 | 1000 | 1.0487 |
|
65 |
+
| 0.9977 | 0.5278 | 1100 | 1.0383 |
|
66 |
+
| 0.9778 | 0.5758 | 1200 | 1.0290 |
|
67 |
+
| 1.0187 | 0.6238 | 1300 | 1.0211 |
|
68 |
+
| 1.085 | 0.6717 | 1400 | 1.0131 |
|
69 |
+
| 0.958 | 0.7197 | 1500 | 1.0072 |
|
70 |
+
| 1.0482 | 0.7677 | 1600 | 1.0007 |
|
71 |
+
| 0.9447 | 0.8157 | 1700 | 0.9946 |
|
72 |
+
| 1.0 | 0.8637 | 1800 | 0.9894 |
|
73 |
+
| 0.9685 | 0.9117 | 1900 | 0.9849 |
|
74 |
+
| 0.8576 | 0.9596 | 2000 | 0.9807 |
|
75 |
+
| 0.8853 | 1.0076 | 2100 | 0.9775 |
|
76 |
+
| 0.947 | 1.0556 | 2200 | 0.9739 |
|
77 |
+
| 0.9207 | 1.1036 | 2300 | 0.9713 |
|
78 |
+
| 0.8596 | 1.1516 | 2400 | 0.9691 |
|
79 |
+
| 1.0277 | 1.1995 | 2500 | 0.9655 |
|
80 |
+
| 0.9646 | 1.2475 | 2600 | 0.9631 |
|
81 |
+
| 0.8583 | 1.2955 | 2700 | 0.9613 |
|
82 |
+
| 0.9367 | 1.3435 | 2800 | 0.9589 |
|
83 |
+
| 0.9146 | 1.3915 | 2900 | 0.9570 |
|
84 |
+
| 0.9697 | 1.4395 | 3000 | 0.9556 |
|
85 |
+
| 0.8713 | 1.4874 | 3100 | 0.9542 |
|
86 |
+
| 0.9855 | 1.5354 | 3200 | 0.9524 |
|
87 |
+
| 0.8651 | 1.5834 | 3300 | 0.9511 |
|
88 |
+
| 0.9448 | 1.6314 | 3400 | 0.9495 |
|
89 |
+
| 0.8997 | 1.6794 | 3500 | 0.9485 |
|
90 |
+
| 1.0446 | 1.7273 | 3600 | 0.9475 |
|
91 |
+
| 0.8862 | 1.7753 | 3700 | 0.9465 |
|
92 |
+
| 0.873 | 1.8233 | 3800 | 0.9456 |
|
93 |
+
| 0.9893 | 1.8713 | 3900 | 0.9448 |
|
94 |
+
| 0.8915 | 1.9193 | 4000 | 0.9442 |
|
95 |
+
| 0.8854 | 1.9673 | 4100 | 0.9435 |
|
96 |
+
| 0.7608 | 2.0152 | 4200 | 0.9447 |
|
97 |
+
| 0.796 | 2.0632 | 4300 | 0.9464 |
|
98 |
+
| 0.9225 | 2.1112 | 4400 | 0.9467 |
|
99 |
+
| 0.9901 | 2.1592 | 4500 | 0.9467 |
|
100 |
+
| 0.9263 | 2.2072 | 4600 | 0.9468 |
|
101 |
+
| 0.7735 | 2.2551 | 4700 | 0.9467 |
|
102 |
+
| 0.8454 | 2.3031 | 4800 | 0.9464 |
|
103 |
+
| 0.8562 | 2.3511 | 4900 | 0.9466 |
|
104 |
+
| 0.8923 | 2.3991 | 5000 | 0.9464 |
|
105 |
+
| 0.7529 | 2.4471 | 5100 | 0.9463 |
|
106 |
+
| 0.8421 | 2.4951 | 5200 | 0.9463 |
|
107 |
+
| 0.8578 | 2.5430 | 5300 | 0.9463 |
|
108 |
+
| 0.8143 | 2.5910 | 5400 | 0.9464 |
|
109 |
+
| 0.8117 | 2.6390 | 5500 | 0.9463 |
|
110 |
+
| 0.861 | 2.6870 | 5600 | 0.9464 |
|
111 |
+
| 0.8415 | 2.7350 | 5700 | 0.9463 |
|
112 |
+
| 0.7846 | 2.7829 | 5800 | 0.9463 |
|
113 |
+
| 0.7605 | 2.8309 | 5900 | 0.9464 |
|
114 |
+
| 0.8721 | 2.8789 | 6000 | 0.9464 |
|
115 |
+
| 0.8566 | 2.9269 | 6100 | 0.9464 |
|
116 |
+
| 0.7978 | 2.9749 | 6200 | 0.9464 |
|
117 |
|
118 |
|
119 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,20 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch": 2.
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"
|
9 |
-
"
|
10 |
-
"
|
11 |
-
"
|
12 |
-
"eval_runtime": 23.3432,
|
13 |
-
"eval_samples_per_second": 14.137,
|
14 |
-
"eval_steps_per_second": 3.556,
|
15 |
-
"total_flos": 4.5615607240812134e+17,
|
16 |
-
"train_loss": 0.26195224279218965,
|
17 |
-
"train_runtime": 3105.2921,
|
18 |
-
"train_samples_per_second": 2.862,
|
19 |
-
"train_steps_per_second": 0.357
|
20 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 2.9998200683740177,
|
3 |
+
"eval_loss": 0.9463909864425659,
|
4 |
+
"eval_runtime": 178.4011,
|
5 |
+
"eval_samples_per_second": 41.536,
|
6 |
+
"eval_steps_per_second": 10.387,
|
7 |
+
"total_flos": 6.498465993073754e+18,
|
8 |
+
"train_loss": 0.937529916860168,
|
9 |
+
"train_runtime": 37609.4943,
|
10 |
+
"train_samples_per_second": 5.32,
|
11 |
+
"train_steps_per_second": 0.166
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
}
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
@@ -27,4 +27,4 @@
|
|
27 |
"use_mrope": false,
|
28 |
"use_sliding_window": false,
|
29 |
"vocab_size": 152064
|
30 |
-
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "hon9kon9ize/Qwen2.5-7B-cpt",
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
|
|
27 |
"use_mrope": false,
|
28 |
"use_sliding_window": false,
|
29 |
"vocab_size": 152064
|
30 |
+
}
|
eval_results.json
CHANGED
@@ -1,15 +1,7 @@
|
|
1 |
{
|
2 |
-
"epoch": 2.
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"eval_loss": 0.31698280572891235,
|
8 |
-
"eval_rewards/accuracies": 0.8463855385780334,
|
9 |
-
"eval_rewards/chosen": -0.7307406663894653,
|
10 |
-
"eval_rewards/margins": 2.3931150436401367,
|
11 |
-
"eval_rewards/rejected": -3.1238558292388916,
|
12 |
-
"eval_runtime": 23.3432,
|
13 |
-
"eval_samples_per_second": 14.137,
|
14 |
-
"eval_steps_per_second": 3.556
|
15 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 2.9998200683740177,
|
3 |
+
"eval_loss": 0.9463909864425659,
|
4 |
+
"eval_runtime": 178.4011,
|
5 |
+
"eval_samples_per_second": 41.536,
|
6 |
+
"eval_steps_per_second": 10.387
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
}
|
generation_config.json
CHANGED
@@ -3,4 +3,4 @@
|
|
3 |
"eos_token_id": 151645,
|
4 |
"max_new_tokens": 2048,
|
5 |
"transformers_version": "4.45.0"
|
6 |
-
}
|
|
|
3 |
"eos_token_id": 151645,
|
4 |
"max_new_tokens": 2048,
|
5 |
"transformers_version": "4.45.0"
|
6 |
+
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e46f9bcd51417c7fe4f6ee7174df3350c5d054b47cd53eba8c6db80677ff144
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c22043facd2fb35cd773bf19a3b868e67047e1fe0c907f055864f243649aabf4
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f94ec0aca7510f6196dc05094e5b321d32c5f9fe210f58819a9e5983ee654b88
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23834907a8855efdec9c5ce5ffbfc1ee56e2fca5fc6a00df562ede4f70a3303a
|
3 |
size 1089994880
|
runs/Oct03_20-36-50_c0002/events.out.tfevents.1727955439.c0002
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9819779c90e165f2a79e52765391e384b230f029f412d0fe87ad341a55b88888
|
3 |
+
size 286095
|
runs/Oct03_20-36-50_c0002/events.out.tfevents.1727993248.c0002
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a44cf150d3dc29ca8ffda0c93414ff2031b34ddf883273e9bacbb9ef01042075
|
3 |
+
size 311
|
runs/Sep23_06-10-16_c0002/events.out.tfevents.1727039792.c0002
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b718e16bd6facd03ac44048c1ecc3f060195b1b9a02a21d2066b65054b24318
|
3 |
+
size 206530
|
runs/Sep23_06-10-16_c0002/events.out.tfevents.1727057464.c0002
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5472c69abe32e2f436290dad5b713cd7715d42c824d12e9ef1078698bdbdb74e
|
3 |
+
size 311
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67909bb0045622af428982dee9b3f1033cf5c4bca5c9423a028d3748364ee14f
|
3 |
+
size 1044277
|
tokenizer_config.json
CHANGED
@@ -205,4 +205,4 @@
|
|
205 |
"split_special_tokens": false,
|
206 |
"tokenizer_class": "Qwen2Tokenizer",
|
207 |
"unk_token": null
|
208 |
-
}
|
|
|
205 |
"split_special_tokens": false,
|
206 |
"tokenizer_class": "Qwen2Tokenizer",
|
207 |
"unk_token": null
|
208 |
+
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch": 2.
|
3 |
-
"total_flos":
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 2.9998200683740177,
|
3 |
+
"total_flos": 6.498465993073754e+18,
|
4 |
+
"train_loss": 0.937529916860168,
|
5 |
+
"train_runtime": 37609.4943,
|
6 |
+
"train_samples_per_second": 5.32,
|
7 |
+
"train_steps_per_second": 0.166
|
8 |
}
|
trainer_log.jsonl
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe9bca8819b0827758099d317d961fbb64bf0550bb5da2ec6faf00cab3280dbc
|
3 |
size 5432
|
training_eval_loss.png
CHANGED
training_loss.png
CHANGED