Model save
Browse files- README.md +12 -8
- adapter_config.json +4 -4
- adapter_model.safetensors +3 -0
- runs/Oct27_15-30-26_graz/events.out.tfevents.1730010650.graz.1060953.0 +3 -0
- runs/Oct27_15-35-07_graz/events.out.tfevents.1730010913.graz.1061216.0 +3 -0
- runs/Oct27_15-36-40_graz/events.out.tfevents.1730011006.graz.1061496.0 +3 -0
- runs/Oct27_15-38-22_graz/events.out.tfevents.1730011108.graz.1061695.0 +3 -0
- runs/Oct27_15-46-54_graz/events.out.tfevents.1730011620.graz.1062153.0 +3 -0
- runs/Oct27_15-48-15_graz/events.out.tfevents.1730011701.graz.1062488.0 +3 -0
- runs/Oct27_15-49-17_graz/events.out.tfevents.1730011762.graz.1062687.0 +3 -0
- runs/Oct27_16-32-58_graz/events.out.tfevents.1730014383.graz.1063356.0 +3 -0
- runs/Oct27_16-34-17_graz/events.out.tfevents.1730014462.graz.1063557.0 +3 -0
- runs/Oct27_16-35-02_graz/events.out.tfevents.1730014508.graz.1063769.0 +3 -0
- runs/Oct27_16-48-37_graz/events.out.tfevents.1730015328.graz.1065240.0 +3 -0
- runs/Oct27_16-50-13_graz/events.out.tfevents.1730015424.graz.1065447.0 +3 -0
- runs/Oct27_16-51-10_graz/events.out.tfevents.1730015481.graz.1065650.0 +3 -0
- runs/Oct27_16-59-08_graz/events.out.tfevents.1730015959.graz.1066060.0 +3 -0
- runs/Oct27_17-09-13_graz/events.out.tfevents.1730016564.graz.1066690.0 +3 -0
- runs/Oct27_17-10-33_graz/events.out.tfevents.1730016644.graz.1066893.0 +3 -0
- runs/Oct27_17-11-48_graz/events.out.tfevents.1730016719.graz.1067096.0 +3 -0
- runs/Oct27_17-12-50_graz/events.out.tfevents.1730016781.graz.1067299.0 +3 -0
- runs/Oct27_18-01-15_graz/events.out.tfevents.1730019686.graz.1067916.0 +3 -0
- runs/Oct27_18-04-27_graz/events.out.tfevents.1730019879.graz.1068110.0 +3 -0
- runs/Oct27_18-08-33_graz/events.out.tfevents.1730020125.graz.1068561.0 +3 -0
- runs/Oct27_18-09-10_graz/events.out.tfevents.1730020162.graz.1068744.0 +3 -0
- training_args.bin +2 -2
README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
---
|
2 |
-
|
3 |
-
|
|
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
model-index:
|
@@ -13,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
13 |
|
14 |
# lora
|
15 |
|
16 |
-
This model is a fine-tuned version of [
|
17 |
|
18 |
## Model description
|
19 |
|
@@ -33,15 +34,17 @@ More information needed
|
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
- learning_rate: 2e-05
|
36 |
-
- train_batch_size:
|
37 |
- eval_batch_size: 8
|
38 |
- seed: 42
|
|
|
39 |
- gradient_accumulation_steps: 4
|
40 |
-
- total_train_batch_size:
|
41 |
-
- optimizer:
|
42 |
- lr_scheduler_type: linear
|
43 |
- lr_scheduler_warmup_steps: 2
|
44 |
- num_epochs: 2
|
|
|
45 |
|
46 |
### Training results
|
47 |
|
@@ -49,7 +52,8 @@ The following hyperparameters were used during training:
|
|
49 |
|
50 |
### Framework versions
|
51 |
|
52 |
-
-
|
|
|
53 |
- Pytorch 1.12.1+cu113
|
54 |
- Datasets 2.6.1
|
55 |
-
- Tokenizers 0.
|
|
|
1 |
---
|
2 |
+
base_model: rinna/japanese-gpt-neox-3.6b
|
3 |
+
library_name: peft
|
4 |
+
license: mit
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
model-index:
|
|
|
14 |
|
15 |
# lora
|
16 |
|
17 |
+
This model is a fine-tuned version of [rinna/japanese-gpt-neox-3.6b](https://huggingface.co/rinna/japanese-gpt-neox-3.6b) on an unknown dataset.
|
18 |
|
19 |
## Model description
|
20 |
|
|
|
34 |
|
35 |
The following hyperparameters were used during training:
|
36 |
- learning_rate: 2e-05
|
37 |
+
- train_batch_size: 4
|
38 |
- eval_batch_size: 8
|
39 |
- seed: 42
|
40 |
+
- distributed_type: multi-GPU
|
41 |
- gradient_accumulation_steps: 4
|
42 |
+
- total_train_batch_size: 16
|
43 |
+
- optimizer: Use adamw_hf with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
44 |
- lr_scheduler_type: linear
|
45 |
- lr_scheduler_warmup_steps: 2
|
46 |
- num_epochs: 2
|
47 |
+
- mixed_precision_training: Native AMP
|
48 |
|
49 |
### Training results
|
50 |
|
|
|
52 |
|
53 |
### Framework versions
|
54 |
|
55 |
+
- PEFT 0.13.2
|
56 |
+
- Transformers 4.46.0
|
57 |
- Pytorch 1.12.1+cu113
|
58 |
- Datasets 2.6.1
|
59 |
+
- Tokenizers 0.20.1
|
adapter_config.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"base_model_class": "GPTNeoXForCausalLM",
|
5 |
"parent_library": "transformers.models.gpt_neox.modeling_gpt_neox"
|
6 |
},
|
7 |
-
"base_model_name_or_path": "
|
8 |
"bias": "none",
|
9 |
"fan_in_fan_out": false,
|
10 |
"inference_mode": true,
|
@@ -23,10 +23,10 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"attention.query_key_value",
|
27 |
-
"mlp.dense_4h_to_h",
|
28 |
"mlp.dense_h_to_4h",
|
29 |
-
"attention.dense"
|
|
|
|
|
30 |
],
|
31 |
"task_type": null,
|
32 |
"use_dora": true,
|
|
|
4 |
"base_model_class": "GPTNeoXForCausalLM",
|
5 |
"parent_library": "transformers.models.gpt_neox.modeling_gpt_neox"
|
6 |
},
|
7 |
+
"base_model_name_or_path": "rinna/japanese-gpt-neox-3.6b",
|
8 |
"bias": "none",
|
9 |
"fan_in_fan_out": false,
|
10 |
"inference_mode": true,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
|
|
|
|
26 |
"mlp.dense_h_to_4h",
|
27 |
+
"attention.dense",
|
28 |
+
"mlp.dense_4h_to_h",
|
29 |
+
"attention.query_key_value"
|
30 |
],
|
31 |
"task_type": null,
|
32 |
"use_dora": true,
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f9e660babf44b0418a0c73df182d426f49242f735d41c64ccf480f8d9c97ab1
|
3 |
+
size 27837832
|
runs/Oct27_15-30-26_graz/events.out.tfevents.1730010650.graz.1060953.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ab917db4096f468f38492de34629e9524e93dbeef526df192adaf7de7597687
|
3 |
+
size 5123
|
runs/Oct27_15-35-07_graz/events.out.tfevents.1730010913.graz.1061216.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f74c48c3d3449a8a0e9bc7aac2be37663fde182435a3436cbd8a91de407661d9
|
3 |
+
size 5123
|
runs/Oct27_15-36-40_graz/events.out.tfevents.1730011006.graz.1061496.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e21a28dda2e26c27cfeff8679d4abc6ed4a3e822b7f350b8640a2d8d01bf9781
|
3 |
+
size 5123
|
runs/Oct27_15-38-22_graz/events.out.tfevents.1730011108.graz.1061695.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7e790821063852310f4bdb886005345eb6b74ddfe2d95661182e7d5335ce1ce
|
3 |
+
size 5123
|
runs/Oct27_15-46-54_graz/events.out.tfevents.1730011620.graz.1062153.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2974f266c482e3b4c22032ba219e5e80d66eaae4203ce2ce5087e6e96148f2c5
|
3 |
+
size 5123
|
runs/Oct27_15-48-15_graz/events.out.tfevents.1730011701.graz.1062488.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bffc68e134127bc0fe415fdb951d288bd920712013f5307b652cc3afbfad2b20
|
3 |
+
size 5123
|
runs/Oct27_15-49-17_graz/events.out.tfevents.1730011762.graz.1062687.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04d5f93669cf853208cdf2fb4efe593c4ac8fb365768d1ca9507487990bc2cce
|
3 |
+
size 5123
|
runs/Oct27_16-32-58_graz/events.out.tfevents.1730014383.graz.1063356.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23bb18ba91ac24d8570498f138907d3b2f561e6ce8c7f48530e9b20b09020c10
|
3 |
+
size 5080
|
runs/Oct27_16-34-17_graz/events.out.tfevents.1730014462.graz.1063557.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22ff104c76a0726efed1b4ddee7c7bd1fb4d18bb5ac37e753f762f2da7db756d
|
3 |
+
size 5080
|
runs/Oct27_16-35-02_graz/events.out.tfevents.1730014508.graz.1063769.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ad904360d119d905a57f67ddade39290d3dcef22ccdf5423b15ed4cd6fb4985
|
3 |
+
size 5080
|
runs/Oct27_16-48-37_graz/events.out.tfevents.1730015328.graz.1065240.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:641caa0850f642adad3c23f33f4871fddb23a7c4a68a7b864789a9c1aff32cd0
|
3 |
+
size 5079
|
runs/Oct27_16-50-13_graz/events.out.tfevents.1730015424.graz.1065447.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77742a267a3812a98c6f99a4eec70c0b74824c913ce8987b8f25e93215dd22d8
|
3 |
+
size 5079
|
runs/Oct27_16-51-10_graz/events.out.tfevents.1730015481.graz.1065650.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05fcf06e37043efc30b38e00accd0a374adc4ec638797e56a8af439bb9279972
|
3 |
+
size 5079
|
runs/Oct27_16-59-08_graz/events.out.tfevents.1730015959.graz.1066060.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77301ab089dac40125ba46efb4d7e82d6df21cb8d0c7bb916d2f2f67d1fe6c5d
|
3 |
+
size 5079
|
runs/Oct27_17-09-13_graz/events.out.tfevents.1730016564.graz.1066690.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f82ea3859dc67b54b6f3794198f38244f6d31c149c959e6d966e3062ab8d4a9
|
3 |
+
size 5079
|
runs/Oct27_17-10-33_graz/events.out.tfevents.1730016644.graz.1066893.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d929530723af00d9df938801eeeb2a97725279de8fd4ef2b62bee183fa903f3
|
3 |
+
size 5079
|
runs/Oct27_17-11-48_graz/events.out.tfevents.1730016719.graz.1067096.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d15f3de998f21183368a7390b72fc13bf65b65ef2d5007a2c4a9d2b3434b70fc
|
3 |
+
size 5078
|
runs/Oct27_17-12-50_graz/events.out.tfevents.1730016781.graz.1067299.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aedc245bb3680102c92994b9e64b9d90924d42df0f9d119e8c049e369b6348c0
|
3 |
+
size 5079
|
runs/Oct27_18-01-15_graz/events.out.tfevents.1730019686.graz.1067916.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c72e676e96922a4f4bb70d333752a43ae06ad43b33344e6fee976b5c0aee174
|
3 |
+
size 5127
|
runs/Oct27_18-04-27_graz/events.out.tfevents.1730019879.graz.1068110.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45ff6ec2df57929341f68fc3e0998965fe3242f02aa47107ec5c3616173acc1d
|
3 |
+
size 5127
|
runs/Oct27_18-08-33_graz/events.out.tfevents.1730020125.graz.1068561.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:429e404c43c154aa1ed6941454dc3a92491c227aca02c31aa3e0c146fd2d5ecd
|
3 |
+
size 5127
|
runs/Oct27_18-09-10_graz/events.out.tfevents.1730020162.graz.1068744.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cb874cf82f98bb9b4fc8299abf626de4b9995da6d7b326cce850566761a0d77
|
3 |
+
size 5692
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16a1898da87ecf03bba600ea114f4642e30b06e4011d1add4b3f2d37c1e366e5
|
3 |
+
size 7151
|