sumo43 commited on
Commit
7b35d02
·
verified ·
1 Parent(s): 1ec07fe

Model save

Browse files
README.md CHANGED
@@ -1,16 +1,10 @@
1
  ---
2
- license: apache-2.0
3
- base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - sft
8
- - generated_from_trainer
9
  - trl
10
  - sft
11
  - generated_from_trainer
12
  datasets:
13
- - HuggingFaceH4/ultrachat_200k
14
  model-index:
15
  - name: zephyr-7b-sft-full
16
  results: []
@@ -21,9 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # zephyr-7b-sft-full
23
 
24
- This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the HuggingFaceH4/ultrachat_200k dataset.
25
  It achieves the following results on the evaluation set:
26
- - Loss: 2.1644
27
 
28
  ## Model description
29
 
@@ -46,7 +40,6 @@ The following hyperparameters were used during training:
46
  - train_batch_size: 16
47
  - eval_batch_size: 8
48
  - seed: 42
49
- - distributed_type: multi-GPU
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: constant
52
  - lr_scheduler_warmup_ratio: 0.1
@@ -56,12 +49,12 @@ The following hyperparameters were used during training:
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:-----:|:----:|:---------------:|
59
- | 2.2013 | 1.0 | 8969 | 2.1644 |
60
 
61
 
62
  ### Framework versions
63
 
64
  - Transformers 4.39.0.dev0
65
- - Pytorch 2.1.1+cu121
66
  - Datasets 2.14.6
67
  - Tokenizers 0.15.2
 
1
  ---
 
 
2
  tags:
 
 
 
 
3
  - trl
4
  - sft
5
  - generated_from_trainer
6
  datasets:
7
+ - generator
8
  model-index:
9
  - name: zephyr-7b-sft-full
10
  results: []
 
15
 
16
  # zephyr-7b-sft-full
17
 
18
+ This model is a fine-tuned version of [](https://huggingface.co/) on the generator dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 2.2164
21
 
22
  ## Model description
23
 
 
40
  - train_batch_size: 16
41
  - eval_batch_size: 8
42
  - seed: 42
 
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: constant
45
  - lr_scheduler_warmup_ratio: 0.1
 
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
+ | 2.249 | 1.0 | 8969 | 2.2164 |
53
 
54
 
55
  ### Framework versions
56
 
57
  - Transformers 4.39.0.dev0
58
+ - Pytorch 2.1.0+cu118
59
  - Datasets 2.14.6
60
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 2.164351224899292,
4
- "eval_runtime": 1703.872,
5
- "eval_samples": 23109,
6
- "eval_samples_per_second": 9.321,
7
- "eval_steps_per_second": 1.166,
8
- "train_loss": 2.789915239123786,
9
- "train_runtime": 47169.5182,
10
  "train_samples": 207864,
11
- "train_samples_per_second": 3.042,
12
- "train_steps_per_second": 0.19
13
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 2.6744996346363004,
4
+ "train_runtime": 27779.5691,
 
 
 
 
 
5
  "train_samples": 207864,
6
+ "train_samples_per_second": 5.165,
7
+ "train_steps_per_second": 0.323
8
  }
generation_config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
 
2
  "bos_token_id": 1,
3
  "eos_token_id": 2,
4
- "max_length": 2048,
5
- "pad_token_id": 0,
6
  "transformers_version": "4.39.0.dev0",
7
  "use_cache": false
8
  }
 
1
  {
2
+ "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
 
 
5
  "transformers_version": "4.39.0.dev0",
6
  "use_cache": false
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08ce96c31744cae26d21b7888fceb9a10b68782aa8737f6cf8e35993f3351983
3
  size 485201912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8a01276ab9bd1c2b28525781848bd023395bc7e7214b7c973c9caf652d8f30
3
  size 485201912
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 2.789915239123786,
4
- "train_runtime": 47169.5182,
5
  "train_samples": 207864,
6
- "train_samples_per_second": 3.042,
7
- "train_steps_per_second": 0.19
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 2.6744996346363004,
4
+ "train_runtime": 27779.5691,
5
  "train_samples": 207864,
6
+ "train_samples_per_second": 5.165,
7
+ "train_steps_per_second": 0.323
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff