alexander-hm commited on
Commit
6fcd91d
·
verified ·
1 Parent(s): 49fda9f

End of training

Browse files
Files changed (6) hide show
  1. README.md +11 -13
  2. all_results.json +10 -10
  3. eval_results.json +5 -5
  4. metrics.json +1 -1
  5. train_results.json +6 -6
  6. trainer_state.json +0 -0
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [huggyllama/llama-13b](https://huggingface.co/huggyllama/llama-13b) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.5115
20
 
21
  ## Model description
22
 
@@ -44,23 +44,21 @@ The following hyperparameters were used during training:
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: constant
46
  - lr_scheduler_warmup_ratio: 0.03
47
- - training_steps: 1875
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:------:|:----:|:---------------:|
53
- | 1.4264 | 0.0018 | 1 | 1.5678 |
54
- | 1.4263 | 0.3392 | 187 | 1.2365 |
55
- | 1.0712 | 0.6783 | 374 | 1.2313 |
56
- | 1.2232 | 1.0175 | 561 | 1.2366 |
57
- | 0.9766 | 1.3566 | 748 | 1.2977 |
58
- | 0.983 | 1.6958 | 935 | 1.2653 |
59
- | 0.5607 | 2.0349 | 1122 | 1.3461 |
60
- | 0.7534 | 2.3741 | 1309 | 1.3852 |
61
- | 0.5426 | 2.7132 | 1496 | 1.4808 |
62
- | 0.2964 | 3.0524 | 1683 | 1.5780 |
63
- | 0.3246 | 3.3915 | 1870 | 1.4927 |
64
 
65
 
66
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [huggyllama/llama-13b](https://huggingface.co/huggyllama/llama-13b) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 1.5069
20
 
21
  ## Model description
22
 
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: constant
46
  - lr_scheduler_warmup_ratio: 0.03
47
+ - training_steps: 0
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:------:|:----:|:---------------:|
53
+ | 1.4264 | 0.0018 | 1 | 1.5699 |
54
+ | 1.4261 | 0.3392 | 187 | 1.2365 |
55
+ | 1.0708 | 0.6783 | 374 | 1.2308 |
56
+ | 1.2217 | 1.0175 | 561 | 1.2357 |
57
+ | 0.9791 | 1.3566 | 748 | 1.2991 |
58
+ | 0.9827 | 1.6958 | 935 | 1.2639 |
59
+ | 0.5563 | 2.0349 | 1122 | 1.3503 |
60
+ | 0.7603 | 2.3741 | 1309 | 1.3852 |
61
+ | 0.5553 | 2.7132 | 1496 | 1.4816 |
 
 
62
 
63
 
64
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 3.4005894355021535,
3
- "eval_loss": 1.511498212814331,
4
- "eval_runtime": 331.6938,
5
- "eval_samples_per_second": 3.015,
6
- "eval_steps_per_second": 3.015,
7
- "total_flos": 8.246064732932506e+17,
8
- "train_loss": 0.8989817124764125,
9
- "train_runtime": 128394.3068,
10
- "train_samples_per_second": 0.234,
11
- "train_steps_per_second": 0.015
12
  }
 
1
  {
2
+ "epoch": 2.997959646338699,
3
+ "eval_loss": 1.5068761110305786,
4
+ "eval_runtime": 331.0739,
5
+ "eval_samples_per_second": 3.02,
6
+ "eval_steps_per_second": 3.02,
7
+ "total_flos": 7.23522717038592e+17,
8
+ "train_loss": 0.9697928366449048,
9
+ "train_runtime": 126183.6259,
10
+ "train_samples_per_second": 0.21,
11
+ "train_steps_per_second": 0.013
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 3.4005894355021535,
3
- "eval_loss": 1.511498212814331,
4
- "eval_runtime": 331.6938,
5
- "eval_samples_per_second": 3.015,
6
- "eval_steps_per_second": 3.015
7
  }
 
1
  {
2
+ "epoch": 2.997959646338699,
3
+ "eval_loss": 1.5068761110305786,
4
+ "eval_runtime": 331.0739,
5
+ "eval_samples_per_second": 3.02,
6
+ "eval_steps_per_second": 3.02
7
  }
metrics.json CHANGED
@@ -1 +1 @@
1
- {"run_name": "huggyllama/llama-13b_oasst1_l0.0002_32,8,8,8,8", "train_runtime": 128394.3068, "train_samples_per_second": 0.234, "train_steps_per_second": 0.015, "total_flos": 8.246064732932506e+17, "train_loss": 0.8989817124764125, "epoch": 3.4005894355021535, "eval_loss": 1.511498212814331, "eval_runtime": 331.6938, "eval_samples_per_second": 3.015, "eval_steps_per_second": 3.015}
 
1
+ {"run_name": "huggyllama/llama-13b_oasst1_l0.0002_32,8,8,8,8", "train_runtime": 126183.6259, "train_samples_per_second": 0.21, "train_steps_per_second": 0.013, "total_flos": 7.23522717038592e+17, "train_loss": 0.9697928366449048, "epoch": 2.997959646338699, "eval_loss": 1.5068761110305786, "eval_runtime": 331.0739, "eval_samples_per_second": 3.02, "eval_steps_per_second": 3.02}
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.4005894355021535,
3
- "total_flos": 8.246064732932506e+17,
4
- "train_loss": 0.8989817124764125,
5
- "train_runtime": 128394.3068,
6
- "train_samples_per_second": 0.234,
7
- "train_steps_per_second": 0.015
8
  }
 
1
  {
2
+ "epoch": 2.997959646338699,
3
+ "total_flos": 7.23522717038592e+17,
4
+ "train_loss": 0.9697928366449048,
5
+ "train_runtime": 126183.6259,
6
+ "train_samples_per_second": 0.21,
7
+ "train_steps_per_second": 0.013
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff