albertmartinez commited on
Commit
936c0f5
·
verified ·
1 Parent(s): 9ded0cd

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_f1": 0.7954325175529264,
4
- "eval_loss": 0.734076738357544,
5
- "eval_runtime": 677.1647,
6
  "eval_samples": 8605,
7
- "eval_samples_per_second": 12.707,
8
- "eval_steps_per_second": 0.397,
9
  "total_flos": 2.28033311981568e+16,
10
- "train_loss": 0.7531972722049982,
11
- "train_runtime": 49725.5381,
12
  "train_samples": 34420,
13
- "train_samples_per_second": 3.461,
14
- "train_steps_per_second": 0.108
15
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_f1": 0.7705941970545616,
4
+ "eval_loss": 0.8075858354568481,
5
+ "eval_runtime": 5.8073,
6
  "eval_samples": 8605,
7
+ "eval_samples_per_second": 1481.76,
8
+ "eval_steps_per_second": 23.247,
9
  "total_flos": 2.28033311981568e+16,
10
+ "train_loss": 1.097573880901124,
11
+ "train_runtime": 397.4859,
12
  "train_samples": 34420,
13
+ "train_samples_per_second": 432.971,
14
+ "train_steps_per_second": 6.768
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_f1": 0.7954325175529264,
4
- "eval_loss": 0.734076738357544,
5
- "eval_runtime": 677.1647,
6
  "eval_samples": 8605,
7
- "eval_samples_per_second": 12.707,
8
- "eval_steps_per_second": 0.397
9
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_f1": 0.7705941970545616,
4
+ "eval_loss": 0.8075858354568481,
5
+ "eval_runtime": 5.8073,
6
  "eval_samples": 8605,
7
+ "eval_samples_per_second": 1481.76,
8
+ "eval_steps_per_second": 23.247
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 2.28033311981568e+16,
4
- "train_loss": 0.7531972722049982,
5
- "train_runtime": 49725.5381,
6
  "train_samples": 34420,
7
- "train_samples_per_second": 3.461,
8
- "train_steps_per_second": 0.108
9
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 2.28033311981568e+16,
4
+ "train_loss": 1.097573880901124,
5
+ "train_runtime": 397.4859,
6
  "train_samples": 34420,
7
+ "train_samples_per_second": 432.971,
8
+ "train_steps_per_second": 6.768
9
  }
trainer_state.json CHANGED
@@ -1,105 +1,105 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
- "eval_steps": 500.0,
6
- "global_step": 5380,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "grad_norm": 10.252312660217285,
14
- "learning_rate": 1.8008368200836822e-05,
15
- "loss": 1.484,
16
- "step": 1076
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_f1": 0.7462822532277763,
21
- "eval_loss": 0.8799635171890259,
22
- "eval_runtime": 683.415,
23
- "eval_samples_per_second": 12.591,
24
- "eval_steps_per_second": 0.394,
25
- "step": 1076
26
  },
27
  {
28
- "epoch": 2.0,
29
- "grad_norm": 11.723691940307617,
30
- "learning_rate": 1.3506276150627616e-05,
31
- "loss": 0.7957,
32
- "step": 2152
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_f1": 0.773518075222442,
37
- "eval_loss": 0.7795748710632324,
38
- "eval_runtime": 678.1252,
39
- "eval_samples_per_second": 12.689,
40
- "eval_steps_per_second": 0.397,
41
- "step": 2152
42
  },
43
  {
44
- "epoch": 3.0,
45
- "grad_norm": 9.515564918518066,
46
- "learning_rate": 9.004184100418411e-06,
47
- "loss": 0.612,
48
- "step": 3228
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_f1": 0.790370571641359,
53
- "eval_loss": 0.7352398037910461,
54
- "eval_runtime": 672.8155,
55
- "eval_samples_per_second": 12.79,
56
- "eval_steps_per_second": 0.4,
57
- "step": 3228
58
  },
59
  {
60
- "epoch": 4.0,
61
- "grad_norm": 22.87370491027832,
62
- "learning_rate": 4.5020920502092055e-06,
63
- "loss": 0.4784,
64
- "step": 4304
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_f1": 0.7948184348707309,
69
- "eval_loss": 0.7243198156356812,
70
- "eval_runtime": 678.247,
71
- "eval_samples_per_second": 12.687,
72
- "eval_steps_per_second": 0.397,
73
- "step": 4304
74
  },
75
  {
76
- "epoch": 5.0,
77
- "grad_norm": 5.2252960205078125,
78
- "learning_rate": 0.0,
79
- "loss": 0.3959,
80
- "step": 5380
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_f1": 0.7954325175529264,
85
- "eval_loss": 0.734076738357544,
86
- "eval_runtime": 676.4727,
87
- "eval_samples_per_second": 12.72,
88
- "eval_steps_per_second": 0.398,
89
- "step": 5380
90
  },
91
  {
92
  "epoch": 5.0,
93
- "step": 5380,
94
  "total_flos": 2.28033311981568e+16,
95
- "train_loss": 0.7531972722049982,
96
- "train_runtime": 49725.5381,
97
- "train_samples_per_second": 3.461,
98
- "train_steps_per_second": 0.108
99
  }
100
  ],
101
  "logging_steps": 500,
102
- "max_steps": 5380,
103
  "num_input_tokens_seen": 0,
104
  "num_train_epochs": 5,
105
  "save_steps": 500,
@@ -116,7 +116,7 @@
116
  }
117
  },
118
  "total_flos": 2.28033311981568e+16,
119
- "train_batch_size": 32,
120
  "trial_name": null,
121
  "trial_params": null
122
  }
 
1
  {
2
+ "best_metric": 0.8075858354568481,
3
+ "best_model_checkpoint": "./distilbert-multilingual-sdg-classification/checkpoint-2690",
4
  "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2690,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.929368029739777,
13
+ "grad_norm": 5.209447383880615,
14
+ "learning_rate": 8.333333333333334e-06,
15
+ "loss": 2.1669,
16
+ "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_f1": 0.6552228504413294,
21
+ "eval_loss": 1.2065790891647339,
22
+ "eval_runtime": 5.8951,
23
+ "eval_samples_per_second": 1459.691,
24
+ "eval_steps_per_second": 22.9,
25
+ "step": 538
26
  },
27
  {
28
+ "epoch": 1.858736059479554,
29
+ "grad_norm": 4.618020534515381,
30
+ "learning_rate": 8.090909090909092e-06,
31
+ "loss": 1.0784,
32
+ "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_f1": 0.7414291761169469,
37
+ "eval_loss": 0.9131004214286804,
38
+ "eval_runtime": 5.864,
39
+ "eval_samples_per_second": 1467.42,
40
+ "eval_steps_per_second": 23.022,
41
+ "step": 1076
42
  },
43
  {
44
+ "epoch": 2.7881040892193307,
45
+ "grad_norm": 6.927801609039307,
46
+ "learning_rate": 5.698564593301436e-06,
47
+ "loss": 0.8756,
48
+ "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_f1": 0.7613752074026013,
53
+ "eval_loss": 0.840763509273529,
54
+ "eval_runtime": 5.8902,
55
+ "eval_samples_per_second": 1460.895,
56
+ "eval_steps_per_second": 22.919,
57
+ "step": 1614
58
  },
59
  {
60
+ "epoch": 3.717472118959108,
61
+ "grad_norm": 7.057933330535889,
62
+ "learning_rate": 3.30622009569378e-06,
63
+ "loss": 0.7817,
64
+ "step": 2000
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_f1": 0.7688440327429015,
69
+ "eval_loss": 0.8136261105537415,
70
+ "eval_runtime": 5.8715,
71
+ "eval_samples_per_second": 1465.551,
72
+ "eval_steps_per_second": 22.992,
73
+ "step": 2152
74
  },
75
  {
76
+ "epoch": 4.646840148698884,
77
+ "grad_norm": 5.335556507110596,
78
+ "learning_rate": 9.138755980861245e-07,
79
+ "loss": 0.7337,
80
+ "step": 2500
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_f1": 0.7705941970545616,
85
+ "eval_loss": 0.8075858354568481,
86
+ "eval_runtime": 5.8756,
87
+ "eval_samples_per_second": 1464.538,
88
+ "eval_steps_per_second": 22.976,
89
+ "step": 2690
90
  },
91
  {
92
  "epoch": 5.0,
93
+ "step": 2690,
94
  "total_flos": 2.28033311981568e+16,
95
+ "train_loss": 1.097573880901124,
96
+ "train_runtime": 397.4859,
97
+ "train_samples_per_second": 432.971,
98
+ "train_steps_per_second": 6.768
99
  }
100
  ],
101
  "logging_steps": 500,
102
+ "max_steps": 2690,
103
  "num_input_tokens_seen": 0,
104
  "num_train_epochs": 5,
105
  "save_steps": 500,
 
116
  }
117
  },
118
  "total_flos": 2.28033311981568e+16,
119
+ "train_batch_size": 64,
120
  "trial_name": null,
121
  "trial_params": null
122
  }