File size: 1,926 Bytes
229fdd5
 
 
ef3602d
229fdd5
ef3602d
229fdd5
 
 
 
 
ef3602d
98b40d0
229fdd5
98b40d0
ef3602d
229fdd5
 
ef3602d
98b40d0
229fdd5
98b40d0
ef3602d
229fdd5
 
ef3602d
98b40d0
229fdd5
98b40d0
ef3602d
229fdd5
 
 
98b40d0
229fdd5
98b40d0
ef3602d
229fdd5
 
ef3602d
98b40d0
229fdd5
98b40d0
ef3602d
229fdd5
 
ef3602d
98b40d0
229fdd5
98b40d0
ef3602d
229fdd5
 
ef3602d
 
98b40d0
 
 
 
 
229fdd5
 
 
ef3602d
229fdd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98b40d0
229fdd5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 12,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 16.085969924926758,
      "learning_rate": 8.333333333333334e-05,
      "loss": 8.8634,
      "step": 2
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.1762865781784058,
      "learning_rate": 6.666666666666667e-05,
      "loss": 5.6966,
      "step": 4
    },
    {
      "epoch": 1.5925925925925926,
      "grad_norm": 2.515094518661499,
      "learning_rate": 5e-05,
      "loss": 7.5589,
      "step": 6
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.3776528835296631,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 4.9983,
      "step": 8
    },
    {
      "epoch": 2.5925925925925926,
      "grad_norm": 0.8414855599403381,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 7.2323,
      "step": 10
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.46793699264526367,
      "learning_rate": 0.0,
      "loss": 4.9528,
      "step": 12
    },
    {
      "epoch": 3.0,
      "step": 12,
      "total_flos": 16597381341504.0,
      "train_loss": 6.5503848393758135,
      "train_runtime": 47.4701,
      "train_samples_per_second": 4.55,
      "train_steps_per_second": 0.253
    }
  ],
  "logging_steps": 2,
  "max_steps": 12,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 16597381341504.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}