File size: 1,349 Bytes
75c4c46
 
 
c23fad8
75c4c46
c23fad8
75c4c46
 
 
 
 
64b8cf0
c23fad8
64b8cf0
75c4c46
 
 
 
c23fad8
 
64b8cf0
75c4c46
 
 
c23fad8
 
64b8cf0
75c4c46
 
 
c23fad8
 
64b8cf0
75c4c46
 
64b8cf0
c23fad8
 
 
 
64b8cf0
75c4c46
 
c23fad8
 
 
 
 
 
 
75c4c46
 
 
c23fad8
75c4c46
c23fad8
75c4c46
c23fad8
75c4c46
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.96,
  "eval_steps": 500,
  "global_step": 18,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.05,
      "learning_rate": 0.0001,
      "loss": 2.272,
      "step": 1
    },
    {
      "epoch": 0.27,
      "learning_rate": 0.00018314696123025454,
      "loss": 1.7677,
      "step": 5
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.0001,
      "loss": 0.5037,
      "step": 10
    },
    {
      "epoch": 0.8,
      "learning_rate": 1.6853038769745467e-05,
      "loss": 0.2847,
      "step": 15
    },
    {
      "epoch": 0.96,
      "eval_loss": 0.26359862089157104,
      "eval_runtime": 8.8309,
      "eval_samples_per_second": 22.648,
      "eval_steps_per_second": 2.831,
      "step": 18
    },
    {
      "epoch": 0.96,
      "step": 18,
      "total_flos": 20001168556032.0,
      "train_loss": 0.7799174222681258,
      "train_runtime": 192.3889,
      "train_samples_per_second": 3.119,
      "train_steps_per_second": 0.094
    }
  ],
  "logging_steps": 5,
  "max_steps": 18,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 20001168556032.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}