File size: 1,695 Bytes
9166de3
 
 
 
 
42c83c3
9166de3
 
 
 
 
42c83c3
 
9166de3
42c83c3
9166de3
 
 
42c83c3
 
9166de3
42c83c3
9166de3
 
 
42c83c3
 
9166de3
42c83c3
9166de3
 
 
42c83c3
 
9166de3
42c83c3
9166de3
 
 
42c83c3
 
9166de3
42c83c3
9166de3
 
 
42c83c3
 
9166de3
42c83c3
9166de3
 
 
 
42c83c3
9166de3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42c83c3
9166de3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.8041964769363403,
      "learning_rate": 0.0002,
      "loss": 0.8636,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.6063739061355591,
      "learning_rate": 0.0002,
      "loss": 0.4697,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 0.9930739402770996,
      "learning_rate": 0.0002,
      "loss": 0.3111,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.8456557393074036,
      "learning_rate": 0.0002,
      "loss": 0.235,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.948847770690918,
      "learning_rate": 0.0002,
      "loss": 0.2006,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.4028228223323822,
      "learning_rate": 0.0002,
      "loss": 0.1806,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.0944844579045376e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}