File size: 1,699 Bytes
31c51a8
 
 
 
 
4b32760
31c51a8
 
 
 
 
4b32760
 
31c51a8
4b32760
31c51a8
 
 
4b32760
 
31c51a8
4b32760
31c51a8
 
 
4b32760
 
31c51a8
4b32760
31c51a8
 
 
4b32760
 
31c51a8
4b32760
31c51a8
 
 
4b32760
 
31c51a8
4b32760
31c51a8
 
 
4b32760
 
31c51a8
4b32760
31c51a8
 
 
 
4b32760
31c51a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b32760
31c51a8
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.5260074138641357,
      "learning_rate": 0.0002,
      "loss": 0.8045,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.3671823740005493,
      "learning_rate": 0.0002,
      "loss": 0.3861,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 0.49801939725875854,
      "learning_rate": 0.0002,
      "loss": 0.2388,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.25766927003860474,
      "learning_rate": 0.0002,
      "loss": 0.1858,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.5788165926933289,
      "learning_rate": 0.0002,
      "loss": 0.1568,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.3915470540523529,
      "learning_rate": 0.0002,
      "loss": 0.1405,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.0351908666638336e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}