masatochi commited on
Commit
dcf83db
·
verified ·
1 Parent(s): 173ac79

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2693e3c807ab1261a91352b2a34141c7debf3c87e07888e0213731c63bf1238c
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11ddbf41eaaa839589f1095623d71d7b903fb219c74e298e1719200dfd47b771
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57d38d2f83edf7948b193d522722e03c4f2b8cbed5067da59897c7f7c3822919
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ce0623a30bd01e5e27495816869c88c33b4cb28c0d789da7326c5b2ec46c6d
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79354fda314c9103e2d55f6fd0e3e7ec5fe801812e33d2c8e4dd8c180772e09a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad528149b7ddaf36aa54c3270bc91600bfb527af6fb344cfc62207a9548bc407
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fc750a6dfb3e5c9f642238b7443b0984a56e79b7c2731a6e152ecfc3e32f4e7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671828e69cd8fd42106344a5797cbc78f701a434a6386d9dfacd16451ba179aa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06601870529983496,
5
  "eval_steps": 34,
6
- "global_step": 135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -984,6 +984,49 @@
984
  "learning_rate": 6.387583338128471e-05,
985
  "loss": 1.1751,
986
  "step": 135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987
  }
988
  ],
989
  "logging_steps": 1,
@@ -1003,7 +1046,7 @@
1003
  "attributes": {}
1004
  }
1005
  },
1006
- "total_flos": 5.992588163992781e+17,
1007
  "train_batch_size": 3,
1008
  "trial_name": null,
1009
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06846384253316218,
5
  "eval_steps": 34,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
984
  "learning_rate": 6.387583338128471e-05,
985
  "loss": 1.1751,
986
  "step": 135
987
+ },
988
+ {
989
+ "epoch": 0.0665077327465004,
990
+ "grad_norm": 0.3435130715370178,
991
+ "learning_rate": 6.215889499576898e-05,
992
+ "loss": 1.0164,
993
+ "step": 136
994
+ },
995
+ {
996
+ "epoch": 0.0665077327465004,
997
+ "eval_loss": 0.9951372146606445,
998
+ "eval_runtime": 1359.5911,
999
+ "eval_samples_per_second": 1.9,
1000
+ "eval_steps_per_second": 0.633,
1001
+ "step": 136
1002
+ },
1003
+ {
1004
+ "epoch": 0.06699676019316585,
1005
+ "grad_norm": 0.2642524540424347,
1006
+ "learning_rate": 6.0454879312945754e-05,
1007
+ "loss": 0.9332,
1008
+ "step": 137
1009
+ },
1010
+ {
1011
+ "epoch": 0.06748578763983129,
1012
+ "grad_norm": 0.30863985419273376,
1013
+ "learning_rate": 5.876436825260967e-05,
1014
+ "loss": 1.0283,
1015
+ "step": 138
1016
+ },
1017
+ {
1018
+ "epoch": 0.06797481508649673,
1019
+ "grad_norm": 0.5168886780738831,
1020
+ "learning_rate": 5.708793912273911e-05,
1021
+ "loss": 0.6723,
1022
+ "step": 139
1023
+ },
1024
+ {
1025
+ "epoch": 0.06846384253316218,
1026
+ "grad_norm": 0.4818655848503113,
1027
+ "learning_rate": 5.542616442234618e-05,
1028
+ "loss": 1.1226,
1029
+ "step": 140
1030
  }
1031
  ],
1032
  "logging_steps": 1,
 
1046
  "attributes": {}
1047
  }
1048
  },
1049
+ "total_flos": 6.214535873770291e+17,
1050
  "train_batch_size": 3,
1051
  "trial_name": null,
1052
  "trial_params": null