VERSIL91 commited on
Commit
5a95bf5
·
verified ·
1 Parent(s): d8f2cac

Training in progress, step 56, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65404fcba803e23e53086ab488dde090b236bd4a10fdff1f3a362e117647386f
3
  size 14696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e019a9f57ecb8b85aec8bb6eee672a122e591e36bd2565bc6c4c332b190d0fc5
3
  size 14696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:781a3ba2ec16b5917dc91ce4dbf65b6416c24180b4df590c64bdb07408768a47
3
  size 39398
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdac6c649202ef2291ed5061c16538bc0530983fc22e3831ec5e6375e3ece8d5
3
  size 39398
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec1fe9c39ff4b5f539b12a8ca1b294f1888da074b9e285a01034d20290d60d4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f3d68d08c427bfcb26826cb256501e784dd1de9309d24ccf5d718c10dd6b5c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e60975a7e32769ae56ab7bbabcff8a55576b728ed866489161a176028dde2134
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e41d236d9989ea458dc3f7994dccaa194aab3668a60eebd5db6cd4583a97af79
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7508379888268156,
5
  "eval_steps": 14,
6
- "global_step": 42,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -333,6 +333,112 @@
333
  "eval_samples_per_second": 249.752,
334
  "eval_steps_per_second": 126.19,
335
  "step": 42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  }
337
  ],
338
  "logging_steps": 1,
@@ -347,12 +453,12 @@
347
  "should_evaluate": false,
348
  "should_log": false,
349
  "should_save": true,
350
- "should_training_stop": false
351
  },
352
  "attributes": {}
353
  }
354
  },
355
- "total_flos": 19851116544.0,
356
  "train_batch_size": 2,
357
  "trial_name": null,
358
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0033519553072625,
5
  "eval_steps": 14,
6
+ "global_step": 56,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
333
  "eval_samples_per_second": 249.752,
334
  "eval_steps_per_second": 126.19,
335
  "step": 42
336
+ },
337
+ {
338
+ "epoch": 0.7687150837988826,
339
+ "grad_norm": 0.010876229964196682,
340
+ "learning_rate": 1.8445602783697374e-05,
341
+ "loss": 11.9305,
342
+ "step": 43
343
+ },
344
+ {
345
+ "epoch": 0.7865921787709497,
346
+ "grad_norm": 0.008084769360721111,
347
+ "learning_rate": 1.5872342839067306e-05,
348
+ "loss": 11.9299,
349
+ "step": 44
350
+ },
351
+ {
352
+ "epoch": 0.8044692737430168,
353
+ "grad_norm": 0.009019813500344753,
354
+ "learning_rate": 1.3458201786093794e-05,
355
+ "loss": 11.9283,
356
+ "step": 45
357
+ },
358
+ {
359
+ "epoch": 0.8223463687150838,
360
+ "grad_norm": 0.008098295889794827,
361
+ "learning_rate": 1.1214435464779006e-05,
362
+ "loss": 11.9292,
363
+ "step": 46
364
+ },
365
+ {
366
+ "epoch": 0.8402234636871508,
367
+ "grad_norm": 0.008133570663630962,
368
+ "learning_rate": 9.151505349477902e-06,
369
+ "loss": 11.9289,
370
+ "step": 47
371
+ },
372
+ {
373
+ "epoch": 0.8581005586592179,
374
+ "grad_norm": 0.012650455348193645,
375
+ "learning_rate": 7.2790297726755716e-06,
376
+ "loss": 11.9302,
377
+ "step": 48
378
+ },
379
+ {
380
+ "epoch": 0.8759776536312849,
381
+ "grad_norm": 0.009691119194030762,
382
+ "learning_rate": 5.605739079881239e-06,
383
+ "loss": 11.9307,
384
+ "step": 49
385
+ },
386
+ {
387
+ "epoch": 0.8938547486033519,
388
+ "grad_norm": 0.009178046137094498,
389
+ "learning_rate": 4.139434924727359e-06,
390
+ "loss": 11.9297,
391
+ "step": 50
392
+ },
393
+ {
394
+ "epoch": 0.911731843575419,
395
+ "grad_norm": 0.008271483704447746,
396
+ "learning_rate": 2.88695389405898e-06,
397
+ "loss": 11.9291,
398
+ "step": 51
399
+ },
400
+ {
401
+ "epoch": 0.929608938547486,
402
+ "grad_norm": 0.00795311015099287,
403
+ "learning_rate": 1.8541356326100433e-06,
404
+ "loss": 11.9286,
405
+ "step": 52
406
+ },
407
+ {
408
+ "epoch": 0.9474860335195531,
409
+ "grad_norm": 0.008445663377642632,
410
+ "learning_rate": 1.0457956158838544e-06,
411
+ "loss": 11.932,
412
+ "step": 53
413
+ },
414
+ {
415
+ "epoch": 0.9653631284916201,
416
+ "grad_norm": 0.009014743380248547,
417
+ "learning_rate": 4.6570269818346224e-07,
418
+ "loss": 11.9283,
419
+ "step": 54
420
+ },
421
+ {
422
+ "epoch": 0.9832402234636871,
423
+ "grad_norm": 0.009628918021917343,
424
+ "learning_rate": 1.1656154047303691e-07,
425
+ "loss": 11.9317,
426
+ "step": 55
427
+ },
428
+ {
429
+ "epoch": 1.0033519553072625,
430
+ "grad_norm": 0.00985956471413374,
431
+ "learning_rate": 0.0,
432
+ "loss": 13.9592,
433
+ "step": 56
434
+ },
435
+ {
436
+ "epoch": 1.0033519553072625,
437
+ "eval_loss": 11.928487777709961,
438
+ "eval_runtime": 0.3738,
439
+ "eval_samples_per_second": 254.136,
440
+ "eval_steps_per_second": 128.405,
441
+ "step": 56
442
  }
443
  ],
444
  "logging_steps": 1,
 
453
  "should_evaluate": false,
454
  "should_log": false,
455
  "should_save": true,
456
+ "should_training_stop": true
457
  },
458
  "attributes": {}
459
  }
460
  },
461
+ "total_flos": 26468155392.0,
462
  "train_batch_size": 2,
463
  "trial_name": null,
464
  "trial_params": null