leixa commited on
Commit
7399839
·
verified ·
1 Parent(s): 6a4a4c7

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b51edb95931c50c7eccffe73d0c373447c32b5551b695812216e3bd9d18b2928
3
  size 36220072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e66a2dd8d3d2bbed1d876000dda0651212a54ba174dc9b3cd15e946adefaca8f
3
  size 36220072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:481a7a996369d8d39cb2ff983ba7d20de9cdb194073610ec25fb97d5046a40c3
3
- size 18763860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a9067022dd03df96d0f89703e9bc525a8489b35c83eb6ec203a92f4ee520a9
3
+ size 18764180
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba08bf9c96258aa66309cebbe094ba794b566e771cd04cb308d30904cb3fcb21
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:652624241b697f848aa0b96d2618afbd1e071c5dbd47f895da652fcde2a22b64
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac116b8169c53ab649a7f15f2f32735f2c71ec2f803f70de8c655a513ee9cfc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43da0688aca60835f4e18fa7e0f3cc099504828f82fd5dd994118be26b760a0f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.032609138711123795,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -311,6 +311,154 @@
311
  "eval_samples_per_second": 25.274,
312
  "eval_steps_per_second": 12.642,
313
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
  ],
316
  "logging_steps": 5,
@@ -330,7 +478,7 @@
330
  "attributes": {}
331
  }
332
  },
333
- "total_flos": 1.8669553072472064e+16,
334
  "train_batch_size": 2,
335
  "trial_name": null,
336
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04891370806668569,
5
  "eval_steps": 100,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
311
  "eval_samples_per_second": 25.274,
312
  "eval_steps_per_second": 12.642,
313
  "step": 200
314
+ },
315
+ {
316
+ "epoch": 0.03342436717890189,
317
+ "grad_norm": 0.18553270399570465,
318
+ "learning_rate": 5e-05,
319
+ "loss": 0.6936,
320
+ "step": 205
321
+ },
322
+ {
323
+ "epoch": 0.034239595646679985,
324
+ "grad_norm": 0.1617966592311859,
325
+ "learning_rate": 4.798670299452926e-05,
326
+ "loss": 0.7387,
327
+ "step": 210
328
+ },
329
+ {
330
+ "epoch": 0.03505482411445808,
331
+ "grad_norm": 0.23151330649852753,
332
+ "learning_rate": 4.597667156416371e-05,
333
+ "loss": 0.6698,
334
+ "step": 215
335
+ },
336
+ {
337
+ "epoch": 0.03587005258223617,
338
+ "grad_norm": 0.172349214553833,
339
+ "learning_rate": 4.397316598723385e-05,
340
+ "loss": 0.6974,
341
+ "step": 220
342
+ },
343
+ {
344
+ "epoch": 0.036685281050014264,
345
+ "grad_norm": 0.15773482620716095,
346
+ "learning_rate": 4.197943595711198e-05,
347
+ "loss": 0.7278,
348
+ "step": 225
349
+ },
350
+ {
351
+ "epoch": 0.03750050951779236,
352
+ "grad_norm": 0.1810629963874817,
353
+ "learning_rate": 3.9998715311197785e-05,
354
+ "loss": 0.6484,
355
+ "step": 230
356
+ },
357
+ {
358
+ "epoch": 0.038315737985570454,
359
+ "grad_norm": 0.18713156878948212,
360
+ "learning_rate": 3.803421678562213e-05,
361
+ "loss": 0.684,
362
+ "step": 235
363
+ },
364
+ {
365
+ "epoch": 0.03913096645334855,
366
+ "grad_norm": 0.25403130054473877,
367
+ "learning_rate": 3.608912680417737e-05,
368
+ "loss": 0.7086,
369
+ "step": 240
370
+ },
371
+ {
372
+ "epoch": 0.039946194921126645,
373
+ "grad_norm": 0.15474697947502136,
374
+ "learning_rate": 3.4166600309926387e-05,
375
+ "loss": 0.6171,
376
+ "step": 245
377
+ },
378
+ {
379
+ "epoch": 0.04076142338890474,
380
+ "grad_norm": 0.17823943495750427,
381
+ "learning_rate": 3.226975564787322e-05,
382
+ "loss": 0.6566,
383
+ "step": 250
384
+ },
385
+ {
386
+ "epoch": 0.041576651856682835,
387
+ "grad_norm": 0.34823134541511536,
388
+ "learning_rate": 3.0401669506996256e-05,
389
+ "loss": 0.6684,
390
+ "step": 255
391
+ },
392
+ {
393
+ "epoch": 0.04239188032446093,
394
+ "grad_norm": 0.15805622935295105,
395
+ "learning_rate": 2.8565371929847284e-05,
396
+ "loss": 0.6549,
397
+ "step": 260
398
+ },
399
+ {
400
+ "epoch": 0.043207108792239025,
401
+ "grad_norm": 0.13102880120277405,
402
+ "learning_rate": 2.6763841397811573e-05,
403
+ "loss": 0.6221,
404
+ "step": 265
405
+ },
406
+ {
407
+ "epoch": 0.04402233726001712,
408
+ "grad_norm": 0.15072394907474518,
409
+ "learning_rate": 2.500000000000001e-05,
410
+ "loss": 0.699,
411
+ "step": 270
412
+ },
413
+ {
414
+ "epoch": 0.044837565727795216,
415
+ "grad_norm": 0.18680483102798462,
416
+ "learning_rate": 2.3276708693609943e-05,
417
+ "loss": 0.7056,
418
+ "step": 275
419
+ },
420
+ {
421
+ "epoch": 0.04565279419557331,
422
+ "grad_norm": 0.19171744585037231,
423
+ "learning_rate": 2.1596762663442218e-05,
424
+ "loss": 0.6821,
425
+ "step": 280
426
+ },
427
+ {
428
+ "epoch": 0.046468022663351406,
429
+ "grad_norm": 0.2306189388036728,
430
+ "learning_rate": 1.996288678810105e-05,
431
+ "loss": 0.725,
432
+ "step": 285
433
+ },
434
+ {
435
+ "epoch": 0.0472832511311295,
436
+ "grad_norm": 0.2382626235485077,
437
+ "learning_rate": 1.837773122023114e-05,
438
+ "loss": 0.7041,
439
+ "step": 290
440
+ },
441
+ {
442
+ "epoch": 0.0480984795989076,
443
+ "grad_norm": 0.2878624498844147,
444
+ "learning_rate": 1.684386708796025e-05,
445
+ "loss": 0.7284,
446
+ "step": 295
447
+ },
448
+ {
449
+ "epoch": 0.04891370806668569,
450
+ "grad_norm": 0.2006363421678543,
451
+ "learning_rate": 1.536378232452003e-05,
452
+ "loss": 0.7574,
453
+ "step": 300
454
+ },
455
+ {
456
+ "epoch": 0.04891370806668569,
457
+ "eval_loss": 0.7295248508453369,
458
+ "eval_runtime": 102.1368,
459
+ "eval_samples_per_second": 25.29,
460
+ "eval_steps_per_second": 12.65,
461
+ "step": 300
462
  }
463
  ],
464
  "logging_steps": 5,
 
478
  "attributes": {}
479
  }
480
  },
481
+ "total_flos": 2.808394390752461e+16,
482
  "train_batch_size": 2,
483
  "trial_name": null,
484
  "trial_params": null