Training in progress, step 20000
Browse files- model.safetensors +1 -1
- trainer_log.jsonl +156 -0
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 169434248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bd0769a88f795574127e99bf663e5f9c47b9a23b4ea72d354c1beb2cfe0d328
|
3 |
size 169434248
|
trainer_log.jsonl
CHANGED
@@ -468,3 +468,159 @@
|
|
468 |
{"current_steps": 14976, "total_steps": 20000, "loss": 3.3139, "eval_loss": null, "predict_loss": null, "learning_rate": 7.066795684245788e-05, "epoch": 0.8529931081619867, "percentage": 74.88}
|
469 |
{"current_steps": 15008, "total_steps": 20000, "loss": 3.2513, "eval_loss": null, "predict_loss": null, "learning_rate": 7.017883949439288e-05, "epoch": 0.8548157430084866, "percentage": 75.04}
|
470 |
{"current_steps": 15040, "total_steps": 20000, "loss": 3.3781, "eval_loss": null, "predict_loss": null, "learning_rate": 6.969216655409388e-05, "epoch": 0.8566383778549866, "percentage": 75.2}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
{"current_steps": 14976, "total_steps": 20000, "loss": 3.3139, "eval_loss": null, "predict_loss": null, "learning_rate": 7.066795684245788e-05, "epoch": 0.8529931081619867, "percentage": 74.88}
|
469 |
{"current_steps": 15008, "total_steps": 20000, "loss": 3.2513, "eval_loss": null, "predict_loss": null, "learning_rate": 7.017883949439288e-05, "epoch": 0.8548157430084866, "percentage": 75.04}
|
470 |
{"current_steps": 15040, "total_steps": 20000, "loss": 3.3781, "eval_loss": null, "predict_loss": null, "learning_rate": 6.969216655409388e-05, "epoch": 0.8566383778549866, "percentage": 75.2}
|
471 |
+
{"current_steps": 15072, "total_steps": 20000, "loss": 3.3177, "eval_loss": null, "predict_loss": null, "learning_rate": 6.92079505675697e-05, "epoch": 0.8584610127014866, "percentage": 75.36}
|
472 |
+
{"current_steps": 15104, "total_steps": 20000, "loss": 3.3132, "eval_loss": null, "predict_loss": null, "learning_rate": 6.872620401749094e-05, "epoch": 0.8602836475479866, "percentage": 75.52}
|
473 |
+
{"current_steps": 15136, "total_steps": 20000, "loss": 3.5272, "eval_loss": null, "predict_loss": null, "learning_rate": 6.824693932286834e-05, "epoch": 0.8621062823944865, "percentage": 75.68}
|
474 |
+
{"current_steps": 15168, "total_steps": 20000, "loss": 3.3706, "eval_loss": null, "predict_loss": null, "learning_rate": 6.777016883873253e-05, "epoch": 0.8639289172409865, "percentage": 75.84}
|
475 |
+
{"current_steps": 15200, "total_steps": 20000, "loss": 3.3582, "eval_loss": null, "predict_loss": null, "learning_rate": 6.729590485581552e-05, "epoch": 0.8657515520874864, "percentage": 76.0}
|
476 |
+
{"current_steps": 15232, "total_steps": 20000, "loss": 3.3588, "eval_loss": null, "predict_loss": null, "learning_rate": 6.68241596002339e-05, "epoch": 0.8675741869339865, "percentage": 76.16}
|
477 |
+
{"current_steps": 15264, "total_steps": 20000, "loss": 3.3436, "eval_loss": null, "predict_loss": null, "learning_rate": 6.63549452331737e-05, "epoch": 0.8693968217804864, "percentage": 76.32}
|
478 |
+
{"current_steps": 15296, "total_steps": 20000, "loss": 3.4766, "eval_loss": null, "predict_loss": null, "learning_rate": 6.588827385057663e-05, "epoch": 0.8712194566269864, "percentage": 76.48}
|
479 |
+
{"current_steps": 15328, "total_steps": 20000, "loss": 3.503, "eval_loss": null, "predict_loss": null, "learning_rate": 6.542415748282867e-05, "epoch": 0.8730420914734863, "percentage": 76.64}
|
480 |
+
{"current_steps": 15360, "total_steps": 20000, "loss": 3.2933, "eval_loss": null, "predict_loss": null, "learning_rate": 6.496260809444961e-05, "epoch": 0.8748647263199864, "percentage": 76.8}
|
481 |
+
{"current_steps": 15392, "total_steps": 20000, "loss": 3.2031, "eval_loss": null, "predict_loss": null, "learning_rate": 6.45036375837848e-05, "epoch": 0.8766873611664863, "percentage": 76.96}
|
482 |
+
{"current_steps": 15424, "total_steps": 20000, "loss": 3.4419, "eval_loss": null, "predict_loss": null, "learning_rate": 6.404725778269821e-05, "epoch": 0.8785099960129863, "percentage": 77.12}
|
483 |
+
{"current_steps": 15456, "total_steps": 20000, "loss": 3.2545, "eval_loss": null, "predict_loss": null, "learning_rate": 6.359348045626768e-05, "epoch": 0.8803326308594862, "percentage": 77.28}
|
484 |
+
{"current_steps": 15488, "total_steps": 20000, "loss": 3.3168, "eval_loss": null, "predict_loss": null, "learning_rate": 6.314231730248144e-05, "epoch": 0.8821552657059862, "percentage": 77.44}
|
485 |
+
{"current_steps": 15520, "total_steps": 20000, "loss": 3.1868, "eval_loss": null, "predict_loss": null, "learning_rate": 6.269377995193662e-05, "epoch": 0.8839779005524862, "percentage": 77.6}
|
486 |
+
{"current_steps": 15552, "total_steps": 20000, "loss": 3.2727, "eval_loss": null, "predict_loss": null, "learning_rate": 6.224787996753946e-05, "epoch": 0.8858005353989862, "percentage": 77.76}
|
487 |
+
{"current_steps": 15584, "total_steps": 20000, "loss": 3.4514, "eval_loss": null, "predict_loss": null, "learning_rate": 6.180462884420702e-05, "epoch": 0.8876231702454861, "percentage": 77.92}
|
488 |
+
{"current_steps": 15616, "total_steps": 20000, "loss": 3.3448, "eval_loss": null, "predict_loss": null, "learning_rate": 6.136403800857113e-05, "epoch": 0.8894458050919861, "percentage": 78.08}
|
489 |
+
{"current_steps": 15648, "total_steps": 20000, "loss": 3.2692, "eval_loss": null, "predict_loss": null, "learning_rate": 6.092611881868369e-05, "epoch": 0.891268439938486, "percentage": 78.24}
|
490 |
+
{"current_steps": 15680, "total_steps": 20000, "loss": 3.1736, "eval_loss": null, "predict_loss": null, "learning_rate": 6.049088256372386e-05, "epoch": 0.8930910747849861, "percentage": 78.4}
|
491 |
+
{"current_steps": 15712, "total_steps": 20000, "loss": 3.0885, "eval_loss": null, "predict_loss": null, "learning_rate": 6.005834046370704e-05, "epoch": 0.894913709631486, "percentage": 78.56}
|
492 |
+
{"current_steps": 15744, "total_steps": 20000, "loss": 3.4021, "eval_loss": null, "predict_loss": null, "learning_rate": 5.962850366919566e-05, "epoch": 0.896736344477986, "percentage": 78.72}
|
493 |
+
{"current_steps": 15776, "total_steps": 20000, "loss": 3.2818, "eval_loss": null, "predict_loss": null, "learning_rate": 5.9201383261011636e-05, "epoch": 0.8985589793244859, "percentage": 78.88}
|
494 |
+
{"current_steps": 15808, "total_steps": 20000, "loss": 3.2196, "eval_loss": null, "predict_loss": null, "learning_rate": 5.877699024995088e-05, "epoch": 0.9003816141709859, "percentage": 79.04}
|
495 |
+
{"current_steps": 15840, "total_steps": 20000, "loss": 3.2524, "eval_loss": null, "predict_loss": null, "learning_rate": 5.835533557649932e-05, "epoch": 0.9022042490174859, "percentage": 79.2}
|
496 |
+
{"current_steps": 15872, "total_steps": 20000, "loss": 3.3108, "eval_loss": null, "predict_loss": null, "learning_rate": 5.793643011055091e-05, "epoch": 0.9040268838639859, "percentage": 79.36}
|
497 |
+
{"current_steps": 15904, "total_steps": 20000, "loss": 3.1776, "eval_loss": null, "predict_loss": null, "learning_rate": 5.752028465112736e-05, "epoch": 0.9058495187104858, "percentage": 79.52}
|
498 |
+
{"current_steps": 15936, "total_steps": 20000, "loss": 3.3016, "eval_loss": null, "predict_loss": null, "learning_rate": 5.710690992609983e-05, "epoch": 0.9076721535569858, "percentage": 79.68}
|
499 |
+
{"current_steps": 15968, "total_steps": 20000, "loss": 3.239, "eval_loss": null, "predict_loss": null, "learning_rate": 5.6696316591912355e-05, "epoch": 0.9094947884034857, "percentage": 79.84}
|
500 |
+
{"current_steps": 16000, "total_steps": 20000, "loss": 3.2968, "eval_loss": null, "predict_loss": null, "learning_rate": 5.628851523330708e-05, "epoch": 0.9113174232499858, "percentage": 80.0}
|
501 |
+
{"current_steps": 16032, "total_steps": 20000, "loss": 3.4124, "eval_loss": null, "predict_loss": null, "learning_rate": 5.588351636305146e-05, "epoch": 0.9131400580964857, "percentage": 80.16}
|
502 |
+
{"current_steps": 16064, "total_steps": 20000, "loss": 3.2489, "eval_loss": null, "predict_loss": null, "learning_rate": 5.548133042166714e-05, "epoch": 0.9149626929429857, "percentage": 80.32}
|
503 |
+
{"current_steps": 16096, "total_steps": 20000, "loss": 3.4498, "eval_loss": null, "predict_loss": null, "learning_rate": 5.5081967777161005e-05, "epoch": 0.9167853277894856, "percentage": 80.48}
|
504 |
+
{"current_steps": 16128, "total_steps": 20000, "loss": 3.4449, "eval_loss": null, "predict_loss": null, "learning_rate": 5.468543872475766e-05, "epoch": 0.9186079626359857, "percentage": 80.64}
|
505 |
+
{"current_steps": 16160, "total_steps": 20000, "loss": 3.2734, "eval_loss": null, "predict_loss": null, "learning_rate": 5.42917534866342e-05, "epoch": 0.9204305974824856, "percentage": 80.8}
|
506 |
+
{"current_steps": 16192, "total_steps": 20000, "loss": 3.2834, "eval_loss": null, "predict_loss": null, "learning_rate": 5.390092221165669e-05, "epoch": 0.9222532323289856, "percentage": 80.96}
|
507 |
+
{"current_steps": 16224, "total_steps": 20000, "loss": 3.1821, "eval_loss": null, "predict_loss": null, "learning_rate": 5.3512954975118384e-05, "epoch": 0.9240758671754855, "percentage": 81.12}
|
508 |
+
{"current_steps": 16256, "total_steps": 20000, "loss": 3.4049, "eval_loss": null, "predict_loss": null, "learning_rate": 5.3127861778480155e-05, "epoch": 0.9258985020219855, "percentage": 81.28}
|
509 |
+
{"current_steps": 16288, "total_steps": 20000, "loss": 3.5117, "eval_loss": null, "predict_loss": null, "learning_rate": 5.274565254911261e-05, "epoch": 0.9277211368684855, "percentage": 81.44}
|
510 |
+
{"current_steps": 16320, "total_steps": 20000, "loss": 3.4768, "eval_loss": null, "predict_loss": null, "learning_rate": 5.236633714004014e-05, "epoch": 0.9295437717149855, "percentage": 81.6}
|
511 |
+
{"current_steps": 16352, "total_steps": 20000, "loss": 3.2567, "eval_loss": null, "predict_loss": null, "learning_rate": 5.1989925329686985e-05, "epoch": 0.9313664065614854, "percentage": 81.76}
|
512 |
+
{"current_steps": 16384, "total_steps": 20000, "loss": 3.2253, "eval_loss": null, "predict_loss": null, "learning_rate": 5.161642682162506e-05, "epoch": 0.9331890414079854, "percentage": 81.92}
|
513 |
+
{"current_steps": 16416, "total_steps": 20000, "loss": 3.3911, "eval_loss": null, "predict_loss": null, "learning_rate": 5.1245851244323926e-05, "epoch": 0.9350116762544853, "percentage": 82.08}
|
514 |
+
{"current_steps": 16448, "total_steps": 20000, "loss": 3.3593, "eval_loss": null, "predict_loss": null, "learning_rate": 5.087820815090239e-05, "epoch": 0.9368343111009854, "percentage": 82.24}
|
515 |
+
{"current_steps": 16480, "total_steps": 20000, "loss": 3.2743, "eval_loss": null, "predict_loss": null, "learning_rate": 5.0513507018882495e-05, "epoch": 0.9386569459474854, "percentage": 82.4}
|
516 |
+
{"current_steps": 16512, "total_steps": 20000, "loss": 3.2987, "eval_loss": null, "predict_loss": null, "learning_rate": 5.015175724994498e-05, "epoch": 0.9404795807939853, "percentage": 82.56}
|
517 |
+
{"current_steps": 16544, "total_steps": 20000, "loss": 3.4634, "eval_loss": null, "predict_loss": null, "learning_rate": 4.979296816968697e-05, "epoch": 0.9423022156404853, "percentage": 82.72}
|
518 |
+
{"current_steps": 16576, "total_steps": 20000, "loss": 3.2298, "eval_loss": null, "predict_loss": null, "learning_rate": 4.943714902738163e-05, "epoch": 0.9441248504869852, "percentage": 82.88}
|
519 |
+
{"current_steps": 16608, "total_steps": 20000, "loss": 3.4775, "eval_loss": null, "predict_loss": null, "learning_rate": 4.908430899573967e-05, "epoch": 0.9459474853334853, "percentage": 83.04}
|
520 |
+
{"current_steps": 16640, "total_steps": 20000, "loss": 3.3456, "eval_loss": null, "predict_loss": null, "learning_rate": 4.873445717067292e-05, "epoch": 0.9477701201799852, "percentage": 83.2}
|
521 |
+
{"current_steps": 16672, "total_steps": 20000, "loss": 3.2322, "eval_loss": null, "predict_loss": null, "learning_rate": 4.838760257105979e-05, "epoch": 0.9495927550264852, "percentage": 83.36}
|
522 |
+
{"current_steps": 16704, "total_steps": 20000, "loss": 3.4117, "eval_loss": null, "predict_loss": null, "learning_rate": 4.8043754138512794e-05, "epoch": 0.9514153898729851, "percentage": 83.52}
|
523 |
+
{"current_steps": 16736, "total_steps": 20000, "loss": 3.2533, "eval_loss": null, "predict_loss": null, "learning_rate": 4.770292073714807e-05, "epoch": 0.9532380247194852, "percentage": 83.68}
|
524 |
+
{"current_steps": 16768, "total_steps": 20000, "loss": 3.076, "eval_loss": null, "predict_loss": null, "learning_rate": 4.73651111533569e-05, "epoch": 0.9550606595659851, "percentage": 83.84}
|
525 |
+
{"current_steps": 16800, "total_steps": 20000, "loss": 3.3636, "eval_loss": null, "predict_loss": null, "learning_rate": 4.703033409557903e-05, "epoch": 0.9568832944124851, "percentage": 84.0}
|
526 |
+
{"current_steps": 16832, "total_steps": 20000, "loss": 3.2092, "eval_loss": null, "predict_loss": null, "learning_rate": 4.669859819407844e-05, "epoch": 0.958705929258985, "percentage": 84.16}
|
527 |
+
{"current_steps": 16864, "total_steps": 20000, "loss": 3.2306, "eval_loss": null, "predict_loss": null, "learning_rate": 4.63699120007206e-05, "epoch": 0.960528564105485, "percentage": 84.32}
|
528 |
+
{"current_steps": 16896, "total_steps": 20000, "loss": 3.2953, "eval_loss": null, "predict_loss": null, "learning_rate": 4.6044283988752214e-05, "epoch": 0.962351198951985, "percentage": 84.48}
|
529 |
+
{"current_steps": 16928, "total_steps": 20000, "loss": 3.4327, "eval_loss": null, "predict_loss": null, "learning_rate": 4.572172255258268e-05, "epoch": 0.964173833798485, "percentage": 84.64}
|
530 |
+
{"current_steps": 16960, "total_steps": 20000, "loss": 3.3192, "eval_loss": null, "predict_loss": null, "learning_rate": 4.540223600756775e-05, "epoch": 0.9659964686449849, "percentage": 84.8}
|
531 |
+
{"current_steps": 16992, "total_steps": 20000, "loss": 3.2256, "eval_loss": null, "predict_loss": null, "learning_rate": 4.508583258979507e-05, "epoch": 0.9678191034914849, "percentage": 84.96}
|
532 |
+
{"current_steps": 17024, "total_steps": 20000, "loss": 3.3236, "eval_loss": null, "predict_loss": null, "learning_rate": 4.4772520455871974e-05, "epoch": 0.9696417383379848, "percentage": 85.12}
|
533 |
+
{"current_steps": 17056, "total_steps": 20000, "loss": 3.2133, "eval_loss": null, "predict_loss": null, "learning_rate": 4.446230768271513e-05, "epoch": 0.9714643731844849, "percentage": 85.28}
|
534 |
+
{"current_steps": 17088, "total_steps": 20000, "loss": 3.373, "eval_loss": null, "predict_loss": null, "learning_rate": 4.415520226734242e-05, "epoch": 0.9732870080309848, "percentage": 85.44}
|
535 |
+
{"current_steps": 17120, "total_steps": 20000, "loss": 3.3513, "eval_loss": null, "predict_loss": null, "learning_rate": 4.385121212666663e-05, "epoch": 0.9751096428774848, "percentage": 85.6}
|
536 |
+
{"current_steps": 17152, "total_steps": 20000, "loss": 3.2226, "eval_loss": null, "predict_loss": null, "learning_rate": 4.355034509729152e-05, "epoch": 0.9769322777239847, "percentage": 85.76}
|
537 |
+
{"current_steps": 17184, "total_steps": 20000, "loss": 3.2886, "eval_loss": null, "predict_loss": null, "learning_rate": 4.325260893530965e-05, "epoch": 0.9787549125704847, "percentage": 85.92}
|
538 |
+
{"current_steps": 17216, "total_steps": 20000, "loss": 3.3178, "eval_loss": null, "predict_loss": null, "learning_rate": 4.295801131610265e-05, "epoch": 0.9805775474169847, "percentage": 86.08}
|
539 |
+
{"current_steps": 17248, "total_steps": 20000, "loss": 3.069, "eval_loss": null, "predict_loss": null, "learning_rate": 4.266655983414312e-05, "epoch": 0.9824001822634847, "percentage": 86.24}
|
540 |
+
{"current_steps": 17280, "total_steps": 20000, "loss": 3.2765, "eval_loss": null, "predict_loss": null, "learning_rate": 4.237826200279898e-05, "epoch": 0.9842228171099846, "percentage": 86.4}
|
541 |
+
{"current_steps": 17312, "total_steps": 20000, "loss": 3.3029, "eval_loss": null, "predict_loss": null, "learning_rate": 4.209312525413978e-05, "epoch": 0.9860454519564846, "percentage": 86.56}
|
542 |
+
{"current_steps": 17344, "total_steps": 20000, "loss": 3.2338, "eval_loss": null, "predict_loss": null, "learning_rate": 4.1811156938745036e-05, "epoch": 0.9878680868029845, "percentage": 86.72}
|
543 |
+
{"current_steps": 17376, "total_steps": 20000, "loss": 3.2381, "eval_loss": null, "predict_loss": null, "learning_rate": 4.153236432551488e-05, "epoch": 0.9896907216494846, "percentage": 86.88}
|
544 |
+
{"current_steps": 17408, "total_steps": 20000, "loss": 3.4429, "eval_loss": null, "predict_loss": null, "learning_rate": 4.125675460148243e-05, "epoch": 0.9915133564959845, "percentage": 87.04}
|
545 |
+
{"current_steps": 17440, "total_steps": 20000, "loss": 3.2695, "eval_loss": null, "predict_loss": null, "learning_rate": 4.09843348716288e-05, "epoch": 0.9933359913424845, "percentage": 87.2}
|
546 |
+
{"current_steps": 17472, "total_steps": 20000, "loss": 3.1872, "eval_loss": null, "predict_loss": null, "learning_rate": 4.071511215869974e-05, "epoch": 0.9951586261889844, "percentage": 87.36}
|
547 |
+
{"current_steps": 17504, "total_steps": 20000, "loss": 3.2188, "eval_loss": null, "predict_loss": null, "learning_rate": 4.044909340302465e-05, "epoch": 0.9969812610354845, "percentage": 87.52}
|
548 |
+
{"current_steps": 17536, "total_steps": 20000, "loss": 3.1875, "eval_loss": null, "predict_loss": null, "learning_rate": 4.018628546233774e-05, "epoch": 0.9988038958819844, "percentage": 87.68}
|
549 |
+
{"current_steps": 17568, "total_steps": 20000, "loss": 3.4438, "eval_loss": null, "predict_loss": null, "learning_rate": 3.992669511160111e-05, "epoch": 1.0006265307284843, "percentage": 87.84}
|
550 |
+
{"current_steps": 17600, "total_steps": 20000, "loss": 3.1772, "eval_loss": null, "predict_loss": null, "learning_rate": 3.9670329042830205e-05, "epoch": 1.0024491655749843, "percentage": 88.0}
|
551 |
+
{"current_steps": 17632, "total_steps": 20000, "loss": 3.3985, "eval_loss": null, "predict_loss": null, "learning_rate": 3.941719386492123e-05, "epoch": 1.0042718004214843, "percentage": 88.16}
|
552 |
+
{"current_steps": 17664, "total_steps": 20000, "loss": 3.1783, "eval_loss": null, "predict_loss": null, "learning_rate": 3.916729610348085e-05, "epoch": 1.0060944352679844, "percentage": 88.32}
|
553 |
+
{"current_steps": 17696, "total_steps": 20000, "loss": 3.3251, "eval_loss": null, "predict_loss": null, "learning_rate": 3.89206422006579e-05, "epoch": 1.0079170701144842, "percentage": 88.48}
|
554 |
+
{"current_steps": 17728, "total_steps": 20000, "loss": 3.322, "eval_loss": null, "predict_loss": null, "learning_rate": 3.8677238514977316e-05, "epoch": 1.0097397049609842, "percentage": 88.64}
|
555 |
+
{"current_steps": 17760, "total_steps": 20000, "loss": 3.3623, "eval_loss": null, "predict_loss": null, "learning_rate": 3.843709132117625e-05, "epoch": 1.0115623398074842, "percentage": 88.8}
|
556 |
+
{"current_steps": 17792, "total_steps": 20000, "loss": 3.1051, "eval_loss": null, "predict_loss": null, "learning_rate": 3.8200206810042385e-05, "epoch": 1.0133849746539842, "percentage": 88.96}
|
557 |
+
{"current_steps": 17824, "total_steps": 20000, "loss": 3.4684, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7966591088254076e-05, "epoch": 1.015207609500484, "percentage": 89.12}
|
558 |
+
{"current_steps": 17856, "total_steps": 20000, "loss": 3.245, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7736250178223284e-05, "epoch": 1.017030244346984, "percentage": 89.28}
|
559 |
+
{"current_steps": 17888, "total_steps": 20000, "loss": 3.2, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7509190017940066e-05, "epoch": 1.018852879193484, "percentage": 89.44}
|
560 |
+
{"current_steps": 17920, "total_steps": 20000, "loss": 3.1397, "eval_loss": null, "predict_loss": null, "learning_rate": 3.728541646081958e-05, "epoch": 1.0206755140399841, "percentage": 89.6}
|
561 |
+
{"current_steps": 17952, "total_steps": 20000, "loss": 3.1356, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7064935275551196e-05, "epoch": 1.022498148886484, "percentage": 89.76}
|
562 |
+
{"current_steps": 17984, "total_steps": 20000, "loss": 3.2471, "eval_loss": null, "predict_loss": null, "learning_rate": 3.6847752145949786e-05, "epoch": 1.024320783732984, "percentage": 89.92}
|
563 |
+
{"current_steps": 18016, "total_steps": 20000, "loss": 3.2271, "eval_loss": null, "predict_loss": null, "learning_rate": 3.66338726708092e-05, "epoch": 1.026143418579484, "percentage": 90.08}
|
564 |
+
{"current_steps": 18048, "total_steps": 20000, "loss": 3.3322, "eval_loss": null, "predict_loss": null, "learning_rate": 3.642330236375788e-05, "epoch": 1.027966053425984, "percentage": 90.24}
|
565 |
+
{"current_steps": 18080, "total_steps": 20000, "loss": 3.2742, "eval_loss": null, "predict_loss": null, "learning_rate": 3.6216046653116795e-05, "epoch": 1.0297886882724838, "percentage": 90.4}
|
566 |
+
{"current_steps": 18112, "total_steps": 20000, "loss": 3.3772, "eval_loss": null, "predict_loss": null, "learning_rate": 3.601211088175948e-05, "epoch": 1.0316113231189838, "percentage": 90.56}
|
567 |
+
{"current_steps": 18144, "total_steps": 20000, "loss": 3.2253, "eval_loss": null, "predict_loss": null, "learning_rate": 3.581150030697432e-05, "epoch": 1.0334339579654839, "percentage": 90.72}
|
568 |
+
{"current_steps": 18176, "total_steps": 20000, "loss": 3.2542, "eval_loss": null, "predict_loss": null, "learning_rate": 3.561422010032892e-05, "epoch": 1.035256592811984, "percentage": 90.88}
|
569 |
+
{"current_steps": 18208, "total_steps": 20000, "loss": 3.3016, "eval_loss": null, "predict_loss": null, "learning_rate": 3.5420275347536976e-05, "epoch": 1.0370792276584837, "percentage": 91.04}
|
570 |
+
{"current_steps": 18240, "total_steps": 20000, "loss": 3.3292, "eval_loss": null, "predict_loss": null, "learning_rate": 3.522967104832694e-05, "epoch": 1.0389018625049837, "percentage": 91.2}
|
571 |
+
{"current_steps": 18272, "total_steps": 20000, "loss": 3.2847, "eval_loss": null, "predict_loss": null, "learning_rate": 3.504241211631338e-05, "epoch": 1.0407244973514838, "percentage": 91.36}
|
572 |
+
{"current_steps": 18304, "total_steps": 20000, "loss": 3.111, "eval_loss": null, "predict_loss": null, "learning_rate": 3.485850337887007e-05, "epoch": 1.0425471321979838, "percentage": 91.52}
|
573 |
+
{"current_steps": 18336, "total_steps": 20000, "loss": 3.2267, "eval_loss": null, "predict_loss": null, "learning_rate": 3.467794957700573e-05, "epoch": 1.0443697670444836, "percentage": 91.68}
|
574 |
+
{"current_steps": 18368, "total_steps": 20000, "loss": 3.2945, "eval_loss": null, "predict_loss": null, "learning_rate": 3.450075536524166e-05, "epoch": 1.0461924018909836, "percentage": 91.84}
|
575 |
+
{"current_steps": 18400, "total_steps": 20000, "loss": 3.3187, "eval_loss": null, "predict_loss": null, "learning_rate": 3.432692531149191e-05, "epoch": 1.0480150367374836, "percentage": 92.0}
|
576 |
+
{"current_steps": 18432, "total_steps": 20000, "loss": 3.1775, "eval_loss": null, "predict_loss": null, "learning_rate": 3.4156463896945356e-05, "epoch": 1.0498376715839837, "percentage": 92.16}
|
577 |
+
{"current_steps": 18464, "total_steps": 20000, "loss": 3.2722, "eval_loss": null, "predict_loss": null, "learning_rate": 3.398937551595037e-05, "epoch": 1.0516603064304835, "percentage": 92.32}
|
578 |
+
{"current_steps": 18496, "total_steps": 20000, "loss": 3.1885, "eval_loss": null, "predict_loss": null, "learning_rate": 3.382566447590126e-05, "epoch": 1.0534829412769835, "percentage": 92.48}
|
579 |
+
{"current_steps": 18528, "total_steps": 20000, "loss": 3.1801, "eval_loss": null, "predict_loss": null, "learning_rate": 3.366533499712757e-05, "epoch": 1.0553055761234835, "percentage": 92.64}
|
580 |
+
{"current_steps": 18560, "total_steps": 20000, "loss": 3.3831, "eval_loss": null, "predict_loss": null, "learning_rate": 3.3508391212784984e-05, "epoch": 1.0571282109699836, "percentage": 92.8}
|
581 |
+
{"current_steps": 18592, "total_steps": 20000, "loss": 3.1692, "eval_loss": null, "predict_loss": null, "learning_rate": 3.3354837168748956e-05, "epoch": 1.0589508458164834, "percentage": 92.96}
|
582 |
+
{"current_steps": 18624, "total_steps": 20000, "loss": 3.0596, "eval_loss": null, "predict_loss": null, "learning_rate": 3.32046768235104e-05, "epoch": 1.0607734806629834, "percentage": 93.12}
|
583 |
+
{"current_steps": 18656, "total_steps": 20000, "loss": 3.352, "eval_loss": null, "predict_loss": null, "learning_rate": 3.305791404807349e-05, "epoch": 1.0625961155094834, "percentage": 93.28}
|
584 |
+
{"current_steps": 18688, "total_steps": 20000, "loss": 3.324, "eval_loss": null, "predict_loss": null, "learning_rate": 3.291455262585608e-05, "epoch": 1.0644187503559834, "percentage": 93.44}
|
585 |
+
{"current_steps": 18720, "total_steps": 20000, "loss": 3.2363, "eval_loss": null, "predict_loss": null, "learning_rate": 3.277459625259199e-05, "epoch": 1.0662413852024832, "percentage": 93.6}
|
586 |
+
{"current_steps": 18752, "total_steps": 20000, "loss": 3.2239, "eval_loss": null, "predict_loss": null, "learning_rate": 3.2638048536235925e-05, "epoch": 1.0680640200489833, "percentage": 93.76}
|
587 |
+
{"current_steps": 18784, "total_steps": 20000, "loss": 3.2173, "eval_loss": null, "predict_loss": null, "learning_rate": 3.2504912996870223e-05, "epoch": 1.0698866548954833, "percentage": 93.92}
|
588 |
+
{"current_steps": 18816, "total_steps": 20000, "loss": 3.109, "eval_loss": null, "predict_loss": null, "learning_rate": 3.237519306661436e-05, "epoch": 1.0717092897419833, "percentage": 94.08}
|
589 |
+
{"current_steps": 18848, "total_steps": 20000, "loss": 3.288, "eval_loss": null, "predict_loss": null, "learning_rate": 3.224889208953625e-05, "epoch": 1.0735319245884831, "percentage": 94.24}
|
590 |
+
{"current_steps": 18880, "total_steps": 20000, "loss": 3.4045, "eval_loss": null, "predict_loss": null, "learning_rate": 3.21260133215662e-05, "epoch": 1.0753545594349831, "percentage": 94.4}
|
591 |
+
{"current_steps": 18912, "total_steps": 20000, "loss": 3.3063, "eval_loss": null, "predict_loss": null, "learning_rate": 3.200655993041291e-05, "epoch": 1.0771771942814832, "percentage": 94.56}
|
592 |
+
{"current_steps": 18944, "total_steps": 20000, "loss": 3.3941, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1890534995481836e-05, "epoch": 1.0789998291279832, "percentage": 94.72}
|
593 |
+
{"current_steps": 18976, "total_steps": 20000, "loss": 3.2295, "eval_loss": null, "predict_loss": null, "learning_rate": 3.177794150779575e-05, "epoch": 1.0808224639744832, "percentage": 94.88}
|
594 |
+
{"current_steps": 19008, "total_steps": 20000, "loss": 3.1485, "eval_loss": null, "predict_loss": null, "learning_rate": 3.166878236991767e-05, "epoch": 1.082645098820983, "percentage": 95.04}
|
595 |
+
{"current_steps": 19040, "total_steps": 20000, "loss": 3.0656, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1563060395876074e-05, "epoch": 1.084467733667483, "percentage": 95.2}
|
596 |
+
{"current_steps": 19072, "total_steps": 20000, "loss": 3.2092, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1460778311092306e-05, "epoch": 1.086290368513983, "percentage": 95.36}
|
597 |
+
{"current_steps": 19104, "total_steps": 20000, "loss": 3.2151, "eval_loss": null, "predict_loss": null, "learning_rate": 3.136193875231033e-05, "epoch": 1.0881130033604829, "percentage": 95.52}
|
598 |
+
{"current_steps": 19136, "total_steps": 20000, "loss": 3.2017, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1266544267528746e-05, "epoch": 1.089935638206983, "percentage": 95.68}
|
599 |
+
{"current_steps": 19168, "total_steps": 20000, "loss": 3.1767, "eval_loss": null, "predict_loss": null, "learning_rate": 3.117459731593514e-05, "epoch": 1.091758273053483, "percentage": 95.84}
|
600 |
+
{"current_steps": 19200, "total_steps": 20000, "loss": 3.2252, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1086100267842626e-05, "epoch": 1.093580907899983, "percentage": 96.0}
|
601 |
+
{"current_steps": 19232, "total_steps": 20000, "loss": 3.2337, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1001055404628825e-05, "epoch": 1.095403542746483, "percentage": 96.16}
|
602 |
+
{"current_steps": 19264, "total_steps": 20000, "loss": 3.2672, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0919464918676995e-05, "epoch": 1.0972261775929828, "percentage": 96.32}
|
603 |
+
{"current_steps": 19296, "total_steps": 20000, "loss": 3.2231, "eval_loss": null, "predict_loss": null, "learning_rate": 3.084133091331949e-05, "epoch": 1.0990488124394828, "percentage": 96.48}
|
604 |
+
{"current_steps": 19328, "total_steps": 20000, "loss": 3.214, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0766655402783616e-05, "epoch": 1.1008714472859829, "percentage": 96.64}
|
605 |
+
{"current_steps": 19360, "total_steps": 20000, "loss": 3.2734, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0695440312139664e-05, "epoch": 1.1026940821324827, "percentage": 96.8}
|
606 |
+
{"current_steps": 19392, "total_steps": 20000, "loss": 3.2981, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0627687477251264e-05, "epoch": 1.1045167169789827, "percentage": 96.96}
|
607 |
+
{"current_steps": 19424, "total_steps": 20000, "loss": 3.3372, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0563398644728066e-05, "epoch": 1.1063393518254827, "percentage": 97.12}
|
608 |
+
{"current_steps": 19456, "total_steps": 20000, "loss": 3.2728, "eval_loss": null, "predict_loss": null, "learning_rate": 3.050257547188077e-05, "epoch": 1.1081619866719827, "percentage": 97.28}
|
609 |
+
{"current_steps": 19488, "total_steps": 20000, "loss": 3.1758, "eval_loss": null, "predict_loss": null, "learning_rate": 3.044521952667833e-05, "epoch": 1.1099846215184828, "percentage": 97.44}
|
610 |
+
{"current_steps": 19520, "total_steps": 20000, "loss": 3.1932, "eval_loss": null, "predict_loss": null, "learning_rate": 3.039133228770754e-05, "epoch": 1.1118072563649826, "percentage": 97.6}
|
611 |
+
{"current_steps": 19552, "total_steps": 20000, "loss": 3.2914, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0340915144134993e-05, "epoch": 1.1136298912114826, "percentage": 97.76}
|
612 |
+
{"current_steps": 19584, "total_steps": 20000, "loss": 3.1345, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0293969395671157e-05, "epoch": 1.1154525260579826, "percentage": 97.92}
|
613 |
+
{"current_steps": 19616, "total_steps": 20000, "loss": 3.2481, "eval_loss": null, "predict_loss": null, "learning_rate": 3.025049625253697e-05, "epoch": 1.1172751609044826, "percentage": 98.08}
|
614 |
+
{"current_steps": 19648, "total_steps": 20000, "loss": 3.2937, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0210496835432564e-05, "epoch": 1.1190977957509824, "percentage": 98.24}
|
615 |
+
{"current_steps": 19680, "total_steps": 20000, "loss": 3.4591, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0173972175508445e-05, "epoch": 1.1209204305974825, "percentage": 98.4}
|
616 |
+
{"current_steps": 19712, "total_steps": 20000, "loss": 3.4243, "eval_loss": null, "predict_loss": null, "learning_rate": 3.014092321433883e-05, "epoch": 1.1227430654439825, "percentage": 98.56}
|
617 |
+
{"current_steps": 19744, "total_steps": 20000, "loss": 3.1639, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0111350803897467e-05, "epoch": 1.1245657002904825, "percentage": 98.72}
|
618 |
+
{"current_steps": 19776, "total_steps": 20000, "loss": 3.128, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0085255706535594e-05, "epoch": 1.1263883351369823, "percentage": 98.88}
|
619 |
+
{"current_steps": 19808, "total_steps": 20000, "loss": 3.1756, "eval_loss": null, "predict_loss": null, "learning_rate": 3.006263859496231e-05, "epoch": 1.1282109699834824, "percentage": 99.04}
|
620 |
+
{"current_steps": 19840, "total_steps": 20000, "loss": 3.1915, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0043500052227267e-05, "epoch": 1.1300336048299824, "percentage": 99.2}
|
621 |
+
{"current_steps": 19872, "total_steps": 20000, "loss": 3.2118, "eval_loss": null, "predict_loss": null, "learning_rate": 3.002784057170561e-05, "epoch": 1.1318562396764824, "percentage": 99.36}
|
622 |
+
{"current_steps": 19904, "total_steps": 20000, "loss": 3.1102, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0015660557085206e-05, "epoch": 1.1336788745229822, "percentage": 99.52}
|
623 |
+
{"current_steps": 19936, "total_steps": 20000, "loss": 3.1771, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0006960322356355e-05, "epoch": 1.1355015093694822, "percentage": 99.68}
|
624 |
+
{"current_steps": 19968, "total_steps": 20000, "loss": 3.2286, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0001740091803596e-05, "epoch": 1.1373241442159823, "percentage": 99.84}
|
625 |
+
{"current_steps": 20000, "total_steps": 20000, "loss": 3.3227, "eval_loss": null, "predict_loss": null, "learning_rate": 2.9999999999999997e-05, "epoch": 1.1391467790624823, "percentage": 100.0}
|
626 |
+
{"current_steps": 20000, "total_steps": 20000, "loss": null, "eval_loss": null, "predict_loss": null, "learning_rate": null, "epoch": 1.1391467790624823, "percentage": 100.0}
|