{ "best_metric": 0.9951154529307282, "best_model_checkpoint": "vit-hybrid-base-bit-384-finetuned-ibird/checkpoint-1131", "epoch": 2.9973474801061006, "eval_steps": 500, "global_step": 1695, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 10.60638427734375, "learning_rate": 2.9411764705882355e-06, "loss": 3.3842, "step": 10 }, { "epoch": 0.04, "grad_norm": 10.456100463867188, "learning_rate": 5.882352941176471e-06, "loss": 3.1282, "step": 20 }, { "epoch": 0.05, "grad_norm": 9.986790657043457, "learning_rate": 8.823529411764707e-06, "loss": 2.8815, "step": 30 }, { "epoch": 0.07, "grad_norm": 10.580012321472168, "learning_rate": 1.1764705882352942e-05, "loss": 2.3087, "step": 40 }, { "epoch": 0.09, "grad_norm": 7.9744133949279785, "learning_rate": 1.4705882352941177e-05, "loss": 1.5822, "step": 50 }, { "epoch": 0.11, "grad_norm": 6.688446044921875, "learning_rate": 1.7647058823529414e-05, "loss": 0.9381, "step": 60 }, { "epoch": 0.12, "grad_norm": 4.64487361907959, "learning_rate": 2.058823529411765e-05, "loss": 0.4115, "step": 70 }, { "epoch": 0.14, "grad_norm": 5.120860576629639, "learning_rate": 2.3529411764705884e-05, "loss": 0.1915, "step": 80 }, { "epoch": 0.16, "grad_norm": 2.6903035640716553, "learning_rate": 2.647058823529412e-05, "loss": 0.0935, "step": 90 }, { "epoch": 0.18, "grad_norm": 2.5217204093933105, "learning_rate": 2.9411764705882354e-05, "loss": 0.0949, "step": 100 }, { "epoch": 0.19, "grad_norm": 0.7664793133735657, "learning_rate": 3.235294117647059e-05, "loss": 0.0545, "step": 110 }, { "epoch": 0.21, "grad_norm": 4.868194580078125, "learning_rate": 3.529411764705883e-05, "loss": 0.0495, "step": 120 }, { "epoch": 0.23, "grad_norm": 3.152761459350586, "learning_rate": 3.8235294117647055e-05, "loss": 0.0566, "step": 130 }, { "epoch": 0.25, "grad_norm": 3.1319901943206787, "learning_rate": 4.11764705882353e-05, "loss": 0.0522, "step": 140 }, { "epoch": 0.27, "grad_norm": 2.599245309829712, "learning_rate": 4.411764705882353e-05, "loss": 0.0605, "step": 150 }, { "epoch": 0.28, "grad_norm": 0.845979630947113, "learning_rate": 4.705882352941177e-05, "loss": 0.0521, "step": 160 }, { "epoch": 0.3, "grad_norm": 6.3421759605407715, "learning_rate": 5e-05, "loss": 0.0849, "step": 170 }, { "epoch": 0.32, "grad_norm": 2.421968698501587, "learning_rate": 4.967213114754098e-05, "loss": 0.0502, "step": 180 }, { "epoch": 0.34, "grad_norm": 0.6349968910217285, "learning_rate": 4.934426229508197e-05, "loss": 0.0161, "step": 190 }, { "epoch": 0.35, "grad_norm": 0.3493717908859253, "learning_rate": 4.9016393442622957e-05, "loss": 0.027, "step": 200 }, { "epoch": 0.37, "grad_norm": 0.24897021055221558, "learning_rate": 4.868852459016394e-05, "loss": 0.0481, "step": 210 }, { "epoch": 0.39, "grad_norm": 0.10974353551864624, "learning_rate": 4.836065573770492e-05, "loss": 0.017, "step": 220 }, { "epoch": 0.41, "grad_norm": 2.431795358657837, "learning_rate": 4.8032786885245904e-05, "loss": 0.0494, "step": 230 }, { "epoch": 0.42, "grad_norm": 3.4805123805999756, "learning_rate": 4.770491803278689e-05, "loss": 0.0388, "step": 240 }, { "epoch": 0.44, "grad_norm": 2.0460708141326904, "learning_rate": 4.737704918032787e-05, "loss": 0.0507, "step": 250 }, { "epoch": 0.46, "grad_norm": 0.7923760414123535, "learning_rate": 4.704918032786885e-05, "loss": 0.0781, "step": 260 }, { "epoch": 0.48, "grad_norm": 2.829329252243042, "learning_rate": 4.672131147540984e-05, "loss": 0.0446, "step": 270 }, { "epoch": 0.5, "grad_norm": 0.8546658158302307, "learning_rate": 4.6393442622950825e-05, "loss": 0.0543, "step": 280 }, { "epoch": 0.51, "grad_norm": 0.2265116274356842, "learning_rate": 4.6065573770491805e-05, "loss": 0.0528, "step": 290 }, { "epoch": 0.53, "grad_norm": 2.7868409156799316, "learning_rate": 4.5737704918032786e-05, "loss": 0.0442, "step": 300 }, { "epoch": 0.55, "grad_norm": 0.05122404918074608, "learning_rate": 4.540983606557377e-05, "loss": 0.0473, "step": 310 }, { "epoch": 0.57, "grad_norm": 4.265463829040527, "learning_rate": 4.508196721311476e-05, "loss": 0.0358, "step": 320 }, { "epoch": 0.58, "grad_norm": 0.3292916417121887, "learning_rate": 4.475409836065574e-05, "loss": 0.0291, "step": 330 }, { "epoch": 0.6, "grad_norm": 1.2873409986495972, "learning_rate": 4.442622950819673e-05, "loss": 0.0681, "step": 340 }, { "epoch": 0.62, "grad_norm": 0.5806503891944885, "learning_rate": 4.409836065573771e-05, "loss": 0.0838, "step": 350 }, { "epoch": 0.64, "grad_norm": 4.996669769287109, "learning_rate": 4.377049180327869e-05, "loss": 0.0182, "step": 360 }, { "epoch": 0.65, "grad_norm": 0.058223385363817215, "learning_rate": 4.3442622950819674e-05, "loss": 0.0454, "step": 370 }, { "epoch": 0.67, "grad_norm": 0.5088571906089783, "learning_rate": 4.311475409836066e-05, "loss": 0.0444, "step": 380 }, { "epoch": 0.69, "grad_norm": 6.739996433258057, "learning_rate": 4.278688524590164e-05, "loss": 0.0445, "step": 390 }, { "epoch": 0.71, "grad_norm": 0.22746062278747559, "learning_rate": 4.245901639344262e-05, "loss": 0.0258, "step": 400 }, { "epoch": 0.73, "grad_norm": 0.04663905128836632, "learning_rate": 4.213114754098361e-05, "loss": 0.0023, "step": 410 }, { "epoch": 0.74, "grad_norm": 0.3145718574523926, "learning_rate": 4.1803278688524595e-05, "loss": 0.0205, "step": 420 }, { "epoch": 0.76, "grad_norm": 0.06759347021579742, "learning_rate": 4.1475409836065575e-05, "loss": 0.0779, "step": 430 }, { "epoch": 0.78, "grad_norm": 0.7269498705863953, "learning_rate": 4.1147540983606556e-05, "loss": 0.0194, "step": 440 }, { "epoch": 0.8, "grad_norm": 0.6663650274276733, "learning_rate": 4.081967213114754e-05, "loss": 0.0495, "step": 450 }, { "epoch": 0.81, "grad_norm": 0.015244757756590843, "learning_rate": 4.049180327868853e-05, "loss": 0.0217, "step": 460 }, { "epoch": 0.83, "grad_norm": 0.05687205493450165, "learning_rate": 4.016393442622951e-05, "loss": 0.0157, "step": 470 }, { "epoch": 0.85, "grad_norm": 0.06807345896959305, "learning_rate": 3.983606557377049e-05, "loss": 0.0021, "step": 480 }, { "epoch": 0.87, "grad_norm": 6.138463497161865, "learning_rate": 3.950819672131148e-05, "loss": 0.0473, "step": 490 }, { "epoch": 0.88, "grad_norm": 0.23685503005981445, "learning_rate": 3.9180327868852464e-05, "loss": 0.0289, "step": 500 }, { "epoch": 0.9, "grad_norm": 0.19919660687446594, "learning_rate": 3.8852459016393444e-05, "loss": 0.0659, "step": 510 }, { "epoch": 0.92, "grad_norm": 2.63227915763855, "learning_rate": 3.8524590163934424e-05, "loss": 0.0618, "step": 520 }, { "epoch": 0.94, "grad_norm": 0.18995435535907745, "learning_rate": 3.819672131147541e-05, "loss": 0.0151, "step": 530 }, { "epoch": 0.95, "grad_norm": 0.05399240553379059, "learning_rate": 3.78688524590164e-05, "loss": 0.0168, "step": 540 }, { "epoch": 0.97, "grad_norm": 0.13513301312923431, "learning_rate": 3.754098360655738e-05, "loss": 0.0492, "step": 550 }, { "epoch": 0.99, "grad_norm": 8.097681999206543, "learning_rate": 3.721311475409836e-05, "loss": 0.0997, "step": 560 }, { "epoch": 1.0, "eval_accuracy": 0.9906749555950266, "eval_loss": 0.031034117564558983, "eval_runtime": 124.3045, "eval_samples_per_second": 18.117, "eval_steps_per_second": 2.269, "step": 565 }, { "epoch": 1.01, "grad_norm": 0.08757388591766357, "learning_rate": 3.6885245901639346e-05, "loss": 0.0243, "step": 570 }, { "epoch": 1.03, "grad_norm": 0.03581409156322479, "learning_rate": 3.655737704918033e-05, "loss": 0.0101, "step": 580 }, { "epoch": 1.04, "grad_norm": 0.01882335916161537, "learning_rate": 3.622950819672131e-05, "loss": 0.0034, "step": 590 }, { "epoch": 1.06, "grad_norm": 0.12845346331596375, "learning_rate": 3.590163934426229e-05, "loss": 0.006, "step": 600 }, { "epoch": 1.08, "grad_norm": 6.836170196533203, "learning_rate": 3.557377049180328e-05, "loss": 0.0054, "step": 610 }, { "epoch": 1.1, "grad_norm": 0.014669831842184067, "learning_rate": 3.524590163934427e-05, "loss": 0.0007, "step": 620 }, { "epoch": 1.11, "grad_norm": 0.0303161833435297, "learning_rate": 3.491803278688525e-05, "loss": 0.0272, "step": 630 }, { "epoch": 1.13, "grad_norm": 0.010958039201796055, "learning_rate": 3.459016393442623e-05, "loss": 0.0016, "step": 640 }, { "epoch": 1.15, "grad_norm": 0.10876976698637009, "learning_rate": 3.4262295081967214e-05, "loss": 0.0113, "step": 650 }, { "epoch": 1.17, "grad_norm": 0.024475092068314552, "learning_rate": 3.39344262295082e-05, "loss": 0.0199, "step": 660 }, { "epoch": 1.18, "grad_norm": 3.7424721717834473, "learning_rate": 3.360655737704918e-05, "loss": 0.0153, "step": 670 }, { "epoch": 1.2, "grad_norm": 3.548234224319458, "learning_rate": 3.327868852459017e-05, "loss": 0.0124, "step": 680 }, { "epoch": 1.22, "grad_norm": 0.018901199102401733, "learning_rate": 3.295081967213115e-05, "loss": 0.0059, "step": 690 }, { "epoch": 1.24, "grad_norm": 0.01598968915641308, "learning_rate": 3.2622950819672136e-05, "loss": 0.0272, "step": 700 }, { "epoch": 1.26, "grad_norm": 0.01613859459757805, "learning_rate": 3.2295081967213116e-05, "loss": 0.013, "step": 710 }, { "epoch": 1.27, "grad_norm": 0.030256085097789764, "learning_rate": 3.19672131147541e-05, "loss": 0.0061, "step": 720 }, { "epoch": 1.29, "grad_norm": 0.08354157209396362, "learning_rate": 3.163934426229508e-05, "loss": 0.0008, "step": 730 }, { "epoch": 1.31, "grad_norm": 0.1932225227355957, "learning_rate": 3.131147540983606e-05, "loss": 0.0019, "step": 740 }, { "epoch": 1.33, "grad_norm": 0.4999471604824066, "learning_rate": 3.098360655737705e-05, "loss": 0.012, "step": 750 }, { "epoch": 1.34, "grad_norm": 0.016076169908046722, "learning_rate": 3.065573770491804e-05, "loss": 0.0006, "step": 760 }, { "epoch": 1.36, "grad_norm": 0.010563875548541546, "learning_rate": 3.0327868852459017e-05, "loss": 0.0034, "step": 770 }, { "epoch": 1.38, "grad_norm": 0.020218759775161743, "learning_rate": 3e-05, "loss": 0.0016, "step": 780 }, { "epoch": 1.4, "grad_norm": 0.016868168488144875, "learning_rate": 2.967213114754098e-05, "loss": 0.0033, "step": 790 }, { "epoch": 1.41, "grad_norm": 0.013298319652676582, "learning_rate": 2.934426229508197e-05, "loss": 0.0037, "step": 800 }, { "epoch": 1.43, "grad_norm": 0.062481291592121124, "learning_rate": 2.901639344262295e-05, "loss": 0.0118, "step": 810 }, { "epoch": 1.45, "grad_norm": 1.4717094898223877, "learning_rate": 2.8688524590163935e-05, "loss": 0.0104, "step": 820 }, { "epoch": 1.47, "grad_norm": 1.5437506437301636, "learning_rate": 2.8360655737704922e-05, "loss": 0.0062, "step": 830 }, { "epoch": 1.49, "grad_norm": 0.03338282182812691, "learning_rate": 2.8032786885245906e-05, "loss": 0.0025, "step": 840 }, { "epoch": 1.5, "grad_norm": 1.8838506937026978, "learning_rate": 2.7704918032786886e-05, "loss": 0.0193, "step": 850 }, { "epoch": 1.52, "grad_norm": 0.007599662058055401, "learning_rate": 2.737704918032787e-05, "loss": 0.0083, "step": 860 }, { "epoch": 1.54, "grad_norm": 0.0383157841861248, "learning_rate": 2.7049180327868856e-05, "loss": 0.0059, "step": 870 }, { "epoch": 1.56, "grad_norm": 0.004423599224537611, "learning_rate": 2.6721311475409837e-05, "loss": 0.0019, "step": 880 }, { "epoch": 1.57, "grad_norm": 7.753259658813477, "learning_rate": 2.639344262295082e-05, "loss": 0.0197, "step": 890 }, { "epoch": 1.59, "grad_norm": 0.0051222569309175014, "learning_rate": 2.6065573770491804e-05, "loss": 0.0128, "step": 900 }, { "epoch": 1.61, "grad_norm": 0.006367386784404516, "learning_rate": 2.573770491803279e-05, "loss": 0.0097, "step": 910 }, { "epoch": 1.63, "grad_norm": 0.016590917482972145, "learning_rate": 2.540983606557377e-05, "loss": 0.0163, "step": 920 }, { "epoch": 1.64, "grad_norm": 0.006148540414869785, "learning_rate": 2.5081967213114754e-05, "loss": 0.0107, "step": 930 }, { "epoch": 1.66, "grad_norm": 0.021444685757160187, "learning_rate": 2.4754098360655738e-05, "loss": 0.008, "step": 940 }, { "epoch": 1.68, "grad_norm": 0.27239102125167847, "learning_rate": 2.442622950819672e-05, "loss": 0.0009, "step": 950 }, { "epoch": 1.7, "grad_norm": 2.461951971054077, "learning_rate": 2.4098360655737705e-05, "loss": 0.0224, "step": 960 }, { "epoch": 1.72, "grad_norm": 0.009482527151703835, "learning_rate": 2.377049180327869e-05, "loss": 0.0032, "step": 970 }, { "epoch": 1.73, "grad_norm": 0.0694417804479599, "learning_rate": 2.3442622950819672e-05, "loss": 0.001, "step": 980 }, { "epoch": 1.75, "grad_norm": 0.008940008468925953, "learning_rate": 2.311475409836066e-05, "loss": 0.0063, "step": 990 }, { "epoch": 1.77, "grad_norm": 0.008408569730818272, "learning_rate": 2.278688524590164e-05, "loss": 0.0026, "step": 1000 }, { "epoch": 1.79, "grad_norm": 0.00473896786570549, "learning_rate": 2.2459016393442626e-05, "loss": 0.0162, "step": 1010 }, { "epoch": 1.8, "grad_norm": 0.0444784052670002, "learning_rate": 2.2131147540983607e-05, "loss": 0.0004, "step": 1020 }, { "epoch": 1.82, "grad_norm": 0.002952872309833765, "learning_rate": 2.1803278688524594e-05, "loss": 0.0079, "step": 1030 }, { "epoch": 1.84, "grad_norm": 0.009093045257031918, "learning_rate": 2.1475409836065574e-05, "loss": 0.0049, "step": 1040 }, { "epoch": 1.86, "grad_norm": 5.8584818840026855, "learning_rate": 2.114754098360656e-05, "loss": 0.0048, "step": 1050 }, { "epoch": 1.87, "grad_norm": 0.025469429790973663, "learning_rate": 2.081967213114754e-05, "loss": 0.0118, "step": 1060 }, { "epoch": 1.89, "grad_norm": 0.005341151263564825, "learning_rate": 2.0491803278688525e-05, "loss": 0.0016, "step": 1070 }, { "epoch": 1.91, "grad_norm": 0.006102704908698797, "learning_rate": 2.0163934426229508e-05, "loss": 0.001, "step": 1080 }, { "epoch": 1.93, "grad_norm": 0.009585659019649029, "learning_rate": 1.9836065573770492e-05, "loss": 0.0003, "step": 1090 }, { "epoch": 1.95, "grad_norm": 0.003977091982960701, "learning_rate": 1.9508196721311475e-05, "loss": 0.0034, "step": 1100 }, { "epoch": 1.96, "grad_norm": 0.008223881945014, "learning_rate": 1.918032786885246e-05, "loss": 0.0016, "step": 1110 }, { "epoch": 1.98, "grad_norm": 0.014407954178750515, "learning_rate": 1.8852459016393442e-05, "loss": 0.0013, "step": 1120 }, { "epoch": 2.0, "grad_norm": 0.3026231825351715, "learning_rate": 1.8524590163934426e-05, "loss": 0.0005, "step": 1130 }, { "epoch": 2.0, "eval_accuracy": 0.9951154529307282, "eval_loss": 0.01963997818529606, "eval_runtime": 122.5197, "eval_samples_per_second": 18.381, "eval_steps_per_second": 2.302, "step": 1131 }, { "epoch": 2.02, "grad_norm": 0.005760945845395327, "learning_rate": 1.8196721311475413e-05, "loss": 0.0028, "step": 1140 }, { "epoch": 2.03, "grad_norm": 0.00677720969542861, "learning_rate": 1.7868852459016393e-05, "loss": 0.0008, "step": 1150 }, { "epoch": 2.05, "grad_norm": 0.004843315575271845, "learning_rate": 1.754098360655738e-05, "loss": 0.002, "step": 1160 }, { "epoch": 2.07, "grad_norm": 0.003445906564593315, "learning_rate": 1.721311475409836e-05, "loss": 0.0002, "step": 1170 }, { "epoch": 2.09, "grad_norm": 0.0024833856150507927, "learning_rate": 1.6885245901639347e-05, "loss": 0.0028, "step": 1180 }, { "epoch": 2.1, "grad_norm": 0.024947261437773705, "learning_rate": 1.6557377049180328e-05, "loss": 0.0003, "step": 1190 }, { "epoch": 2.12, "grad_norm": 0.0022959029302001, "learning_rate": 1.6229508196721314e-05, "loss": 0.0002, "step": 1200 }, { "epoch": 2.14, "grad_norm": 0.01967487297952175, "learning_rate": 1.5901639344262295e-05, "loss": 0.0003, "step": 1210 }, { "epoch": 2.16, "grad_norm": 0.357111394405365, "learning_rate": 1.557377049180328e-05, "loss": 0.0005, "step": 1220 }, { "epoch": 2.18, "grad_norm": 0.0058966344222426414, "learning_rate": 1.5245901639344262e-05, "loss": 0.0001, "step": 1230 }, { "epoch": 2.19, "grad_norm": 0.0033721905201673508, "learning_rate": 1.4918032786885247e-05, "loss": 0.0002, "step": 1240 }, { "epoch": 2.21, "grad_norm": 0.0022145358379930258, "learning_rate": 1.4590163934426229e-05, "loss": 0.0002, "step": 1250 }, { "epoch": 2.23, "grad_norm": 0.035282738506793976, "learning_rate": 1.4262295081967214e-05, "loss": 0.0002, "step": 1260 }, { "epoch": 2.25, "grad_norm": 0.0015229666605591774, "learning_rate": 1.3934426229508196e-05, "loss": 0.0002, "step": 1270 }, { "epoch": 2.26, "grad_norm": 0.005502559244632721, "learning_rate": 1.3606557377049181e-05, "loss": 0.0021, "step": 1280 }, { "epoch": 2.28, "grad_norm": 0.0072832051664590836, "learning_rate": 1.3278688524590163e-05, "loss": 0.0001, "step": 1290 }, { "epoch": 2.3, "grad_norm": 0.001554520451463759, "learning_rate": 1.2950819672131149e-05, "loss": 0.0002, "step": 1300 }, { "epoch": 2.32, "grad_norm": 0.0054123131558299065, "learning_rate": 1.2622950819672132e-05, "loss": 0.0002, "step": 1310 }, { "epoch": 2.33, "grad_norm": 0.013851379975676537, "learning_rate": 1.2295081967213116e-05, "loss": 0.0066, "step": 1320 }, { "epoch": 2.35, "grad_norm": 0.0056920950300991535, "learning_rate": 1.19672131147541e-05, "loss": 0.0002, "step": 1330 }, { "epoch": 2.37, "grad_norm": 0.00168787338770926, "learning_rate": 1.1639344262295083e-05, "loss": 0.0002, "step": 1340 }, { "epoch": 2.39, "grad_norm": 0.005322947632521391, "learning_rate": 1.1311475409836065e-05, "loss": 0.0002, "step": 1350 }, { "epoch": 2.4, "grad_norm": 0.002933880779892206, "learning_rate": 1.0983606557377048e-05, "loss": 0.0002, "step": 1360 }, { "epoch": 2.42, "grad_norm": 0.0015972905093804002, "learning_rate": 1.0655737704918032e-05, "loss": 0.0003, "step": 1370 }, { "epoch": 2.44, "grad_norm": 0.0015742258401587605, "learning_rate": 1.0327868852459017e-05, "loss": 0.0003, "step": 1380 }, { "epoch": 2.46, "grad_norm": 0.003087802790105343, "learning_rate": 1e-05, "loss": 0.0002, "step": 1390 }, { "epoch": 2.48, "grad_norm": 0.002106766914948821, "learning_rate": 9.672131147540984e-06, "loss": 0.0008, "step": 1400 }, { "epoch": 2.49, "grad_norm": 0.058892298489809036, "learning_rate": 9.344262295081968e-06, "loss": 0.0003, "step": 1410 }, { "epoch": 2.51, "grad_norm": 0.0024353417102247477, "learning_rate": 9.016393442622952e-06, "loss": 0.0002, "step": 1420 }, { "epoch": 2.53, "grad_norm": 0.003295810893177986, "learning_rate": 8.688524590163935e-06, "loss": 0.0028, "step": 1430 }, { "epoch": 2.55, "grad_norm": 0.006564658600836992, "learning_rate": 8.360655737704919e-06, "loss": 0.0001, "step": 1440 }, { "epoch": 2.56, "grad_norm": 0.17080430686473846, "learning_rate": 8.032786885245902e-06, "loss": 0.0005, "step": 1450 }, { "epoch": 2.58, "grad_norm": 0.012207315303385258, "learning_rate": 7.704918032786886e-06, "loss": 0.0008, "step": 1460 }, { "epoch": 2.6, "grad_norm": 0.07983198761940002, "learning_rate": 7.3770491803278695e-06, "loss": 0.0003, "step": 1470 }, { "epoch": 2.62, "grad_norm": 0.0021029170602560043, "learning_rate": 7.049180327868852e-06, "loss": 0.0002, "step": 1480 }, { "epoch": 2.63, "grad_norm": 0.001665195683017373, "learning_rate": 6.721311475409836e-06, "loss": 0.0002, "step": 1490 }, { "epoch": 2.65, "grad_norm": 0.0021821956615895033, "learning_rate": 6.393442622950819e-06, "loss": 0.0003, "step": 1500 }, { "epoch": 2.67, "grad_norm": 0.0023076001089066267, "learning_rate": 6.065573770491804e-06, "loss": 0.0007, "step": 1510 }, { "epoch": 2.69, "grad_norm": 0.002858547493815422, "learning_rate": 5.737704918032787e-06, "loss": 0.0002, "step": 1520 }, { "epoch": 2.71, "grad_norm": 0.014774510636925697, "learning_rate": 5.409836065573771e-06, "loss": 0.0002, "step": 1530 }, { "epoch": 2.72, "grad_norm": 0.0014713788405060768, "learning_rate": 5.0819672131147545e-06, "loss": 0.0001, "step": 1540 }, { "epoch": 2.74, "grad_norm": 0.0027496495749801397, "learning_rate": 4.754098360655738e-06, "loss": 0.0002, "step": 1550 }, { "epoch": 2.76, "grad_norm": 0.002773697255179286, "learning_rate": 4.426229508196722e-06, "loss": 0.0002, "step": 1560 }, { "epoch": 2.78, "grad_norm": 0.002084126928821206, "learning_rate": 4.098360655737704e-06, "loss": 0.0001, "step": 1570 }, { "epoch": 2.79, "grad_norm": 0.0016491630813106894, "learning_rate": 3.770491803278689e-06, "loss": 0.0002, "step": 1580 }, { "epoch": 2.81, "grad_norm": 0.003069406608119607, "learning_rate": 3.4426229508196724e-06, "loss": 0.0074, "step": 1590 }, { "epoch": 2.83, "grad_norm": 0.001330082886852324, "learning_rate": 3.114754098360656e-06, "loss": 0.0003, "step": 1600 }, { "epoch": 2.85, "grad_norm": 0.9783486127853394, "learning_rate": 2.7868852459016396e-06, "loss": 0.0007, "step": 1610 }, { "epoch": 2.86, "grad_norm": 0.001621302799321711, "learning_rate": 2.459016393442623e-06, "loss": 0.0001, "step": 1620 }, { "epoch": 2.88, "grad_norm": 0.05432431399822235, "learning_rate": 2.1311475409836067e-06, "loss": 0.0002, "step": 1630 }, { "epoch": 2.9, "grad_norm": 0.001987756695598364, "learning_rate": 1.8032786885245903e-06, "loss": 0.0003, "step": 1640 }, { "epoch": 2.92, "grad_norm": 0.0034532160498201847, "learning_rate": 1.4754098360655739e-06, "loss": 0.0001, "step": 1650 }, { "epoch": 2.94, "grad_norm": 0.0017541700508445501, "learning_rate": 1.1475409836065575e-06, "loss": 0.0112, "step": 1660 }, { "epoch": 2.95, "grad_norm": 0.02740754559636116, "learning_rate": 8.19672131147541e-07, "loss": 0.0002, "step": 1670 }, { "epoch": 2.97, "grad_norm": 0.0015147700905799866, "learning_rate": 4.918032786885246e-07, "loss": 0.0001, "step": 1680 }, { "epoch": 2.99, "grad_norm": 2.0458450317382812, "learning_rate": 1.639344262295082e-07, "loss": 0.0014, "step": 1690 }, { "epoch": 3.0, "eval_accuracy": 0.9942273534635879, "eval_loss": 0.02003500424325466, "eval_runtime": 123.0739, "eval_samples_per_second": 18.298, "eval_steps_per_second": 2.291, "step": 1695 }, { "epoch": 3.0, "step": 1695, "total_flos": 1.4134330246261506e+19, "train_loss": 0.10345972847079468, "train_runtime": 4872.3509, "train_samples_per_second": 11.138, "train_steps_per_second": 0.348 } ], "logging_steps": 10, "max_steps": 1695, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.4134330246261506e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }