{ "best_metric": 0.9576789992014906, "best_model_checkpoint": "checkpoints/checkpoint-331000", "epoch": 14.126584439417865, "eval_steps": 500, "global_step": 331000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.0669625709530112e-07, "loss": 0.6022, "step": 500 }, { "epoch": 0.02, "eval_accuracy": 0.543784934788395, "eval_f1": 0.5427684499683697, "eval_loss": 0.9619719982147217, "eval_runtime": 29.8388, "eval_samples_per_second": 251.82, "eval_steps_per_second": 3.955, "step": 500 }, { "epoch": 0.04, "learning_rate": 2.1339251419060223e-07, "loss": 0.485, "step": 1000 }, { "epoch": 0.04, "eval_accuracy": 0.5958211338834176, "eval_f1": 0.5978559682284419, "eval_loss": 0.9289390444755554, "eval_runtime": 29.8421, "eval_samples_per_second": 251.792, "eval_steps_per_second": 3.954, "step": 1000 }, { "epoch": 0.06, "learning_rate": 3.200887712859033e-07, "loss": 0.392, "step": 1500 }, { "epoch": 0.06, "eval_accuracy": 0.6265637476710141, "eval_f1": 0.6288184460866618, "eval_loss": 0.9415813088417053, "eval_runtime": 29.8528, "eval_samples_per_second": 251.701, "eval_steps_per_second": 3.953, "step": 1500 }, { "epoch": 0.09, "learning_rate": 4.2678502838120447e-07, "loss": 0.341, "step": 2000 }, { "epoch": 0.09, "eval_accuracy": 0.6396060686718126, "eval_f1": 0.6420018684811181, "eval_loss": 0.9537326097488403, "eval_runtime": 29.6716, "eval_samples_per_second": 253.239, "eval_steps_per_second": 3.977, "step": 2000 }, { "epoch": 0.11, "learning_rate": 5.332678929623149e-07, "loss": 0.3014, "step": 2500 }, { "epoch": 0.11, "eval_accuracy": 0.6586372105403248, "eval_f1": 0.6606654532497396, "eval_loss": 0.9310646057128906, "eval_runtime": 29.6727, "eval_samples_per_second": 253.229, "eval_steps_per_second": 3.977, "step": 2500 }, { "epoch": 0.13, "learning_rate": 6.39964150057616e-07, "loss": 0.2904, "step": 3000 }, { "epoch": 0.13, "eval_accuracy": 0.659701889805696, "eval_f1": 0.6629432187101901, "eval_loss": 0.9263201951980591, "eval_runtime": 29.8167, "eval_samples_per_second": 252.007, "eval_steps_per_second": 3.958, "step": 3000 }, { "epoch": 0.15, "learning_rate": 7.466604071529171e-07, "loss": 0.2777, "step": 3500 }, { "epoch": 0.15, "eval_accuracy": 0.6752728240617514, "eval_f1": 0.6780225280179651, "eval_loss": 0.874735951423645, "eval_runtime": 29.7236, "eval_samples_per_second": 252.795, "eval_steps_per_second": 3.97, "step": 3500 }, { "epoch": 0.17, "learning_rate": 8.533566642482183e-07, "loss": 0.2592, "step": 4000 }, { "epoch": 0.17, "eval_accuracy": 0.6756720787862657, "eval_f1": 0.6786108310156049, "eval_loss": 0.8717394471168518, "eval_runtime": 29.7655, "eval_samples_per_second": 252.44, "eval_steps_per_second": 3.964, "step": 4000 }, { "epoch": 0.19, "learning_rate": 9.598395288293286e-07, "loss": 0.2517, "step": 4500 }, { "epoch": 0.19, "eval_accuracy": 0.6734096353473517, "eval_f1": 0.6769719434965177, "eval_loss": 0.8735432624816895, "eval_runtime": 29.6907, "eval_samples_per_second": 253.076, "eval_steps_per_second": 3.974, "step": 4500 }, { "epoch": 0.21, "learning_rate": 1.0665357859246298e-06, "loss": 0.248, "step": 5000 }, { "epoch": 0.21, "eval_accuracy": 0.6948363055629492, "eval_f1": 0.6978001595314681, "eval_loss": 0.813610315322876, "eval_runtime": 29.6586, "eval_samples_per_second": 253.35, "eval_steps_per_second": 3.979, "step": 5000 }, { "epoch": 0.23, "learning_rate": 1.173232043019931e-06, "loss": 0.2356, "step": 5500 }, { "epoch": 0.23, "eval_accuracy": 0.6996273622571201, "eval_f1": 0.7034185823176623, "eval_loss": 0.7985087633132935, "eval_runtime": 29.648, "eval_samples_per_second": 253.441, "eval_steps_per_second": 3.98, "step": 5500 }, { "epoch": 0.26, "learning_rate": 1.279928300115232e-06, "loss": 0.2309, "step": 6000 }, { "epoch": 0.26, "eval_accuracy": 0.6981634282672345, "eval_f1": 0.7017567836313319, "eval_loss": 0.8125708103179932, "eval_runtime": 29.8184, "eval_samples_per_second": 251.992, "eval_steps_per_second": 3.957, "step": 6000 }, { "epoch": 0.28, "learning_rate": 1.3864111646963426e-06, "loss": 0.2361, "step": 6500 }, { "epoch": 0.28, "eval_accuracy": 0.7104072398190046, "eval_f1": 0.7139759041126813, "eval_loss": 0.7547827363014221, "eval_runtime": 29.9387, "eval_samples_per_second": 250.98, "eval_steps_per_second": 3.941, "step": 6500 }, { "epoch": 0.3, "learning_rate": 1.4931074217916436e-06, "loss": 0.228, "step": 7000 }, { "epoch": 0.3, "eval_accuracy": 0.7069470322065478, "eval_f1": 0.7106086090282727, "eval_loss": 0.7759659290313721, "eval_runtime": 29.786, "eval_samples_per_second": 252.267, "eval_steps_per_second": 3.962, "step": 7000 }, { "epoch": 0.32, "learning_rate": 1.5998036788869448e-06, "loss": 0.2208, "step": 7500 }, { "epoch": 0.32, "eval_accuracy": 0.7167953154112323, "eval_f1": 0.7197346181733503, "eval_loss": 0.7356535792350769, "eval_runtime": 29.8209, "eval_samples_per_second": 251.971, "eval_steps_per_second": 3.957, "step": 7500 }, { "epoch": 0.34, "learning_rate": 1.706499935982246e-06, "loss": 0.2163, "step": 8000 }, { "epoch": 0.34, "eval_accuracy": 0.7199893532073462, "eval_f1": 0.7244465646996111, "eval_loss": 0.7504526972770691, "eval_runtime": 29.7061, "eval_samples_per_second": 252.944, "eval_steps_per_second": 3.972, "step": 8000 }, { "epoch": 0.36, "learning_rate": 1.8129828005633564e-06, "loss": 0.2107, "step": 8500 }, { "epoch": 0.36, "eval_accuracy": 0.7410167686984296, "eval_f1": 0.7423783524470383, "eval_loss": 0.6787217855453491, "eval_runtime": 29.6863, "eval_samples_per_second": 253.113, "eval_steps_per_second": 3.975, "step": 8500 }, { "epoch": 0.38, "learning_rate": 1.9196790576586572e-06, "loss": 0.2149, "step": 9000 }, { "epoch": 0.38, "eval_accuracy": 0.7233164759116316, "eval_f1": 0.7276929881395522, "eval_loss": 0.726280927658081, "eval_runtime": 29.7736, "eval_samples_per_second": 252.372, "eval_steps_per_second": 3.963, "step": 9000 }, { "epoch": 0.41, "learning_rate": 2.0263753147539587e-06, "loss": 0.2136, "step": 9500 }, { "epoch": 0.41, "eval_accuracy": 0.7230503060952888, "eval_f1": 0.7276284165227827, "eval_loss": 0.73542720079422, "eval_runtime": 29.6589, "eval_samples_per_second": 253.348, "eval_steps_per_second": 3.979, "step": 9500 }, { "epoch": 0.43, "learning_rate": 2.1330715718492596e-06, "loss": 0.2048, "step": 10000 }, { "epoch": 0.43, "eval_accuracy": 0.7463401650252861, "eval_f1": 0.7498995143342098, "eval_loss": 0.6706867218017578, "eval_runtime": 29.649, "eval_samples_per_second": 253.432, "eval_steps_per_second": 3.98, "step": 10000 }, { "epoch": 0.45, "learning_rate": 2.2395544364303703e-06, "loss": 0.2061, "step": 10500 }, { "epoch": 0.45, "eval_accuracy": 0.7655043918019696, "eval_f1": 0.7677479761415009, "eval_loss": 0.6240710020065308, "eval_runtime": 29.7677, "eval_samples_per_second": 252.421, "eval_steps_per_second": 3.964, "step": 10500 }, { "epoch": 0.47, "learning_rate": 2.3462506935256712e-06, "loss": 0.2023, "step": 11000 }, { "epoch": 0.47, "eval_accuracy": 0.7657705616183125, "eval_f1": 0.7675978015109074, "eval_loss": 0.6099062561988831, "eval_runtime": 28.8989, "eval_samples_per_second": 260.01, "eval_steps_per_second": 4.083, "step": 11000 }, { "epoch": 0.49, "learning_rate": 2.4529469506209722e-06, "loss": 0.1971, "step": 11500 }, { "epoch": 0.49, "eval_accuracy": 0.766569071067341, "eval_f1": 0.7688363381777508, "eval_loss": 0.6124671697616577, "eval_runtime": 29.7028, "eval_samples_per_second": 252.973, "eval_steps_per_second": 3.973, "step": 11500 }, { "epoch": 0.51, "learning_rate": 2.5596432077162737e-06, "loss": 0.2007, "step": 12000 }, { "epoch": 0.51, "eval_accuracy": 0.7802768166089965, "eval_f1": 0.7815354154818521, "eval_loss": 0.5773194432258606, "eval_runtime": 29.6902, "eval_samples_per_second": 253.08, "eval_steps_per_second": 3.974, "step": 12000 }, { "epoch": 0.53, "learning_rate": 2.6663394648115747e-06, "loss": 0.1966, "step": 12500 }, { "epoch": 0.53, "eval_accuracy": 0.7766835240883684, "eval_f1": 0.7787527362371823, "eval_loss": 0.5817181468009949, "eval_runtime": 29.6898, "eval_samples_per_second": 253.083, "eval_steps_per_second": 3.974, "step": 12500 }, { "epoch": 0.55, "learning_rate": 2.7728223293926853e-06, "loss": 0.1904, "step": 13000 }, { "epoch": 0.55, "eval_accuracy": 0.763375033271227, "eval_f1": 0.7673421658493134, "eval_loss": 0.6202647089958191, "eval_runtime": 29.6897, "eval_samples_per_second": 253.084, "eval_steps_per_second": 3.974, "step": 13000 }, { "epoch": 0.58, "learning_rate": 2.879518586487986e-06, "loss": 0.1951, "step": 13500 }, { "epoch": 0.58, "eval_accuracy": 0.7721586372105403, "eval_f1": 0.7752940740863317, "eval_loss": 0.6014246344566345, "eval_runtime": 28.9444, "eval_samples_per_second": 259.601, "eval_steps_per_second": 4.077, "step": 13500 }, { "epoch": 0.6, "learning_rate": 2.9862148435832873e-06, "loss": 0.1876, "step": 14000 }, { "epoch": 0.6, "eval_accuracy": 0.7704285334043119, "eval_f1": 0.7738889189274433, "eval_loss": 0.5962545871734619, "eval_runtime": 29.7266, "eval_samples_per_second": 252.77, "eval_steps_per_second": 3.97, "step": 14000 }, { "epoch": 0.62, "learning_rate": 3.0929111006785883e-06, "loss": 0.1887, "step": 14500 }, { "epoch": 0.62, "eval_accuracy": 0.7913228639872238, "eval_f1": 0.7934762289692718, "eval_loss": 0.5552248954772949, "eval_runtime": 29.7855, "eval_samples_per_second": 252.271, "eval_steps_per_second": 3.962, "step": 14500 }, { "epoch": 0.64, "learning_rate": 3.199393965259699e-06, "loss": 0.1869, "step": 15000 }, { "epoch": 0.64, "eval_accuracy": 0.7954484961405377, "eval_f1": 0.7971630354645414, "eval_loss": 0.5504642724990845, "eval_runtime": 29.8102, "eval_samples_per_second": 252.061, "eval_steps_per_second": 3.958, "step": 15000 }, { "epoch": 0.66, "learning_rate": 3.3060902223550003e-06, "loss": 0.1885, "step": 15500 }, { "epoch": 0.66, "eval_accuracy": 0.7981101943039659, "eval_f1": 0.8001115884271371, "eval_loss": 0.5248883962631226, "eval_runtime": 29.8233, "eval_samples_per_second": 251.951, "eval_steps_per_second": 3.957, "step": 15500 }, { "epoch": 0.68, "learning_rate": 3.4127864794503013e-06, "loss": 0.1841, "step": 16000 }, { "epoch": 0.68, "eval_accuracy": 0.76177801437317, "eval_f1": 0.7663511235727536, "eval_loss": 0.6254962682723999, "eval_runtime": 28.8714, "eval_samples_per_second": 260.258, "eval_steps_per_second": 4.087, "step": 16000 }, { "epoch": 0.7, "learning_rate": 3.5194827365456023e-06, "loss": 0.1888, "step": 16500 }, { "epoch": 0.7, "eval_accuracy": 0.8039659302635082, "eval_f1": 0.8057213697184751, "eval_loss": 0.513132631778717, "eval_runtime": 29.7082, "eval_samples_per_second": 252.927, "eval_steps_per_second": 3.972, "step": 16500 }, { "epoch": 0.73, "learning_rate": 3.625965601126713e-06, "loss": 0.1822, "step": 17000 }, { "epoch": 0.73, "eval_accuracy": 0.7708277881288262, "eval_f1": 0.7751738701524662, "eval_loss": 0.6065024733543396, "eval_runtime": 29.6783, "eval_samples_per_second": 253.182, "eval_steps_per_second": 3.976, "step": 17000 }, { "epoch": 0.75, "learning_rate": 3.7326618582220135e-06, "loss": 0.1824, "step": 17500 }, { "epoch": 0.75, "eval_accuracy": 0.8144796380090498, "eval_f1": 0.815863436789794, "eval_loss": 0.48926714062690735, "eval_runtime": 29.8006, "eval_samples_per_second": 252.143, "eval_steps_per_second": 3.96, "step": 17500 }, { "epoch": 0.77, "learning_rate": 3.8393581153173145e-06, "loss": 0.1762, "step": 18000 }, { "epoch": 0.77, "eval_accuracy": 0.8018365717327655, "eval_f1": 0.8042255377709717, "eval_loss": 0.5292934775352478, "eval_runtime": 29.6674, "eval_samples_per_second": 253.275, "eval_steps_per_second": 3.977, "step": 18000 }, { "epoch": 0.79, "learning_rate": 3.946054372412616e-06, "loss": 0.1794, "step": 18500 }, { "epoch": 0.79, "eval_accuracy": 0.7919882885280809, "eval_f1": 0.7951855368343619, "eval_loss": 0.5379685163497925, "eval_runtime": 29.6555, "eval_samples_per_second": 253.376, "eval_steps_per_second": 3.979, "step": 18500 }, { "epoch": 0.81, "learning_rate": 4.052750629507917e-06, "loss": 0.178, "step": 19000 }, { "epoch": 0.81, "eval_accuracy": 0.7966462603140804, "eval_f1": 0.7997248083930035, "eval_loss": 0.547903299331665, "eval_runtime": 29.6966, "eval_samples_per_second": 253.026, "eval_steps_per_second": 3.974, "step": 19000 }, { "epoch": 0.83, "learning_rate": 4.159446886603218e-06, "loss": 0.178, "step": 19500 }, { "epoch": 0.83, "eval_accuracy": 0.8191376097950492, "eval_f1": 0.820192566679803, "eval_loss": 0.48710888624191284, "eval_runtime": 29.6619, "eval_samples_per_second": 253.322, "eval_steps_per_second": 3.978, "step": 19500 }, { "epoch": 0.85, "learning_rate": 4.266143143698519e-06, "loss": 0.1804, "step": 20000 }, { "epoch": 0.85, "eval_accuracy": 0.7983763641203088, "eval_f1": 0.80157249996382, "eval_loss": 0.5329124331474304, "eval_runtime": 29.6874, "eval_samples_per_second": 253.104, "eval_steps_per_second": 3.975, "step": 20000 }, { "epoch": 0.87, "learning_rate": 4.372839400793821e-06, "loss": 0.1735, "step": 20500 }, { "epoch": 0.87, "eval_accuracy": 0.8005057226510514, "eval_f1": 0.8025166030334374, "eval_loss": 0.5359117388725281, "eval_runtime": 29.7951, "eval_samples_per_second": 252.189, "eval_steps_per_second": 3.96, "step": 20500 }, { "epoch": 0.9, "learning_rate": 4.479322265374931e-06, "loss": 0.1757, "step": 21000 }, { "epoch": 0.9, "eval_accuracy": 0.8148788927335641, "eval_f1": 0.8169785681348869, "eval_loss": 0.49457210302352905, "eval_runtime": 29.7201, "eval_samples_per_second": 252.825, "eval_steps_per_second": 3.97, "step": 21000 }, { "epoch": 0.92, "learning_rate": 4.5858051299560415e-06, "loss": 0.1784, "step": 21500 }, { "epoch": 0.92, "eval_accuracy": 0.7914559488953953, "eval_f1": 0.7952801320443871, "eval_loss": 0.5443964600563049, "eval_runtime": 29.7969, "eval_samples_per_second": 252.174, "eval_steps_per_second": 3.96, "step": 21500 }, { "epoch": 0.94, "learning_rate": 4.6925013870513425e-06, "loss": 0.1727, "step": 22000 }, { "epoch": 0.94, "eval_accuracy": 0.7762842693638541, "eval_f1": 0.7809969777740893, "eval_loss": 0.6244210004806519, "eval_runtime": 29.7843, "eval_samples_per_second": 252.281, "eval_steps_per_second": 3.962, "step": 22000 }, { "epoch": 0.96, "learning_rate": 4.7991976441466435e-06, "loss": 0.1661, "step": 22500 }, { "epoch": 0.96, "eval_accuracy": 0.8233963268565345, "eval_f1": 0.8247457942936195, "eval_loss": 0.4613490104675293, "eval_runtime": 29.7228, "eval_samples_per_second": 252.802, "eval_steps_per_second": 3.97, "step": 22500 }, { "epoch": 0.98, "learning_rate": 4.9058939012419445e-06, "loss": 0.1765, "step": 23000 }, { "epoch": 0.98, "eval_accuracy": 0.8155443172744211, "eval_f1": 0.817901282131261, "eval_loss": 0.4752632677555084, "eval_runtime": 29.6943, "eval_samples_per_second": 253.045, "eval_steps_per_second": 3.974, "step": 23000 }, { "epoch": 1.0, "learning_rate": 5.012376765823055e-06, "loss": 0.1691, "step": 23500 }, { "epoch": 1.0, "eval_accuracy": 0.8148788927335641, "eval_f1": 0.8171852369583342, "eval_loss": 0.4768976867198944, "eval_runtime": 29.8514, "eval_samples_per_second": 251.713, "eval_steps_per_second": 3.953, "step": 23500 }, { "epoch": 1.02, "learning_rate": 5.119073022918356e-06, "loss": 0.1602, "step": 24000 }, { "epoch": 1.02, "eval_accuracy": 0.8349747138674475, "eval_f1": 0.8360082038928794, "eval_loss": 0.4381480813026428, "eval_runtime": 29.7058, "eval_samples_per_second": 252.947, "eval_steps_per_second": 3.972, "step": 24000 }, { "epoch": 1.05, "learning_rate": 5.225769280013657e-06, "loss": 0.1598, "step": 24500 }, { "epoch": 1.05, "eval_accuracy": 0.8154112323662497, "eval_f1": 0.817754338851287, "eval_loss": 0.4830179512500763, "eval_runtime": 29.7125, "eval_samples_per_second": 252.89, "eval_steps_per_second": 3.971, "step": 24500 }, { "epoch": 1.07, "learning_rate": 5.332465537108958e-06, "loss": 0.1572, "step": 25000 }, { "epoch": 1.07, "eval_accuracy": 0.8293851477242481, "eval_f1": 0.8311224826812309, "eval_loss": 0.46394336223602295, "eval_runtime": 29.7759, "eval_samples_per_second": 252.352, "eval_steps_per_second": 3.963, "step": 25000 }, { "epoch": 1.09, "learning_rate": 5.43916179420426e-06, "loss": 0.1595, "step": 25500 }, { "epoch": 1.09, "eval_accuracy": 0.8148788927335641, "eval_f1": 0.8178297752035011, "eval_loss": 0.5001041889190674, "eval_runtime": 29.6893, "eval_samples_per_second": 253.088, "eval_steps_per_second": 3.974, "step": 25500 }, { "epoch": 1.11, "learning_rate": 5.545858051299561e-06, "loss": 0.161, "step": 26000 }, { "epoch": 1.11, "eval_accuracy": 0.8259249401117913, "eval_f1": 0.8286636751609872, "eval_loss": 0.48775410652160645, "eval_runtime": 29.8731, "eval_samples_per_second": 251.531, "eval_steps_per_second": 3.95, "step": 26000 }, { "epoch": 1.13, "learning_rate": 5.652554308394862e-06, "loss": 0.1579, "step": 26500 }, { "epoch": 1.13, "eval_accuracy": 0.8295182326324195, "eval_f1": 0.8312661426787813, "eval_loss": 0.46488162875175476, "eval_runtime": 29.804, "eval_samples_per_second": 252.114, "eval_steps_per_second": 3.959, "step": 26500 }, { "epoch": 1.15, "learning_rate": 5.759250565490163e-06, "loss": 0.158, "step": 27000 }, { "epoch": 1.15, "eval_accuracy": 0.8315145062549907, "eval_f1": 0.8338268703712137, "eval_loss": 0.468116819858551, "eval_runtime": 29.7512, "eval_samples_per_second": 252.561, "eval_steps_per_second": 3.966, "step": 27000 }, { "epoch": 1.17, "learning_rate": 5.8657334300712735e-06, "loss": 0.1564, "step": 27500 }, { "epoch": 1.17, "eval_accuracy": 0.8424274687250466, "eval_f1": 0.8440072450050907, "eval_loss": 0.43628567457199097, "eval_runtime": 29.7871, "eval_samples_per_second": 252.256, "eval_steps_per_second": 3.961, "step": 27500 }, { "epoch": 1.19, "learning_rate": 5.9724296871665745e-06, "loss": 0.1563, "step": 28000 }, { "epoch": 1.19, "eval_accuracy": 0.8393665158371041, "eval_f1": 0.8406275107384483, "eval_loss": 0.4493769705295563, "eval_runtime": 29.8008, "eval_samples_per_second": 252.141, "eval_steps_per_second": 3.96, "step": 28000 }, { "epoch": 1.22, "learning_rate": 6.0791259442618755e-06, "loss": 0.1537, "step": 28500 }, { "epoch": 1.22, "eval_accuracy": 0.8558690444503594, "eval_f1": 0.8564129141434538, "eval_loss": 0.40108588337898254, "eval_runtime": 29.7407, "eval_samples_per_second": 252.65, "eval_steps_per_second": 3.968, "step": 28500 }, { "epoch": 1.24, "learning_rate": 6.1858222013571765e-06, "loss": 0.1563, "step": 29000 }, { "epoch": 1.24, "eval_accuracy": 0.8498802235826457, "eval_f1": 0.8508552401873143, "eval_loss": 0.39731621742248535, "eval_runtime": 29.6785, "eval_samples_per_second": 253.18, "eval_steps_per_second": 3.976, "step": 29000 }, { "epoch": 1.26, "learning_rate": 6.292305065938288e-06, "loss": 0.1561, "step": 29500 }, { "epoch": 1.26, "eval_accuracy": 0.8237955815810487, "eval_f1": 0.8271945122035734, "eval_loss": 0.46914729475975037, "eval_runtime": 29.7151, "eval_samples_per_second": 252.868, "eval_steps_per_second": 3.971, "step": 29500 }, { "epoch": 1.28, "learning_rate": 6.399001323033589e-06, "loss": 0.1528, "step": 30000 }, { "epoch": 1.28, "eval_accuracy": 0.8251264306627628, "eval_f1": 0.8285461748150632, "eval_loss": 0.48026174306869507, "eval_runtime": 29.6374, "eval_samples_per_second": 253.531, "eval_steps_per_second": 3.981, "step": 30000 }, { "epoch": 1.3, "learning_rate": 6.505697580128889e-06, "loss": 0.1544, "step": 30500 }, { "epoch": 1.3, "eval_accuracy": 0.848682459409103, "eval_f1": 0.8501262102833276, "eval_loss": 0.41893479228019714, "eval_runtime": 29.6645, "eval_samples_per_second": 253.3, "eval_steps_per_second": 3.978, "step": 30500 }, { "epoch": 1.32, "learning_rate": 6.61239383722419e-06, "loss": 0.1539, "step": 31000 }, { "epoch": 1.32, "eval_accuracy": 0.851876497205217, "eval_f1": 0.8531827777031692, "eval_loss": 0.4037366509437561, "eval_runtime": 29.7077, "eval_samples_per_second": 252.931, "eval_steps_per_second": 3.972, "step": 31000 }, { "epoch": 1.34, "learning_rate": 6.7188767018053016e-06, "loss": 0.151, "step": 31500 }, { "epoch": 1.34, "eval_accuracy": 0.852541921746074, "eval_f1": 0.8535937254114443, "eval_loss": 0.39944180846214294, "eval_runtime": 29.7895, "eval_samples_per_second": 252.237, "eval_steps_per_second": 3.961, "step": 31500 }, { "epoch": 1.37, "learning_rate": 6.8255729589006026e-06, "loss": 0.152, "step": 32000 }, { "epoch": 1.37, "eval_accuracy": 0.8347085440511046, "eval_f1": 0.8372269081601834, "eval_loss": 0.45202094316482544, "eval_runtime": 29.6443, "eval_samples_per_second": 253.472, "eval_steps_per_second": 3.981, "step": 32000 }, { "epoch": 1.39, "learning_rate": 6.9322692159959036e-06, "loss": 0.1518, "step": 32500 }, { "epoch": 1.39, "eval_accuracy": 0.8540058557359596, "eval_f1": 0.8549640879293109, "eval_loss": 0.4035201668739319, "eval_runtime": 29.6728, "eval_samples_per_second": 253.229, "eval_steps_per_second": 3.977, "step": 32500 }, { "epoch": 1.41, "learning_rate": 7.0389654730912046e-06, "loss": 0.1525, "step": 33000 }, { "epoch": 1.41, "eval_accuracy": 0.8542720255523024, "eval_f1": 0.8556504958817148, "eval_loss": 0.3930774927139282, "eval_runtime": 29.4043, "eval_samples_per_second": 255.541, "eval_steps_per_second": 4.013, "step": 33000 }, { "epoch": 1.43, "learning_rate": 7.145661730186506e-06, "loss": 0.1531, "step": 33500 }, { "epoch": 1.43, "eval_accuracy": 0.8571998935320735, "eval_f1": 0.8578085198022855, "eval_loss": 0.39213454723358154, "eval_runtime": 29.717, "eval_samples_per_second": 252.852, "eval_steps_per_second": 3.971, "step": 33500 }, { "epoch": 1.45, "learning_rate": 7.252357987281807e-06, "loss": 0.1508, "step": 34000 }, { "epoch": 1.45, "eval_accuracy": 0.8645195634815012, "eval_f1": 0.8653816271187702, "eval_loss": 0.3561285138130188, "eval_runtime": 29.654, "eval_samples_per_second": 253.389, "eval_steps_per_second": 3.979, "step": 34000 }, { "epoch": 1.47, "learning_rate": 7.3590542443771076e-06, "loss": 0.1484, "step": 34500 }, { "epoch": 1.47, "eval_accuracy": 0.8627894596752729, "eval_f1": 0.8636694754124691, "eval_loss": 0.3743633031845093, "eval_runtime": 29.8096, "eval_samples_per_second": 252.066, "eval_steps_per_second": 3.958, "step": 34500 }, { "epoch": 1.49, "learning_rate": 7.4657505014724086e-06, "loss": 0.1506, "step": 35000 }, { "epoch": 1.49, "eval_accuracy": 0.8727708277881289, "eval_f1": 0.8731333607131244, "eval_loss": 0.3446885347366333, "eval_runtime": 29.6928, "eval_samples_per_second": 253.058, "eval_steps_per_second": 3.974, "step": 35000 }, { "epoch": 1.52, "learning_rate": 7.57223336605352e-06, "loss": 0.1498, "step": 35500 }, { "epoch": 1.52, "eval_accuracy": 0.8578653180729305, "eval_f1": 0.8594115234962781, "eval_loss": 0.3940994441509247, "eval_runtime": 28.8268, "eval_samples_per_second": 260.66, "eval_steps_per_second": 4.093, "step": 35500 }, { "epoch": 1.54, "learning_rate": 7.67892962314882e-06, "loss": 0.1492, "step": 36000 }, { "epoch": 1.54, "eval_accuracy": 0.8517434122970455, "eval_f1": 0.8539360175778631, "eval_loss": 0.42693892121315, "eval_runtime": 29.7835, "eval_samples_per_second": 252.287, "eval_steps_per_second": 3.962, "step": 36000 }, { "epoch": 1.56, "learning_rate": 7.785625880244122e-06, "loss": 0.1512, "step": 36500 }, { "epoch": 1.56, "eval_accuracy": 0.8623902049507586, "eval_f1": 0.8638277895919978, "eval_loss": 0.38825100660324097, "eval_runtime": 29.7295, "eval_samples_per_second": 252.745, "eval_steps_per_second": 3.969, "step": 36500 }, { "epoch": 1.58, "learning_rate": 7.892322137339424e-06, "loss": 0.1488, "step": 37000 }, { "epoch": 1.58, "eval_accuracy": 0.8622571200425871, "eval_f1": 0.8625953129968353, "eval_loss": 0.3947807848453522, "eval_runtime": 29.8226, "eval_samples_per_second": 251.957, "eval_steps_per_second": 3.957, "step": 37000 }, { "epoch": 1.6, "learning_rate": 7.998805001920533e-06, "loss": 0.1462, "step": 37500 }, { "epoch": 1.6, "eval_accuracy": 0.857998402981102, "eval_f1": 0.85965218608276, "eval_loss": 0.39402276277542114, "eval_runtime": 29.6801, "eval_samples_per_second": 253.167, "eval_steps_per_second": 3.976, "step": 37500 }, { "epoch": 1.62, "learning_rate": 8.105501259015835e-06, "loss": 0.1444, "step": 38000 }, { "epoch": 1.62, "eval_accuracy": 0.8752994410433856, "eval_f1": 0.8754268843244172, "eval_loss": 0.3435206115245819, "eval_runtime": 28.916, "eval_samples_per_second": 259.856, "eval_steps_per_second": 4.081, "step": 38000 }, { "epoch": 1.64, "learning_rate": 8.212197516111135e-06, "loss": 0.1479, "step": 38500 }, { "epoch": 1.64, "eval_accuracy": 0.8776949693904711, "eval_f1": 0.8777389705505072, "eval_loss": 0.34012433886528015, "eval_runtime": 29.8124, "eval_samples_per_second": 252.043, "eval_steps_per_second": 3.958, "step": 38500 }, { "epoch": 1.66, "learning_rate": 8.318893773206437e-06, "loss": 0.1454, "step": 39000 }, { "epoch": 1.66, "eval_accuracy": 0.8727708277881289, "eval_f1": 0.8738380396408167, "eval_loss": 0.3515642583370209, "eval_runtime": 29.6541, "eval_samples_per_second": 253.388, "eval_steps_per_second": 3.979, "step": 39000 }, { "epoch": 1.69, "learning_rate": 8.425376637787547e-06, "loss": 0.1455, "step": 39500 }, { "epoch": 1.69, "eval_accuracy": 0.8675805163694437, "eval_f1": 0.8691207089918026, "eval_loss": 0.3673495352268219, "eval_runtime": 29.6701, "eval_samples_per_second": 253.251, "eval_steps_per_second": 3.977, "step": 39500 }, { "epoch": 1.71, "learning_rate": 8.532072894882847e-06, "loss": 0.1453, "step": 40000 }, { "epoch": 1.71, "eval_accuracy": 0.8581314878892734, "eval_f1": 0.8595605264062034, "eval_loss": 0.39343753457069397, "eval_runtime": 29.6716, "eval_samples_per_second": 253.239, "eval_steps_per_second": 3.977, "step": 40000 }, { "epoch": 1.73, "learning_rate": 8.63876915197815e-06, "loss": 0.147, "step": 40500 }, { "epoch": 1.73, "eval_accuracy": 0.8658504125632154, "eval_f1": 0.8668749400965359, "eval_loss": 0.36048775911331177, "eval_runtime": 27.3595, "eval_samples_per_second": 274.639, "eval_steps_per_second": 4.313, "step": 40500 }, { "epoch": 1.75, "learning_rate": 8.745465409073451e-06, "loss": 0.1516, "step": 41000 }, { "epoch": 1.75, "eval_accuracy": 0.8758317806760714, "eval_f1": 0.8766235747210261, "eval_loss": 0.35379454493522644, "eval_runtime": 29.6799, "eval_samples_per_second": 253.168, "eval_steps_per_second": 3.976, "step": 41000 }, { "epoch": 1.77, "learning_rate": 8.851948273654562e-06, "loss": 0.1395, "step": 41500 }, { "epoch": 1.77, "eval_accuracy": 0.8714399787064147, "eval_f1": 0.8728150152532584, "eval_loss": 0.3659563362598419, "eval_runtime": 29.6835, "eval_samples_per_second": 253.137, "eval_steps_per_second": 3.975, "step": 41500 }, { "epoch": 1.79, "learning_rate": 8.958644530749862e-06, "loss": 0.1407, "step": 42000 }, { "epoch": 1.79, "eval_accuracy": 0.8734362523289859, "eval_f1": 0.8743262687390195, "eval_loss": 0.3471178710460663, "eval_runtime": 29.6846, "eval_samples_per_second": 253.128, "eval_steps_per_second": 3.975, "step": 42000 }, { "epoch": 1.81, "learning_rate": 9.065340787845162e-06, "loss": 0.1399, "step": 42500 }, { "epoch": 1.81, "eval_accuracy": 0.8856800638807559, "eval_f1": 0.8861514607271497, "eval_loss": 0.32689064741134644, "eval_runtime": 29.7995, "eval_samples_per_second": 252.152, "eval_steps_per_second": 3.96, "step": 42500 }, { "epoch": 1.84, "learning_rate": 9.172037044940464e-06, "loss": 0.1447, "step": 43000 }, { "epoch": 1.84, "eval_accuracy": 0.8697098749001864, "eval_f1": 0.8708193235034514, "eval_loss": 0.3712153732776642, "eval_runtime": 29.722, "eval_samples_per_second": 252.809, "eval_steps_per_second": 3.97, "step": 43000 }, { "epoch": 1.86, "learning_rate": 9.278733302035766e-06, "loss": 0.1453, "step": 43500 }, { "epoch": 1.86, "eval_accuracy": 0.8903380356667554, "eval_f1": 0.8906690257334923, "eval_loss": 0.30124372243881226, "eval_runtime": 29.8038, "eval_samples_per_second": 252.115, "eval_steps_per_second": 3.959, "step": 43500 }, { "epoch": 1.88, "learning_rate": 9.385429559131066e-06, "loss": 0.1451, "step": 44000 }, { "epoch": 1.88, "eval_accuracy": 0.8871439978706415, "eval_f1": 0.8877045418770082, "eval_loss": 0.31062984466552734, "eval_runtime": 29.6847, "eval_samples_per_second": 253.127, "eval_steps_per_second": 3.975, "step": 44000 }, { "epoch": 1.9, "learning_rate": 9.491912423712178e-06, "loss": 0.1409, "step": 44500 }, { "epoch": 1.9, "eval_accuracy": 0.8790258184721853, "eval_f1": 0.8799332093833431, "eval_loss": 0.3516901135444641, "eval_runtime": 29.7103, "eval_samples_per_second": 252.909, "eval_steps_per_second": 3.972, "step": 44500 }, { "epoch": 1.92, "learning_rate": 9.598608680807478e-06, "loss": 0.1417, "step": 45000 }, { "epoch": 1.92, "eval_accuracy": 0.8727708277881289, "eval_f1": 0.8742134115368855, "eval_loss": 0.3526758849620819, "eval_runtime": 29.6753, "eval_samples_per_second": 253.207, "eval_steps_per_second": 3.976, "step": 45000 }, { "epoch": 1.94, "learning_rate": 9.705304937902778e-06, "loss": 0.1409, "step": 45500 }, { "epoch": 1.94, "eval_accuracy": 0.8880755922278414, "eval_f1": 0.8886875427320827, "eval_loss": 0.3103240430355072, "eval_runtime": 29.7312, "eval_samples_per_second": 252.731, "eval_steps_per_second": 3.969, "step": 45500 }, { "epoch": 1.96, "learning_rate": 9.81200119499808e-06, "loss": 0.1479, "step": 46000 }, { "epoch": 1.96, "eval_accuracy": 0.8900718658504125, "eval_f1": 0.8906162569128221, "eval_loss": 0.30096864700317383, "eval_runtime": 29.8, "eval_samples_per_second": 252.148, "eval_steps_per_second": 3.96, "step": 46000 }, { "epoch": 1.98, "learning_rate": 9.91869745209338e-06, "loss": 0.1385, "step": 46500 }, { "epoch": 1.98, "eval_accuracy": 0.8846153846153846, "eval_f1": 0.885573613420597, "eval_loss": 0.320277601480484, "eval_runtime": 29.687, "eval_samples_per_second": 253.107, "eval_steps_per_second": 3.975, "step": 46500 }, { "epoch": 2.01, "learning_rate": 1.0025180316674493e-05, "loss": 0.1334, "step": 47000 }, { "epoch": 2.01, "eval_accuracy": 0.889273356401384, "eval_f1": 0.8898038480316975, "eval_loss": 0.3248152732849121, "eval_runtime": 29.6988, "eval_samples_per_second": 253.007, "eval_steps_per_second": 3.973, "step": 47000 }, { "epoch": 2.03, "learning_rate": 1.0131876573769793e-05, "loss": 0.121, "step": 47500 }, { "epoch": 2.03, "eval_accuracy": 0.8856800638807559, "eval_f1": 0.8866614530137226, "eval_loss": 0.32954302430152893, "eval_runtime": 29.7176, "eval_samples_per_second": 252.847, "eval_steps_per_second": 3.971, "step": 47500 }, { "epoch": 2.05, "learning_rate": 1.0238572830865095e-05, "loss": 0.1261, "step": 48000 }, { "epoch": 2.05, "eval_accuracy": 0.8984562150652116, "eval_f1": 0.8988722889159447, "eval_loss": 0.2932971119880676, "eval_runtime": 29.7394, "eval_samples_per_second": 252.662, "eval_steps_per_second": 3.968, "step": 48000 }, { "epoch": 2.07, "learning_rate": 1.0345269087960395e-05, "loss": 0.1181, "step": 48500 }, { "epoch": 2.07, "eval_accuracy": 0.8951290923609263, "eval_f1": 0.8958668889166673, "eval_loss": 0.3214079439640045, "eval_runtime": 29.6828, "eval_samples_per_second": 253.143, "eval_steps_per_second": 3.975, "step": 48500 }, { "epoch": 2.09, "learning_rate": 1.0451751952541507e-05, "loss": 0.1267, "step": 49000 }, { "epoch": 2.09, "eval_accuracy": 0.896992281075326, "eval_f1": 0.8978174395785067, "eval_loss": 0.31766536831855774, "eval_runtime": 29.7643, "eval_samples_per_second": 252.45, "eval_steps_per_second": 3.964, "step": 49000 }, { "epoch": 2.11, "learning_rate": 1.0558448209636807e-05, "loss": 0.124, "step": 49500 }, { "epoch": 2.11, "eval_accuracy": 0.8655842427468725, "eval_f1": 0.8674991335930158, "eval_loss": 0.3998588025569916, "eval_runtime": 29.6648, "eval_samples_per_second": 253.297, "eval_steps_per_second": 3.978, "step": 49500 }, { "epoch": 2.13, "learning_rate": 1.0665144466732107e-05, "loss": 0.1238, "step": 50000 }, { "epoch": 2.13, "eval_accuracy": 0.8912696300239553, "eval_f1": 0.8920994435131362, "eval_loss": 0.32742416858673096, "eval_runtime": 29.7221, "eval_samples_per_second": 252.808, "eval_steps_per_second": 3.97, "step": 50000 }, { "epoch": 2.16, "learning_rate": 1.077184072382741e-05, "loss": 0.1206, "step": 50500 }, { "epoch": 2.16, "eval_accuracy": 0.9039126963002395, "eval_f1": 0.9041946980678703, "eval_loss": 0.29819589853286743, "eval_runtime": 29.6654, "eval_samples_per_second": 253.292, "eval_steps_per_second": 3.978, "step": 50500 }, { "epoch": 2.18, "learning_rate": 1.087832358840852e-05, "loss": 0.1231, "step": 51000 }, { "epoch": 2.18, "eval_accuracy": 0.864785733297844, "eval_f1": 0.8675346101327358, "eval_loss": 0.40950268507003784, "eval_runtime": 29.6781, "eval_samples_per_second": 253.183, "eval_steps_per_second": 3.976, "step": 51000 }, { "epoch": 2.2, "learning_rate": 1.0985019845503822e-05, "loss": 0.1211, "step": 51500 }, { "epoch": 2.2, "eval_accuracy": 0.8814213468192706, "eval_f1": 0.8826759157361712, "eval_loss": 0.376949667930603, "eval_runtime": 29.6468, "eval_samples_per_second": 253.45, "eval_steps_per_second": 3.98, "step": 51500 }, { "epoch": 2.22, "learning_rate": 1.1091716102599122e-05, "loss": 0.129, "step": 52000 }, { "epoch": 2.22, "eval_accuracy": 0.9021825924940112, "eval_f1": 0.9026398982081912, "eval_loss": 0.28755125403404236, "eval_runtime": 29.6857, "eval_samples_per_second": 253.119, "eval_steps_per_second": 3.975, "step": 52000 }, { "epoch": 2.24, "learning_rate": 1.1198412359694424e-05, "loss": 0.1193, "step": 52500 }, { "epoch": 2.24, "eval_accuracy": 0.883151450625499, "eval_f1": 0.8842688770624403, "eval_loss": 0.3539595901966095, "eval_runtime": 29.6767, "eval_samples_per_second": 253.195, "eval_steps_per_second": 3.976, "step": 52500 }, { "epoch": 2.26, "learning_rate": 1.1304895224275534e-05, "loss": 0.12, "step": 53000 }, { "epoch": 2.26, "eval_accuracy": 0.860260846420016, "eval_f1": 0.8633802325021412, "eval_loss": 0.4444720149040222, "eval_runtime": 29.8492, "eval_samples_per_second": 251.732, "eval_steps_per_second": 3.953, "step": 53000 }, { "epoch": 2.28, "learning_rate": 1.1411591481370834e-05, "loss": 0.1205, "step": 53500 }, { "epoch": 2.28, "eval_accuracy": 0.9056428001064679, "eval_f1": 0.9060288772812102, "eval_loss": 0.28057876229286194, "eval_runtime": 29.7764, "eval_samples_per_second": 252.348, "eval_steps_per_second": 3.963, "step": 53500 }, { "epoch": 2.3, "learning_rate": 1.1518287738466136e-05, "loss": 0.1225, "step": 54000 }, { "epoch": 2.3, "eval_accuracy": 0.90018631887144, "eval_f1": 0.9011140009401021, "eval_loss": 0.3092280924320221, "eval_runtime": 29.7057, "eval_samples_per_second": 252.948, "eval_steps_per_second": 3.972, "step": 54000 }, { "epoch": 2.33, "learning_rate": 1.1624983995561436e-05, "loss": 0.1242, "step": 54500 }, { "epoch": 2.33, "eval_accuracy": 0.8866116582379558, "eval_f1": 0.8877422144240315, "eval_loss": 0.3376993238925934, "eval_runtime": 29.662, "eval_samples_per_second": 253.321, "eval_steps_per_second": 3.978, "step": 54500 }, { "epoch": 2.35, "learning_rate": 1.1731466860142547e-05, "loss": 0.1208, "step": 55000 }, { "epoch": 2.35, "eval_accuracy": 0.8895395262177269, "eval_f1": 0.8908095697962214, "eval_loss": 0.3369642496109009, "eval_runtime": 29.6995, "eval_samples_per_second": 253.001, "eval_steps_per_second": 3.973, "step": 55000 }, { "epoch": 2.37, "learning_rate": 1.1838163117237849e-05, "loss": 0.1214, "step": 55500 }, { "epoch": 2.37, "eval_accuracy": 0.8932659036465265, "eval_f1": 0.8942589850549713, "eval_loss": 0.3164837658405304, "eval_runtime": 29.7307, "eval_samples_per_second": 252.736, "eval_steps_per_second": 3.969, "step": 55500 }, { "epoch": 2.39, "learning_rate": 1.1944645981818958e-05, "loss": 0.1252, "step": 56000 }, { "epoch": 2.39, "eval_accuracy": 0.9021825924940112, "eval_f1": 0.902743360666717, "eval_loss": 0.29441598057746887, "eval_runtime": 29.7002, "eval_samples_per_second": 252.995, "eval_steps_per_second": 3.973, "step": 56000 }, { "epoch": 2.41, "learning_rate": 1.2051342238914261e-05, "loss": 0.1219, "step": 56500 }, { "epoch": 2.41, "eval_accuracy": 0.8979238754325259, "eval_f1": 0.8984367636555247, "eval_loss": 0.32014647126197815, "eval_runtime": 29.7756, "eval_samples_per_second": 252.354, "eval_steps_per_second": 3.963, "step": 56500 }, { "epoch": 2.43, "learning_rate": 1.2158038496009562e-05, "loss": 0.1195, "step": 57000 }, { "epoch": 2.43, "eval_accuracy": 0.9109661964333244, "eval_f1": 0.9108760160900574, "eval_loss": 0.2840426564216614, "eval_runtime": 29.7122, "eval_samples_per_second": 252.893, "eval_steps_per_second": 3.971, "step": 57000 }, { "epoch": 2.45, "learning_rate": 1.2264734753104862e-05, "loss": 0.126, "step": 57500 }, { "epoch": 2.45, "eval_accuracy": 0.9041788661165824, "eval_f1": 0.9046980888295584, "eval_loss": 0.2939707338809967, "eval_runtime": 28.8636, "eval_samples_per_second": 260.327, "eval_steps_per_second": 4.088, "step": 57500 }, { "epoch": 2.48, "learning_rate": 1.2371431010200164e-05, "loss": 0.1282, "step": 58000 }, { "epoch": 2.48, "eval_accuracy": 0.894064413095555, "eval_f1": 0.8951953592249402, "eval_loss": 0.3158866763114929, "eval_runtime": 29.738, "eval_samples_per_second": 252.674, "eval_steps_per_second": 3.968, "step": 58000 }, { "epoch": 2.5, "learning_rate": 1.2478127267295464e-05, "loss": 0.1226, "step": 58500 }, { "epoch": 2.5, "eval_accuracy": 0.8977907905243545, "eval_f1": 0.8987052263199368, "eval_loss": 0.31644803285598755, "eval_runtime": 29.779, "eval_samples_per_second": 252.325, "eval_steps_per_second": 3.963, "step": 58500 }, { "epoch": 2.52, "learning_rate": 1.2584823524390765e-05, "loss": 0.1204, "step": 59000 }, { "epoch": 2.52, "eval_accuracy": 0.9188182060154378, "eval_f1": 0.9190661023623332, "eval_loss": 0.2515789568424225, "eval_runtime": 29.6787, "eval_samples_per_second": 253.178, "eval_steps_per_second": 3.976, "step": 59000 }, { "epoch": 2.54, "learning_rate": 1.2691519781486066e-05, "loss": 0.1217, "step": 59500 }, { "epoch": 2.54, "eval_accuracy": 0.886345488421613, "eval_f1": 0.8883012828628285, "eval_loss": 0.3361978232860565, "eval_runtime": 29.7086, "eval_samples_per_second": 252.923, "eval_steps_per_second": 3.972, "step": 59500 }, { "epoch": 2.56, "learning_rate": 1.2798002646067178e-05, "loss": 0.1196, "step": 60000 }, { "epoch": 2.56, "eval_accuracy": 0.9121639606068672, "eval_f1": 0.912425036592791, "eval_loss": 0.2658819854259491, "eval_runtime": 28.9201, "eval_samples_per_second": 259.82, "eval_steps_per_second": 4.08, "step": 60000 }, { "epoch": 2.58, "learning_rate": 1.2904698903162478e-05, "loss": 0.1251, "step": 60500 }, { "epoch": 2.58, "eval_accuracy": 0.9209475645461804, "eval_f1": 0.9211303421839581, "eval_loss": 0.23802870512008667, "eval_runtime": 29.6859, "eval_samples_per_second": 253.117, "eval_steps_per_second": 3.975, "step": 60500 }, { "epoch": 2.6, "learning_rate": 1.3011395160257778e-05, "loss": 0.1189, "step": 61000 }, { "epoch": 2.6, "eval_accuracy": 0.9095022624434389, "eval_f1": 0.9100155518175987, "eval_loss": 0.2792932987213135, "eval_runtime": 29.677, "eval_samples_per_second": 253.193, "eval_steps_per_second": 3.976, "step": 61000 }, { "epoch": 2.62, "learning_rate": 1.311809141735308e-05, "loss": 0.1236, "step": 61500 }, { "epoch": 2.62, "eval_accuracy": 0.9140271493212669, "eval_f1": 0.9145109646655343, "eval_loss": 0.25876516103744507, "eval_runtime": 29.7058, "eval_samples_per_second": 252.947, "eval_steps_per_second": 3.972, "step": 61500 }, { "epoch": 2.65, "learning_rate": 1.322457428193419e-05, "loss": 0.1214, "step": 62000 }, { "epoch": 2.65, "eval_accuracy": 0.9057758850146393, "eval_f1": 0.9067875023670071, "eval_loss": 0.2852957546710968, "eval_runtime": 29.6747, "eval_samples_per_second": 253.212, "eval_steps_per_second": 3.976, "step": 62000 }, { "epoch": 2.67, "learning_rate": 1.3331270539029493e-05, "loss": 0.118, "step": 62500 }, { "epoch": 2.67, "eval_accuracy": 0.9136278945967528, "eval_f1": 0.9141599682657664, "eval_loss": 0.27117565274238586, "eval_runtime": 28.9704, "eval_samples_per_second": 259.368, "eval_steps_per_second": 4.073, "step": 62500 }, { "epoch": 2.69, "learning_rate": 1.3437966796124793e-05, "loss": 0.1185, "step": 63000 }, { "epoch": 2.69, "eval_accuracy": 0.896992281075326, "eval_f1": 0.8987913278259848, "eval_loss": 0.3407359719276428, "eval_runtime": 29.6984, "eval_samples_per_second": 253.01, "eval_steps_per_second": 3.973, "step": 63000 }, { "epoch": 2.71, "learning_rate": 1.3544663053220095e-05, "loss": 0.1205, "step": 63500 }, { "epoch": 2.71, "eval_accuracy": 0.9174873569337237, "eval_f1": 0.9178361952808003, "eval_loss": 0.2523012161254883, "eval_runtime": 29.8027, "eval_samples_per_second": 252.125, "eval_steps_per_second": 3.959, "step": 63500 }, { "epoch": 2.73, "learning_rate": 1.3651145917801205e-05, "loss": 0.1237, "step": 64000 }, { "epoch": 2.73, "eval_accuracy": 0.9218791589033803, "eval_f1": 0.9220981450875413, "eval_loss": 0.24377423524856567, "eval_runtime": 29.4972, "eval_samples_per_second": 254.736, "eval_steps_per_second": 4.0, "step": 64000 }, { "epoch": 2.75, "learning_rate": 1.3757842174896505e-05, "loss": 0.1209, "step": 64500 }, { "epoch": 2.75, "eval_accuracy": 0.9113654511578387, "eval_f1": 0.9120163666906858, "eval_loss": 0.2814728319644928, "eval_runtime": 29.6606, "eval_samples_per_second": 253.332, "eval_steps_per_second": 3.978, "step": 64500 }, { "epoch": 2.77, "learning_rate": 1.3864538431991807e-05, "loss": 0.1181, "step": 65000 }, { "epoch": 2.77, "eval_accuracy": 0.9112323662496673, "eval_f1": 0.9117679194164329, "eval_loss": 0.27400583028793335, "eval_runtime": 29.6997, "eval_samples_per_second": 252.999, "eval_steps_per_second": 3.973, "step": 65000 }, { "epoch": 2.8, "learning_rate": 1.3971234689087107e-05, "loss": 0.1247, "step": 65500 }, { "epoch": 2.8, "eval_accuracy": 0.8949960074527549, "eval_f1": 0.8963813695574201, "eval_loss": 0.3465683162212372, "eval_runtime": 29.8235, "eval_samples_per_second": 251.949, "eval_steps_per_second": 3.957, "step": 65500 }, { "epoch": 2.82, "learning_rate": 1.4077717553668218e-05, "loss": 0.1214, "step": 66000 }, { "epoch": 2.82, "eval_accuracy": 0.9019164226776684, "eval_f1": 0.9023520343501465, "eval_loss": 0.2800099849700928, "eval_runtime": 29.685, "eval_samples_per_second": 253.125, "eval_steps_per_second": 3.975, "step": 66000 }, { "epoch": 2.84, "learning_rate": 1.418441381076352e-05, "loss": 0.1232, "step": 66500 }, { "epoch": 2.84, "eval_accuracy": 0.8988554697897259, "eval_f1": 0.8998071192198867, "eval_loss": 0.31529906392097473, "eval_runtime": 29.7055, "eval_samples_per_second": 252.95, "eval_steps_per_second": 3.972, "step": 66500 }, { "epoch": 2.86, "learning_rate": 1.429111006785882e-05, "loss": 0.1235, "step": 67000 }, { "epoch": 2.86, "eval_accuracy": 0.9121639606068672, "eval_f1": 0.9130752200374649, "eval_loss": 0.2936250567436218, "eval_runtime": 29.6997, "eval_samples_per_second": 253.0, "eval_steps_per_second": 3.973, "step": 67000 }, { "epoch": 2.88, "learning_rate": 1.4397806324954122e-05, "loss": 0.1194, "step": 67500 }, { "epoch": 2.88, "eval_accuracy": 0.9176204418418952, "eval_f1": 0.9179026321234128, "eval_loss": 0.23929628729820251, "eval_runtime": 29.742, "eval_samples_per_second": 252.639, "eval_steps_per_second": 3.967, "step": 67500 }, { "epoch": 2.9, "learning_rate": 1.4504289189535232e-05, "loss": 0.1212, "step": 68000 }, { "epoch": 2.9, "eval_accuracy": 0.9214799041788662, "eval_f1": 0.9217761013099526, "eval_loss": 0.2274727076292038, "eval_runtime": 29.6815, "eval_samples_per_second": 253.154, "eval_steps_per_second": 3.976, "step": 68000 }, { "epoch": 2.92, "learning_rate": 1.4610985446630532e-05, "loss": 0.1179, "step": 68500 }, { "epoch": 2.92, "eval_accuracy": 0.9103007718924674, "eval_f1": 0.9111492564025686, "eval_loss": 0.26987186074256897, "eval_runtime": 29.6917, "eval_samples_per_second": 253.067, "eval_steps_per_second": 3.974, "step": 68500 }, { "epoch": 2.94, "learning_rate": 1.4717681703725834e-05, "loss": 0.1248, "step": 69000 }, { "epoch": 2.94, "eval_accuracy": 0.9265371306893798, "eval_f1": 0.926618029973173, "eval_loss": 0.22422577440738678, "eval_runtime": 29.8231, "eval_samples_per_second": 251.952, "eval_steps_per_second": 3.957, "step": 69000 }, { "epoch": 2.97, "learning_rate": 1.4824377960821134e-05, "loss": 0.1193, "step": 69500 }, { "epoch": 2.97, "eval_accuracy": 0.9174873569337237, "eval_f1": 0.9179152081146503, "eval_loss": 0.2527088224887848, "eval_runtime": 29.7228, "eval_samples_per_second": 252.803, "eval_steps_per_second": 3.97, "step": 69500 }, { "epoch": 2.99, "learning_rate": 1.4930860825402247e-05, "loss": 0.1191, "step": 70000 }, { "epoch": 2.99, "eval_accuracy": 0.9103007718924674, "eval_f1": 0.9114240853347546, "eval_loss": 0.2774083614349365, "eval_runtime": 29.8732, "eval_samples_per_second": 251.53, "eval_steps_per_second": 3.95, "step": 70000 }, { "epoch": 3.01, "learning_rate": 1.5037557082497547e-05, "loss": 0.1134, "step": 70500 }, { "epoch": 3.01, "eval_accuracy": 0.9244077721586372, "eval_f1": 0.924962986957909, "eval_loss": 0.26340293884277344, "eval_runtime": 29.7837, "eval_samples_per_second": 252.286, "eval_steps_per_second": 3.962, "step": 70500 }, { "epoch": 3.03, "learning_rate": 1.5144253339592849e-05, "loss": 0.0997, "step": 71000 }, { "epoch": 3.03, "eval_accuracy": 0.9166888474846953, "eval_f1": 0.9174450725360211, "eval_loss": 0.2868385314941406, "eval_runtime": 29.6773, "eval_samples_per_second": 253.19, "eval_steps_per_second": 3.976, "step": 71000 }, { "epoch": 3.05, "learning_rate": 1.5250949596688149e-05, "loss": 0.0997, "step": 71500 }, { "epoch": 3.05, "eval_accuracy": 0.9075059888208677, "eval_f1": 0.9083709688675202, "eval_loss": 0.3178350329399109, "eval_runtime": 29.7056, "eval_samples_per_second": 252.949, "eval_steps_per_second": 3.972, "step": 71500 }, { "epoch": 3.07, "learning_rate": 1.5357432461269258e-05, "loss": 0.103, "step": 72000 }, { "epoch": 3.07, "eval_accuracy": 0.9055097151982965, "eval_f1": 0.9066829178335443, "eval_loss": 0.3471682071685791, "eval_runtime": 29.8055, "eval_samples_per_second": 252.101, "eval_steps_per_second": 3.959, "step": 72000 }, { "epoch": 3.09, "learning_rate": 1.5464128718364563e-05, "loss": 0.1001, "step": 72500 }, { "epoch": 3.09, "eval_accuracy": 0.9209475645461804, "eval_f1": 0.9213604258381931, "eval_loss": 0.2840951681137085, "eval_runtime": 29.7444, "eval_samples_per_second": 252.619, "eval_steps_per_second": 3.967, "step": 72500 }, { "epoch": 3.12, "learning_rate": 1.557082497545986e-05, "loss": 0.1026, "step": 73000 }, { "epoch": 3.12, "eval_accuracy": 0.9126963002395528, "eval_f1": 0.9135468482778786, "eval_loss": 0.31749972701072693, "eval_runtime": 29.6905, "eval_samples_per_second": 253.078, "eval_steps_per_second": 3.974, "step": 73000 }, { "epoch": 3.14, "learning_rate": 1.5677521232555163e-05, "loss": 0.0992, "step": 73500 }, { "epoch": 3.14, "eval_accuracy": 0.91243013042321, "eval_f1": 0.9134755081204278, "eval_loss": 0.3127423822879791, "eval_runtime": 29.6796, "eval_samples_per_second": 253.171, "eval_steps_per_second": 3.976, "step": 73500 }, { "epoch": 3.16, "learning_rate": 1.5784004097136276e-05, "loss": 0.0962, "step": 74000 }, { "epoch": 3.16, "eval_accuracy": 0.9240085174341229, "eval_f1": 0.9243631550194533, "eval_loss": 0.27587592601776123, "eval_runtime": 29.6853, "eval_samples_per_second": 253.122, "eval_steps_per_second": 3.975, "step": 74000 }, { "epoch": 3.18, "learning_rate": 1.5890700354231574e-05, "loss": 0.1029, "step": 74500 }, { "epoch": 3.18, "eval_accuracy": 0.9244077721586372, "eval_f1": 0.9248144452947205, "eval_loss": 0.25898730754852295, "eval_runtime": 29.716, "eval_samples_per_second": 252.86, "eval_steps_per_second": 3.971, "step": 74500 }, { "epoch": 3.2, "learning_rate": 1.5997396611326876e-05, "loss": 0.1017, "step": 75000 }, { "epoch": 3.2, "eval_accuracy": 0.9157572531274953, "eval_f1": 0.9160725646478424, "eval_loss": 0.29474782943725586, "eval_runtime": 29.7179, "eval_samples_per_second": 252.844, "eval_steps_per_second": 3.971, "step": 75000 }, { "epoch": 3.22, "learning_rate": 1.6103879475907985e-05, "loss": 0.0975, "step": 75500 }, { "epoch": 3.22, "eval_accuracy": 0.9104338568006388, "eval_f1": 0.9114419436408188, "eval_loss": 0.3418111205101013, "eval_runtime": 29.7159, "eval_samples_per_second": 252.861, "eval_steps_per_second": 3.971, "step": 75500 }, { "epoch": 3.24, "learning_rate": 1.6210575733003287e-05, "loss": 0.1017, "step": 76000 }, { "epoch": 3.24, "eval_accuracy": 0.920814479638009, "eval_f1": 0.9212922615435651, "eval_loss": 0.2858801484107971, "eval_runtime": 29.7053, "eval_samples_per_second": 252.952, "eval_steps_per_second": 3.972, "step": 76000 }, { "epoch": 3.26, "learning_rate": 1.631727199009859e-05, "loss": 0.1056, "step": 76500 }, { "epoch": 3.26, "eval_accuracy": 0.9258717061485228, "eval_f1": 0.9258138327069322, "eval_loss": 0.26912811398506165, "eval_runtime": 29.8329, "eval_samples_per_second": 251.87, "eval_steps_per_second": 3.955, "step": 76500 }, { "epoch": 3.29, "learning_rate": 1.642396824719389e-05, "loss": 0.1005, "step": 77000 }, { "epoch": 3.29, "eval_accuracy": 0.8961937716262975, "eval_f1": 0.898532464780015, "eval_loss": 0.3822651505470276, "eval_runtime": 29.6919, "eval_samples_per_second": 253.065, "eval_steps_per_second": 3.974, "step": 77000 }, { "epoch": 3.31, "learning_rate": 1.6530664504289192e-05, "loss": 0.1016, "step": 77500 }, { "epoch": 3.31, "eval_accuracy": 0.8926004791056694, "eval_f1": 0.89485432153672, "eval_loss": 0.3906500041484833, "eval_runtime": 29.7236, "eval_samples_per_second": 252.796, "eval_steps_per_second": 3.97, "step": 77500 }, { "epoch": 3.33, "learning_rate": 1.663736076138449e-05, "loss": 0.1015, "step": 78000 }, { "epoch": 3.33, "eval_accuracy": 0.9012509981368113, "eval_f1": 0.9025446954153953, "eval_loss": 0.36229291558265686, "eval_runtime": 29.7176, "eval_samples_per_second": 252.847, "eval_steps_per_second": 3.971, "step": 78000 }, { "epoch": 3.35, "learning_rate": 1.6744057018479793e-05, "loss": 0.1068, "step": 78500 }, { "epoch": 3.35, "eval_accuracy": 0.9196167154644663, "eval_f1": 0.9202631935738008, "eval_loss": 0.28303927183151245, "eval_runtime": 29.6695, "eval_samples_per_second": 253.257, "eval_steps_per_second": 3.977, "step": 78500 }, { "epoch": 3.37, "learning_rate": 1.6850753275575094e-05, "loss": 0.1023, "step": 79000 }, { "epoch": 3.37, "eval_accuracy": 0.9190843758317807, "eval_f1": 0.919786947414631, "eval_loss": 0.285210520029068, "eval_runtime": 29.66, "eval_samples_per_second": 253.338, "eval_steps_per_second": 3.978, "step": 79000 }, { "epoch": 3.39, "learning_rate": 1.6957236140156207e-05, "loss": 0.104, "step": 79500 }, { "epoch": 3.39, "eval_accuracy": 0.914692573862124, "eval_f1": 0.9155263369499395, "eval_loss": 0.2765791714191437, "eval_runtime": 29.2294, "eval_samples_per_second": 257.07, "eval_steps_per_second": 4.037, "step": 79500 }, { "epoch": 3.41, "learning_rate": 1.7063932397251505e-05, "loss": 0.1057, "step": 80000 }, { "epoch": 3.41, "eval_accuracy": 0.9245408570668087, "eval_f1": 0.9249301872373819, "eval_loss": 0.2611972391605377, "eval_runtime": 29.7005, "eval_samples_per_second": 252.992, "eval_steps_per_second": 3.973, "step": 80000 }, { "epoch": 3.44, "learning_rate": 1.7170628654346807e-05, "loss": 0.1052, "step": 80500 }, { "epoch": 3.44, "eval_accuracy": 0.9154910833111525, "eval_f1": 0.9164321348413044, "eval_loss": 0.3005645275115967, "eval_runtime": 29.7061, "eval_samples_per_second": 252.944, "eval_steps_per_second": 3.972, "step": 80500 }, { "epoch": 3.46, "learning_rate": 1.727711151892792e-05, "loss": 0.1009, "step": 81000 }, { "epoch": 3.46, "eval_accuracy": 0.9065743944636678, "eval_f1": 0.907476050006008, "eval_loss": 0.30318447947502136, "eval_runtime": 29.7072, "eval_samples_per_second": 252.935, "eval_steps_per_second": 3.972, "step": 81000 }, { "epoch": 3.48, "learning_rate": 1.7383807776023218e-05, "loss": 0.1033, "step": 81500 }, { "epoch": 3.48, "eval_accuracy": 0.906973649188182, "eval_f1": 0.9082369305977184, "eval_loss": 0.31744304299354553, "eval_runtime": 29.6831, "eval_samples_per_second": 253.141, "eval_steps_per_second": 3.975, "step": 81500 }, { "epoch": 3.5, "learning_rate": 1.749050403311852e-05, "loss": 0.1099, "step": 82000 }, { "epoch": 3.5, "eval_accuracy": 0.9252062816076657, "eval_f1": 0.9255446305995462, "eval_loss": 0.23546352982521057, "eval_runtime": 28.9475, "eval_samples_per_second": 259.574, "eval_steps_per_second": 4.076, "step": 82000 }, { "epoch": 3.52, "learning_rate": 1.759720029021382e-05, "loss": 0.1075, "step": 82500 }, { "epoch": 3.52, "eval_accuracy": 0.9303965930263508, "eval_f1": 0.9305716790224627, "eval_loss": 0.23489102721214294, "eval_runtime": 29.7169, "eval_samples_per_second": 252.853, "eval_steps_per_second": 3.971, "step": 82500 }, { "epoch": 3.54, "learning_rate": 1.7703896547309123e-05, "loss": 0.1082, "step": 83000 }, { "epoch": 3.54, "eval_accuracy": 0.9017833377694969, "eval_f1": 0.9031724871437866, "eval_loss": 0.3438743054866791, "eval_runtime": 29.683, "eval_samples_per_second": 253.141, "eval_steps_per_second": 3.975, "step": 83000 }, { "epoch": 3.56, "learning_rate": 1.7810379411890232e-05, "loss": 0.107, "step": 83500 }, { "epoch": 3.56, "eval_accuracy": 0.9190843758317807, "eval_f1": 0.919808533673745, "eval_loss": 0.2954671084880829, "eval_runtime": 29.6856, "eval_samples_per_second": 253.119, "eval_steps_per_second": 3.975, "step": 83500 }, { "epoch": 3.58, "learning_rate": 1.7917075668985534e-05, "loss": 0.1069, "step": 84000 }, { "epoch": 3.58, "eval_accuracy": 0.9269363854138941, "eval_f1": 0.9274280575614218, "eval_loss": 0.2474394142627716, "eval_runtime": 29.7052, "eval_samples_per_second": 252.952, "eval_steps_per_second": 3.972, "step": 84000 }, { "epoch": 3.61, "learning_rate": 1.8023771926080836e-05, "loss": 0.1101, "step": 84500 }, { "epoch": 3.61, "eval_accuracy": 0.9232100079850944, "eval_f1": 0.9238938753643797, "eval_loss": 0.28328654170036316, "eval_runtime": 28.9009, "eval_samples_per_second": 259.992, "eval_steps_per_second": 4.083, "step": 84500 }, { "epoch": 3.63, "learning_rate": 1.8130468183176134e-05, "loss": 0.1121, "step": 85000 }, { "epoch": 3.63, "eval_accuracy": 0.9190843758317807, "eval_f1": 0.9199827093655857, "eval_loss": 0.2627813220024109, "eval_runtime": 29.7188, "eval_samples_per_second": 252.837, "eval_steps_per_second": 3.971, "step": 85000 }, { "epoch": 3.65, "learning_rate": 1.8237164440271436e-05, "loss": 0.1083, "step": 85500 }, { "epoch": 3.65, "eval_accuracy": 0.9326590364652648, "eval_f1": 0.9329981540836807, "eval_loss": 0.24135735630989075, "eval_runtime": 29.6566, "eval_samples_per_second": 253.367, "eval_steps_per_second": 3.979, "step": 85500 }, { "epoch": 3.67, "learning_rate": 1.8343860697366738e-05, "loss": 0.1039, "step": 86000 }, { "epoch": 3.67, "eval_accuracy": 0.9241416023422944, "eval_f1": 0.924742970053259, "eval_loss": 0.26063743233680725, "eval_runtime": 29.6683, "eval_samples_per_second": 253.267, "eval_steps_per_second": 3.977, "step": 86000 }, { "epoch": 3.69, "learning_rate": 1.845055695446204e-05, "loss": 0.1055, "step": 86500 }, { "epoch": 3.69, "eval_accuracy": 0.9122970455150385, "eval_f1": 0.9130212242543791, "eval_loss": 0.2822701036930084, "eval_runtime": 29.685, "eval_samples_per_second": 253.124, "eval_steps_per_second": 3.975, "step": 86500 }, { "epoch": 3.71, "learning_rate": 1.8557253211557342e-05, "loss": 0.107, "step": 87000 }, { "epoch": 3.71, "eval_accuracy": 0.9278679797710939, "eval_f1": 0.9283307100808318, "eval_loss": 0.24245958030223846, "eval_runtime": 27.1463, "eval_samples_per_second": 276.797, "eval_steps_per_second": 4.347, "step": 87000 }, { "epoch": 3.73, "learning_rate": 1.866373607613845e-05, "loss": 0.1075, "step": 87500 }, { "epoch": 3.73, "eval_accuracy": 0.9148256587702954, "eval_f1": 0.9157022847611576, "eval_loss": 0.2839546799659729, "eval_runtime": 29.7126, "eval_samples_per_second": 252.889, "eval_steps_per_second": 3.971, "step": 87500 }, { "epoch": 3.76, "learning_rate": 1.8770432333233752e-05, "loss": 0.1078, "step": 88000 }, { "epoch": 3.76, "eval_accuracy": 0.9299973383018366, "eval_f1": 0.9302152729819625, "eval_loss": 0.22580114006996155, "eval_runtime": 29.6944, "eval_samples_per_second": 253.044, "eval_steps_per_second": 3.974, "step": 88000 }, { "epoch": 3.78, "learning_rate": 1.887712859032905e-05, "loss": 0.1072, "step": 88500 }, { "epoch": 3.78, "eval_accuracy": 0.9287995741282938, "eval_f1": 0.9292731554080146, "eval_loss": 0.2478398084640503, "eval_runtime": 29.7139, "eval_samples_per_second": 252.878, "eval_steps_per_second": 3.971, "step": 88500 }, { "epoch": 3.8, "learning_rate": 1.8983824847424356e-05, "loss": 0.1097, "step": 89000 }, { "epoch": 3.8, "eval_accuracy": 0.9060420548309822, "eval_f1": 0.9077407417767054, "eval_loss": 0.31326791644096375, "eval_runtime": 29.7069, "eval_samples_per_second": 252.938, "eval_steps_per_second": 3.972, "step": 89000 }, { "epoch": 3.82, "learning_rate": 1.9090307712005465e-05, "loss": 0.1037, "step": 89500 }, { "epoch": 3.82, "eval_accuracy": 0.9298642533936652, "eval_f1": 0.9303728668629602, "eval_loss": 0.2570092976093292, "eval_runtime": 29.7155, "eval_samples_per_second": 252.864, "eval_steps_per_second": 3.971, "step": 89500 }, { "epoch": 3.84, "learning_rate": 1.9197003969100764e-05, "loss": 0.1096, "step": 90000 }, { "epoch": 3.84, "eval_accuracy": 0.9269363854138941, "eval_f1": 0.9273172425282155, "eval_loss": 0.24834661185741425, "eval_runtime": 29.7079, "eval_samples_per_second": 252.929, "eval_steps_per_second": 3.972, "step": 90000 }, { "epoch": 3.86, "learning_rate": 1.930370022619607e-05, "loss": 0.1082, "step": 90500 }, { "epoch": 3.86, "eval_accuracy": 0.9241416023422944, "eval_f1": 0.9247359215150383, "eval_loss": 0.2676146328449249, "eval_runtime": 29.6791, "eval_samples_per_second": 253.175, "eval_steps_per_second": 3.976, "step": 90500 }, { "epoch": 3.88, "learning_rate": 1.9410183090777178e-05, "loss": 0.1096, "step": 91000 }, { "epoch": 3.88, "eval_accuracy": 0.9311951024753793, "eval_f1": 0.9314316700507581, "eval_loss": 0.23314546048641205, "eval_runtime": 29.6746, "eval_samples_per_second": 253.214, "eval_steps_per_second": 3.976, "step": 91000 }, { "epoch": 3.91, "learning_rate": 1.9516879347872476e-05, "loss": 0.1035, "step": 91500 }, { "epoch": 3.91, "eval_accuracy": 0.9250731966994943, "eval_f1": 0.9256904714759885, "eval_loss": 0.27229878306388855, "eval_runtime": 29.8012, "eval_samples_per_second": 252.137, "eval_steps_per_second": 3.96, "step": 91500 }, { "epoch": 3.93, "learning_rate": 1.9623575604967778e-05, "loss": 0.1089, "step": 92000 }, { "epoch": 3.93, "eval_accuracy": 0.9250731966994943, "eval_f1": 0.9255950104312578, "eval_loss": 0.2421479970216751, "eval_runtime": 29.7466, "eval_samples_per_second": 252.6, "eval_steps_per_second": 3.967, "step": 92000 }, { "epoch": 3.95, "learning_rate": 1.973027186206308e-05, "loss": 0.1074, "step": 92500 }, { "epoch": 3.95, "eval_accuracy": 0.9317274421080649, "eval_f1": 0.9321005917005625, "eval_loss": 0.23583532869815826, "eval_runtime": 29.682, "eval_samples_per_second": 253.15, "eval_steps_per_second": 3.975, "step": 92500 }, { "epoch": 3.97, "learning_rate": 1.983696811915838e-05, "loss": 0.1098, "step": 93000 }, { "epoch": 3.97, "eval_accuracy": 0.9274687250465797, "eval_f1": 0.9278264551595683, "eval_loss": 0.23873497545719147, "eval_runtime": 29.6836, "eval_samples_per_second": 253.136, "eval_steps_per_second": 3.975, "step": 93000 }, { "epoch": 3.99, "learning_rate": 1.9943664376253684e-05, "loss": 0.1082, "step": 93500 }, { "epoch": 3.99, "eval_accuracy": 0.9343891402714932, "eval_f1": 0.9346466592321752, "eval_loss": 0.22431565821170807, "eval_runtime": 29.6604, "eval_samples_per_second": 253.334, "eval_steps_per_second": 3.978, "step": 93500 }, { "epoch": 4.01, "learning_rate": 1.9987409841662757e-05, "loss": 0.0967, "step": 94000 }, { "epoch": 4.01, "eval_accuracy": 0.9284003194037797, "eval_f1": 0.9290340605178463, "eval_loss": 0.29876431822776794, "eval_runtime": 29.7877, "eval_samples_per_second": 252.251, "eval_steps_per_second": 3.961, "step": 94000 }, { "epoch": 4.03, "learning_rate": 1.996078912551748e-05, "loss": 0.0841, "step": 94500 }, { "epoch": 4.03, "eval_accuracy": 0.9225445834442374, "eval_f1": 0.9227958817621394, "eval_loss": 0.3175296187400818, "eval_runtime": 29.7507, "eval_samples_per_second": 252.565, "eval_steps_per_second": 3.966, "step": 94500 }, { "epoch": 4.05, "learning_rate": 1.9934115061243654e-05, "loss": 0.0882, "step": 95000 }, { "epoch": 4.05, "eval_accuracy": 0.9233430928932659, "eval_f1": 0.9241105837969426, "eval_loss": 0.3155466616153717, "eval_runtime": 29.6776, "eval_samples_per_second": 253.187, "eval_steps_per_second": 3.976, "step": 95000 }, { "epoch": 4.08, "learning_rate": 1.990744099696983e-05, "loss": 0.0883, "step": 95500 }, { "epoch": 4.08, "eval_accuracy": 0.9313281873835507, "eval_f1": 0.9316087493957224, "eval_loss": 0.24962832033634186, "eval_runtime": 29.6708, "eval_samples_per_second": 253.245, "eval_steps_per_second": 3.977, "step": 95500 }, { "epoch": 4.1, "learning_rate": 1.9880766932696003e-05, "loss": 0.0899, "step": 96000 }, { "epoch": 4.1, "eval_accuracy": 0.9112323662496673, "eval_f1": 0.912925405010681, "eval_loss": 0.3676290512084961, "eval_runtime": 29.6697, "eval_samples_per_second": 253.255, "eval_steps_per_second": 3.977, "step": 96000 }, { "epoch": 4.12, "learning_rate": 1.9854146216550725e-05, "loss": 0.0859, "step": 96500 }, { "epoch": 4.12, "eval_accuracy": 0.9353207346286931, "eval_f1": 0.9356799670819131, "eval_loss": 0.25173771381378174, "eval_runtime": 29.6986, "eval_samples_per_second": 253.008, "eval_steps_per_second": 3.973, "step": 96500 }, { "epoch": 4.14, "learning_rate": 1.98274721522769e-05, "loss": 0.0852, "step": 97000 }, { "epoch": 4.14, "eval_accuracy": 0.9256055363321799, "eval_f1": 0.9263085710587642, "eval_loss": 0.30905455350875854, "eval_runtime": 29.7296, "eval_samples_per_second": 252.744, "eval_steps_per_second": 3.969, "step": 97000 }, { "epoch": 4.16, "learning_rate": 1.9800798088003074e-05, "loss": 0.0908, "step": 97500 }, { "epoch": 4.16, "eval_accuracy": 0.9284003194037797, "eval_f1": 0.9289808331035979, "eval_loss": 0.2927681505680084, "eval_runtime": 29.6939, "eval_samples_per_second": 253.048, "eval_steps_per_second": 3.974, "step": 97500 }, { "epoch": 4.18, "learning_rate": 1.977412402372925e-05, "loss": 0.0876, "step": 98000 }, { "epoch": 4.18, "eval_accuracy": 0.9378493478839499, "eval_f1": 0.9379757198421206, "eval_loss": 0.23220977187156677, "eval_runtime": 29.7676, "eval_samples_per_second": 252.422, "eval_steps_per_second": 3.964, "step": 98000 }, { "epoch": 4.2, "learning_rate": 1.974750330758397e-05, "loss": 0.0884, "step": 98500 }, { "epoch": 4.2, "eval_accuracy": 0.9299973383018366, "eval_f1": 0.9304508259060222, "eval_loss": 0.2828425467014313, "eval_runtime": 29.6703, "eval_samples_per_second": 253.25, "eval_steps_per_second": 3.977, "step": 98500 }, { "epoch": 4.23, "learning_rate": 1.972082924331015e-05, "loss": 0.0906, "step": 99000 }, { "epoch": 4.23, "eval_accuracy": 0.9338568006388076, "eval_f1": 0.934280526666979, "eval_loss": 0.25503915548324585, "eval_runtime": 29.6986, "eval_samples_per_second": 253.008, "eval_steps_per_second": 3.973, "step": 99000 }, { "epoch": 4.25, "learning_rate": 1.969415517903632e-05, "loss": 0.0923, "step": 99500 }, { "epoch": 4.25, "eval_accuracy": 0.9330582911897791, "eval_f1": 0.9332689003068755, "eval_loss": 0.25108087062835693, "eval_runtime": 29.8057, "eval_samples_per_second": 252.1, "eval_steps_per_second": 3.959, "step": 99500 }, { "epoch": 4.27, "learning_rate": 1.9667481114762497e-05, "loss": 0.0929, "step": 100000 }, { "epoch": 4.27, "eval_accuracy": 0.9353207346286931, "eval_f1": 0.935553232165074, "eval_loss": 0.26846399903297424, "eval_runtime": 29.7753, "eval_samples_per_second": 252.357, "eval_steps_per_second": 3.963, "step": 100000 }, { "epoch": 4.29, "learning_rate": 1.9640807050488672e-05, "loss": 0.091, "step": 100500 }, { "epoch": 4.29, "eval_accuracy": 0.9272025552302369, "eval_f1": 0.9279322378363214, "eval_loss": 0.30113697052001953, "eval_runtime": 29.8112, "eval_samples_per_second": 252.053, "eval_steps_per_second": 3.958, "step": 100500 }, { "epoch": 4.31, "learning_rate": 1.9614132986214843e-05, "loss": 0.093, "step": 101000 }, { "epoch": 4.31, "eval_accuracy": 0.92467394197498, "eval_f1": 0.92528965941888, "eval_loss": 0.28048303723335266, "eval_runtime": 29.6933, "eval_samples_per_second": 253.054, "eval_steps_per_second": 3.974, "step": 101000 }, { "epoch": 4.33, "learning_rate": 1.958745892194102e-05, "loss": 0.0894, "step": 101500 }, { "epoch": 4.33, "eval_accuracy": 0.920149055097152, "eval_f1": 0.9212457425349848, "eval_loss": 0.30429819226264954, "eval_runtime": 29.6772, "eval_samples_per_second": 253.191, "eval_steps_per_second": 3.976, "step": 101500 }, { "epoch": 4.35, "learning_rate": 1.9560784857667196e-05, "loss": 0.0912, "step": 102000 }, { "epoch": 4.35, "eval_accuracy": 0.9229438381687517, "eval_f1": 0.9239101512746192, "eval_loss": 0.30318892002105713, "eval_runtime": 29.7216, "eval_samples_per_second": 252.812, "eval_steps_per_second": 3.97, "step": 102000 }, { "epoch": 4.37, "learning_rate": 1.953411079339337e-05, "loss": 0.0914, "step": 102500 }, { "epoch": 4.37, "eval_accuracy": 0.9164226776683524, "eval_f1": 0.9172366101282634, "eval_loss": 0.31719163060188293, "eval_runtime": 29.6376, "eval_samples_per_second": 253.529, "eval_steps_per_second": 3.981, "step": 102500 }, { "epoch": 4.4, "learning_rate": 1.9507490077248092e-05, "loss": 0.0902, "step": 103000 }, { "epoch": 4.4, "eval_accuracy": 0.9337237157306362, "eval_f1": 0.9340855551795185, "eval_loss": 0.25142449140548706, "eval_runtime": 29.7958, "eval_samples_per_second": 252.183, "eval_steps_per_second": 3.96, "step": 103000 }, { "epoch": 4.42, "learning_rate": 1.9480869361102814e-05, "loss": 0.0948, "step": 103500 }, { "epoch": 4.42, "eval_accuracy": 0.9225445834442374, "eval_f1": 0.923140532339757, "eval_loss": 0.2710263133049011, "eval_runtime": 29.694, "eval_samples_per_second": 253.048, "eval_steps_per_second": 3.974, "step": 103500 }, { "epoch": 4.44, "learning_rate": 1.945419529682899e-05, "loss": 0.0926, "step": 104000 }, { "epoch": 4.44, "eval_accuracy": 0.9319936119244078, "eval_f1": 0.9323841800449281, "eval_loss": 0.2564203441143036, "eval_runtime": 28.8753, "eval_samples_per_second": 260.222, "eval_steps_per_second": 4.087, "step": 104000 }, { "epoch": 4.46, "learning_rate": 1.9427521232555163e-05, "loss": 0.0918, "step": 104500 }, { "epoch": 4.46, "eval_accuracy": 0.9375831780676072, "eval_f1": 0.9377531909606569, "eval_loss": 0.2197369635105133, "eval_runtime": 29.7897, "eval_samples_per_second": 252.235, "eval_steps_per_second": 3.961, "step": 104500 }, { "epoch": 4.48, "learning_rate": 1.9400847168281338e-05, "loss": 0.0928, "step": 105000 }, { "epoch": 4.48, "eval_accuracy": 0.9230769230769231, "eval_f1": 0.9238162163050091, "eval_loss": 0.2949956953525543, "eval_runtime": 29.7038, "eval_samples_per_second": 252.964, "eval_steps_per_second": 3.973, "step": 105000 }, { "epoch": 4.5, "learning_rate": 1.9374173104007512e-05, "loss": 0.0911, "step": 105500 }, { "epoch": 4.5, "eval_accuracy": 0.9261378759648656, "eval_f1": 0.9270039183662078, "eval_loss": 0.3085399568080902, "eval_runtime": 29.6996, "eval_samples_per_second": 253.0, "eval_steps_per_second": 3.973, "step": 105500 }, { "epoch": 4.52, "learning_rate": 1.9347499039733687e-05, "loss": 0.0916, "step": 106000 }, { "epoch": 4.52, "eval_accuracy": 0.9256055363321799, "eval_f1": 0.9265535422815637, "eval_loss": 0.2896316647529602, "eval_runtime": 29.7002, "eval_samples_per_second": 252.995, "eval_steps_per_second": 3.973, "step": 106000 }, { "epoch": 4.55, "learning_rate": 1.932087832358841e-05, "loss": 0.0903, "step": 106500 }, { "epoch": 4.55, "eval_accuracy": 0.9277348948629226, "eval_f1": 0.9284942853366587, "eval_loss": 0.30977749824523926, "eval_runtime": 28.875, "eval_samples_per_second": 260.225, "eval_steps_per_second": 4.087, "step": 106500 }, { "epoch": 4.57, "learning_rate": 1.9294204259314583e-05, "loss": 0.0909, "step": 107000 }, { "epoch": 4.57, "eval_accuracy": 0.9168219323928667, "eval_f1": 0.9182154468537094, "eval_loss": 0.33723878860473633, "eval_runtime": 29.6597, "eval_samples_per_second": 253.34, "eval_steps_per_second": 3.978, "step": 107000 }, { "epoch": 4.59, "learning_rate": 1.926753019504076e-05, "loss": 0.0974, "step": 107500 }, { "epoch": 4.59, "eval_accuracy": 0.9206813947298377, "eval_f1": 0.9215757708309522, "eval_loss": 0.2856293022632599, "eval_runtime": 29.641, "eval_samples_per_second": 253.5, "eval_steps_per_second": 3.981, "step": 107500 }, { "epoch": 4.61, "learning_rate": 1.9240856130766936e-05, "loss": 0.0947, "step": 108000 }, { "epoch": 4.61, "eval_accuracy": 0.9347883949960074, "eval_f1": 0.9353102062008903, "eval_loss": 0.23915627598762512, "eval_runtime": 29.6912, "eval_samples_per_second": 253.071, "eval_steps_per_second": 3.974, "step": 108000 }, { "epoch": 4.63, "learning_rate": 1.921418206649311e-05, "loss": 0.0923, "step": 108500 }, { "epoch": 4.63, "eval_accuracy": 0.9389140271493213, "eval_f1": 0.939221752097426, "eval_loss": 0.23398292064666748, "eval_runtime": 29.6922, "eval_samples_per_second": 253.063, "eval_steps_per_second": 3.974, "step": 108500 }, { "epoch": 4.65, "learning_rate": 1.9187508002219285e-05, "loss": 0.0888, "step": 109000 }, { "epoch": 4.65, "eval_accuracy": 0.9249401117913228, "eval_f1": 0.9261639270408716, "eval_loss": 0.3061336278915405, "eval_runtime": 28.8839, "eval_samples_per_second": 260.145, "eval_steps_per_second": 4.085, "step": 109000 }, { "epoch": 4.67, "learning_rate": 1.916083393794546e-05, "loss": 0.0936, "step": 109500 }, { "epoch": 4.67, "eval_accuracy": 0.9330582911897791, "eval_f1": 0.9333501847126282, "eval_loss": 0.24740859866142273, "eval_runtime": 29.7064, "eval_samples_per_second": 252.942, "eval_steps_per_second": 3.972, "step": 109500 }, { "epoch": 4.69, "learning_rate": 1.9134159873671634e-05, "loss": 0.0943, "step": 110000 }, { "epoch": 4.69, "eval_accuracy": 0.9383816875166356, "eval_f1": 0.9386827805161855, "eval_loss": 0.21956907212734222, "eval_runtime": 29.6931, "eval_samples_per_second": 253.056, "eval_steps_per_second": 3.974, "step": 110000 }, { "epoch": 4.72, "learning_rate": 1.9107539157526356e-05, "loss": 0.0912, "step": 110500 }, { "epoch": 4.72, "eval_accuracy": 0.9184189512909237, "eval_f1": 0.9196958151079796, "eval_loss": 0.2968537509441376, "eval_runtime": 29.65, "eval_samples_per_second": 253.423, "eval_steps_per_second": 3.98, "step": 110500 }, { "epoch": 4.74, "learning_rate": 1.9080918441381078e-05, "loss": 0.0885, "step": 111000 }, { "epoch": 4.74, "eval_accuracy": 0.9330582911897791, "eval_f1": 0.9335718524554016, "eval_loss": 0.25288963317871094, "eval_runtime": 29.6594, "eval_samples_per_second": 253.343, "eval_steps_per_second": 3.979, "step": 111000 }, { "epoch": 4.76, "learning_rate": 1.9054244377107252e-05, "loss": 0.0939, "step": 111500 }, { "epoch": 4.76, "eval_accuracy": 0.9333244610061219, "eval_f1": 0.9340340926527145, "eval_loss": 0.2551884949207306, "eval_runtime": 29.6834, "eval_samples_per_second": 253.138, "eval_steps_per_second": 3.975, "step": 111500 }, { "epoch": 4.78, "learning_rate": 1.9027570312833427e-05, "loss": 0.0901, "step": 112000 }, { "epoch": 4.78, "eval_accuracy": 0.9357199893532073, "eval_f1": 0.9362421857356324, "eval_loss": 0.25156065821647644, "eval_runtime": 29.7058, "eval_samples_per_second": 252.947, "eval_steps_per_second": 3.972, "step": 112000 }, { "epoch": 4.8, "learning_rate": 1.90008962485596e-05, "loss": 0.0961, "step": 112500 }, { "epoch": 4.8, "eval_accuracy": 0.9329252062816077, "eval_f1": 0.933612036082154, "eval_loss": 0.2644532322883606, "eval_runtime": 29.6722, "eval_samples_per_second": 253.234, "eval_steps_per_second": 3.977, "step": 112500 }, { "epoch": 4.82, "learning_rate": 1.897422218428578e-05, "loss": 0.0941, "step": 113000 }, { "epoch": 4.82, "eval_accuracy": 0.9307958477508651, "eval_f1": 0.931396585946887, "eval_loss": 0.2659129202365875, "eval_runtime": 29.7902, "eval_samples_per_second": 252.231, "eval_steps_per_second": 3.961, "step": 113000 }, { "epoch": 4.84, "learning_rate": 1.89476014681405e-05, "loss": 0.0917, "step": 113500 }, { "epoch": 4.84, "eval_accuracy": 0.9394463667820069, "eval_f1": 0.9397464897507521, "eval_loss": 0.24435795843601227, "eval_runtime": 29.6919, "eval_samples_per_second": 253.065, "eval_steps_per_second": 3.974, "step": 113500 }, { "epoch": 4.87, "learning_rate": 1.8920927403866672e-05, "loss": 0.0961, "step": 114000 }, { "epoch": 4.87, "eval_accuracy": 0.9337237157306362, "eval_f1": 0.9339511492582226, "eval_loss": 0.2280196100473404, "eval_runtime": 29.7315, "eval_samples_per_second": 252.728, "eval_steps_per_second": 3.969, "step": 114000 }, { "epoch": 4.89, "learning_rate": 1.889425333959285e-05, "loss": 0.0968, "step": 114500 }, { "epoch": 4.89, "eval_accuracy": 0.9357199893532073, "eval_f1": 0.9363108940519986, "eval_loss": 0.2443583458662033, "eval_runtime": 29.7211, "eval_samples_per_second": 252.817, "eval_steps_per_second": 3.97, "step": 114500 }, { "epoch": 4.91, "learning_rate": 1.8867579275319025e-05, "loss": 0.0945, "step": 115000 }, { "epoch": 4.91, "eval_accuracy": 0.9309289326590364, "eval_f1": 0.931598757443973, "eval_loss": 0.264121949672699, "eval_runtime": 29.6755, "eval_samples_per_second": 253.205, "eval_steps_per_second": 3.976, "step": 115000 }, { "epoch": 4.93, "learning_rate": 1.8840958559173747e-05, "loss": 0.0943, "step": 115500 }, { "epoch": 4.93, "eval_accuracy": 0.9355869044450359, "eval_f1": 0.9360854367526686, "eval_loss": 0.23954808712005615, "eval_runtime": 29.6726, "eval_samples_per_second": 253.23, "eval_steps_per_second": 3.977, "step": 115500 }, { "epoch": 4.95, "learning_rate": 1.881428449489992e-05, "loss": 0.092, "step": 116000 }, { "epoch": 4.95, "eval_accuracy": 0.9401117913228639, "eval_f1": 0.940554328007054, "eval_loss": 0.21225783228874207, "eval_runtime": 29.6796, "eval_samples_per_second": 253.171, "eval_steps_per_second": 3.976, "step": 116000 }, { "epoch": 4.97, "learning_rate": 1.8787610430626096e-05, "loss": 0.0921, "step": 116500 }, { "epoch": 4.97, "eval_accuracy": 0.9387809422411498, "eval_f1": 0.9391814867688993, "eval_loss": 0.24649563431739807, "eval_runtime": 29.7414, "eval_samples_per_second": 252.644, "eval_steps_per_second": 3.968, "step": 116500 }, { "epoch": 4.99, "learning_rate": 1.876093636635227e-05, "loss": 0.0924, "step": 117000 }, { "epoch": 4.99, "eval_accuracy": 0.9330582911897791, "eval_f1": 0.9336951473017778, "eval_loss": 0.2614225745201111, "eval_runtime": 29.7333, "eval_samples_per_second": 252.713, "eval_steps_per_second": 3.969, "step": 117000 }, { "epoch": 5.01, "learning_rate": 1.8734262302078445e-05, "loss": 0.081, "step": 117500 }, { "epoch": 5.01, "eval_accuracy": 0.9291988288528081, "eval_f1": 0.9299493314090986, "eval_loss": 0.3145081698894501, "eval_runtime": 29.7729, "eval_samples_per_second": 252.378, "eval_steps_per_second": 3.963, "step": 117500 }, { "epoch": 5.04, "learning_rate": 1.870758823780462e-05, "loss": 0.0718, "step": 118000 }, { "epoch": 5.04, "eval_accuracy": 0.9407772158637211, "eval_f1": 0.9410248102577136, "eval_loss": 0.24476242065429688, "eval_runtime": 29.6709, "eval_samples_per_second": 253.245, "eval_steps_per_second": 3.977, "step": 118000 }, { "epoch": 5.06, "learning_rate": 1.868096752165934e-05, "loss": 0.0725, "step": 118500 }, { "epoch": 5.06, "eval_accuracy": 0.9322597817407506, "eval_f1": 0.932907820406911, "eval_loss": 0.2969840466976166, "eval_runtime": 29.6827, "eval_samples_per_second": 253.144, "eval_steps_per_second": 3.975, "step": 118500 }, { "epoch": 5.08, "learning_rate": 1.8654293457385516e-05, "loss": 0.0715, "step": 119000 }, { "epoch": 5.08, "eval_accuracy": 0.9319936119244078, "eval_f1": 0.932401349859726, "eval_loss": 0.3180652856826782, "eval_runtime": 29.8767, "eval_samples_per_second": 251.5, "eval_steps_per_second": 3.95, "step": 119000 }, { "epoch": 5.1, "learning_rate": 1.862761939311169e-05, "loss": 0.0707, "step": 119500 }, { "epoch": 5.1, "eval_accuracy": 0.9220122438115518, "eval_f1": 0.9233960607632528, "eval_loss": 0.3678698241710663, "eval_runtime": 29.7048, "eval_samples_per_second": 252.956, "eval_steps_per_second": 3.972, "step": 119500 }, { "epoch": 5.12, "learning_rate": 1.8600945328837865e-05, "loss": 0.0739, "step": 120000 }, { "epoch": 5.12, "eval_accuracy": 0.9419749800372638, "eval_f1": 0.9422688639940412, "eval_loss": 0.23977598547935486, "eval_runtime": 29.7047, "eval_samples_per_second": 252.957, "eval_steps_per_second": 3.972, "step": 120000 }, { "epoch": 5.14, "learning_rate": 1.857427126456404e-05, "loss": 0.0732, "step": 120500 }, { "epoch": 5.14, "eval_accuracy": 0.9409103007718924, "eval_f1": 0.9413533843553499, "eval_loss": 0.2784920334815979, "eval_runtime": 29.6623, "eval_samples_per_second": 253.319, "eval_steps_per_second": 3.978, "step": 120500 }, { "epoch": 5.16, "learning_rate": 1.8547650548418765e-05, "loss": 0.0729, "step": 121000 }, { "epoch": 5.16, "eval_accuracy": 0.9407772158637211, "eval_f1": 0.9411080827560356, "eval_loss": 0.2614119350910187, "eval_runtime": 29.6988, "eval_samples_per_second": 253.007, "eval_steps_per_second": 3.973, "step": 121000 }, { "epoch": 5.19, "learning_rate": 1.8520976484144936e-05, "loss": 0.0721, "step": 121500 }, { "epoch": 5.19, "eval_accuracy": 0.9354538195368646, "eval_f1": 0.9360616006194592, "eval_loss": 0.2742285430431366, "eval_runtime": 29.7205, "eval_samples_per_second": 252.822, "eval_steps_per_second": 3.97, "step": 121500 }, { "epoch": 5.21, "learning_rate": 1.8494302419871114e-05, "loss": 0.0755, "step": 122000 }, { "epoch": 5.21, "eval_accuracy": 0.9212137343625233, "eval_f1": 0.922317359924068, "eval_loss": 0.3301926255226135, "eval_runtime": 29.7206, "eval_samples_per_second": 252.821, "eval_steps_per_second": 3.97, "step": 122000 }, { "epoch": 5.23, "learning_rate": 1.846762835559729e-05, "loss": 0.0778, "step": 122500 }, { "epoch": 5.23, "eval_accuracy": 0.9303965930263508, "eval_f1": 0.9308745735895487, "eval_loss": 0.2771119773387909, "eval_runtime": 29.7333, "eval_samples_per_second": 252.713, "eval_steps_per_second": 3.969, "step": 122500 }, { "epoch": 5.25, "learning_rate": 1.8440954291323463e-05, "loss": 0.0778, "step": 123000 }, { "epoch": 5.25, "eval_accuracy": 0.9346553100878361, "eval_f1": 0.9351655806765509, "eval_loss": 0.26929518580436707, "eval_runtime": 29.8793, "eval_samples_per_second": 251.479, "eval_steps_per_second": 3.949, "step": 123000 }, { "epoch": 5.27, "learning_rate": 1.8414333575178185e-05, "loss": 0.0742, "step": 123500 }, { "epoch": 5.27, "eval_accuracy": 0.9449028480170348, "eval_f1": 0.9451744443467338, "eval_loss": 0.23084259033203125, "eval_runtime": 29.7304, "eval_samples_per_second": 252.738, "eval_steps_per_second": 3.969, "step": 123500 }, { "epoch": 5.29, "learning_rate": 1.838765951090436e-05, "loss": 0.0732, "step": 124000 }, { "epoch": 5.29, "eval_accuracy": 0.9378493478839499, "eval_f1": 0.9383794117060509, "eval_loss": 0.294933557510376, "eval_runtime": 29.7559, "eval_samples_per_second": 252.522, "eval_steps_per_second": 3.966, "step": 124000 }, { "epoch": 5.31, "learning_rate": 1.8360985446630534e-05, "loss": 0.0742, "step": 124500 }, { "epoch": 5.31, "eval_accuracy": 0.9254724514240085, "eval_f1": 0.9263212611151752, "eval_loss": 0.32705116271972656, "eval_runtime": 29.7061, "eval_samples_per_second": 252.944, "eval_steps_per_second": 3.972, "step": 124500 }, { "epoch": 5.33, "learning_rate": 1.833431138235671e-05, "loss": 0.0769, "step": 125000 }, { "epoch": 5.33, "eval_accuracy": 0.9449028480170348, "eval_f1": 0.9449892328941387, "eval_loss": 0.23981066048145294, "eval_runtime": 29.7033, "eval_samples_per_second": 252.969, "eval_steps_per_second": 3.973, "step": 125000 }, { "epoch": 5.36, "learning_rate": 1.830769066621143e-05, "loss": 0.0764, "step": 125500 }, { "epoch": 5.36, "eval_accuracy": 0.9334575459142933, "eval_f1": 0.9338798349360586, "eval_loss": 0.2838508188724518, "eval_runtime": 29.8318, "eval_samples_per_second": 251.879, "eval_steps_per_second": 3.956, "step": 125500 }, { "epoch": 5.38, "learning_rate": 1.8281016601937605e-05, "loss": 0.0765, "step": 126000 }, { "epoch": 5.38, "eval_accuracy": 0.9341229704551504, "eval_f1": 0.9347863135821975, "eval_loss": 0.27602633833885193, "eval_runtime": 28.984, "eval_samples_per_second": 259.246, "eval_steps_per_second": 4.071, "step": 126000 }, { "epoch": 5.4, "learning_rate": 1.825439588579233e-05, "loss": 0.0781, "step": 126500 }, { "epoch": 5.4, "eval_accuracy": 0.9327921213734363, "eval_f1": 0.9334944076626738, "eval_loss": 0.27298033237457275, "eval_runtime": 29.7395, "eval_samples_per_second": 252.661, "eval_steps_per_second": 3.968, "step": 126500 }, { "epoch": 5.42, "learning_rate": 1.82277218215185e-05, "loss": 0.0791, "step": 127000 }, { "epoch": 5.42, "eval_accuracy": 0.9361192440777216, "eval_f1": 0.9366051447103599, "eval_loss": 0.279489666223526, "eval_runtime": 29.6861, "eval_samples_per_second": 253.115, "eval_steps_per_second": 3.975, "step": 127000 }, { "epoch": 5.44, "learning_rate": 1.8201047757244676e-05, "loss": 0.0755, "step": 127500 }, { "epoch": 5.44, "eval_accuracy": 0.9410433856800638, "eval_f1": 0.9414163947590737, "eval_loss": 0.27021223306655884, "eval_runtime": 29.7025, "eval_samples_per_second": 252.975, "eval_steps_per_second": 3.973, "step": 127500 }, { "epoch": 5.46, "learning_rate": 1.8174373692970854e-05, "loss": 0.0808, "step": 128000 }, { "epoch": 5.46, "eval_accuracy": 0.9319936119244078, "eval_f1": 0.9325696896758618, "eval_loss": 0.28826984763145447, "eval_runtime": 29.7213, "eval_samples_per_second": 252.815, "eval_steps_per_second": 3.97, "step": 128000 }, { "epoch": 5.48, "learning_rate": 1.8147699628697025e-05, "loss": 0.0815, "step": 128500 }, { "epoch": 5.48, "eval_accuracy": 0.9303965930263508, "eval_f1": 0.9312246929240023, "eval_loss": 0.28006842732429504, "eval_runtime": 28.8693, "eval_samples_per_second": 260.276, "eval_steps_per_second": 4.087, "step": 128500 }, { "epoch": 5.51, "learning_rate": 1.8121025564423203e-05, "loss": 0.0781, "step": 129000 }, { "epoch": 5.51, "eval_accuracy": 0.9342560553633218, "eval_f1": 0.934916102930222, "eval_loss": 0.28170469403266907, "eval_runtime": 29.7318, "eval_samples_per_second": 252.726, "eval_steps_per_second": 3.969, "step": 129000 }, { "epoch": 5.53, "learning_rate": 1.8094351500149377e-05, "loss": 0.0799, "step": 129500 }, { "epoch": 5.53, "eval_accuracy": 0.9367846686185787, "eval_f1": 0.9373211035369594, "eval_loss": 0.26480868458747864, "eval_runtime": 29.6578, "eval_samples_per_second": 253.356, "eval_steps_per_second": 3.979, "step": 129500 }, { "epoch": 5.55, "learning_rate": 1.806767743587555e-05, "loss": 0.0795, "step": 130000 }, { "epoch": 5.55, "eval_accuracy": 0.9418418951290923, "eval_f1": 0.9421717077366013, "eval_loss": 0.24259281158447266, "eval_runtime": 29.6526, "eval_samples_per_second": 253.401, "eval_steps_per_second": 3.979, "step": 130000 }, { "epoch": 5.57, "learning_rate": 1.8041003371601726e-05, "loss": 0.0779, "step": 130500 }, { "epoch": 5.57, "eval_accuracy": 0.932392866648922, "eval_f1": 0.9327021166238731, "eval_loss": 0.2653105854988098, "eval_runtime": 29.6697, "eval_samples_per_second": 253.255, "eval_steps_per_second": 3.977, "step": 130500 }, { "epoch": 5.59, "learning_rate": 1.80143293073279e-05, "loss": 0.0805, "step": 131000 }, { "epoch": 5.59, "eval_accuracy": 0.9347883949960074, "eval_f1": 0.9353918774215707, "eval_loss": 0.27313682436943054, "eval_runtime": 28.8119, "eval_samples_per_second": 260.795, "eval_steps_per_second": 4.096, "step": 131000 }, { "epoch": 5.61, "learning_rate": 1.7987655243054076e-05, "loss": 0.0744, "step": 131500 }, { "epoch": 5.61, "eval_accuracy": 0.9407772158637211, "eval_f1": 0.9411238964325214, "eval_loss": 0.2567736804485321, "eval_runtime": 29.8525, "eval_samples_per_second": 251.705, "eval_steps_per_second": 3.953, "step": 131500 }, { "epoch": 5.63, "learning_rate": 1.796098117878025e-05, "loss": 0.0795, "step": 132000 }, { "epoch": 5.63, "eval_accuracy": 0.9342560553633218, "eval_f1": 0.9348669837102812, "eval_loss": 0.279877632856369, "eval_runtime": 29.7073, "eval_samples_per_second": 252.935, "eval_steps_per_second": 3.972, "step": 132000 }, { "epoch": 5.65, "learning_rate": 1.7934360462634972e-05, "loss": 0.0799, "step": 132500 }, { "epoch": 5.65, "eval_accuracy": 0.9399787064146926, "eval_f1": 0.9403908704532573, "eval_loss": 0.27136918902397156, "eval_runtime": 29.6962, "eval_samples_per_second": 253.029, "eval_steps_per_second": 3.974, "step": 132500 }, { "epoch": 5.68, "learning_rate": 1.7907686398361147e-05, "loss": 0.0795, "step": 133000 }, { "epoch": 5.68, "eval_accuracy": 0.9445035932925206, "eval_f1": 0.9446204625899423, "eval_loss": 0.24645930528640747, "eval_runtime": 29.8001, "eval_samples_per_second": 252.147, "eval_steps_per_second": 3.96, "step": 133000 }, { "epoch": 5.7, "learning_rate": 1.788101233408732e-05, "loss": 0.0773, "step": 133500 }, { "epoch": 5.7, "eval_accuracy": 0.9367846686185787, "eval_f1": 0.9372887189762769, "eval_loss": 0.23624612390995026, "eval_runtime": 29.0666, "eval_samples_per_second": 258.51, "eval_steps_per_second": 4.06, "step": 133500 }, { "epoch": 5.72, "learning_rate": 1.7854338269813496e-05, "loss": 0.0773, "step": 134000 }, { "epoch": 5.72, "eval_accuracy": 0.9454351876497206, "eval_f1": 0.9457146735470864, "eval_loss": 0.2246805727481842, "eval_runtime": 29.7092, "eval_samples_per_second": 252.918, "eval_steps_per_second": 3.972, "step": 134000 }, { "epoch": 5.74, "learning_rate": 1.7827770901796765e-05, "loss": 0.0802, "step": 134500 }, { "epoch": 5.74, "eval_accuracy": 0.9433058291189779, "eval_f1": 0.9436458127410952, "eval_loss": 0.219477578997612, "eval_runtime": 29.7077, "eval_samples_per_second": 252.931, "eval_steps_per_second": 3.972, "step": 134500 }, { "epoch": 5.76, "learning_rate": 1.7801096837522943e-05, "loss": 0.0805, "step": 135000 }, { "epoch": 5.76, "eval_accuracy": 0.9355869044450359, "eval_f1": 0.9362078931632605, "eval_loss": 0.25872257351875305, "eval_runtime": 29.7076, "eval_samples_per_second": 252.932, "eval_steps_per_second": 3.972, "step": 135000 }, { "epoch": 5.78, "learning_rate": 1.7774422773249117e-05, "loss": 0.0796, "step": 135500 }, { "epoch": 5.78, "eval_accuracy": 0.9276018099547512, "eval_f1": 0.9286700082110425, "eval_loss": 0.2882857024669647, "eval_runtime": 29.6463, "eval_samples_per_second": 253.455, "eval_steps_per_second": 3.98, "step": 135500 }, { "epoch": 5.8, "learning_rate": 1.774774870897529e-05, "loss": 0.0797, "step": 136000 }, { "epoch": 5.8, "eval_accuracy": 0.9451690178333777, "eval_f1": 0.9454800904796664, "eval_loss": 0.22322283685207367, "eval_runtime": 29.7184, "eval_samples_per_second": 252.84, "eval_steps_per_second": 3.971, "step": 136000 }, { "epoch": 5.83, "learning_rate": 1.7721074644701466e-05, "loss": 0.0783, "step": 136500 }, { "epoch": 5.83, "eval_accuracy": 0.9492946499866916, "eval_f1": 0.9494469008050342, "eval_loss": 0.20879070460796356, "eval_runtime": 29.697, "eval_samples_per_second": 253.022, "eval_steps_per_second": 3.973, "step": 136500 }, { "epoch": 5.85, "learning_rate": 1.769440058042764e-05, "loss": 0.0806, "step": 137000 }, { "epoch": 5.85, "eval_accuracy": 0.9418418951290923, "eval_f1": 0.942174291286256, "eval_loss": 0.2473965436220169, "eval_runtime": 29.8072, "eval_samples_per_second": 252.086, "eval_steps_per_second": 3.959, "step": 137000 }, { "epoch": 5.87, "learning_rate": 1.7667726516153816e-05, "loss": 0.0818, "step": 137500 }, { "epoch": 5.87, "eval_accuracy": 0.9391801969656641, "eval_f1": 0.9397529317052404, "eval_loss": 0.2630070745944977, "eval_runtime": 29.6999, "eval_samples_per_second": 252.997, "eval_steps_per_second": 3.973, "step": 137500 }, { "epoch": 5.89, "learning_rate": 1.764105245187999e-05, "loss": 0.0773, "step": 138000 }, { "epoch": 5.89, "eval_accuracy": 0.9476976310886346, "eval_f1": 0.9478580726278416, "eval_loss": 0.234640434384346, "eval_runtime": 29.8086, "eval_samples_per_second": 252.075, "eval_steps_per_second": 3.959, "step": 138000 }, { "epoch": 5.91, "learning_rate": 1.7614431735734712e-05, "loss": 0.0795, "step": 138500 }, { "epoch": 5.91, "eval_accuracy": 0.9373170082512643, "eval_f1": 0.9377090107319418, "eval_loss": 0.28119930624961853, "eval_runtime": 29.7146, "eval_samples_per_second": 252.872, "eval_steps_per_second": 3.971, "step": 138500 }, { "epoch": 5.93, "learning_rate": 1.7587757671460887e-05, "loss": 0.0816, "step": 139000 }, { "epoch": 5.93, "eval_accuracy": 0.9274687250465797, "eval_f1": 0.9281982795988495, "eval_loss": 0.32029488682746887, "eval_runtime": 29.7504, "eval_samples_per_second": 252.568, "eval_steps_per_second": 3.966, "step": 139000 }, { "epoch": 5.95, "learning_rate": 1.756108360718706e-05, "loss": 0.0773, "step": 139500 }, { "epoch": 5.95, "eval_accuracy": 0.9375831780676072, "eval_f1": 0.9379461602562825, "eval_loss": 0.2567751705646515, "eval_runtime": 29.8294, "eval_samples_per_second": 251.899, "eval_steps_per_second": 3.956, "step": 139500 }, { "epoch": 5.97, "learning_rate": 1.7534409542913236e-05, "loss": 0.08, "step": 140000 }, { "epoch": 5.97, "eval_accuracy": 0.9463667820069204, "eval_f1": 0.9466704964218854, "eval_loss": 0.21513184905052185, "eval_runtime": 29.6771, "eval_samples_per_second": 253.192, "eval_steps_per_second": 3.976, "step": 140000 }, { "epoch": 6.0, "learning_rate": 1.750773547863941e-05, "loss": 0.0782, "step": 140500 }, { "epoch": 6.0, "eval_accuracy": 0.9387809422411498, "eval_f1": 0.9394699729362729, "eval_loss": 0.28363919258117676, "eval_runtime": 29.7313, "eval_samples_per_second": 252.73, "eval_steps_per_second": 3.969, "step": 140500 }, { "epoch": 6.02, "learning_rate": 1.7481061414365585e-05, "loss": 0.0618, "step": 141000 }, { "epoch": 6.02, "eval_accuracy": 0.9318605270162363, "eval_f1": 0.9324640088965003, "eval_loss": 0.3425421416759491, "eval_runtime": 29.7537, "eval_samples_per_second": 252.54, "eval_steps_per_second": 3.966, "step": 141000 }, { "epoch": 6.04, "learning_rate": 1.7454440698220307e-05, "loss": 0.0645, "step": 141500 }, { "epoch": 6.04, "eval_accuracy": 0.9341229704551504, "eval_f1": 0.934781602448178, "eval_loss": 0.3139813542366028, "eval_runtime": 29.7013, "eval_samples_per_second": 252.985, "eval_steps_per_second": 3.973, "step": 141500 }, { "epoch": 6.06, "learning_rate": 1.7427766633946485e-05, "loss": 0.0576, "step": 142000 }, { "epoch": 6.06, "eval_accuracy": 0.9390471120574927, "eval_f1": 0.9395110845241024, "eval_loss": 0.29140228033065796, "eval_runtime": 29.7171, "eval_samples_per_second": 252.851, "eval_steps_per_second": 3.971, "step": 142000 }, { "epoch": 6.08, "learning_rate": 1.7401092569672656e-05, "loss": 0.0651, "step": 142500 }, { "epoch": 6.08, "eval_accuracy": 0.9395794516901783, "eval_f1": 0.9400294457871766, "eval_loss": 0.2746965289115906, "eval_runtime": 29.689, "eval_samples_per_second": 253.09, "eval_steps_per_second": 3.975, "step": 142500 }, { "epoch": 6.1, "learning_rate": 1.737441850539883e-05, "loss": 0.0638, "step": 143000 }, { "epoch": 6.1, "eval_accuracy": 0.926270960873037, "eval_f1": 0.9275467084065636, "eval_loss": 0.3684926927089691, "eval_runtime": 29.7002, "eval_samples_per_second": 252.995, "eval_steps_per_second": 3.973, "step": 143000 }, { "epoch": 6.12, "learning_rate": 1.7347744441125008e-05, "loss": 0.062, "step": 143500 }, { "epoch": 6.12, "eval_accuracy": 0.9361192440777216, "eval_f1": 0.9367507413022468, "eval_loss": 0.28878238797187805, "eval_runtime": 29.7487, "eval_samples_per_second": 252.583, "eval_steps_per_second": 3.967, "step": 143500 }, { "epoch": 6.15, "learning_rate": 1.7321070376851183e-05, "loss": 0.0602, "step": 144000 }, { "epoch": 6.15, "eval_accuracy": 0.9413095554964067, "eval_f1": 0.9414811417776511, "eval_loss": 0.2957022190093994, "eval_runtime": 29.7114, "eval_samples_per_second": 252.9, "eval_steps_per_second": 3.972, "step": 144000 }, { "epoch": 6.17, "learning_rate": 1.7294449660705905e-05, "loss": 0.0629, "step": 144500 }, { "epoch": 6.17, "eval_accuracy": 0.9418418951290923, "eval_f1": 0.9423131558938396, "eval_loss": 0.2822032570838928, "eval_runtime": 29.7961, "eval_samples_per_second": 252.181, "eval_steps_per_second": 3.96, "step": 144500 }, { "epoch": 6.19, "learning_rate": 1.726777559643208e-05, "loss": 0.0647, "step": 145000 }, { "epoch": 6.19, "eval_accuracy": 0.9375831780676072, "eval_f1": 0.9381601008634161, "eval_loss": 0.2937462031841278, "eval_runtime": 29.7028, "eval_samples_per_second": 252.973, "eval_steps_per_second": 3.973, "step": 145000 }, { "epoch": 6.21, "learning_rate": 1.7241101532158254e-05, "loss": 0.0641, "step": 145500 }, { "epoch": 6.21, "eval_accuracy": 0.9359861591695502, "eval_f1": 0.9364402700524492, "eval_loss": 0.29298967123031616, "eval_runtime": 29.7381, "eval_samples_per_second": 252.673, "eval_steps_per_second": 3.968, "step": 145500 }, { "epoch": 6.23, "learning_rate": 1.7214427467884428e-05, "loss": 0.0619, "step": 146000 }, { "epoch": 6.23, "eval_accuracy": 0.9391801969656641, "eval_f1": 0.9396488023591716, "eval_loss": 0.3040963113307953, "eval_runtime": 29.6999, "eval_samples_per_second": 252.997, "eval_steps_per_second": 3.973, "step": 146000 }, { "epoch": 6.25, "learning_rate": 1.718780675173915e-05, "loss": 0.0662, "step": 146500 }, { "epoch": 6.25, "eval_accuracy": 0.9459675272824062, "eval_f1": 0.9461600576362096, "eval_loss": 0.24619624018669128, "eval_runtime": 29.6621, "eval_samples_per_second": 253.32, "eval_steps_per_second": 3.978, "step": 146500 }, { "epoch": 6.27, "learning_rate": 1.7161132687465325e-05, "loss": 0.0642, "step": 147000 }, { "epoch": 6.27, "eval_accuracy": 0.9391801969656641, "eval_f1": 0.9393517216774104, "eval_loss": 0.29791733622550964, "eval_runtime": 29.7053, "eval_samples_per_second": 252.952, "eval_steps_per_second": 3.972, "step": 147000 }, { "epoch": 6.3, "learning_rate": 1.71344586231915e-05, "loss": 0.0638, "step": 147500 }, { "epoch": 6.3, "eval_accuracy": 0.9295980835773223, "eval_f1": 0.9306057177926395, "eval_loss": 0.3107518255710602, "eval_runtime": 29.699, "eval_samples_per_second": 253.005, "eval_steps_per_second": 3.973, "step": 147500 }, { "epoch": 6.32, "learning_rate": 1.7107784558917674e-05, "loss": 0.0646, "step": 148000 }, { "epoch": 6.32, "eval_accuracy": 0.9399787064146926, "eval_f1": 0.940479475585594, "eval_loss": 0.2788504362106323, "eval_runtime": 27.5638, "eval_samples_per_second": 272.604, "eval_steps_per_second": 4.281, "step": 148000 }, { "epoch": 6.34, "learning_rate": 1.7081163842772396e-05, "loss": 0.0644, "step": 148500 }, { "epoch": 6.34, "eval_accuracy": 0.9381155177002928, "eval_f1": 0.9382586915112797, "eval_loss": 0.2806726396083832, "eval_runtime": 29.7107, "eval_samples_per_second": 252.906, "eval_steps_per_second": 3.972, "step": 148500 }, { "epoch": 6.36, "learning_rate": 1.705448977849857e-05, "loss": 0.0649, "step": 149000 }, { "epoch": 6.36, "eval_accuracy": 0.9419749800372638, "eval_f1": 0.9424753113790683, "eval_loss": 0.27096980810165405, "eval_runtime": 29.7017, "eval_samples_per_second": 252.982, "eval_steps_per_second": 3.973, "step": 149000 }, { "epoch": 6.38, "learning_rate": 1.7027815714224748e-05, "loss": 0.0634, "step": 149500 }, { "epoch": 6.38, "eval_accuracy": 0.9346553100878361, "eval_f1": 0.9353827036795522, "eval_loss": 0.29936379194259644, "eval_runtime": 29.6752, "eval_samples_per_second": 253.208, "eval_steps_per_second": 3.976, "step": 149500 }, { "epoch": 6.4, "learning_rate": 1.700114164995092e-05, "loss": 0.0607, "step": 150000 }, { "epoch": 6.4, "eval_accuracy": 0.9512909236092627, "eval_f1": 0.9513936901264027, "eval_loss": 0.23151962459087372, "eval_runtime": 29.7021, "eval_samples_per_second": 252.979, "eval_steps_per_second": 3.973, "step": 150000 }, { "epoch": 6.42, "learning_rate": 1.6974467585677097e-05, "loss": 0.0653, "step": 150500 }, { "epoch": 6.42, "eval_accuracy": 0.9184189512909237, "eval_f1": 0.9201895395649388, "eval_loss": 0.3934233486652374, "eval_runtime": 28.8229, "eval_samples_per_second": 260.695, "eval_steps_per_second": 4.094, "step": 150500 }, { "epoch": 6.44, "learning_rate": 1.6947793521403272e-05, "loss": 0.0678, "step": 151000 }, { "epoch": 6.44, "eval_accuracy": 0.9209475645461804, "eval_f1": 0.9226098175059657, "eval_loss": 0.3584538996219635, "eval_runtime": 29.7316, "eval_samples_per_second": 252.728, "eval_steps_per_second": 3.969, "step": 151000 }, { "epoch": 6.47, "learning_rate": 1.6921119457129443e-05, "loss": 0.064, "step": 151500 }, { "epoch": 6.47, "eval_accuracy": 0.95009315943572, "eval_f1": 0.950299824662573, "eval_loss": 0.22422486543655396, "eval_runtime": 29.6281, "eval_samples_per_second": 253.611, "eval_steps_per_second": 3.983, "step": 151500 }, { "epoch": 6.49, "learning_rate": 1.689444539285562e-05, "loss": 0.0718, "step": 152000 }, { "epoch": 6.49, "eval_accuracy": 0.9387809422411498, "eval_f1": 0.9393330915737532, "eval_loss": 0.2708144783973694, "eval_runtime": 29.7081, "eval_samples_per_second": 252.928, "eval_steps_per_second": 3.972, "step": 152000 }, { "epoch": 6.51, "learning_rate": 1.6867771328581795e-05, "loss": 0.0677, "step": 152500 }, { "epoch": 6.51, "eval_accuracy": 0.9371839233430929, "eval_f1": 0.9377985107844401, "eval_loss": 0.3036825358867645, "eval_runtime": 29.6939, "eval_samples_per_second": 253.048, "eval_steps_per_second": 3.974, "step": 152500 }, { "epoch": 6.53, "learning_rate": 1.684109726430797e-05, "loss": 0.0623, "step": 153000 }, { "epoch": 6.53, "eval_accuracy": 0.9480968858131488, "eval_f1": 0.9483669366706274, "eval_loss": 0.2451292872428894, "eval_runtime": 28.8917, "eval_samples_per_second": 260.075, "eval_steps_per_second": 4.084, "step": 153000 }, { "epoch": 6.55, "learning_rate": 1.6814476548162692e-05, "loss": 0.0648, "step": 153500 }, { "epoch": 6.55, "eval_accuracy": 0.9351876497205217, "eval_f1": 0.9358404642854584, "eval_loss": 0.33823534846305847, "eval_runtime": 29.725, "eval_samples_per_second": 252.784, "eval_steps_per_second": 3.97, "step": 153500 }, { "epoch": 6.57, "learning_rate": 1.6787802483888866e-05, "loss": 0.0684, "step": 154000 }, { "epoch": 6.57, "eval_accuracy": 0.9389140271493213, "eval_f1": 0.9393959959904314, "eval_loss": 0.288310170173645, "eval_runtime": 29.667, "eval_samples_per_second": 253.278, "eval_steps_per_second": 3.977, "step": 154000 }, { "epoch": 6.59, "learning_rate": 1.676112841961504e-05, "loss": 0.0696, "step": 154500 }, { "epoch": 6.59, "eval_accuracy": 0.9322597817407506, "eval_f1": 0.933185090369889, "eval_loss": 0.32053282856941223, "eval_runtime": 29.7076, "eval_samples_per_second": 252.932, "eval_steps_per_second": 3.972, "step": 154500 }, { "epoch": 6.62, "learning_rate": 1.6734454355341216e-05, "loss": 0.067, "step": 155000 }, { "epoch": 6.62, "eval_accuracy": 0.9458344423742348, "eval_f1": 0.9460017411974384, "eval_loss": 0.24401821196079254, "eval_runtime": 29.6951, "eval_samples_per_second": 253.038, "eval_steps_per_second": 3.974, "step": 155000 }, { "epoch": 6.64, "learning_rate": 1.670778029106739e-05, "loss": 0.0675, "step": 155500 }, { "epoch": 6.64, "eval_accuracy": 0.9498269896193772, "eval_f1": 0.9499514061301335, "eval_loss": 0.23875835537910461, "eval_runtime": 27.5871, "eval_samples_per_second": 272.374, "eval_steps_per_second": 4.277, "step": 155500 }, { "epoch": 6.66, "learning_rate": 1.6681159574922112e-05, "loss": 0.0663, "step": 156000 }, { "epoch": 6.66, "eval_accuracy": 0.9415757253127496, "eval_f1": 0.9417655310043141, "eval_loss": 0.2690986394882202, "eval_runtime": 29.6963, "eval_samples_per_second": 253.028, "eval_steps_per_second": 3.974, "step": 156000 }, { "epoch": 6.68, "learning_rate": 1.665448551064829e-05, "loss": 0.0657, "step": 156500 }, { "epoch": 6.68, "eval_accuracy": 0.9379824327921213, "eval_f1": 0.9386089434837862, "eval_loss": 0.27384960651397705, "eval_runtime": 29.7752, "eval_samples_per_second": 252.358, "eval_steps_per_second": 3.963, "step": 156500 }, { "epoch": 6.7, "learning_rate": 1.662781144637446e-05, "loss": 0.0649, "step": 157000 }, { "epoch": 6.7, "eval_accuracy": 0.9451690178333777, "eval_f1": 0.9454631168972781, "eval_loss": 0.24599717557430267, "eval_runtime": 29.6815, "eval_samples_per_second": 253.154, "eval_steps_per_second": 3.976, "step": 157000 }, { "epoch": 6.72, "learning_rate": 1.660113738210064e-05, "loss": 0.0666, "step": 157500 }, { "epoch": 6.72, "eval_accuracy": 0.9295980835773223, "eval_f1": 0.9306336795200063, "eval_loss": 0.3208254873752594, "eval_runtime": 29.6781, "eval_samples_per_second": 253.184, "eval_steps_per_second": 3.976, "step": 157500 }, { "epoch": 6.74, "learning_rate": 1.657451666595536e-05, "loss": 0.0665, "step": 158000 }, { "epoch": 6.74, "eval_accuracy": 0.9429065743944637, "eval_f1": 0.9432725369505285, "eval_loss": 0.2655596435070038, "eval_runtime": 29.7332, "eval_samples_per_second": 252.714, "eval_steps_per_second": 3.969, "step": 158000 }, { "epoch": 6.76, "learning_rate": 1.6547842601681535e-05, "loss": 0.0693, "step": 158500 }, { "epoch": 6.76, "eval_accuracy": 0.9365184988022358, "eval_f1": 0.9370932056261295, "eval_loss": 0.27700603008270264, "eval_runtime": 29.7205, "eval_samples_per_second": 252.822, "eval_steps_per_second": 3.97, "step": 158500 }, { "epoch": 6.79, "learning_rate": 1.6521168537407707e-05, "loss": 0.0657, "step": 159000 }, { "epoch": 6.79, "eval_accuracy": 0.9374500931594357, "eval_f1": 0.9380905652232998, "eval_loss": 0.29713597893714905, "eval_runtime": 29.7259, "eval_samples_per_second": 252.776, "eval_steps_per_second": 3.97, "step": 159000 }, { "epoch": 6.81, "learning_rate": 1.6494494473133885e-05, "loss": 0.069, "step": 159500 }, { "epoch": 6.81, "eval_accuracy": 0.9378493478839499, "eval_f1": 0.9384099076790622, "eval_loss": 0.28762125968933105, "eval_runtime": 29.817, "eval_samples_per_second": 252.004, "eval_steps_per_second": 3.957, "step": 159500 }, { "epoch": 6.83, "learning_rate": 1.646782040886006e-05, "loss": 0.0699, "step": 160000 }, { "epoch": 6.83, "eval_accuracy": 0.9393132818738356, "eval_f1": 0.9397331535036828, "eval_loss": 0.29436245560646057, "eval_runtime": 29.7168, "eval_samples_per_second": 252.853, "eval_steps_per_second": 3.971, "step": 160000 }, { "epoch": 6.85, "learning_rate": 1.6441146344586234e-05, "loss": 0.0653, "step": 160500 }, { "epoch": 6.85, "eval_accuracy": 0.9406441309555497, "eval_f1": 0.9411619893438787, "eval_loss": 0.31076741218566895, "eval_runtime": 29.8798, "eval_samples_per_second": 251.474, "eval_steps_per_second": 3.949, "step": 160500 }, { "epoch": 6.87, "learning_rate": 1.6414525628440956e-05, "loss": 0.0686, "step": 161000 }, { "epoch": 6.87, "eval_accuracy": 0.9402448762310354, "eval_f1": 0.9405518842645035, "eval_loss": 0.2986462712287903, "eval_runtime": 29.6951, "eval_samples_per_second": 253.038, "eval_steps_per_second": 3.974, "step": 161000 }, { "epoch": 6.89, "learning_rate": 1.638785156416713e-05, "loss": 0.0655, "step": 161500 }, { "epoch": 6.89, "eval_accuracy": 0.9411764705882353, "eval_f1": 0.9416928170807948, "eval_loss": 0.28847745060920715, "eval_runtime": 29.6973, "eval_samples_per_second": 253.02, "eval_steps_per_second": 3.973, "step": 161500 }, { "epoch": 6.91, "learning_rate": 1.6361177499893305e-05, "loss": 0.0678, "step": 162000 }, { "epoch": 6.91, "eval_accuracy": 0.9472983763641203, "eval_f1": 0.9475552479617785, "eval_loss": 0.2651350796222687, "eval_runtime": 29.67, "eval_samples_per_second": 253.253, "eval_steps_per_second": 3.977, "step": 162000 }, { "epoch": 6.94, "learning_rate": 1.633450343561948e-05, "loss": 0.0698, "step": 162500 }, { "epoch": 6.94, "eval_accuracy": 0.9482299707213202, "eval_f1": 0.9484219919203383, "eval_loss": 0.2195654958486557, "eval_runtime": 29.7092, "eval_samples_per_second": 252.919, "eval_steps_per_second": 3.972, "step": 162500 }, { "epoch": 6.96, "learning_rate": 1.6307829371345654e-05, "loss": 0.0687, "step": 163000 }, { "epoch": 6.96, "eval_accuracy": 0.9466329518232632, "eval_f1": 0.9466828668162871, "eval_loss": 0.2680336534976959, "eval_runtime": 29.7341, "eval_samples_per_second": 252.707, "eval_steps_per_second": 3.969, "step": 163000 }, { "epoch": 6.98, "learning_rate": 1.6281155307071828e-05, "loss": 0.0672, "step": 163500 }, { "epoch": 6.98, "eval_accuracy": 0.9438381687516636, "eval_f1": 0.9442382604190801, "eval_loss": 0.26866111159324646, "eval_runtime": 29.6987, "eval_samples_per_second": 253.007, "eval_steps_per_second": 3.973, "step": 163500 }, { "epoch": 7.0, "learning_rate": 1.6254481242798003e-05, "loss": 0.0706, "step": 164000 }, { "epoch": 7.0, "eval_accuracy": 0.9383816875166356, "eval_f1": 0.9387976146150384, "eval_loss": 0.28669700026512146, "eval_runtime": 29.6298, "eval_samples_per_second": 253.596, "eval_steps_per_second": 3.982, "step": 164000 }, { "epoch": 7.02, "learning_rate": 1.622780717852418e-05, "loss": 0.0502, "step": 164500 }, { "epoch": 7.02, "eval_accuracy": 0.9495608198030343, "eval_f1": 0.949692327575141, "eval_loss": 0.25467291474342346, "eval_runtime": 29.675, "eval_samples_per_second": 253.21, "eval_steps_per_second": 3.976, "step": 164500 }, { "epoch": 7.04, "learning_rate": 1.6201186462378903e-05, "loss": 0.0519, "step": 165000 }, { "epoch": 7.04, "eval_accuracy": 0.9389140271493213, "eval_f1": 0.9394556541905762, "eval_loss": 0.3056061565876007, "eval_runtime": 29.802, "eval_samples_per_second": 252.131, "eval_steps_per_second": 3.959, "step": 165000 }, { "epoch": 7.06, "learning_rate": 1.6174512398105077e-05, "loss": 0.0512, "step": 165500 }, { "epoch": 7.06, "eval_accuracy": 0.9387809422411498, "eval_f1": 0.9393428013619174, "eval_loss": 0.2825697362422943, "eval_runtime": 29.6865, "eval_samples_per_second": 253.112, "eval_steps_per_second": 3.975, "step": 165500 }, { "epoch": 7.08, "learning_rate": 1.614783833383125e-05, "loss": 0.0507, "step": 166000 }, { "epoch": 7.08, "eval_accuracy": 0.9435719989353207, "eval_f1": 0.9440688946956373, "eval_loss": 0.31017938256263733, "eval_runtime": 29.6679, "eval_samples_per_second": 253.271, "eval_steps_per_second": 3.977, "step": 166000 }, { "epoch": 7.11, "learning_rate": 1.6121164269557426e-05, "loss": 0.0527, "step": 166500 }, { "epoch": 7.11, "eval_accuracy": 0.9472983763641203, "eval_f1": 0.947657360420552, "eval_loss": 0.27538731694221497, "eval_runtime": 29.6711, "eval_samples_per_second": 253.243, "eval_steps_per_second": 3.977, "step": 166500 }, { "epoch": 7.13, "learning_rate": 1.60944902052836e-05, "loss": 0.0498, "step": 167000 }, { "epoch": 7.13, "eval_accuracy": 0.9375831780676072, "eval_f1": 0.9384167986894664, "eval_loss": 0.3295161724090576, "eval_runtime": 29.6457, "eval_samples_per_second": 253.46, "eval_steps_per_second": 3.98, "step": 167000 }, { "epoch": 7.15, "learning_rate": 1.6067816141009775e-05, "loss": 0.0531, "step": 167500 }, { "epoch": 7.15, "eval_accuracy": 0.9374500931594357, "eval_f1": 0.9381703971218172, "eval_loss": 0.30897414684295654, "eval_runtime": 29.6649, "eval_samples_per_second": 253.296, "eval_steps_per_second": 3.978, "step": 167500 }, { "epoch": 7.17, "learning_rate": 1.604114207673595e-05, "loss": 0.0521, "step": 168000 }, { "epoch": 7.17, "eval_accuracy": 0.9458344423742348, "eval_f1": 0.9461902179062976, "eval_loss": 0.2677062451839447, "eval_runtime": 29.7336, "eval_samples_per_second": 252.711, "eval_steps_per_second": 3.969, "step": 168000 }, { "epoch": 7.19, "learning_rate": 1.6014521360590672e-05, "loss": 0.0521, "step": 168500 }, { "epoch": 7.19, "eval_accuracy": 0.9463667820069204, "eval_f1": 0.9466521291682617, "eval_loss": 0.25630277395248413, "eval_runtime": 29.6838, "eval_samples_per_second": 253.135, "eval_steps_per_second": 3.975, "step": 168500 }, { "epoch": 7.21, "learning_rate": 1.5987847296316846e-05, "loss": 0.0538, "step": 169000 }, { "epoch": 7.21, "eval_accuracy": 0.9427734894862922, "eval_f1": 0.9431255075942407, "eval_loss": 0.297467440366745, "eval_runtime": 29.6889, "eval_samples_per_second": 253.091, "eval_steps_per_second": 3.975, "step": 169000 }, { "epoch": 7.23, "learning_rate": 1.596117323204302e-05, "loss": 0.0544, "step": 169500 }, { "epoch": 7.23, "eval_accuracy": 0.9429065743944637, "eval_f1": 0.9434603808144678, "eval_loss": 0.3097640872001648, "eval_runtime": 29.669, "eval_samples_per_second": 253.261, "eval_steps_per_second": 3.977, "step": 169500 }, { "epoch": 7.26, "learning_rate": 1.5934499167769195e-05, "loss": 0.0558, "step": 170000 }, { "epoch": 7.26, "eval_accuracy": 0.9487623103540058, "eval_f1": 0.9489934319226285, "eval_loss": 0.2626325786113739, "eval_runtime": 29.5649, "eval_samples_per_second": 254.153, "eval_steps_per_second": 3.991, "step": 170000 }, { "epoch": 7.28, "learning_rate": 1.590782510349537e-05, "loss": 0.0529, "step": 170500 }, { "epoch": 7.28, "eval_accuracy": 0.953952621772691, "eval_f1": 0.95408693304066, "eval_loss": 0.25428780913352966, "eval_runtime": 29.728, "eval_samples_per_second": 252.758, "eval_steps_per_second": 3.969, "step": 170500 }, { "epoch": 7.3, "learning_rate": 1.5881151039221544e-05, "loss": 0.0539, "step": 171000 }, { "epoch": 7.3, "eval_accuracy": 0.9322597817407506, "eval_f1": 0.9330774684249004, "eval_loss": 0.35910770297050476, "eval_runtime": 29.6509, "eval_samples_per_second": 253.415, "eval_steps_per_second": 3.98, "step": 171000 }, { "epoch": 7.32, "learning_rate": 1.5854476974947722e-05, "loss": 0.0556, "step": 171500 }, { "epoch": 7.32, "eval_accuracy": 0.9454351876497206, "eval_f1": 0.9457784140036132, "eval_loss": 0.2696589529514313, "eval_runtime": 29.7053, "eval_samples_per_second": 252.952, "eval_steps_per_second": 3.972, "step": 171500 }, { "epoch": 7.34, "learning_rate": 1.5827802910673894e-05, "loss": 0.0552, "step": 172000 }, { "epoch": 7.34, "eval_accuracy": 0.9315943571998936, "eval_f1": 0.9324976127211432, "eval_loss": 0.31420910358428955, "eval_runtime": 29.7064, "eval_samples_per_second": 252.942, "eval_steps_per_second": 3.972, "step": 172000 }, { "epoch": 7.36, "learning_rate": 1.5801182194528615e-05, "loss": 0.0564, "step": 172500 }, { "epoch": 7.36, "eval_accuracy": 0.9451690178333777, "eval_f1": 0.9455382053945202, "eval_loss": 0.2848692834377289, "eval_runtime": 28.8185, "eval_samples_per_second": 260.735, "eval_steps_per_second": 4.095, "step": 172500 }, { "epoch": 7.38, "learning_rate": 1.577450813025479e-05, "loss": 0.0578, "step": 173000 }, { "epoch": 7.38, "eval_accuracy": 0.9434389140271493, "eval_f1": 0.9436506939263858, "eval_loss": 0.3079562485218048, "eval_runtime": 29.6972, "eval_samples_per_second": 253.021, "eval_steps_per_second": 3.973, "step": 173000 }, { "epoch": 7.4, "learning_rate": 1.5747834065980968e-05, "loss": 0.056, "step": 173500 }, { "epoch": 7.4, "eval_accuracy": 0.932392866648922, "eval_f1": 0.9333872565432142, "eval_loss": 0.3507286608219147, "eval_runtime": 29.6862, "eval_samples_per_second": 253.114, "eval_steps_per_second": 3.975, "step": 173500 }, { "epoch": 7.43, "learning_rate": 1.5721160001707142e-05, "loss": 0.0556, "step": 174000 }, { "epoch": 7.43, "eval_accuracy": 0.9476976310886346, "eval_f1": 0.9479980723041073, "eval_loss": 0.2626301944255829, "eval_runtime": 29.656, "eval_samples_per_second": 253.372, "eval_steps_per_second": 3.979, "step": 174000 }, { "epoch": 7.45, "learning_rate": 1.5694539285561864e-05, "loss": 0.0559, "step": 174500 }, { "epoch": 7.45, "eval_accuracy": 0.9421080649454352, "eval_f1": 0.942717103046345, "eval_loss": 0.30394506454467773, "eval_runtime": 29.7575, "eval_samples_per_second": 252.508, "eval_steps_per_second": 3.965, "step": 174500 }, { "epoch": 7.47, "learning_rate": 1.566786522128804e-05, "loss": 0.0566, "step": 175000 }, { "epoch": 7.47, "eval_accuracy": 0.9411764705882353, "eval_f1": 0.9416901978862157, "eval_loss": 0.2973109185695648, "eval_runtime": 28.9023, "eval_samples_per_second": 259.979, "eval_steps_per_second": 4.083, "step": 175000 }, { "epoch": 7.49, "learning_rate": 1.5641191157014213e-05, "loss": 0.0547, "step": 175500 }, { "epoch": 7.49, "eval_accuracy": 0.9427734894862922, "eval_f1": 0.9431437005424792, "eval_loss": 0.3120613098144531, "eval_runtime": 29.7436, "eval_samples_per_second": 252.626, "eval_steps_per_second": 3.967, "step": 175500 }, { "epoch": 7.51, "learning_rate": 1.5614517092740388e-05, "loss": 0.0603, "step": 176000 }, { "epoch": 7.51, "eval_accuracy": 0.9370508384349214, "eval_f1": 0.9378835010471506, "eval_loss": 0.33420896530151367, "eval_runtime": 29.6953, "eval_samples_per_second": 253.037, "eval_steps_per_second": 3.974, "step": 176000 }, { "epoch": 7.53, "learning_rate": 1.5587843028466563e-05, "loss": 0.0587, "step": 176500 }, { "epoch": 7.53, "eval_accuracy": 0.9442374234761778, "eval_f1": 0.944739177561909, "eval_loss": 0.27532902359962463, "eval_runtime": 29.7222, "eval_samples_per_second": 252.808, "eval_steps_per_second": 3.97, "step": 176500 }, { "epoch": 7.55, "learning_rate": 1.5561222312321284e-05, "loss": 0.0551, "step": 177000 }, { "epoch": 7.55, "eval_accuracy": 0.9429065743944637, "eval_f1": 0.9434823852597485, "eval_loss": 0.2956356704235077, "eval_runtime": 29.6918, "eval_samples_per_second": 253.067, "eval_steps_per_second": 3.974, "step": 177000 }, { "epoch": 7.58, "learning_rate": 1.5534548248047462e-05, "loss": 0.0562, "step": 177500 }, { "epoch": 7.58, "eval_accuracy": 0.9522225179664626, "eval_f1": 0.9524300805690111, "eval_loss": 0.24791304767131805, "eval_runtime": 27.4327, "eval_samples_per_second": 273.907, "eval_steps_per_second": 4.301, "step": 177500 }, { "epoch": 7.6, "learning_rate": 1.5507874183773634e-05, "loss": 0.0585, "step": 178000 }, { "epoch": 7.6, "eval_accuracy": 0.9458344423742348, "eval_f1": 0.9460760121457206, "eval_loss": 0.2704208791255951, "eval_runtime": 29.7221, "eval_samples_per_second": 252.808, "eval_steps_per_second": 3.97, "step": 178000 }, { "epoch": 7.62, "learning_rate": 1.5481253467628355e-05, "loss": 0.0585, "step": 178500 }, { "epoch": 7.62, "eval_accuracy": 0.9438381687516636, "eval_f1": 0.9442962119660283, "eval_loss": 0.27086368203163147, "eval_runtime": 29.6917, "eval_samples_per_second": 253.067, "eval_steps_per_second": 3.974, "step": 178500 }, { "epoch": 7.64, "learning_rate": 1.5454579403354533e-05, "loss": 0.0557, "step": 179000 }, { "epoch": 7.64, "eval_accuracy": 0.9430396593026351, "eval_f1": 0.9433439009698508, "eval_loss": 0.27691367268562317, "eval_runtime": 29.7194, "eval_samples_per_second": 252.832, "eval_steps_per_second": 3.97, "step": 179000 }, { "epoch": 7.66, "learning_rate": 1.5427905339080708e-05, "loss": 0.0567, "step": 179500 }, { "epoch": 7.66, "eval_accuracy": 0.9453021027415491, "eval_f1": 0.9455333997360763, "eval_loss": 0.2835189998149872, "eval_runtime": 29.7023, "eval_samples_per_second": 252.977, "eval_steps_per_second": 3.973, "step": 179500 }, { "epoch": 7.68, "learning_rate": 1.540123127480688e-05, "loss": 0.0582, "step": 180000 }, { "epoch": 7.68, "eval_accuracy": 0.9437050838434922, "eval_f1": 0.9440707056312977, "eval_loss": 0.2772218883037567, "eval_runtime": 29.729, "eval_samples_per_second": 252.75, "eval_steps_per_second": 3.969, "step": 180000 }, { "epoch": 7.7, "learning_rate": 1.5374557210533057e-05, "loss": 0.057, "step": 180500 }, { "epoch": 7.7, "eval_accuracy": 0.9423742347617781, "eval_f1": 0.9429248070232732, "eval_loss": 0.28114861249923706, "eval_runtime": 29.6798, "eval_samples_per_second": 253.168, "eval_steps_per_second": 3.976, "step": 180500 }, { "epoch": 7.72, "learning_rate": 1.534788314625923e-05, "loss": 0.0583, "step": 181000 }, { "epoch": 7.72, "eval_accuracy": 0.9463667820069204, "eval_f1": 0.9466935511032313, "eval_loss": 0.25516778230667114, "eval_runtime": 29.6849, "eval_samples_per_second": 253.126, "eval_steps_per_second": 3.975, "step": 181000 }, { "epoch": 7.75, "learning_rate": 1.5321209081985403e-05, "loss": 0.056, "step": 181500 }, { "epoch": 7.75, "eval_accuracy": 0.9494277348948629, "eval_f1": 0.9496087114889121, "eval_loss": 0.26424041390419006, "eval_runtime": 29.7218, "eval_samples_per_second": 252.811, "eval_steps_per_second": 3.97, "step": 181500 }, { "epoch": 7.77, "learning_rate": 1.529453501771158e-05, "loss": 0.055, "step": 182000 }, { "epoch": 7.77, "eval_accuracy": 0.9466329518232632, "eval_f1": 0.94691961434201, "eval_loss": 0.26326122879981995, "eval_runtime": 29.7953, "eval_samples_per_second": 252.187, "eval_steps_per_second": 3.96, "step": 182000 }, { "epoch": 7.79, "learning_rate": 1.5267914301566303e-05, "loss": 0.0561, "step": 182500 }, { "epoch": 7.79, "eval_accuracy": 0.9470322065477775, "eval_f1": 0.9472489943845809, "eval_loss": 0.25921469926834106, "eval_runtime": 29.7325, "eval_samples_per_second": 252.72, "eval_steps_per_second": 3.969, "step": 182500 }, { "epoch": 7.81, "learning_rate": 1.5241240237292475e-05, "loss": 0.0619, "step": 183000 }, { "epoch": 7.81, "eval_accuracy": 0.9249401117913228, "eval_f1": 0.9262515351007677, "eval_loss": 0.366608202457428, "eval_runtime": 29.6783, "eval_samples_per_second": 253.181, "eval_steps_per_second": 3.976, "step": 183000 }, { "epoch": 7.83, "learning_rate": 1.5214566173018652e-05, "loss": 0.0581, "step": 183500 }, { "epoch": 7.83, "eval_accuracy": 0.9402448762310354, "eval_f1": 0.9409055984690178, "eval_loss": 0.3166221082210541, "eval_runtime": 29.6828, "eval_samples_per_second": 253.143, "eval_steps_per_second": 3.975, "step": 183500 }, { "epoch": 7.85, "learning_rate": 1.5187892108744826e-05, "loss": 0.0608, "step": 184000 }, { "epoch": 7.85, "eval_accuracy": 0.9338568006388076, "eval_f1": 0.9346581461981202, "eval_loss": 0.3185623288154602, "eval_runtime": 29.6715, "eval_samples_per_second": 253.239, "eval_steps_per_second": 3.977, "step": 184000 }, { "epoch": 7.87, "learning_rate": 1.5161271392599548e-05, "loss": 0.0575, "step": 184500 }, { "epoch": 7.87, "eval_accuracy": 0.9431727442108065, "eval_f1": 0.9435642942522529, "eval_loss": 0.28907355666160583, "eval_runtime": 29.6659, "eval_samples_per_second": 253.288, "eval_steps_per_second": 3.978, "step": 184500 }, { "epoch": 7.9, "learning_rate": 1.5134597328325724e-05, "loss": 0.0576, "step": 185000 }, { "epoch": 7.9, "eval_accuracy": 0.9494277348948629, "eval_f1": 0.949495115749563, "eval_loss": 0.24902021884918213, "eval_runtime": 29.1132, "eval_samples_per_second": 258.096, "eval_steps_per_second": 4.053, "step": 185000 }, { "epoch": 7.92, "learning_rate": 1.5107923264051897e-05, "loss": 0.0589, "step": 185500 }, { "epoch": 7.92, "eval_accuracy": 0.9401117913228639, "eval_f1": 0.940751907329565, "eval_loss": 0.29299965500831604, "eval_runtime": 13.6025, "eval_samples_per_second": 552.4, "eval_steps_per_second": 8.675, "step": 185500 }, { "epoch": 7.94, "learning_rate": 1.5081249199778073e-05, "loss": 0.0585, "step": 186000 }, { "epoch": 7.94, "eval_accuracy": 0.9237423476177802, "eval_f1": 0.9250711808500939, "eval_loss": 0.35780373215675354, "eval_runtime": 13.5373, "eval_samples_per_second": 555.059, "eval_steps_per_second": 8.717, "step": 186000 }, { "epoch": 7.96, "learning_rate": 1.5054575135504248e-05, "loss": 0.06, "step": 186500 }, { "epoch": 7.96, "eval_accuracy": 0.9383816875166356, "eval_f1": 0.9388439209070308, "eval_loss": 0.28486374020576477, "eval_runtime": 13.5297, "eval_samples_per_second": 555.37, "eval_steps_per_second": 8.722, "step": 186500 }, { "epoch": 7.98, "learning_rate": 1.502795441935897e-05, "loss": 0.0591, "step": 187000 }, { "epoch": 7.98, "eval_accuracy": 0.9446366782006921, "eval_f1": 0.9448190570182394, "eval_loss": 0.25872182846069336, "eval_runtime": 13.5273, "eval_samples_per_second": 555.47, "eval_steps_per_second": 8.723, "step": 187000 }, { "epoch": 8.0, "learning_rate": 1.5001333703213692e-05, "loss": 0.0549, "step": 187500 }, { "epoch": 8.0, "eval_accuracy": 0.9486292254458344, "eval_f1": 0.9489107961009511, "eval_loss": 0.26960158348083496, "eval_runtime": 13.5289, "eval_samples_per_second": 555.406, "eval_steps_per_second": 8.722, "step": 187500 }, { "epoch": 8.02, "learning_rate": 1.4974659638939868e-05, "loss": 0.0426, "step": 188000 }, { "epoch": 8.02, "eval_accuracy": 0.9391801969656641, "eval_f1": 0.9398611153432043, "eval_loss": 0.32398930191993713, "eval_runtime": 13.5276, "eval_samples_per_second": 555.456, "eval_steps_per_second": 8.723, "step": 188000 }, { "epoch": 8.04, "learning_rate": 1.4947985574666043e-05, "loss": 0.04, "step": 188500 }, { "epoch": 8.04, "eval_accuracy": 0.9405110460473782, "eval_f1": 0.9408104589990299, "eval_loss": 0.3479633629322052, "eval_runtime": 13.5408, "eval_samples_per_second": 554.917, "eval_steps_per_second": 8.714, "step": 188500 }, { "epoch": 8.07, "learning_rate": 1.4921311510392215e-05, "loss": 0.0452, "step": 189000 }, { "epoch": 8.07, "eval_accuracy": 0.9457013574660633, "eval_f1": 0.9459910257850512, "eval_loss": 0.3050368130207062, "eval_runtime": 13.5374, "eval_samples_per_second": 555.056, "eval_steps_per_second": 8.717, "step": 189000 }, { "epoch": 8.09, "learning_rate": 1.4894637446118392e-05, "loss": 0.0459, "step": 189500 }, { "epoch": 8.09, "eval_accuracy": 0.9427734894862922, "eval_f1": 0.9431772082967884, "eval_loss": 0.30137473344802856, "eval_runtime": 13.5381, "eval_samples_per_second": 555.027, "eval_steps_per_second": 8.716, "step": 189500 }, { "epoch": 8.11, "learning_rate": 1.4868016729973114e-05, "loss": 0.0459, "step": 190000 }, { "epoch": 8.11, "eval_accuracy": 0.943971253659835, "eval_f1": 0.9444235461365681, "eval_loss": 0.30643758177757263, "eval_runtime": 13.5406, "eval_samples_per_second": 554.926, "eval_steps_per_second": 8.715, "step": 190000 }, { "epoch": 8.13, "learning_rate": 1.4841342665699288e-05, "loss": 0.0483, "step": 190500 }, { "epoch": 8.13, "eval_accuracy": 0.9381155177002928, "eval_f1": 0.9387478969810729, "eval_loss": 0.32927992939949036, "eval_runtime": 13.5327, "eval_samples_per_second": 555.246, "eval_steps_per_second": 8.72, "step": 190500 }, { "epoch": 8.15, "learning_rate": 1.4814668601425464e-05, "loss": 0.0489, "step": 191000 }, { "epoch": 8.15, "eval_accuracy": 0.9410433856800638, "eval_f1": 0.9413968738750169, "eval_loss": 0.33153387904167175, "eval_runtime": 13.5315, "eval_samples_per_second": 555.295, "eval_steps_per_second": 8.72, "step": 191000 }, { "epoch": 8.17, "learning_rate": 1.4787994537151637e-05, "loss": 0.0473, "step": 191500 }, { "epoch": 8.17, "eval_accuracy": 0.9486292254458344, "eval_f1": 0.9489248949784995, "eval_loss": 0.27476081252098083, "eval_runtime": 13.5362, "eval_samples_per_second": 555.103, "eval_steps_per_second": 8.717, "step": 191500 }, { "epoch": 8.19, "learning_rate": 1.4761320472877813e-05, "loss": 0.045, "step": 192000 }, { "epoch": 8.19, "eval_accuracy": 0.9442374234761778, "eval_f1": 0.9447492534031262, "eval_loss": 0.3161003291606903, "eval_runtime": 13.536, "eval_samples_per_second": 555.114, "eval_steps_per_second": 8.718, "step": 192000 }, { "epoch": 8.22, "learning_rate": 1.4734646408603988e-05, "loss": 0.0467, "step": 192500 }, { "epoch": 8.22, "eval_accuracy": 0.9468991216396061, "eval_f1": 0.9471827896586111, "eval_loss": 0.2846441864967346, "eval_runtime": 13.5383, "eval_samples_per_second": 555.016, "eval_steps_per_second": 8.716, "step": 192500 }, { "epoch": 8.24, "learning_rate": 1.470797234433016e-05, "loss": 0.046, "step": 193000 }, { "epoch": 8.24, "eval_accuracy": 0.9423742347617781, "eval_f1": 0.9428938683607367, "eval_loss": 0.3229263722896576, "eval_runtime": 13.5303, "eval_samples_per_second": 555.347, "eval_steps_per_second": 8.721, "step": 193000 }, { "epoch": 8.26, "learning_rate": 1.4681298280056337e-05, "loss": 0.0457, "step": 193500 }, { "epoch": 8.26, "eval_accuracy": 0.946233697098749, "eval_f1": 0.946569454970224, "eval_loss": 0.30373692512512207, "eval_runtime": 13.5229, "eval_samples_per_second": 555.649, "eval_steps_per_second": 8.726, "step": 193500 }, { "epoch": 8.28, "learning_rate": 1.4654677563911059e-05, "loss": 0.0455, "step": 194000 }, { "epoch": 8.28, "eval_accuracy": 0.9437050838434922, "eval_f1": 0.9440899632750663, "eval_loss": 0.31653735041618347, "eval_runtime": 13.5251, "eval_samples_per_second": 555.559, "eval_steps_per_second": 8.725, "step": 194000 }, { "epoch": 8.3, "learning_rate": 1.4628003499637234e-05, "loss": 0.047, "step": 194500 }, { "epoch": 8.3, "eval_accuracy": 0.9466329518232632, "eval_f1": 0.946954671999758, "eval_loss": 0.29640811681747437, "eval_runtime": 13.5268, "eval_samples_per_second": 555.49, "eval_steps_per_second": 8.723, "step": 194500 }, { "epoch": 8.32, "learning_rate": 1.460132943536341e-05, "loss": 0.0468, "step": 195000 }, { "epoch": 8.32, "eval_accuracy": 0.946233697098749, "eval_f1": 0.9465351520904887, "eval_loss": 0.2944372594356537, "eval_runtime": 13.5261, "eval_samples_per_second": 555.519, "eval_steps_per_second": 8.724, "step": 195000 }, { "epoch": 8.34, "learning_rate": 1.4574708719218132e-05, "loss": 0.0459, "step": 195500 }, { "epoch": 8.34, "eval_accuracy": 0.9389140271493213, "eval_f1": 0.9394949185938651, "eval_loss": 0.36057648062705994, "eval_runtime": 13.5297, "eval_samples_per_second": 555.371, "eval_steps_per_second": 8.722, "step": 195500 }, { "epoch": 8.36, "learning_rate": 1.4548034654944305e-05, "loss": 0.0493, "step": 196000 }, { "epoch": 8.36, "eval_accuracy": 0.9417088102209209, "eval_f1": 0.9422040130273488, "eval_loss": 0.3340831398963928, "eval_runtime": 13.5271, "eval_samples_per_second": 555.478, "eval_steps_per_second": 8.723, "step": 196000 }, { "epoch": 8.39, "learning_rate": 1.452136059067048e-05, "loss": 0.0492, "step": 196500 }, { "epoch": 8.39, "eval_accuracy": 0.9423742347617781, "eval_f1": 0.9428594973324005, "eval_loss": 0.2942802608013153, "eval_runtime": 13.5234, "eval_samples_per_second": 555.631, "eval_steps_per_second": 8.726, "step": 196500 }, { "epoch": 8.41, "learning_rate": 1.4494686526396655e-05, "loss": 0.0458, "step": 197000 }, { "epoch": 8.41, "eval_accuracy": 0.9515570934256056, "eval_f1": 0.9516996748274084, "eval_loss": 0.2723489999771118, "eval_runtime": 13.523, "eval_samples_per_second": 555.644, "eval_steps_per_second": 8.726, "step": 197000 }, { "epoch": 8.43, "learning_rate": 1.4468065810251377e-05, "loss": 0.0494, "step": 197500 }, { "epoch": 8.43, "eval_accuracy": 0.9510247537929198, "eval_f1": 0.9511898273887466, "eval_loss": 0.24962320923805237, "eval_runtime": 13.5287, "eval_samples_per_second": 555.411, "eval_steps_per_second": 8.722, "step": 197500 }, { "epoch": 8.45, "learning_rate": 1.4441391745977553e-05, "loss": 0.0472, "step": 198000 }, { "epoch": 8.45, "eval_accuracy": 0.9366515837104072, "eval_f1": 0.9374091182440442, "eval_loss": 0.33135172724723816, "eval_runtime": 13.5362, "eval_samples_per_second": 555.105, "eval_steps_per_second": 8.717, "step": 198000 }, { "epoch": 8.47, "learning_rate": 1.4414717681703726e-05, "loss": 0.0501, "step": 198500 }, { "epoch": 8.47, "eval_accuracy": 0.9461006121905776, "eval_f1": 0.946517345274512, "eval_loss": 0.2846202850341797, "eval_runtime": 13.527, "eval_samples_per_second": 555.48, "eval_steps_per_second": 8.723, "step": 198500 }, { "epoch": 8.49, "learning_rate": 1.43880436174299e-05, "loss": 0.0467, "step": 199000 }, { "epoch": 8.49, "eval_accuracy": 0.9451690178333777, "eval_f1": 0.9454834491860682, "eval_loss": 0.29985642433166504, "eval_runtime": 13.5281, "eval_samples_per_second": 555.437, "eval_steps_per_second": 8.723, "step": 199000 }, { "epoch": 8.51, "learning_rate": 1.4361369553156077e-05, "loss": 0.0467, "step": 199500 }, { "epoch": 8.51, "eval_accuracy": 0.9393132818738356, "eval_f1": 0.939983462083631, "eval_loss": 0.3112838566303253, "eval_runtime": 13.5306, "eval_samples_per_second": 555.334, "eval_steps_per_second": 8.721, "step": 199500 }, { "epoch": 8.54, "learning_rate": 1.4334695488882252e-05, "loss": 0.0507, "step": 200000 }, { "epoch": 8.54, "eval_accuracy": 0.9425073196699494, "eval_f1": 0.9429940607490731, "eval_loss": 0.31961777806282043, "eval_runtime": 13.5305, "eval_samples_per_second": 555.337, "eval_steps_per_second": 8.721, "step": 200000 }, { "epoch": 8.56, "learning_rate": 1.4308021424608424e-05, "loss": 0.0489, "step": 200500 }, { "epoch": 8.56, "eval_accuracy": 0.9280010646792654, "eval_f1": 0.9293048945182593, "eval_loss": 0.4206550121307373, "eval_runtime": 13.5338, "eval_samples_per_second": 555.202, "eval_steps_per_second": 8.719, "step": 200500 }, { "epoch": 8.58, "learning_rate": 1.42813473603346e-05, "loss": 0.048, "step": 201000 }, { "epoch": 8.58, "eval_accuracy": 0.9427734894862922, "eval_f1": 0.9432724652442028, "eval_loss": 0.3334466516971588, "eval_runtime": 13.5332, "eval_samples_per_second": 555.225, "eval_steps_per_second": 8.719, "step": 201000 }, { "epoch": 8.6, "learning_rate": 1.4254673296060775e-05, "loss": 0.0464, "step": 201500 }, { "epoch": 8.6, "eval_accuracy": 0.9451690178333777, "eval_f1": 0.9455351354611425, "eval_loss": 0.3089136481285095, "eval_runtime": 13.5321, "eval_samples_per_second": 555.272, "eval_steps_per_second": 8.72, "step": 201500 }, { "epoch": 8.62, "learning_rate": 1.4228052579915497e-05, "loss": 0.0511, "step": 202000 }, { "epoch": 8.62, "eval_accuracy": 0.9476976310886346, "eval_f1": 0.9480208872624477, "eval_loss": 0.2791631817817688, "eval_runtime": 13.5329, "eval_samples_per_second": 555.241, "eval_steps_per_second": 8.72, "step": 202000 }, { "epoch": 8.64, "learning_rate": 1.4201378515641673e-05, "loss": 0.0475, "step": 202500 }, { "epoch": 8.64, "eval_accuracy": 0.9461006121905776, "eval_f1": 0.9465029093795351, "eval_loss": 0.2907649874687195, "eval_runtime": 13.5424, "eval_samples_per_second": 554.849, "eval_steps_per_second": 8.713, "step": 202500 }, { "epoch": 8.66, "learning_rate": 1.4174704451367846e-05, "loss": 0.0456, "step": 203000 }, { "epoch": 8.66, "eval_accuracy": 0.9445035932925206, "eval_f1": 0.9449522232609514, "eval_loss": 0.3088465929031372, "eval_runtime": 13.542, "eval_samples_per_second": 554.868, "eval_steps_per_second": 8.714, "step": 203000 }, { "epoch": 8.69, "learning_rate": 1.4148030387094022e-05, "loss": 0.0477, "step": 203500 }, { "epoch": 8.69, "eval_accuracy": 0.9442374234761778, "eval_f1": 0.9446454812862733, "eval_loss": 0.29683443903923035, "eval_runtime": 13.5391, "eval_samples_per_second": 554.985, "eval_steps_per_second": 8.716, "step": 203500 }, { "epoch": 8.71, "learning_rate": 1.4121356322820197e-05, "loss": 0.0489, "step": 204000 }, { "epoch": 8.71, "eval_accuracy": 0.9461006121905776, "eval_f1": 0.9463714591570415, "eval_loss": 0.2853368818759918, "eval_runtime": 13.5424, "eval_samples_per_second": 554.85, "eval_steps_per_second": 8.713, "step": 204000 }, { "epoch": 8.73, "learning_rate": 1.409468225854637e-05, "loss": 0.0505, "step": 204500 }, { "epoch": 8.73, "eval_accuracy": 0.9417088102209209, "eval_f1": 0.9422965965625818, "eval_loss": 0.31411096453666687, "eval_runtime": 13.5401, "eval_samples_per_second": 554.944, "eval_steps_per_second": 8.715, "step": 204500 }, { "epoch": 8.75, "learning_rate": 1.4068008194272546e-05, "loss": 0.0511, "step": 205000 }, { "epoch": 8.75, "eval_accuracy": 0.9488953952621773, "eval_f1": 0.9491206021399284, "eval_loss": 0.25752925872802734, "eval_runtime": 13.5421, "eval_samples_per_second": 554.863, "eval_steps_per_second": 8.714, "step": 205000 }, { "epoch": 8.77, "learning_rate": 1.404133412999872e-05, "loss": 0.0498, "step": 205500 }, { "epoch": 8.77, "eval_accuracy": 0.9472983763641203, "eval_f1": 0.9476323959588058, "eval_loss": 0.2809707820415497, "eval_runtime": 13.5386, "eval_samples_per_second": 555.006, "eval_steps_per_second": 8.716, "step": 205500 }, { "epoch": 8.79, "learning_rate": 1.4014713413853443e-05, "loss": 0.0514, "step": 206000 }, { "epoch": 8.79, "eval_accuracy": 0.9471652914559489, "eval_f1": 0.9474699446592635, "eval_loss": 0.29401782155036926, "eval_runtime": 13.5388, "eval_samples_per_second": 554.998, "eval_steps_per_second": 8.716, "step": 206000 }, { "epoch": 8.81, "learning_rate": 1.3988039349579619e-05, "loss": 0.0488, "step": 206500 }, { "epoch": 8.81, "eval_accuracy": 0.9490284801703487, "eval_f1": 0.9493332336992797, "eval_loss": 0.28929102420806885, "eval_runtime": 13.5372, "eval_samples_per_second": 555.061, "eval_steps_per_second": 8.717, "step": 206500 }, { "epoch": 8.83, "learning_rate": 1.3961365285305792e-05, "loss": 0.0492, "step": 207000 }, { "epoch": 8.83, "eval_accuracy": 0.9438381687516636, "eval_f1": 0.9441721189079093, "eval_loss": 0.27806001901626587, "eval_runtime": 13.5306, "eval_samples_per_second": 555.336, "eval_steps_per_second": 8.721, "step": 207000 }, { "epoch": 8.86, "learning_rate": 1.3934691221031966e-05, "loss": 0.0506, "step": 207500 }, { "epoch": 8.86, "eval_accuracy": 0.9347883949960074, "eval_f1": 0.9354718798641715, "eval_loss": 0.36486494541168213, "eval_runtime": 13.5304, "eval_samples_per_second": 555.343, "eval_steps_per_second": 8.721, "step": 207500 }, { "epoch": 8.88, "learning_rate": 1.3908017156758142e-05, "loss": 0.0474, "step": 208000 }, { "epoch": 8.88, "eval_accuracy": 0.9495608198030343, "eval_f1": 0.9498771170325628, "eval_loss": 0.2834174633026123, "eval_runtime": 13.5288, "eval_samples_per_second": 555.408, "eval_steps_per_second": 8.722, "step": 208000 }, { "epoch": 8.9, "learning_rate": 1.3881343092484317e-05, "loss": 0.0513, "step": 208500 }, { "epoch": 8.9, "eval_accuracy": 0.9455682725578919, "eval_f1": 0.9459814913067134, "eval_loss": 0.2689332365989685, "eval_runtime": 13.5299, "eval_samples_per_second": 555.364, "eval_steps_per_second": 8.721, "step": 208500 }, { "epoch": 8.92, "learning_rate": 1.3854669028210491e-05, "loss": 0.0489, "step": 209000 }, { "epoch": 8.92, "eval_accuracy": 0.9426404045781208, "eval_f1": 0.9431762099216892, "eval_loss": 0.31225860118865967, "eval_runtime": 13.5161, "eval_samples_per_second": 555.929, "eval_steps_per_second": 8.73, "step": 209000 }, { "epoch": 8.94, "learning_rate": 1.3827994963936666e-05, "loss": 0.0499, "step": 209500 }, { "epoch": 8.94, "eval_accuracy": 0.9389140271493213, "eval_f1": 0.9396302937762872, "eval_loss": 0.32025986909866333, "eval_runtime": 13.5316, "eval_samples_per_second": 555.292, "eval_steps_per_second": 8.72, "step": 209500 }, { "epoch": 8.96, "learning_rate": 1.3801374247791388e-05, "loss": 0.0508, "step": 210000 }, { "epoch": 8.96, "eval_accuracy": 0.9463667820069204, "eval_f1": 0.946708426144522, "eval_loss": 0.2876236140727997, "eval_runtime": 13.53, "eval_samples_per_second": 555.36, "eval_steps_per_second": 8.721, "step": 210000 }, { "epoch": 8.98, "learning_rate": 1.3774700183517564e-05, "loss": 0.0513, "step": 210500 }, { "epoch": 8.98, "eval_accuracy": 0.9446366782006921, "eval_f1": 0.9450770442358372, "eval_loss": 0.31053757667541504, "eval_runtime": 13.5348, "eval_samples_per_second": 555.16, "eval_steps_per_second": 8.718, "step": 210500 }, { "epoch": 9.01, "learning_rate": 1.3748026119243739e-05, "loss": 0.045, "step": 211000 }, { "epoch": 9.01, "eval_accuracy": 0.9483630556294916, "eval_f1": 0.9486779121833928, "eval_loss": 0.31297969818115234, "eval_runtime": 13.5337, "eval_samples_per_second": 555.209, "eval_steps_per_second": 8.719, "step": 211000 }, { "epoch": 9.03, "learning_rate": 1.3721352054969912e-05, "loss": 0.0386, "step": 211500 }, { "epoch": 9.03, "eval_accuracy": 0.9401117913228639, "eval_f1": 0.9406984058449347, "eval_loss": 0.35765621066093445, "eval_runtime": 13.5363, "eval_samples_per_second": 555.101, "eval_steps_per_second": 8.717, "step": 211500 }, { "epoch": 9.05, "learning_rate": 1.3694731338824637e-05, "loss": 0.0373, "step": 212000 }, { "epoch": 9.05, "eval_accuracy": 0.9461006121905776, "eval_f1": 0.9464518539873553, "eval_loss": 0.3229225277900696, "eval_runtime": 13.5205, "eval_samples_per_second": 555.749, "eval_steps_per_second": 8.728, "step": 212000 }, { "epoch": 9.07, "learning_rate": 1.366805727455081e-05, "loss": 0.0367, "step": 212500 }, { "epoch": 9.07, "eval_accuracy": 0.9458344423742348, "eval_f1": 0.9461143122974073, "eval_loss": 0.33056244254112244, "eval_runtime": 13.5161, "eval_samples_per_second": 555.93, "eval_steps_per_second": 8.73, "step": 212500 }, { "epoch": 9.09, "learning_rate": 1.3641383210276984e-05, "loss": 0.0363, "step": 213000 }, { "epoch": 9.09, "eval_accuracy": 0.9455682725578919, "eval_f1": 0.9459606462006873, "eval_loss": 0.3319728374481201, "eval_runtime": 13.5062, "eval_samples_per_second": 556.336, "eval_steps_per_second": 8.737, "step": 213000 }, { "epoch": 9.11, "learning_rate": 1.3614762494131708e-05, "loss": 0.039, "step": 213500 }, { "epoch": 9.11, "eval_accuracy": 0.9510247537929198, "eval_f1": 0.951212137839212, "eval_loss": 0.2826208770275116, "eval_runtime": 13.5238, "eval_samples_per_second": 555.613, "eval_steps_per_second": 8.725, "step": 213500 }, { "epoch": 9.13, "learning_rate": 1.3588088429857882e-05, "loss": 0.0392, "step": 214000 }, { "epoch": 9.13, "eval_accuracy": 0.9423742347617781, "eval_f1": 0.9428820676687188, "eval_loss": 0.33029282093048096, "eval_runtime": 13.5236, "eval_samples_per_second": 555.62, "eval_steps_per_second": 8.725, "step": 214000 }, { "epoch": 9.15, "learning_rate": 1.3561414365584055e-05, "loss": 0.0387, "step": 214500 }, { "epoch": 9.15, "eval_accuracy": 0.946233697098749, "eval_f1": 0.9464641559514156, "eval_loss": 0.3023781180381775, "eval_runtime": 13.5258, "eval_samples_per_second": 555.531, "eval_steps_per_second": 8.724, "step": 214500 }, { "epoch": 9.18, "learning_rate": 1.3534740301310231e-05, "loss": 0.0385, "step": 215000 }, { "epoch": 9.18, "eval_accuracy": 0.936252328985893, "eval_f1": 0.9368822232154065, "eval_loss": 0.3795050084590912, "eval_runtime": 13.5245, "eval_samples_per_second": 555.584, "eval_steps_per_second": 8.725, "step": 215000 }, { "epoch": 9.2, "learning_rate": 1.3508066237036406e-05, "loss": 0.0388, "step": 215500 }, { "epoch": 9.2, "eval_accuracy": 0.9435719989353207, "eval_f1": 0.9439760866552441, "eval_loss": 0.3283912241458893, "eval_runtime": 13.5235, "eval_samples_per_second": 555.623, "eval_steps_per_second": 8.726, "step": 215500 }, { "epoch": 9.22, "learning_rate": 1.3481392172762579e-05, "loss": 0.0409, "step": 216000 }, { "epoch": 9.22, "eval_accuracy": 0.9413095554964067, "eval_f1": 0.9418583676160031, "eval_loss": 0.3618067800998688, "eval_runtime": 13.5245, "eval_samples_per_second": 555.584, "eval_steps_per_second": 8.725, "step": 216000 }, { "epoch": 9.24, "learning_rate": 1.3454718108488755e-05, "loss": 0.0431, "step": 216500 }, { "epoch": 9.24, "eval_accuracy": 0.9361192440777216, "eval_f1": 0.9369543755725985, "eval_loss": 0.3455849289894104, "eval_runtime": 13.5333, "eval_samples_per_second": 555.222, "eval_steps_per_second": 8.719, "step": 216500 }, { "epoch": 9.26, "learning_rate": 1.342804404421493e-05, "loss": 0.0383, "step": 217000 }, { "epoch": 9.26, "eval_accuracy": 0.9297311684854938, "eval_f1": 0.9305976202705633, "eval_loss": 0.41196706891059875, "eval_runtime": 13.534, "eval_samples_per_second": 555.196, "eval_steps_per_second": 8.719, "step": 217000 }, { "epoch": 9.28, "learning_rate": 1.3401369979941106e-05, "loss": 0.0396, "step": 217500 }, { "epoch": 9.28, "eval_accuracy": 0.9508916688847485, "eval_f1": 0.9510984681604356, "eval_loss": 0.29180270433425903, "eval_runtime": 13.5363, "eval_samples_per_second": 555.098, "eval_steps_per_second": 8.717, "step": 217500 }, { "epoch": 9.3, "learning_rate": 1.3374749263795828e-05, "loss": 0.0399, "step": 218000 }, { "epoch": 9.3, "eval_accuracy": 0.9512909236092627, "eval_f1": 0.9515091138685884, "eval_loss": 0.29510194063186646, "eval_runtime": 13.536, "eval_samples_per_second": 555.111, "eval_steps_per_second": 8.717, "step": 218000 }, { "epoch": 9.33, "learning_rate": 1.3348075199522e-05, "loss": 0.0386, "step": 218500 }, { "epoch": 9.33, "eval_accuracy": 0.9434389140271493, "eval_f1": 0.9439485797046033, "eval_loss": 0.33171477913856506, "eval_runtime": 13.535, "eval_samples_per_second": 555.153, "eval_steps_per_second": 8.718, "step": 218500 }, { "epoch": 9.35, "learning_rate": 1.3321401135248177e-05, "loss": 0.0408, "step": 219000 }, { "epoch": 9.35, "eval_accuracy": 0.9419749800372638, "eval_f1": 0.9424593134472877, "eval_loss": 0.33151671290397644, "eval_runtime": 13.5389, "eval_samples_per_second": 554.995, "eval_steps_per_second": 8.716, "step": 219000 }, { "epoch": 9.37, "learning_rate": 1.3294727070974351e-05, "loss": 0.0424, "step": 219500 }, { "epoch": 9.37, "eval_accuracy": 0.9466329518232632, "eval_f1": 0.9469195787369524, "eval_loss": 0.3001687526702881, "eval_runtime": 13.5369, "eval_samples_per_second": 555.073, "eval_steps_per_second": 8.717, "step": 219500 }, { "epoch": 9.39, "learning_rate": 1.3268053006700526e-05, "loss": 0.0409, "step": 220000 }, { "epoch": 9.39, "eval_accuracy": 0.9455682725578919, "eval_f1": 0.9459156136981188, "eval_loss": 0.3342529237270355, "eval_runtime": 13.5382, "eval_samples_per_second": 555.023, "eval_steps_per_second": 8.716, "step": 220000 }, { "epoch": 9.41, "learning_rate": 1.3241378942426702e-05, "loss": 0.0408, "step": 220500 }, { "epoch": 9.41, "eval_accuracy": 0.9367846686185787, "eval_f1": 0.9375661881227262, "eval_loss": 0.3659190237522125, "eval_runtime": 13.534, "eval_samples_per_second": 555.196, "eval_steps_per_second": 8.719, "step": 220500 }, { "epoch": 9.43, "learning_rate": 1.3214704878152875e-05, "loss": 0.0402, "step": 221000 }, { "epoch": 9.43, "eval_accuracy": 0.9442374234761778, "eval_f1": 0.9447072226618535, "eval_loss": 0.32796511054039, "eval_runtime": 13.5246, "eval_samples_per_second": 555.582, "eval_steps_per_second": 8.725, "step": 221000 }, { "epoch": 9.45, "learning_rate": 1.318803081387905e-05, "loss": 0.041, "step": 221500 }, { "epoch": 9.45, "eval_accuracy": 0.9498269896193772, "eval_f1": 0.9501153797703192, "eval_loss": 0.2731388509273529, "eval_runtime": 13.5271, "eval_samples_per_second": 555.478, "eval_steps_per_second": 8.723, "step": 221500 }, { "epoch": 9.47, "learning_rate": 1.3161410097733773e-05, "loss": 0.0435, "step": 222000 }, { "epoch": 9.47, "eval_accuracy": 0.9467660367314347, "eval_f1": 0.9469677220421611, "eval_loss": 0.32359492778778076, "eval_runtime": 13.53, "eval_samples_per_second": 555.358, "eval_steps_per_second": 8.721, "step": 222000 }, { "epoch": 9.5, "learning_rate": 1.3134736033459948e-05, "loss": 0.0424, "step": 222500 }, { "epoch": 9.5, "eval_accuracy": 0.9498269896193772, "eval_f1": 0.9500851711345424, "eval_loss": 0.28242748975753784, "eval_runtime": 13.5277, "eval_samples_per_second": 555.451, "eval_steps_per_second": 8.723, "step": 222500 }, { "epoch": 9.52, "learning_rate": 1.310811531731467e-05, "loss": 0.0409, "step": 223000 }, { "epoch": 9.52, "eval_accuracy": 0.9407772158637211, "eval_f1": 0.9414327506311181, "eval_loss": 0.35967016220092773, "eval_runtime": 13.5314, "eval_samples_per_second": 555.303, "eval_steps_per_second": 8.72, "step": 223000 }, { "epoch": 9.54, "learning_rate": 1.3081441253040846e-05, "loss": 0.0405, "step": 223500 }, { "epoch": 9.54, "eval_accuracy": 0.9409103007718924, "eval_f1": 0.9415566706878806, "eval_loss": 0.3606089651584625, "eval_runtime": 13.5349, "eval_samples_per_second": 555.157, "eval_steps_per_second": 8.718, "step": 223500 }, { "epoch": 9.56, "learning_rate": 1.3054767188767019e-05, "loss": 0.0419, "step": 224000 }, { "epoch": 9.56, "eval_accuracy": 0.9471652914559489, "eval_f1": 0.9475127712539337, "eval_loss": 0.3135475516319275, "eval_runtime": 13.5324, "eval_samples_per_second": 555.259, "eval_steps_per_second": 8.72, "step": 224000 }, { "epoch": 9.58, "learning_rate": 1.3028093124493193e-05, "loss": 0.039, "step": 224500 }, { "epoch": 9.58, "eval_accuracy": 0.9527548575991482, "eval_f1": 0.9530442815959349, "eval_loss": 0.2737436890602112, "eval_runtime": 13.5316, "eval_samples_per_second": 555.292, "eval_steps_per_second": 8.72, "step": 224500 }, { "epoch": 9.6, "learning_rate": 1.3001472408347917e-05, "loss": 0.0425, "step": 225000 }, { "epoch": 9.6, "eval_accuracy": 0.9458344423742348, "eval_f1": 0.9462683305954652, "eval_loss": 0.2910870313644409, "eval_runtime": 13.5324, "eval_samples_per_second": 555.26, "eval_steps_per_second": 8.72, "step": 225000 }, { "epoch": 9.62, "learning_rate": 1.2974798344074091e-05, "loss": 0.0411, "step": 225500 }, { "epoch": 9.62, "eval_accuracy": 0.9507585839765771, "eval_f1": 0.9509916378732837, "eval_loss": 0.2779683768749237, "eval_runtime": 13.5184, "eval_samples_per_second": 555.837, "eval_steps_per_second": 8.729, "step": 225500 }, { "epoch": 9.65, "learning_rate": 1.2948124279800264e-05, "loss": 0.0446, "step": 226000 }, { "epoch": 9.65, "eval_accuracy": 0.9468991216396061, "eval_f1": 0.9472618308735409, "eval_loss": 0.2749107778072357, "eval_runtime": 13.5289, "eval_samples_per_second": 555.404, "eval_steps_per_second": 8.722, "step": 226000 }, { "epoch": 9.67, "learning_rate": 1.292145021552644e-05, "loss": 0.0417, "step": 226500 }, { "epoch": 9.67, "eval_accuracy": 0.9455682725578919, "eval_f1": 0.9459565056263725, "eval_loss": 0.3201073408126831, "eval_runtime": 13.5287, "eval_samples_per_second": 555.41, "eval_steps_per_second": 8.722, "step": 226500 }, { "epoch": 9.69, "learning_rate": 1.2894776151252615e-05, "loss": 0.0426, "step": 227000 }, { "epoch": 9.69, "eval_accuracy": 0.9464998669150918, "eval_f1": 0.9468397463588856, "eval_loss": 0.2988472282886505, "eval_runtime": 13.5281, "eval_samples_per_second": 555.438, "eval_steps_per_second": 8.723, "step": 227000 }, { "epoch": 9.71, "learning_rate": 1.2868102086978791e-05, "loss": 0.0434, "step": 227500 }, { "epoch": 9.71, "eval_accuracy": 0.9433058291189779, "eval_f1": 0.9435376363499867, "eval_loss": 0.32506898045539856, "eval_runtime": 13.5305, "eval_samples_per_second": 555.337, "eval_steps_per_second": 8.721, "step": 227500 }, { "epoch": 9.73, "learning_rate": 1.2841481370833513e-05, "loss": 0.0424, "step": 228000 }, { "epoch": 9.73, "eval_accuracy": 0.9511578387010913, "eval_f1": 0.951360071589869, "eval_loss": 0.283211886882782, "eval_runtime": 13.5317, "eval_samples_per_second": 555.29, "eval_steps_per_second": 8.72, "step": 228000 }, { "epoch": 9.75, "learning_rate": 1.2814807306559686e-05, "loss": 0.0405, "step": 228500 }, { "epoch": 9.75, "eval_accuracy": 0.9527548575991482, "eval_f1": 0.9527633365669924, "eval_loss": 0.2723678946495056, "eval_runtime": 13.5292, "eval_samples_per_second": 555.391, "eval_steps_per_second": 8.722, "step": 228500 }, { "epoch": 9.77, "learning_rate": 1.278813324228586e-05, "loss": 0.0434, "step": 229000 }, { "epoch": 9.77, "eval_accuracy": 0.9421080649454352, "eval_f1": 0.9426021558819764, "eval_loss": 0.3235361576080322, "eval_runtime": 13.5321, "eval_samples_per_second": 555.27, "eval_steps_per_second": 8.72, "step": 229000 }, { "epoch": 9.79, "learning_rate": 1.2761459178012037e-05, "loss": 0.0411, "step": 229500 }, { "epoch": 9.79, "eval_accuracy": 0.9507585839765771, "eval_f1": 0.950927039725016, "eval_loss": 0.2742987275123596, "eval_runtime": 13.5327, "eval_samples_per_second": 555.247, "eval_steps_per_second": 8.72, "step": 229500 }, { "epoch": 9.82, "learning_rate": 1.2734785113738211e-05, "loss": 0.0431, "step": 230000 }, { "epoch": 9.82, "eval_accuracy": 0.95009315943572, "eval_f1": 0.9503696415522803, "eval_loss": 0.2664912939071655, "eval_runtime": 13.5414, "eval_samples_per_second": 554.89, "eval_steps_per_second": 8.714, "step": 230000 }, { "epoch": 9.84, "learning_rate": 1.2708111049464386e-05, "loss": 0.0395, "step": 230500 }, { "epoch": 9.84, "eval_accuracy": 0.9453021027415491, "eval_f1": 0.9456690842646758, "eval_loss": 0.32275640964508057, "eval_runtime": 13.542, "eval_samples_per_second": 554.865, "eval_steps_per_second": 8.714, "step": 230500 }, { "epoch": 9.86, "learning_rate": 1.2681490333319108e-05, "loss": 0.0441, "step": 231000 }, { "epoch": 9.86, "eval_accuracy": 0.9425073196699494, "eval_f1": 0.9430064792809049, "eval_loss": 0.30706483125686646, "eval_runtime": 13.5412, "eval_samples_per_second": 554.898, "eval_steps_per_second": 8.714, "step": 231000 }, { "epoch": 9.88, "learning_rate": 1.2654816269045282e-05, "loss": 0.0442, "step": 231500 }, { "epoch": 9.88, "eval_accuracy": 0.9397125365983497, "eval_f1": 0.9403983821206406, "eval_loss": 0.32434916496276855, "eval_runtime": 13.5379, "eval_samples_per_second": 555.033, "eval_steps_per_second": 8.716, "step": 231500 }, { "epoch": 9.9, "learning_rate": 1.2628142204771459e-05, "loss": 0.0429, "step": 232000 }, { "epoch": 9.9, "eval_accuracy": 0.9484961405376631, "eval_f1": 0.9488449570964996, "eval_loss": 0.2867507040500641, "eval_runtime": 13.542, "eval_samples_per_second": 554.867, "eval_steps_per_second": 8.714, "step": 232000 }, { "epoch": 9.92, "learning_rate": 1.2601468140497633e-05, "loss": 0.044, "step": 232500 }, { "epoch": 9.92, "eval_accuracy": 0.9434389140271493, "eval_f1": 0.9440101830061064, "eval_loss": 0.30820271372795105, "eval_runtime": 13.54, "eval_samples_per_second": 554.947, "eval_steps_per_second": 8.715, "step": 232500 }, { "epoch": 9.94, "learning_rate": 1.2574794076223806e-05, "loss": 0.0428, "step": 233000 }, { "epoch": 9.94, "eval_accuracy": 0.9341229704551504, "eval_f1": 0.9350479509633295, "eval_loss": 0.368161141872406, "eval_runtime": 13.541, "eval_samples_per_second": 554.909, "eval_steps_per_second": 8.714, "step": 233000 }, { "epoch": 9.97, "learning_rate": 1.2548120011949982e-05, "loss": 0.0443, "step": 233500 }, { "epoch": 9.97, "eval_accuracy": 0.9467660367314347, "eval_f1": 0.9471681860906885, "eval_loss": 0.27612563967704773, "eval_runtime": 13.5379, "eval_samples_per_second": 555.033, "eval_steps_per_second": 8.716, "step": 233500 }, { "epoch": 9.99, "learning_rate": 1.2521445947676157e-05, "loss": 0.0432, "step": 234000 }, { "epoch": 9.99, "eval_accuracy": 0.9427734894862922, "eval_f1": 0.9432861941973121, "eval_loss": 0.29645493626594543, "eval_runtime": 13.5391, "eval_samples_per_second": 554.983, "eval_steps_per_second": 8.715, "step": 234000 }, { "epoch": 10.01, "learning_rate": 1.2494825231530879e-05, "loss": 0.0383, "step": 234500 }, { "epoch": 10.01, "eval_accuracy": 0.9527548575991482, "eval_f1": 0.9527383304408453, "eval_loss": 0.26099878549575806, "eval_runtime": 13.5389, "eval_samples_per_second": 554.992, "eval_steps_per_second": 8.716, "step": 234500 }, { "epoch": 10.03, "learning_rate": 1.2468151167257055e-05, "loss": 0.0319, "step": 235000 }, { "epoch": 10.03, "eval_accuracy": 0.9350545648123503, "eval_f1": 0.9359789154619395, "eval_loss": 0.38348767161369324, "eval_runtime": 13.528, "eval_samples_per_second": 555.44, "eval_steps_per_second": 8.723, "step": 235000 }, { "epoch": 10.05, "learning_rate": 1.2441477102983228e-05, "loss": 0.0319, "step": 235500 }, { "epoch": 10.05, "eval_accuracy": 0.9386478573329784, "eval_f1": 0.9393782650425354, "eval_loss": 0.3721713721752167, "eval_runtime": 13.5326, "eval_samples_per_second": 555.251, "eval_steps_per_second": 8.72, "step": 235500 }, { "epoch": 10.07, "learning_rate": 1.2414803038709402e-05, "loss": 0.0312, "step": 236000 }, { "epoch": 10.07, "eval_accuracy": 0.9453021027415491, "eval_f1": 0.9457109004028486, "eval_loss": 0.3375546336174011, "eval_runtime": 13.5304, "eval_samples_per_second": 555.341, "eval_steps_per_second": 8.721, "step": 236000 }, { "epoch": 10.09, "learning_rate": 1.2388128974435579e-05, "loss": 0.0315, "step": 236500 }, { "epoch": 10.09, "eval_accuracy": 0.9306627628426937, "eval_f1": 0.9316744044650579, "eval_loss": 0.4443936347961426, "eval_runtime": 13.5272, "eval_samples_per_second": 555.472, "eval_steps_per_second": 8.723, "step": 236500 }, { "epoch": 10.11, "learning_rate": 1.2361454910161751e-05, "loss": 0.0345, "step": 237000 }, { "epoch": 10.11, "eval_accuracy": 0.9449028480170348, "eval_f1": 0.9453803935392843, "eval_loss": 0.34678158164024353, "eval_runtime": 13.53, "eval_samples_per_second": 555.36, "eval_steps_per_second": 8.721, "step": 237000 }, { "epoch": 10.14, "learning_rate": 1.2334834194016473e-05, "loss": 0.0326, "step": 237500 }, { "epoch": 10.14, "eval_accuracy": 0.932392866648922, "eval_f1": 0.9335176394538748, "eval_loss": 0.42278626561164856, "eval_runtime": 13.5277, "eval_samples_per_second": 555.453, "eval_steps_per_second": 8.723, "step": 237500 }, { "epoch": 10.16, "learning_rate": 1.230816012974265e-05, "loss": 0.0329, "step": 238000 }, { "epoch": 10.16, "eval_accuracy": 0.9447697631088634, "eval_f1": 0.945271521756412, "eval_loss": 0.3269987106323242, "eval_runtime": 13.5308, "eval_samples_per_second": 555.324, "eval_steps_per_second": 8.721, "step": 238000 }, { "epoch": 10.18, "learning_rate": 1.2281486065468824e-05, "loss": 0.0349, "step": 238500 }, { "epoch": 10.18, "eval_accuracy": 0.9447697631088634, "eval_f1": 0.9452700002965628, "eval_loss": 0.3369627892971039, "eval_runtime": 13.5278, "eval_samples_per_second": 555.448, "eval_steps_per_second": 8.723, "step": 238500 }, { "epoch": 10.2, "learning_rate": 1.2254812001195e-05, "loss": 0.03, "step": 239000 }, { "epoch": 10.2, "eval_accuracy": 0.9488953952621773, "eval_f1": 0.9492453592745367, "eval_loss": 0.32061174511909485, "eval_runtime": 13.5307, "eval_samples_per_second": 555.331, "eval_steps_per_second": 8.721, "step": 239000 }, { "epoch": 10.22, "learning_rate": 1.2228191285049722e-05, "loss": 0.0358, "step": 239500 }, { "epoch": 10.22, "eval_accuracy": 0.9461006121905776, "eval_f1": 0.9465327811229762, "eval_loss": 0.33827266097068787, "eval_runtime": 13.5261, "eval_samples_per_second": 555.518, "eval_steps_per_second": 8.724, "step": 239500 }, { "epoch": 10.24, "learning_rate": 1.2201517220775895e-05, "loss": 0.0367, "step": 240000 }, { "epoch": 10.24, "eval_accuracy": 0.9530210274154911, "eval_f1": 0.9531614604691627, "eval_loss": 0.25948166847229004, "eval_runtime": 13.5325, "eval_samples_per_second": 555.256, "eval_steps_per_second": 8.72, "step": 240000 }, { "epoch": 10.26, "learning_rate": 1.2174843156502071e-05, "loss": 0.0317, "step": 240500 }, { "epoch": 10.26, "eval_accuracy": 0.9526217726909768, "eval_f1": 0.9528273246855208, "eval_loss": 0.30694305896759033, "eval_runtime": 13.5328, "eval_samples_per_second": 555.243, "eval_steps_per_second": 8.72, "step": 240500 }, { "epoch": 10.29, "learning_rate": 1.2148169092228246e-05, "loss": 0.033, "step": 241000 }, { "epoch": 10.29, "eval_accuracy": 0.9484961405376631, "eval_f1": 0.9488020635699753, "eval_loss": 0.3017740249633789, "eval_runtime": 13.5313, "eval_samples_per_second": 555.307, "eval_steps_per_second": 8.721, "step": 241000 }, { "epoch": 10.31, "learning_rate": 1.212149502795442e-05, "loss": 0.036, "step": 241500 }, { "epoch": 10.31, "eval_accuracy": 0.9480968858131488, "eval_f1": 0.9484357589115017, "eval_loss": 0.3117830753326416, "eval_runtime": 13.5326, "eval_samples_per_second": 555.252, "eval_steps_per_second": 8.72, "step": 241500 }, { "epoch": 10.33, "learning_rate": 1.2094820963680595e-05, "loss": 0.0343, "step": 242000 }, { "epoch": 10.33, "eval_accuracy": 0.9502262443438914, "eval_f1": 0.9504874707212669, "eval_loss": 0.29442715644836426, "eval_runtime": 13.514, "eval_samples_per_second": 556.017, "eval_steps_per_second": 8.732, "step": 242000 }, { "epoch": 10.35, "learning_rate": 1.2068200247535319e-05, "loss": 0.0341, "step": 242500 }, { "epoch": 10.35, "eval_accuracy": 0.9450359329252063, "eval_f1": 0.9455823002502098, "eval_loss": 0.33251458406448364, "eval_runtime": 13.5, "eval_samples_per_second": 556.593, "eval_steps_per_second": 8.741, "step": 242500 }, { "epoch": 10.37, "learning_rate": 1.2041526183261491e-05, "loss": 0.0376, "step": 243000 }, { "epoch": 10.37, "eval_accuracy": 0.9346553100878361, "eval_f1": 0.9354847028775204, "eval_loss": 0.3757520020008087, "eval_runtime": 13.4999, "eval_samples_per_second": 556.598, "eval_steps_per_second": 8.741, "step": 243000 }, { "epoch": 10.39, "learning_rate": 1.2014852118987668e-05, "loss": 0.033, "step": 243500 }, { "epoch": 10.39, "eval_accuracy": 0.947830715996806, "eval_f1": 0.9482354473268947, "eval_loss": 0.29782551527023315, "eval_runtime": 13.5045, "eval_samples_per_second": 556.409, "eval_steps_per_second": 8.738, "step": 243500 }, { "epoch": 10.41, "learning_rate": 1.1988178054713842e-05, "loss": 0.0347, "step": 244000 }, { "epoch": 10.41, "eval_accuracy": 0.9503593292520628, "eval_f1": 0.9506089663521747, "eval_loss": 0.28753605484962463, "eval_runtime": 13.5173, "eval_samples_per_second": 555.881, "eval_steps_per_second": 8.73, "step": 244000 }, { "epoch": 10.43, "learning_rate": 1.1961503990440015e-05, "loss": 0.0357, "step": 244500 }, { "epoch": 10.43, "eval_accuracy": 0.9449028480170348, "eval_f1": 0.9453483829345272, "eval_loss": 0.3388841450214386, "eval_runtime": 13.5245, "eval_samples_per_second": 555.584, "eval_steps_per_second": 8.725, "step": 244500 }, { "epoch": 10.46, "learning_rate": 1.1934829926166191e-05, "loss": 0.034, "step": 245000 }, { "epoch": 10.46, "eval_accuracy": 0.9474314612722917, "eval_f1": 0.9476865138649213, "eval_loss": 0.3259483575820923, "eval_runtime": 13.5224, "eval_samples_per_second": 555.672, "eval_steps_per_second": 8.726, "step": 245000 }, { "epoch": 10.48, "learning_rate": 1.1908209210020913e-05, "loss": 0.0388, "step": 245500 }, { "epoch": 10.48, "eval_accuracy": 0.9454351876497206, "eval_f1": 0.9458281591899665, "eval_loss": 0.32985618710517883, "eval_runtime": 13.5192, "eval_samples_per_second": 555.8, "eval_steps_per_second": 8.728, "step": 245500 }, { "epoch": 10.5, "learning_rate": 1.1881535145747088e-05, "loss": 0.0364, "step": 246000 }, { "epoch": 10.5, "eval_accuracy": 0.9491615650785201, "eval_f1": 0.9494692785609192, "eval_loss": 0.2861853539943695, "eval_runtime": 13.5275, "eval_samples_per_second": 555.462, "eval_steps_per_second": 8.723, "step": 246000 }, { "epoch": 10.52, "learning_rate": 1.1854861081473264e-05, "loss": 0.0359, "step": 246500 }, { "epoch": 10.52, "eval_accuracy": 0.9419749800372638, "eval_f1": 0.9426052474217692, "eval_loss": 0.3198188245296478, "eval_runtime": 13.5288, "eval_samples_per_second": 555.408, "eval_steps_per_second": 8.722, "step": 246500 }, { "epoch": 10.54, "learning_rate": 1.1828187017199437e-05, "loss": 0.0366, "step": 247000 }, { "epoch": 10.54, "eval_accuracy": 0.9429065743944637, "eval_f1": 0.9435218841102918, "eval_loss": 0.3460228443145752, "eval_runtime": 13.5192, "eval_samples_per_second": 555.802, "eval_steps_per_second": 8.728, "step": 247000 }, { "epoch": 10.56, "learning_rate": 1.1801512952925613e-05, "loss": 0.0365, "step": 247500 }, { "epoch": 10.56, "eval_accuracy": 0.9442374234761778, "eval_f1": 0.9447131929091213, "eval_loss": 0.30125898122787476, "eval_runtime": 13.5139, "eval_samples_per_second": 556.019, "eval_steps_per_second": 8.732, "step": 247500 }, { "epoch": 10.58, "learning_rate": 1.1774838888651788e-05, "loss": 0.0344, "step": 248000 }, { "epoch": 10.58, "eval_accuracy": 0.9434389140271493, "eval_f1": 0.94390686705927, "eval_loss": 0.3141534626483917, "eval_runtime": 13.5171, "eval_samples_per_second": 555.889, "eval_steps_per_second": 8.73, "step": 248000 }, { "epoch": 10.61, "learning_rate": 1.174816482437796e-05, "loss": 0.0348, "step": 248500 }, { "epoch": 10.61, "eval_accuracy": 0.9474314612722917, "eval_f1": 0.9477872430849247, "eval_loss": 0.2989385426044464, "eval_runtime": 13.515, "eval_samples_per_second": 555.973, "eval_steps_per_second": 8.731, "step": 248500 }, { "epoch": 10.63, "learning_rate": 1.1721490760104137e-05, "loss": 0.0357, "step": 249000 }, { "epoch": 10.63, "eval_accuracy": 0.9455682725578919, "eval_f1": 0.9459662824329179, "eval_loss": 0.29782894253730774, "eval_runtime": 13.5043, "eval_samples_per_second": 556.414, "eval_steps_per_second": 8.738, "step": 249000 }, { "epoch": 10.65, "learning_rate": 1.1694870043958859e-05, "loss": 0.0335, "step": 249500 }, { "epoch": 10.65, "eval_accuracy": 0.9415757253127496, "eval_f1": 0.9420959106843136, "eval_loss": 0.3017115890979767, "eval_runtime": 13.515, "eval_samples_per_second": 555.976, "eval_steps_per_second": 8.731, "step": 249500 }, { "epoch": 10.67, "learning_rate": 1.1668195979685033e-05, "loss": 0.035, "step": 250000 }, { "epoch": 10.67, "eval_accuracy": 0.9430396593026351, "eval_f1": 0.9436321487656903, "eval_loss": 0.3339087665081024, "eval_runtime": 13.5029, "eval_samples_per_second": 556.474, "eval_steps_per_second": 8.739, "step": 250000 }, { "epoch": 10.69, "learning_rate": 1.1641575263539755e-05, "loss": 0.0365, "step": 250500 }, { "epoch": 10.69, "eval_accuracy": 0.9461006121905776, "eval_f1": 0.946504806534825, "eval_loss": 0.3119305670261383, "eval_runtime": 13.5164, "eval_samples_per_second": 555.917, "eval_steps_per_second": 8.73, "step": 250500 }, { "epoch": 10.71, "learning_rate": 1.1614901199265931e-05, "loss": 0.0349, "step": 251000 }, { "epoch": 10.71, "eval_accuracy": 0.9431727442108065, "eval_f1": 0.9436855116972849, "eval_loss": 0.3208593726158142, "eval_runtime": 13.5183, "eval_samples_per_second": 555.84, "eval_steps_per_second": 8.729, "step": 251000 }, { "epoch": 10.73, "learning_rate": 1.1588227134992104e-05, "loss": 0.0378, "step": 251500 }, { "epoch": 10.73, "eval_accuracy": 0.9494277348948629, "eval_f1": 0.9497391996371748, "eval_loss": 0.29452651739120483, "eval_runtime": 13.504, "eval_samples_per_second": 556.426, "eval_steps_per_second": 8.738, "step": 251500 }, { "epoch": 10.75, "learning_rate": 1.156155307071828e-05, "loss": 0.0379, "step": 252000 }, { "epoch": 10.75, "eval_accuracy": 0.9441043385680064, "eval_f1": 0.944606696971724, "eval_loss": 0.3278190493583679, "eval_runtime": 13.5057, "eval_samples_per_second": 556.359, "eval_steps_per_second": 8.737, "step": 252000 }, { "epoch": 10.78, "learning_rate": 1.1534879006444455e-05, "loss": 0.0357, "step": 252500 }, { "epoch": 10.78, "eval_accuracy": 0.9490284801703487, "eval_f1": 0.9493800643943184, "eval_loss": 0.29757875204086304, "eval_runtime": 13.5211, "eval_samples_per_second": 555.723, "eval_steps_per_second": 8.727, "step": 252500 }, { "epoch": 10.8, "learning_rate": 1.1508258290299177e-05, "loss": 0.0387, "step": 253000 }, { "epoch": 10.8, "eval_accuracy": 0.9393132818738356, "eval_f1": 0.9400745889233175, "eval_loss": 0.3252202868461609, "eval_runtime": 13.5093, "eval_samples_per_second": 556.208, "eval_steps_per_second": 8.735, "step": 253000 }, { "epoch": 10.82, "learning_rate": 1.1481584226025353e-05, "loss": 0.0376, "step": 253500 }, { "epoch": 10.82, "eval_accuracy": 0.9527548575991482, "eval_f1": 0.9529652109380102, "eval_loss": 0.25803959369659424, "eval_runtime": 13.5164, "eval_samples_per_second": 555.919, "eval_steps_per_second": 8.73, "step": 253500 }, { "epoch": 10.84, "learning_rate": 1.1454910161751528e-05, "loss": 0.0405, "step": 254000 }, { "epoch": 10.84, "eval_accuracy": 0.9427734894862922, "eval_f1": 0.9433379426738755, "eval_loss": 0.3349682092666626, "eval_runtime": 13.5239, "eval_samples_per_second": 555.611, "eval_steps_per_second": 8.725, "step": 254000 }, { "epoch": 10.86, "learning_rate": 1.14282360974777e-05, "loss": 0.0363, "step": 254500 }, { "epoch": 10.86, "eval_accuracy": 0.9487623103540058, "eval_f1": 0.9491131402425562, "eval_loss": 0.3017532229423523, "eval_runtime": 13.5269, "eval_samples_per_second": 555.486, "eval_steps_per_second": 8.723, "step": 254500 }, { "epoch": 10.88, "learning_rate": 1.1401562033203877e-05, "loss": 0.0347, "step": 255000 }, { "epoch": 10.88, "eval_accuracy": 0.9480968858131488, "eval_f1": 0.9484905581037079, "eval_loss": 0.30602821707725525, "eval_runtime": 13.5262, "eval_samples_per_second": 555.513, "eval_steps_per_second": 8.724, "step": 255000 }, { "epoch": 10.9, "learning_rate": 1.1374941317058599e-05, "loss": 0.0348, "step": 255500 }, { "epoch": 10.9, "eval_accuracy": 0.9457013574660633, "eval_f1": 0.9461498483987765, "eval_loss": 0.3226492404937744, "eval_runtime": 13.5325, "eval_samples_per_second": 555.257, "eval_steps_per_second": 8.72, "step": 255500 }, { "epoch": 10.93, "learning_rate": 1.1348267252784773e-05, "loss": 0.0357, "step": 256000 }, { "epoch": 10.93, "eval_accuracy": 0.9484961405376631, "eval_f1": 0.9487462035465501, "eval_loss": 0.30912718176841736, "eval_runtime": 13.533, "eval_samples_per_second": 555.236, "eval_steps_per_second": 8.719, "step": 256000 }, { "epoch": 10.95, "learning_rate": 1.132159318851095e-05, "loss": 0.0345, "step": 256500 }, { "epoch": 10.95, "eval_accuracy": 0.9467660367314347, "eval_f1": 0.9472597356450273, "eval_loss": 0.32901766896247864, "eval_runtime": 13.5296, "eval_samples_per_second": 555.374, "eval_steps_per_second": 8.722, "step": 256500 }, { "epoch": 10.97, "learning_rate": 1.1294919124237122e-05, "loss": 0.0391, "step": 257000 }, { "epoch": 10.97, "eval_accuracy": 0.9502262443438914, "eval_f1": 0.9505242429513786, "eval_loss": 0.2757878005504608, "eval_runtime": 13.5316, "eval_samples_per_second": 555.291, "eval_steps_per_second": 8.72, "step": 257000 }, { "epoch": 10.99, "learning_rate": 1.1268245059963297e-05, "loss": 0.035, "step": 257500 }, { "epoch": 10.99, "eval_accuracy": 0.9472983763641203, "eval_f1": 0.9476722219527903, "eval_loss": 0.30939844250679016, "eval_runtime": 13.5323, "eval_samples_per_second": 555.265, "eval_steps_per_second": 8.72, "step": 257500 }, { "epoch": 11.01, "learning_rate": 1.124162434381802e-05, "loss": 0.0292, "step": 258000 }, { "epoch": 11.01, "eval_accuracy": 0.9522225179664626, "eval_f1": 0.9525102493286882, "eval_loss": 0.29116666316986084, "eval_runtime": 13.5378, "eval_samples_per_second": 555.04, "eval_steps_per_second": 8.716, "step": 258000 }, { "epoch": 11.03, "learning_rate": 1.1214950279544195e-05, "loss": 0.0254, "step": 258500 }, { "epoch": 11.03, "eval_accuracy": 0.9471652914559489, "eval_f1": 0.9475831203573146, "eval_loss": 0.3402584195137024, "eval_runtime": 13.5397, "eval_samples_per_second": 554.96, "eval_steps_per_second": 8.715, "step": 258500 }, { "epoch": 11.05, "learning_rate": 1.1188276215270368e-05, "loss": 0.028, "step": 259000 }, { "epoch": 11.05, "eval_accuracy": 0.9502262443438914, "eval_f1": 0.9506309679003032, "eval_loss": 0.2876528799533844, "eval_runtime": 13.539, "eval_samples_per_second": 554.991, "eval_steps_per_second": 8.716, "step": 259000 }, { "epoch": 11.08, "learning_rate": 1.1161602150996544e-05, "loss": 0.0291, "step": 259500 }, { "epoch": 11.08, "eval_accuracy": 0.9512909236092627, "eval_f1": 0.9516158915240817, "eval_loss": 0.2986809313297272, "eval_runtime": 13.5362, "eval_samples_per_second": 555.103, "eval_steps_per_second": 8.717, "step": 259500 }, { "epoch": 11.1, "learning_rate": 1.1134928086722719e-05, "loss": 0.0279, "step": 260000 }, { "epoch": 11.1, "eval_accuracy": 0.9461006121905776, "eval_f1": 0.9466771033072814, "eval_loss": 0.3648306429386139, "eval_runtime": 13.5352, "eval_samples_per_second": 555.144, "eval_steps_per_second": 8.718, "step": 260000 }, { "epoch": 11.12, "learning_rate": 1.1108254022448895e-05, "loss": 0.0297, "step": 260500 }, { "epoch": 11.12, "eval_accuracy": 0.9483630556294916, "eval_f1": 0.948826460056088, "eval_loss": 0.3425619900226593, "eval_runtime": 13.5252, "eval_samples_per_second": 555.554, "eval_steps_per_second": 8.724, "step": 260500 }, { "epoch": 11.14, "learning_rate": 1.1081579958175068e-05, "loss": 0.0301, "step": 261000 }, { "epoch": 11.14, "eval_accuracy": 0.9488953952621773, "eval_f1": 0.9493716498936634, "eval_loss": 0.31174421310424805, "eval_runtime": 13.5347, "eval_samples_per_second": 555.166, "eval_steps_per_second": 8.718, "step": 261000 }, { "epoch": 11.16, "learning_rate": 1.1054905893901242e-05, "loss": 0.0284, "step": 261500 }, { "epoch": 11.16, "eval_accuracy": 0.9510247537929198, "eval_f1": 0.9514079176915496, "eval_loss": 0.30318567156791687, "eval_runtime": 13.5381, "eval_samples_per_second": 555.026, "eval_steps_per_second": 8.716, "step": 261500 }, { "epoch": 11.18, "learning_rate": 1.1028231829627418e-05, "loss": 0.0299, "step": 262000 }, { "epoch": 11.18, "eval_accuracy": 0.9542187915890338, "eval_f1": 0.9544176427194278, "eval_loss": 0.2822323441505432, "eval_runtime": 13.5372, "eval_samples_per_second": 555.062, "eval_steps_per_second": 8.717, "step": 262000 }, { "epoch": 11.2, "learning_rate": 1.100161111348214e-05, "loss": 0.0279, "step": 262500 }, { "epoch": 11.2, "eval_accuracy": 0.9445035932925206, "eval_f1": 0.9450459515410018, "eval_loss": 0.3772587180137634, "eval_runtime": 13.5407, "eval_samples_per_second": 554.918, "eval_steps_per_second": 8.714, "step": 262500 }, { "epoch": 11.22, "learning_rate": 1.0974937049208315e-05, "loss": 0.0287, "step": 263000 }, { "epoch": 11.22, "eval_accuracy": 0.9459675272824062, "eval_f1": 0.9464438417497797, "eval_loss": 0.3681629002094269, "eval_runtime": 13.5273, "eval_samples_per_second": 555.47, "eval_steps_per_second": 8.723, "step": 263000 }, { "epoch": 11.25, "learning_rate": 1.094826298493449e-05, "loss": 0.0296, "step": 263500 }, { "epoch": 11.25, "eval_accuracy": 0.946233697098749, "eval_f1": 0.9467587683332466, "eval_loss": 0.3240737318992615, "eval_runtime": 13.5306, "eval_samples_per_second": 555.335, "eval_steps_per_second": 8.721, "step": 263500 }, { "epoch": 11.27, "learning_rate": 1.0921588920660664e-05, "loss": 0.029, "step": 264000 }, { "epoch": 11.27, "eval_accuracy": 0.9488953952621773, "eval_f1": 0.9493013988508006, "eval_loss": 0.3337612748146057, "eval_runtime": 13.535, "eval_samples_per_second": 555.152, "eval_steps_per_second": 8.718, "step": 264000 }, { "epoch": 11.29, "learning_rate": 1.0894968204515386e-05, "loss": 0.0305, "step": 264500 }, { "epoch": 11.29, "eval_accuracy": 0.9511578387010913, "eval_f1": 0.9513546227894613, "eval_loss": 0.28592097759246826, "eval_runtime": 13.5321, "eval_samples_per_second": 555.271, "eval_steps_per_second": 8.72, "step": 264500 }, { "epoch": 11.31, "learning_rate": 1.0868294140241562e-05, "loss": 0.0289, "step": 265000 }, { "epoch": 11.31, "eval_accuracy": 0.9472983763641203, "eval_f1": 0.9478212294888846, "eval_loss": 0.3027932941913605, "eval_runtime": 13.5302, "eval_samples_per_second": 555.348, "eval_steps_per_second": 8.721, "step": 265000 }, { "epoch": 11.33, "learning_rate": 1.0841620075967737e-05, "loss": 0.0294, "step": 265500 }, { "epoch": 11.33, "eval_accuracy": 0.9437050838434922, "eval_f1": 0.944358158604522, "eval_loss": 0.34016963839530945, "eval_runtime": 13.5339, "eval_samples_per_second": 555.198, "eval_steps_per_second": 8.719, "step": 265500 }, { "epoch": 11.35, "learning_rate": 1.081494601169391e-05, "loss": 0.0301, "step": 266000 }, { "epoch": 11.35, "eval_accuracy": 0.9512909236092627, "eval_f1": 0.9516383347086875, "eval_loss": 0.29323649406433105, "eval_runtime": 13.5315, "eval_samples_per_second": 555.299, "eval_steps_per_second": 8.72, "step": 266000 }, { "epoch": 11.37, "learning_rate": 1.0788325295548635e-05, "loss": 0.0323, "step": 266500 }, { "epoch": 11.37, "eval_accuracy": 0.9474314612722917, "eval_f1": 0.9479135405572265, "eval_loss": 0.32282477617263794, "eval_runtime": 13.5167, "eval_samples_per_second": 555.906, "eval_steps_per_second": 8.73, "step": 266500 }, { "epoch": 11.4, "learning_rate": 1.0761651231274808e-05, "loss": 0.0294, "step": 267000 }, { "epoch": 11.4, "eval_accuracy": 0.9471652914559489, "eval_f1": 0.9474788361666246, "eval_loss": 0.34356236457824707, "eval_runtime": 13.5297, "eval_samples_per_second": 555.372, "eval_steps_per_second": 8.722, "step": 267000 }, { "epoch": 11.42, "learning_rate": 1.073503051512953e-05, "loss": 0.0299, "step": 267500 }, { "epoch": 11.42, "eval_accuracy": 0.9496939047112057, "eval_f1": 0.9500661332245157, "eval_loss": 0.3229745328426361, "eval_runtime": 13.5139, "eval_samples_per_second": 556.019, "eval_steps_per_second": 8.732, "step": 267500 }, { "epoch": 11.44, "learning_rate": 1.0708356450855706e-05, "loss": 0.0295, "step": 268000 }, { "epoch": 11.44, "eval_accuracy": 0.9532871972318339, "eval_f1": 0.9535328056655195, "eval_loss": 0.3073120415210724, "eval_runtime": 13.5288, "eval_samples_per_second": 555.407, "eval_steps_per_second": 8.722, "step": 268000 }, { "epoch": 11.46, "learning_rate": 1.068168238658188e-05, "loss": 0.0285, "step": 268500 }, { "epoch": 11.46, "eval_accuracy": 0.9484961405376631, "eval_f1": 0.9489967962192396, "eval_loss": 0.3616872727870941, "eval_runtime": 13.5307, "eval_samples_per_second": 555.331, "eval_steps_per_second": 8.721, "step": 268500 }, { "epoch": 11.48, "learning_rate": 1.0655008322308053e-05, "loss": 0.0311, "step": 269000 }, { "epoch": 11.48, "eval_accuracy": 0.9546180463135481, "eval_f1": 0.9547804895488143, "eval_loss": 0.2949013411998749, "eval_runtime": 13.5302, "eval_samples_per_second": 555.35, "eval_steps_per_second": 8.721, "step": 269000 }, { "epoch": 11.5, "learning_rate": 1.0628387606162778e-05, "loss": 0.0327, "step": 269500 }, { "epoch": 11.5, "eval_accuracy": 0.9538195368645196, "eval_f1": 0.954041082728273, "eval_loss": 0.2957130968570709, "eval_runtime": 13.5313, "eval_samples_per_second": 555.306, "eval_steps_per_second": 8.721, "step": 269500 }, { "epoch": 11.52, "learning_rate": 1.0601713541888951e-05, "loss": 0.0277, "step": 270000 }, { "epoch": 11.52, "eval_accuracy": 0.9518232632419483, "eval_f1": 0.9521390780449444, "eval_loss": 0.3210020661354065, "eval_runtime": 13.5284, "eval_samples_per_second": 555.426, "eval_steps_per_second": 8.722, "step": 270000 }, { "epoch": 11.54, "learning_rate": 1.0575039477615126e-05, "loss": 0.0312, "step": 270500 }, { "epoch": 11.54, "eval_accuracy": 0.9494277348948629, "eval_f1": 0.9497943099716304, "eval_loss": 0.33388105034828186, "eval_runtime": 13.5275, "eval_samples_per_second": 555.462, "eval_steps_per_second": 8.723, "step": 270500 }, { "epoch": 11.57, "learning_rate": 1.0548365413341302e-05, "loss": 0.0307, "step": 271000 }, { "epoch": 11.57, "eval_accuracy": 0.9391801969656641, "eval_f1": 0.9400404568776457, "eval_loss": 0.4182300567626953, "eval_runtime": 13.5305, "eval_samples_per_second": 555.338, "eval_steps_per_second": 8.721, "step": 271000 }, { "epoch": 11.59, "learning_rate": 1.0521691349067475e-05, "loss": 0.0286, "step": 271500 }, { "epoch": 11.59, "eval_accuracy": 0.9342560553633218, "eval_f1": 0.9353321721209846, "eval_loss": 0.42980387806892395, "eval_runtime": 13.5324, "eval_samples_per_second": 555.262, "eval_steps_per_second": 8.72, "step": 271500 }, { "epoch": 11.61, "learning_rate": 1.0495070632922197e-05, "loss": 0.0322, "step": 272000 }, { "epoch": 11.61, "eval_accuracy": 0.9463667820069204, "eval_f1": 0.9467999447575667, "eval_loss": 0.3350697457790375, "eval_runtime": 13.5467, "eval_samples_per_second": 554.674, "eval_steps_per_second": 8.711, "step": 272000 }, { "epoch": 11.63, "learning_rate": 1.0468396568648373e-05, "loss": 0.0322, "step": 272500 }, { "epoch": 11.63, "eval_accuracy": 0.9468991216396061, "eval_f1": 0.9474194226678742, "eval_loss": 0.3376242518424988, "eval_runtime": 13.5463, "eval_samples_per_second": 554.688, "eval_steps_per_second": 8.711, "step": 272500 }, { "epoch": 11.65, "learning_rate": 1.0441722504374548e-05, "loss": 0.0284, "step": 273000 }, { "epoch": 11.65, "eval_accuracy": 0.9519563481501198, "eval_f1": 0.9522952967136102, "eval_loss": 0.30849677324295044, "eval_runtime": 13.5447, "eval_samples_per_second": 554.756, "eval_steps_per_second": 8.712, "step": 273000 }, { "epoch": 11.67, "learning_rate": 1.041504844010072e-05, "loss": 0.0291, "step": 273500 }, { "epoch": 11.67, "eval_accuracy": 0.9540857066808623, "eval_f1": 0.9543742598604628, "eval_loss": 0.3051382899284363, "eval_runtime": 13.5382, "eval_samples_per_second": 555.021, "eval_steps_per_second": 8.716, "step": 273500 }, { "epoch": 11.69, "learning_rate": 1.0388374375826897e-05, "loss": 0.0315, "step": 274000 }, { "epoch": 11.69, "eval_accuracy": 0.9463667820069204, "eval_f1": 0.9468753229061821, "eval_loss": 0.33453667163848877, "eval_runtime": 13.5381, "eval_samples_per_second": 555.027, "eval_steps_per_second": 8.716, "step": 274000 }, { "epoch": 11.72, "learning_rate": 1.0361753659681619e-05, "loss": 0.0296, "step": 274500 }, { "epoch": 11.72, "eval_accuracy": 0.9532871972318339, "eval_f1": 0.9535879021225674, "eval_loss": 0.2986421585083008, "eval_runtime": 13.5415, "eval_samples_per_second": 554.886, "eval_steps_per_second": 8.714, "step": 274500 }, { "epoch": 11.74, "learning_rate": 1.0335079595407793e-05, "loss": 0.0317, "step": 275000 }, { "epoch": 11.74, "eval_accuracy": 0.9467660367314347, "eval_f1": 0.9470580392199042, "eval_loss": 0.3374924957752228, "eval_runtime": 13.5405, "eval_samples_per_second": 554.927, "eval_steps_per_second": 8.715, "step": 275000 }, { "epoch": 11.76, "learning_rate": 1.030840553113397e-05, "loss": 0.0301, "step": 275500 }, { "epoch": 11.76, "eval_accuracy": 0.9472983763641203, "eval_f1": 0.9477488712814109, "eval_loss": 0.3174923062324524, "eval_runtime": 13.5443, "eval_samples_per_second": 554.77, "eval_steps_per_second": 8.712, "step": 275500 }, { "epoch": 11.78, "learning_rate": 1.0281731466860144e-05, "loss": 0.0328, "step": 276000 }, { "epoch": 11.78, "eval_accuracy": 0.9506254990684057, "eval_f1": 0.9509012299136492, "eval_loss": 0.30687013268470764, "eval_runtime": 13.5377, "eval_samples_per_second": 555.043, "eval_steps_per_second": 8.716, "step": 276000 }, { "epoch": 11.8, "learning_rate": 1.0255057402586318e-05, "loss": 0.0308, "step": 276500 }, { "epoch": 11.8, "eval_accuracy": 0.9496939047112057, "eval_f1": 0.9499248413227012, "eval_loss": 0.3148394227027893, "eval_runtime": 13.54, "eval_samples_per_second": 554.947, "eval_steps_per_second": 8.715, "step": 276500 }, { "epoch": 11.82, "learning_rate": 1.0228383338312493e-05, "loss": 0.0284, "step": 277000 }, { "epoch": 11.82, "eval_accuracy": 0.9504924141602342, "eval_f1": 0.9507888811610475, "eval_loss": 0.28838837146759033, "eval_runtime": 13.5292, "eval_samples_per_second": 555.39, "eval_steps_per_second": 8.722, "step": 277000 }, { "epoch": 11.84, "learning_rate": 1.0201709274038668e-05, "loss": 0.0301, "step": 277500 }, { "epoch": 11.84, "eval_accuracy": 0.9457013574660633, "eval_f1": 0.9461555213511837, "eval_loss": 0.3430428206920624, "eval_runtime": 13.5298, "eval_samples_per_second": 555.366, "eval_steps_per_second": 8.721, "step": 277500 }, { "epoch": 11.86, "learning_rate": 1.0175035209764844e-05, "loss": 0.0288, "step": 278000 }, { "epoch": 11.86, "eval_accuracy": 0.9491615650785201, "eval_f1": 0.9494279298765108, "eval_loss": 0.30778148770332336, "eval_runtime": 13.5324, "eval_samples_per_second": 555.26, "eval_steps_per_second": 8.72, "step": 278000 }, { "epoch": 11.89, "learning_rate": 1.0148414493619566e-05, "loss": 0.0302, "step": 278500 }, { "epoch": 11.89, "eval_accuracy": 0.9475645461804632, "eval_f1": 0.9479488373562613, "eval_loss": 0.32820576429367065, "eval_runtime": 13.5289, "eval_samples_per_second": 555.402, "eval_steps_per_second": 8.722, "step": 278500 }, { "epoch": 11.91, "learning_rate": 1.0121740429345739e-05, "loss": 0.0322, "step": 279000 }, { "epoch": 11.91, "eval_accuracy": 0.9506254990684057, "eval_f1": 0.9509672286308062, "eval_loss": 0.30490967631340027, "eval_runtime": 13.531, "eval_samples_per_second": 555.318, "eval_steps_per_second": 8.721, "step": 279000 }, { "epoch": 11.93, "learning_rate": 1.0095066365071915e-05, "loss": 0.0291, "step": 279500 }, { "epoch": 11.93, "eval_accuracy": 0.9377162629757786, "eval_f1": 0.9385255738922734, "eval_loss": 0.42235612869262695, "eval_runtime": 13.5309, "eval_samples_per_second": 555.322, "eval_steps_per_second": 8.721, "step": 279500 }, { "epoch": 11.95, "learning_rate": 1.006839230079809e-05, "loss": 0.0303, "step": 280000 }, { "epoch": 11.95, "eval_accuracy": 0.9512909236092627, "eval_f1": 0.9515785237481514, "eval_loss": 0.31639474630355835, "eval_runtime": 13.5318, "eval_samples_per_second": 555.286, "eval_steps_per_second": 8.72, "step": 280000 }, { "epoch": 11.97, "learning_rate": 1.0041771584652811e-05, "loss": 0.0316, "step": 280500 }, { "epoch": 11.97, "eval_accuracy": 0.951690178333777, "eval_f1": 0.951872103274024, "eval_loss": 0.3232201635837555, "eval_runtime": 13.5298, "eval_samples_per_second": 555.368, "eval_steps_per_second": 8.722, "step": 280500 }, { "epoch": 11.99, "learning_rate": 1.0015097520378987e-05, "loss": 0.0313, "step": 281000 }, { "epoch": 11.99, "eval_accuracy": 0.9457013574660633, "eval_f1": 0.9462042561834819, "eval_loss": 0.35591208934783936, "eval_runtime": 13.5346, "eval_samples_per_second": 555.169, "eval_steps_per_second": 8.718, "step": 281000 }, { "epoch": 12.01, "learning_rate": 9.98842345610516e-06, "loss": 0.0235, "step": 281500 }, { "epoch": 12.01, "eval_accuracy": 0.9484961405376631, "eval_f1": 0.9488481933097076, "eval_loss": 0.3524312674999237, "eval_runtime": 13.533, "eval_samples_per_second": 555.237, "eval_steps_per_second": 8.719, "step": 281500 }, { "epoch": 12.04, "learning_rate": 9.961749391831337e-06, "loss": 0.0236, "step": 282000 }, { "epoch": 12.04, "eval_accuracy": 0.9492946499866916, "eval_f1": 0.9495979257310748, "eval_loss": 0.34940576553344727, "eval_runtime": 13.5339, "eval_samples_per_second": 555.197, "eval_steps_per_second": 8.719, "step": 282000 }, { "epoch": 12.06, "learning_rate": 9.935128675686058e-06, "loss": 0.0228, "step": 282500 }, { "epoch": 12.06, "eval_accuracy": 0.9515570934256056, "eval_f1": 0.9519025620870449, "eval_loss": 0.34196367859840393, "eval_runtime": 13.5343, "eval_samples_per_second": 555.182, "eval_steps_per_second": 8.719, "step": 282500 }, { "epoch": 12.08, "learning_rate": 9.908454611412233e-06, "loss": 0.0228, "step": 283000 }, { "epoch": 12.08, "eval_accuracy": 0.9459675272824062, "eval_f1": 0.9463649056755801, "eval_loss": 0.3793661892414093, "eval_runtime": 13.5368, "eval_samples_per_second": 555.078, "eval_steps_per_second": 8.717, "step": 283000 }, { "epoch": 12.1, "learning_rate": 9.881780547138408e-06, "loss": 0.0255, "step": 283500 }, { "epoch": 12.1, "eval_accuracy": 0.9496939047112057, "eval_f1": 0.9499314374127825, "eval_loss": 0.3424386978149414, "eval_runtime": 13.5163, "eval_samples_per_second": 555.921, "eval_steps_per_second": 8.73, "step": 283500 }, { "epoch": 12.12, "learning_rate": 9.855106482864582e-06, "loss": 0.0225, "step": 284000 }, { "epoch": 12.12, "eval_accuracy": 0.9540857066808623, "eval_f1": 0.954274654474955, "eval_loss": 0.30393466353416443, "eval_runtime": 13.5359, "eval_samples_per_second": 555.118, "eval_steps_per_second": 8.718, "step": 284000 }, { "epoch": 12.14, "learning_rate": 9.828485766719304e-06, "loss": 0.0233, "step": 284500 }, { "epoch": 12.14, "eval_accuracy": 0.9490284801703487, "eval_f1": 0.9493907632773001, "eval_loss": 0.34559109807014465, "eval_runtime": 13.5353, "eval_samples_per_second": 555.14, "eval_steps_per_second": 8.718, "step": 284500 }, { "epoch": 12.16, "learning_rate": 9.80181170244548e-06, "loss": 0.0234, "step": 285000 }, { "epoch": 12.16, "eval_accuracy": 0.9373170082512643, "eval_f1": 0.9381188612046385, "eval_loss": 0.4422382116317749, "eval_runtime": 13.5325, "eval_samples_per_second": 555.254, "eval_steps_per_second": 8.72, "step": 285000 }, { "epoch": 12.18, "learning_rate": 9.775190986300202e-06, "loss": 0.0248, "step": 285500 }, { "epoch": 12.18, "eval_accuracy": 0.9518232632419483, "eval_f1": 0.9520718964883907, "eval_loss": 0.30406928062438965, "eval_runtime": 13.5351, "eval_samples_per_second": 555.151, "eval_steps_per_second": 8.718, "step": 285500 }, { "epoch": 12.21, "learning_rate": 9.748516922026377e-06, "loss": 0.0233, "step": 286000 }, { "epoch": 12.21, "eval_accuracy": 0.9507585839765771, "eval_f1": 0.9510769606493671, "eval_loss": 0.3260180354118347, "eval_runtime": 13.5389, "eval_samples_per_second": 554.992, "eval_steps_per_second": 8.716, "step": 286000 }, { "epoch": 12.23, "learning_rate": 9.72184285775255e-06, "loss": 0.0257, "step": 286500 }, { "epoch": 12.23, "eval_accuracy": 0.9467660367314347, "eval_f1": 0.9471589365766602, "eval_loss": 0.35731199383735657, "eval_runtime": 13.539, "eval_samples_per_second": 554.988, "eval_steps_per_second": 8.716, "step": 286500 }, { "epoch": 12.25, "learning_rate": 9.695168793478726e-06, "loss": 0.0232, "step": 287000 }, { "epoch": 12.25, "eval_accuracy": 0.9458344423742348, "eval_f1": 0.9464105058302559, "eval_loss": 0.3893645703792572, "eval_runtime": 13.5441, "eval_samples_per_second": 554.779, "eval_steps_per_second": 8.712, "step": 287000 }, { "epoch": 12.27, "learning_rate": 9.6684947292049e-06, "loss": 0.0252, "step": 287500 }, { "epoch": 12.27, "eval_accuracy": 0.9495608198030343, "eval_f1": 0.9499226526270541, "eval_loss": 0.35022905468940735, "eval_runtime": 13.5389, "eval_samples_per_second": 554.993, "eval_steps_per_second": 8.716, "step": 287500 }, { "epoch": 12.29, "learning_rate": 9.641820664931075e-06, "loss": 0.0253, "step": 288000 }, { "epoch": 12.29, "eval_accuracy": 0.9512909236092627, "eval_f1": 0.9515790174427777, "eval_loss": 0.3345930278301239, "eval_runtime": 13.5468, "eval_samples_per_second": 554.669, "eval_steps_per_second": 8.711, "step": 288000 }, { "epoch": 12.31, "learning_rate": 9.61514660065725e-06, "loss": 0.0247, "step": 288500 }, { "epoch": 12.31, "eval_accuracy": 0.9488953952621773, "eval_f1": 0.9493082796480175, "eval_loss": 0.3517289161682129, "eval_runtime": 13.5314, "eval_samples_per_second": 555.302, "eval_steps_per_second": 8.72, "step": 288500 }, { "epoch": 12.33, "learning_rate": 9.588525884511973e-06, "loss": 0.0263, "step": 289000 }, { "epoch": 12.33, "eval_accuracy": 0.9487623103540058, "eval_f1": 0.9491705634218461, "eval_loss": 0.34944280982017517, "eval_runtime": 13.5432, "eval_samples_per_second": 554.816, "eval_steps_per_second": 8.713, "step": 289000 }, { "epoch": 12.36, "learning_rate": 9.561851820238148e-06, "loss": 0.0245, "step": 289500 }, { "epoch": 12.36, "eval_accuracy": 0.9496939047112057, "eval_f1": 0.9500608964308354, "eval_loss": 0.3433271646499634, "eval_runtime": 13.5444, "eval_samples_per_second": 554.767, "eval_steps_per_second": 8.712, "step": 289500 }, { "epoch": 12.38, "learning_rate": 9.53517775596432e-06, "loss": 0.0227, "step": 290000 }, { "epoch": 12.38, "eval_accuracy": 0.9530210274154911, "eval_f1": 0.9533301903565214, "eval_loss": 0.31356149911880493, "eval_runtime": 13.5415, "eval_samples_per_second": 554.885, "eval_steps_per_second": 8.714, "step": 290000 }, { "epoch": 12.4, "learning_rate": 9.508503691690497e-06, "loss": 0.0267, "step": 290500 }, { "epoch": 12.4, "eval_accuracy": 0.9484961405376631, "eval_f1": 0.9488992009331783, "eval_loss": 0.34211859107017517, "eval_runtime": 13.5416, "eval_samples_per_second": 554.882, "eval_steps_per_second": 8.714, "step": 290500 }, { "epoch": 12.42, "learning_rate": 9.481829627416671e-06, "loss": 0.0243, "step": 291000 }, { "epoch": 12.42, "eval_accuracy": 0.9443705083843492, "eval_f1": 0.9448289751919863, "eval_loss": 0.35784754157066345, "eval_runtime": 13.537, "eval_samples_per_second": 555.07, "eval_steps_per_second": 8.717, "step": 291000 }, { "epoch": 12.44, "learning_rate": 9.455155563142846e-06, "loss": 0.0269, "step": 291500 }, { "epoch": 12.44, "eval_accuracy": 0.9479638009049773, "eval_f1": 0.9482919091970918, "eval_loss": 0.3485228717327118, "eval_runtime": 13.5293, "eval_samples_per_second": 555.389, "eval_steps_per_second": 8.722, "step": 291500 }, { "epoch": 12.46, "learning_rate": 9.428534846997568e-06, "loss": 0.0245, "step": 292000 }, { "epoch": 12.46, "eval_accuracy": 0.9417088102209209, "eval_f1": 0.9424175859815097, "eval_loss": 0.4244661331176758, "eval_runtime": 13.5348, "eval_samples_per_second": 555.162, "eval_steps_per_second": 8.718, "step": 292000 }, { "epoch": 12.48, "learning_rate": 9.401860782723742e-06, "loss": 0.0242, "step": 292500 }, { "epoch": 12.48, "eval_accuracy": 0.9464998669150918, "eval_f1": 0.9469866857888503, "eval_loss": 0.38829919695854187, "eval_runtime": 13.5322, "eval_samples_per_second": 555.27, "eval_steps_per_second": 8.72, "step": 292500 }, { "epoch": 12.5, "learning_rate": 9.375186718449918e-06, "loss": 0.0255, "step": 293000 }, { "epoch": 12.5, "eval_accuracy": 0.9391801969656641, "eval_f1": 0.9399025777758279, "eval_loss": 0.4465163052082062, "eval_runtime": 13.5383, "eval_samples_per_second": 555.02, "eval_steps_per_second": 8.716, "step": 293000 }, { "epoch": 12.53, "learning_rate": 9.348512654176091e-06, "loss": 0.0276, "step": 293500 }, { "epoch": 12.53, "eval_accuracy": 0.9423742347617781, "eval_f1": 0.9429647320426405, "eval_loss": 0.4152087867259979, "eval_runtime": 13.5354, "eval_samples_per_second": 555.137, "eval_steps_per_second": 8.718, "step": 293500 }, { "epoch": 12.55, "learning_rate": 9.321838589902267e-06, "loss": 0.0246, "step": 294000 }, { "epoch": 12.55, "eval_accuracy": 0.9397125365983497, "eval_f1": 0.9404570177915673, "eval_loss": 0.42639487981796265, "eval_runtime": 13.5309, "eval_samples_per_second": 555.321, "eval_steps_per_second": 8.721, "step": 294000 }, { "epoch": 12.57, "learning_rate": 9.295164525628442e-06, "loss": 0.0259, "step": 294500 }, { "epoch": 12.57, "eval_accuracy": 0.9496939047112057, "eval_f1": 0.9500699644825673, "eval_loss": 0.34086450934410095, "eval_runtime": 13.5329, "eval_samples_per_second": 555.238, "eval_steps_per_second": 8.719, "step": 294500 }, { "epoch": 12.59, "learning_rate": 9.268543809483164e-06, "loss": 0.028, "step": 295000 }, { "epoch": 12.59, "eval_accuracy": 0.9449028480170348, "eval_f1": 0.9454380735919529, "eval_loss": 0.35605818033218384, "eval_runtime": 13.5307, "eval_samples_per_second": 555.331, "eval_steps_per_second": 8.721, "step": 295000 }, { "epoch": 12.61, "learning_rate": 9.241869745209339e-06, "loss": 0.0259, "step": 295500 }, { "epoch": 12.61, "eval_accuracy": 0.9476976310886346, "eval_f1": 0.9481629327735969, "eval_loss": 0.3794984817504883, "eval_runtime": 13.5317, "eval_samples_per_second": 555.287, "eval_steps_per_second": 8.72, "step": 295500 }, { "epoch": 12.63, "learning_rate": 9.215195680935513e-06, "loss": 0.0235, "step": 296000 }, { "epoch": 12.63, "eval_accuracy": 0.9502262443438914, "eval_f1": 0.95056367027671, "eval_loss": 0.3418872356414795, "eval_runtime": 13.5317, "eval_samples_per_second": 555.289, "eval_steps_per_second": 8.72, "step": 296000 }, { "epoch": 12.65, "learning_rate": 9.18852161666169e-06, "loss": 0.0239, "step": 296500 }, { "epoch": 12.65, "eval_accuracy": 0.9504924141602342, "eval_f1": 0.9509260011654419, "eval_loss": 0.3470332622528076, "eval_runtime": 13.5278, "eval_samples_per_second": 555.448, "eval_steps_per_second": 8.723, "step": 296500 }, { "epoch": 12.68, "learning_rate": 9.161847552387862e-06, "loss": 0.0237, "step": 297000 }, { "epoch": 12.68, "eval_accuracy": 0.9470322065477775, "eval_f1": 0.9476021217895277, "eval_loss": 0.3777107298374176, "eval_runtime": 13.5316, "eval_samples_per_second": 555.292, "eval_steps_per_second": 8.72, "step": 297000 }, { "epoch": 12.7, "learning_rate": 9.135173488114037e-06, "loss": 0.0283, "step": 297500 }, { "epoch": 12.7, "eval_accuracy": 0.9470322065477775, "eval_f1": 0.9475167386604156, "eval_loss": 0.360249787569046, "eval_runtime": 13.5329, "eval_samples_per_second": 555.241, "eval_steps_per_second": 8.72, "step": 297500 }, { "epoch": 12.72, "learning_rate": 9.108499423840213e-06, "loss": 0.0233, "step": 298000 }, { "epoch": 12.72, "eval_accuracy": 0.9434389140271493, "eval_f1": 0.9440649766238651, "eval_loss": 0.3837593197822571, "eval_runtime": 13.533, "eval_samples_per_second": 555.235, "eval_steps_per_second": 8.719, "step": 298000 }, { "epoch": 12.74, "learning_rate": 9.081825359566387e-06, "loss": 0.0261, "step": 298500 }, { "epoch": 12.74, "eval_accuracy": 0.943971253659835, "eval_f1": 0.9446036441422058, "eval_loss": 0.37317943572998047, "eval_runtime": 13.5218, "eval_samples_per_second": 555.696, "eval_steps_per_second": 8.727, "step": 298500 }, { "epoch": 12.76, "learning_rate": 9.05520464342111e-06, "loss": 0.0273, "step": 299000 }, { "epoch": 12.76, "eval_accuracy": 0.9451690178333777, "eval_f1": 0.9455156483734646, "eval_loss": 0.36402907967567444, "eval_runtime": 13.5359, "eval_samples_per_second": 555.117, "eval_steps_per_second": 8.718, "step": 299000 }, { "epoch": 12.78, "learning_rate": 9.028530579147284e-06, "loss": 0.0281, "step": 299500 }, { "epoch": 12.78, "eval_accuracy": 0.9486292254458344, "eval_f1": 0.9490071762088005, "eval_loss": 0.3424794673919678, "eval_runtime": 13.5348, "eval_samples_per_second": 555.161, "eval_steps_per_second": 8.718, "step": 299500 }, { "epoch": 12.8, "learning_rate": 9.001909863002006e-06, "loss": 0.0258, "step": 300000 }, { "epoch": 12.8, "eval_accuracy": 0.9480968858131488, "eval_f1": 0.948523862823844, "eval_loss": 0.3134201765060425, "eval_runtime": 13.533, "eval_samples_per_second": 555.234, "eval_steps_per_second": 8.719, "step": 300000 }, { "epoch": 12.82, "learning_rate": 8.97528914685673e-06, "loss": 0.0274, "step": 300500 }, { "epoch": 12.82, "eval_accuracy": 0.9490284801703487, "eval_f1": 0.949378066518175, "eval_loss": 0.3174980878829956, "eval_runtime": 13.5411, "eval_samples_per_second": 554.902, "eval_steps_per_second": 8.714, "step": 300500 }, { "epoch": 12.85, "learning_rate": 8.948615082582904e-06, "loss": 0.0287, "step": 301000 }, { "epoch": 12.85, "eval_accuracy": 0.9492946499866916, "eval_f1": 0.9496516160818783, "eval_loss": 0.30968043208122253, "eval_runtime": 13.543, "eval_samples_per_second": 554.827, "eval_steps_per_second": 8.713, "step": 301000 }, { "epoch": 12.87, "learning_rate": 8.921941018309079e-06, "loss": 0.0239, "step": 301500 }, { "epoch": 12.87, "eval_accuracy": 0.9450359329252063, "eval_f1": 0.9456431323564933, "eval_loss": 0.3644786775112152, "eval_runtime": 13.5436, "eval_samples_per_second": 554.799, "eval_steps_per_second": 8.713, "step": 301500 }, { "epoch": 12.89, "learning_rate": 8.895266954035253e-06, "loss": 0.0267, "step": 302000 }, { "epoch": 12.89, "eval_accuracy": 0.9486292254458344, "eval_f1": 0.9490490952355025, "eval_loss": 0.3264901340007782, "eval_runtime": 13.5378, "eval_samples_per_second": 555.038, "eval_steps_per_second": 8.716, "step": 302000 }, { "epoch": 12.91, "learning_rate": 8.868592889761428e-06, "loss": 0.0256, "step": 302500 }, { "epoch": 12.91, "eval_accuracy": 0.9410433856800638, "eval_f1": 0.9418673516898102, "eval_loss": 0.39492905139923096, "eval_runtime": 13.5485, "eval_samples_per_second": 554.599, "eval_steps_per_second": 8.709, "step": 302500 }, { "epoch": 12.93, "learning_rate": 8.841918825487604e-06, "loss": 0.0266, "step": 303000 }, { "epoch": 12.93, "eval_accuracy": 0.9474314612722917, "eval_f1": 0.9479752478937516, "eval_loss": 0.33922043442726135, "eval_runtime": 13.5432, "eval_samples_per_second": 554.819, "eval_steps_per_second": 8.713, "step": 303000 }, { "epoch": 12.95, "learning_rate": 8.815244761213777e-06, "loss": 0.0309, "step": 303500 }, { "epoch": 12.95, "eval_accuracy": 0.9518232632419483, "eval_f1": 0.9521082903193254, "eval_loss": 0.2975204885005951, "eval_runtime": 13.5374, "eval_samples_per_second": 555.056, "eval_steps_per_second": 8.717, "step": 303500 }, { "epoch": 12.97, "learning_rate": 8.788570696939951e-06, "loss": 0.0285, "step": 304000 }, { "epoch": 12.97, "eval_accuracy": 0.9468991216396061, "eval_f1": 0.9474010746701429, "eval_loss": 0.3319370150566101, "eval_runtime": 13.5414, "eval_samples_per_second": 554.892, "eval_steps_per_second": 8.714, "step": 304000 }, { "epoch": 13.0, "learning_rate": 8.761896632666127e-06, "loss": 0.0277, "step": 304500 }, { "epoch": 13.0, "eval_accuracy": 0.9471652914559489, "eval_f1": 0.9475862830905143, "eval_loss": 0.3333088755607605, "eval_runtime": 13.5417, "eval_samples_per_second": 554.877, "eval_steps_per_second": 8.714, "step": 304500 }, { "epoch": 13.02, "learning_rate": 8.735222568392302e-06, "loss": 0.0195, "step": 305000 }, { "epoch": 13.02, "eval_accuracy": 0.9470322065477775, "eval_f1": 0.9474741308095099, "eval_loss": 0.3394637405872345, "eval_runtime": 13.5268, "eval_samples_per_second": 555.489, "eval_steps_per_second": 8.723, "step": 305000 }, { "epoch": 13.04, "learning_rate": 8.708601852247024e-06, "loss": 0.0177, "step": 305500 }, { "epoch": 13.04, "eval_accuracy": 0.9507585839765771, "eval_f1": 0.9511306898962475, "eval_loss": 0.33947256207466125, "eval_runtime": 13.529, "eval_samples_per_second": 555.399, "eval_steps_per_second": 8.722, "step": 305500 }, { "epoch": 13.06, "learning_rate": 8.681927787973198e-06, "loss": 0.0203, "step": 306000 }, { "epoch": 13.06, "eval_accuracy": 0.9466329518232632, "eval_f1": 0.9470831681704304, "eval_loss": 0.38961419463157654, "eval_runtime": 13.5335, "eval_samples_per_second": 555.216, "eval_steps_per_second": 8.719, "step": 306000 }, { "epoch": 13.08, "learning_rate": 8.655253723699375e-06, "loss": 0.0187, "step": 306500 }, { "epoch": 13.08, "eval_accuracy": 0.9498269896193772, "eval_f1": 0.9500627333947544, "eval_loss": 0.3579484224319458, "eval_runtime": 13.5321, "eval_samples_per_second": 555.273, "eval_steps_per_second": 8.72, "step": 306500 }, { "epoch": 13.1, "learning_rate": 8.628579659425548e-06, "loss": 0.0217, "step": 307000 }, { "epoch": 13.1, "eval_accuracy": 0.9524886877828054, "eval_f1": 0.952847940349729, "eval_loss": 0.3058973550796509, "eval_runtime": 13.5306, "eval_samples_per_second": 555.333, "eval_steps_per_second": 8.721, "step": 307000 }, { "epoch": 13.12, "learning_rate": 8.601905595151722e-06, "loss": 0.0176, "step": 307500 }, { "epoch": 13.12, "eval_accuracy": 0.9515570934256056, "eval_f1": 0.9519249944919858, "eval_loss": 0.34081974625587463, "eval_runtime": 13.5326, "eval_samples_per_second": 555.253, "eval_steps_per_second": 8.72, "step": 307500 }, { "epoch": 13.14, "learning_rate": 8.575231530877898e-06, "loss": 0.0216, "step": 308000 }, { "epoch": 13.14, "eval_accuracy": 0.9490284801703487, "eval_f1": 0.9494784208638937, "eval_loss": 0.37706053256988525, "eval_runtime": 13.5281, "eval_samples_per_second": 555.436, "eval_steps_per_second": 8.723, "step": 308000 }, { "epoch": 13.17, "learning_rate": 8.548557466604073e-06, "loss": 0.0198, "step": 308500 }, { "epoch": 13.17, "eval_accuracy": 0.9474314612722917, "eval_f1": 0.9478336601536719, "eval_loss": 0.35731518268585205, "eval_runtime": 13.5317, "eval_samples_per_second": 555.288, "eval_steps_per_second": 8.72, "step": 308500 }, { "epoch": 13.19, "learning_rate": 8.521883402330247e-06, "loss": 0.0189, "step": 309000 }, { "epoch": 13.19, "eval_accuracy": 0.9502262443438914, "eval_f1": 0.9506267413803191, "eval_loss": 0.3440324068069458, "eval_runtime": 13.5286, "eval_samples_per_second": 555.415, "eval_steps_per_second": 8.722, "step": 309000 }, { "epoch": 13.21, "learning_rate": 8.495316034313517e-06, "loss": 0.0209, "step": 309500 }, { "epoch": 13.21, "eval_accuracy": 0.95009315943572, "eval_f1": 0.9505051712846733, "eval_loss": 0.3339444398880005, "eval_runtime": 13.5263, "eval_samples_per_second": 555.511, "eval_steps_per_second": 8.724, "step": 309500 }, { "epoch": 13.23, "learning_rate": 8.468641970039691e-06, "loss": 0.0192, "step": 310000 }, { "epoch": 13.23, "eval_accuracy": 0.9459675272824062, "eval_f1": 0.9464722133111272, "eval_loss": 0.37150633335113525, "eval_runtime": 13.5326, "eval_samples_per_second": 555.253, "eval_steps_per_second": 8.72, "step": 310000 }, { "epoch": 13.25, "learning_rate": 8.441967905765866e-06, "loss": 0.021, "step": 310500 }, { "epoch": 13.25, "eval_accuracy": 0.9504924141602342, "eval_f1": 0.950787518846855, "eval_loss": 0.35833537578582764, "eval_runtime": 13.5315, "eval_samples_per_second": 555.299, "eval_steps_per_second": 8.72, "step": 310500 }, { "epoch": 13.27, "learning_rate": 8.415293841492042e-06, "loss": 0.0211, "step": 311000 }, { "epoch": 13.27, "eval_accuracy": 0.9499600745275486, "eval_f1": 0.9503528961719908, "eval_loss": 0.3444558084011078, "eval_runtime": 13.5321, "eval_samples_per_second": 555.273, "eval_steps_per_second": 8.72, "step": 311000 }, { "epoch": 13.29, "learning_rate": 8.388619777218215e-06, "loss": 0.0174, "step": 311500 }, { "epoch": 13.29, "eval_accuracy": 0.9483630556294916, "eval_f1": 0.948639503786924, "eval_loss": 0.3671543002128601, "eval_runtime": 13.5177, "eval_samples_per_second": 555.864, "eval_steps_per_second": 8.729, "step": 311500 }, { "epoch": 13.32, "learning_rate": 8.361945712944391e-06, "loss": 0.0198, "step": 312000 }, { "epoch": 13.32, "eval_accuracy": 0.9506254990684057, "eval_f1": 0.9509650396188268, "eval_loss": 0.35667410492897034, "eval_runtime": 13.5286, "eval_samples_per_second": 555.416, "eval_steps_per_second": 8.722, "step": 312000 }, { "epoch": 13.34, "learning_rate": 8.335324996799113e-06, "loss": 0.024, "step": 312500 }, { "epoch": 13.34, "eval_accuracy": 0.9512909236092627, "eval_f1": 0.9516829387166317, "eval_loss": 0.342978298664093, "eval_runtime": 13.5287, "eval_samples_per_second": 555.411, "eval_steps_per_second": 8.722, "step": 312500 }, { "epoch": 13.36, "learning_rate": 8.308650932525288e-06, "loss": 0.0218, "step": 313000 }, { "epoch": 13.36, "eval_accuracy": 0.9488953952621773, "eval_f1": 0.9491380374650025, "eval_loss": 0.3575313985347748, "eval_runtime": 13.53, "eval_samples_per_second": 555.36, "eval_steps_per_second": 8.721, "step": 313000 }, { "epoch": 13.38, "learning_rate": 8.281976868251462e-06, "loss": 0.0216, "step": 313500 }, { "epoch": 13.38, "eval_accuracy": 0.9515570934256056, "eval_f1": 0.9518313582997843, "eval_loss": 0.3285492956638336, "eval_runtime": 13.5303, "eval_samples_per_second": 555.346, "eval_steps_per_second": 8.721, "step": 313500 }, { "epoch": 13.4, "learning_rate": 8.255302803977637e-06, "loss": 0.0219, "step": 314000 }, { "epoch": 13.4, "eval_accuracy": 0.9520894330582912, "eval_f1": 0.9523844337000912, "eval_loss": 0.33300209045410156, "eval_runtime": 13.549, "eval_samples_per_second": 554.581, "eval_steps_per_second": 8.709, "step": 314000 }, { "epoch": 13.42, "learning_rate": 8.228628739703813e-06, "loss": 0.0222, "step": 314500 }, { "epoch": 13.42, "eval_accuracy": 0.9422411498536066, "eval_f1": 0.9429110456077876, "eval_loss": 0.4017827808856964, "eval_runtime": 13.5434, "eval_samples_per_second": 554.808, "eval_steps_per_second": 8.713, "step": 314500 }, { "epoch": 13.44, "learning_rate": 8.202008023558535e-06, "loss": 0.0223, "step": 315000 }, { "epoch": 13.44, "eval_accuracy": 0.9542187915890338, "eval_f1": 0.954503749292435, "eval_loss": 0.3192913234233856, "eval_runtime": 13.5411, "eval_samples_per_second": 554.904, "eval_steps_per_second": 8.714, "step": 315000 }, { "epoch": 13.47, "learning_rate": 8.17533395928471e-06, "loss": 0.0208, "step": 315500 }, { "epoch": 13.47, "eval_accuracy": 0.9491615650785201, "eval_f1": 0.9495546249079766, "eval_loss": 0.34587451815605164, "eval_runtime": 13.5465, "eval_samples_per_second": 554.682, "eval_steps_per_second": 8.711, "step": 315500 }, { "epoch": 13.49, "learning_rate": 8.148659895010884e-06, "loss": 0.021, "step": 316000 }, { "epoch": 13.49, "eval_accuracy": 0.9480968858131488, "eval_f1": 0.9485904316567751, "eval_loss": 0.36984121799468994, "eval_runtime": 13.5392, "eval_samples_per_second": 554.979, "eval_steps_per_second": 8.715, "step": 316000 }, { "epoch": 13.51, "learning_rate": 8.121985830737058e-06, "loss": 0.0229, "step": 316500 }, { "epoch": 13.51, "eval_accuracy": 0.9503593292520628, "eval_f1": 0.9507095888927306, "eval_loss": 0.36136379837989807, "eval_runtime": 13.542, "eval_samples_per_second": 554.865, "eval_steps_per_second": 8.714, "step": 316500 }, { "epoch": 13.53, "learning_rate": 8.095311766463233e-06, "loss": 0.0216, "step": 317000 }, { "epoch": 13.53, "eval_accuracy": 0.9498269896193772, "eval_f1": 0.9502031930904051, "eval_loss": 0.3747590482234955, "eval_runtime": 13.5418, "eval_samples_per_second": 554.873, "eval_steps_per_second": 8.714, "step": 317000 }, { "epoch": 13.55, "learning_rate": 8.068637702189407e-06, "loss": 0.0215, "step": 317500 }, { "epoch": 13.55, "eval_accuracy": 0.9487623103540058, "eval_f1": 0.9490990236660635, "eval_loss": 0.3659830689430237, "eval_runtime": 13.5408, "eval_samples_per_second": 554.916, "eval_steps_per_second": 8.714, "step": 317500 }, { "epoch": 13.57, "learning_rate": 8.041963637915584e-06, "loss": 0.0212, "step": 318000 }, { "epoch": 13.57, "eval_accuracy": 0.9546180463135481, "eval_f1": 0.9548286526427764, "eval_loss": 0.32127419114112854, "eval_runtime": 13.5416, "eval_samples_per_second": 554.881, "eval_steps_per_second": 8.714, "step": 318000 }, { "epoch": 13.59, "learning_rate": 8.015342921770306e-06, "loss": 0.0221, "step": 318500 }, { "epoch": 13.59, "eval_accuracy": 0.9520894330582912, "eval_f1": 0.9524872864210052, "eval_loss": 0.3612636923789978, "eval_runtime": 13.543, "eval_samples_per_second": 554.827, "eval_steps_per_second": 8.713, "step": 318500 }, { "epoch": 13.61, "learning_rate": 7.98866885749648e-06, "loss": 0.0224, "step": 319000 }, { "epoch": 13.61, "eval_accuracy": 0.9468991216396061, "eval_f1": 0.9474092608026106, "eval_loss": 0.3816893398761749, "eval_runtime": 13.5345, "eval_samples_per_second": 555.175, "eval_steps_per_second": 8.718, "step": 319000 }, { "epoch": 13.64, "learning_rate": 7.961994793222655e-06, "loss": 0.021, "step": 319500 }, { "epoch": 13.64, "eval_accuracy": 0.9449028480170348, "eval_f1": 0.9454074908591944, "eval_loss": 0.41708582639694214, "eval_runtime": 13.5337, "eval_samples_per_second": 555.205, "eval_steps_per_second": 8.719, "step": 319500 }, { "epoch": 13.66, "learning_rate": 7.93532072894883e-06, "loss": 0.0212, "step": 320000 }, { "epoch": 13.66, "eval_accuracy": 0.9466329518232632, "eval_f1": 0.9471857115940103, "eval_loss": 0.41624367237091064, "eval_runtime": 13.5161, "eval_samples_per_second": 555.931, "eval_steps_per_second": 8.73, "step": 320000 }, { "epoch": 13.68, "learning_rate": 7.908646664675004e-06, "loss": 0.025, "step": 320500 }, { "epoch": 13.68, "eval_accuracy": 0.9524886877828054, "eval_f1": 0.9527800688729483, "eval_loss": 0.3502090275287628, "eval_runtime": 13.515, "eval_samples_per_second": 555.974, "eval_steps_per_second": 8.731, "step": 320500 }, { "epoch": 13.7, "learning_rate": 7.881972600401178e-06, "loss": 0.0206, "step": 321000 }, { "epoch": 13.7, "eval_accuracy": 0.9482299707213202, "eval_f1": 0.9486548518369584, "eval_loss": 0.3662354648113251, "eval_runtime": 13.5152, "eval_samples_per_second": 555.965, "eval_steps_per_second": 8.731, "step": 321000 }, { "epoch": 13.72, "learning_rate": 7.8553518842559e-06, "loss": 0.0238, "step": 321500 }, { "epoch": 13.72, "eval_accuracy": 0.9502262443438914, "eval_f1": 0.9505464950704269, "eval_loss": 0.341840535402298, "eval_runtime": 13.52, "eval_samples_per_second": 555.769, "eval_steps_per_second": 8.728, "step": 321500 }, { "epoch": 13.74, "learning_rate": 7.828677819982076e-06, "loss": 0.021, "step": 322000 }, { "epoch": 13.74, "eval_accuracy": 0.9507585839765771, "eval_f1": 0.9510538753270081, "eval_loss": 0.3421614170074463, "eval_runtime": 13.5138, "eval_samples_per_second": 556.023, "eval_steps_per_second": 8.732, "step": 322000 }, { "epoch": 13.76, "learning_rate": 7.802003755708251e-06, "loss": 0.02, "step": 322500 }, { "epoch": 13.76, "eval_accuracy": 0.9522225179664626, "eval_f1": 0.9525058871622871, "eval_loss": 0.33706653118133545, "eval_runtime": 13.514, "eval_samples_per_second": 556.017, "eval_steps_per_second": 8.732, "step": 322500 }, { "epoch": 13.79, "learning_rate": 7.775329691434426e-06, "loss": 0.0207, "step": 323000 }, { "epoch": 13.79, "eval_accuracy": 0.9472983763641203, "eval_f1": 0.9477291310319618, "eval_loss": 0.3597688376903534, "eval_runtime": 13.5206, "eval_samples_per_second": 555.743, "eval_steps_per_second": 8.727, "step": 323000 }, { "epoch": 13.81, "learning_rate": 7.7486556271606e-06, "loss": 0.0203, "step": 323500 }, { "epoch": 13.81, "eval_accuracy": 0.9495608198030343, "eval_f1": 0.9498900072393941, "eval_loss": 0.3365311920642853, "eval_runtime": 13.5216, "eval_samples_per_second": 555.703, "eval_steps_per_second": 8.727, "step": 323500 }, { "epoch": 13.83, "learning_rate": 7.722034911015322e-06, "loss": 0.0235, "step": 324000 }, { "epoch": 13.83, "eval_accuracy": 0.9499600745275486, "eval_f1": 0.9503223785334844, "eval_loss": 0.32426974177360535, "eval_runtime": 13.5237, "eval_samples_per_second": 555.619, "eval_steps_per_second": 8.725, "step": 324000 }, { "epoch": 13.85, "learning_rate": 7.695360846741497e-06, "loss": 0.0211, "step": 324500 }, { "epoch": 13.85, "eval_accuracy": 0.9479638009049773, "eval_f1": 0.9484043560686116, "eval_loss": 0.35503652691841125, "eval_runtime": 13.5258, "eval_samples_per_second": 555.532, "eval_steps_per_second": 8.724, "step": 324500 }, { "epoch": 13.87, "learning_rate": 7.668686782467671e-06, "loss": 0.0206, "step": 325000 }, { "epoch": 13.87, "eval_accuracy": 0.9519563481501198, "eval_f1": 0.952238975411297, "eval_loss": 0.31724080443382263, "eval_runtime": 13.5262, "eval_samples_per_second": 555.515, "eval_steps_per_second": 8.724, "step": 325000 }, { "epoch": 13.89, "learning_rate": 7.642012718193846e-06, "loss": 0.0244, "step": 325500 }, { "epoch": 13.89, "eval_accuracy": 0.95009315943572, "eval_f1": 0.9504515199397509, "eval_loss": 0.3432355225086212, "eval_runtime": 13.5167, "eval_samples_per_second": 555.907, "eval_steps_per_second": 8.73, "step": 325500 }, { "epoch": 13.91, "learning_rate": 7.615338653920021e-06, "loss": 0.0205, "step": 326000 }, { "epoch": 13.91, "eval_accuracy": 0.9530210274154911, "eval_f1": 0.9533589200873134, "eval_loss": 0.3301331698894501, "eval_runtime": 13.529, "eval_samples_per_second": 555.398, "eval_steps_per_second": 8.722, "step": 326000 }, { "epoch": 13.93, "learning_rate": 7.588664589646196e-06, "loss": 0.0205, "step": 326500 }, { "epoch": 13.93, "eval_accuracy": 0.9502262443438914, "eval_f1": 0.9504567720611751, "eval_loss": 0.3518039882183075, "eval_runtime": 13.5331, "eval_samples_per_second": 555.229, "eval_steps_per_second": 8.719, "step": 326500 }, { "epoch": 13.96, "learning_rate": 7.56199052537237e-06, "loss": 0.0235, "step": 327000 }, { "epoch": 13.96, "eval_accuracy": 0.9532871972318339, "eval_f1": 0.9535624980400332, "eval_loss": 0.3395467698574066, "eval_runtime": 13.5318, "eval_samples_per_second": 555.284, "eval_steps_per_second": 8.72, "step": 327000 }, { "epoch": 13.98, "learning_rate": 7.535369809227093e-06, "loss": 0.0205, "step": 327500 }, { "epoch": 13.98, "eval_accuracy": 0.9511578387010913, "eval_f1": 0.9514405872868344, "eval_loss": 0.3324923813343048, "eval_runtime": 13.5349, "eval_samples_per_second": 555.157, "eval_steps_per_second": 8.718, "step": 327500 }, { "epoch": 14.0, "learning_rate": 7.508695744953267e-06, "loss": 0.0213, "step": 328000 }, { "epoch": 14.0, "eval_accuracy": 0.9468991216396061, "eval_f1": 0.9473362888156124, "eval_loss": 0.3898778557777405, "eval_runtime": 13.541, "eval_samples_per_second": 554.909, "eval_steps_per_second": 8.714, "step": 328000 }, { "epoch": 14.02, "learning_rate": 7.48207502880799e-06, "loss": 0.0179, "step": 328500 }, { "epoch": 14.02, "eval_accuracy": 0.9498269896193772, "eval_f1": 0.950198372051693, "eval_loss": 0.35235846042633057, "eval_runtime": 13.5403, "eval_samples_per_second": 554.937, "eval_steps_per_second": 8.715, "step": 328500 }, { "epoch": 14.04, "learning_rate": 7.4554009645341656e-06, "loss": 0.0163, "step": 329000 }, { "epoch": 14.04, "eval_accuracy": 0.9518232632419483, "eval_f1": 0.9521459996214305, "eval_loss": 0.3546800911426544, "eval_runtime": 13.5254, "eval_samples_per_second": 555.546, "eval_steps_per_second": 8.724, "step": 329000 }, { "epoch": 14.06, "learning_rate": 7.428726900260339e-06, "loss": 0.0141, "step": 329500 }, { "epoch": 14.06, "eval_accuracy": 0.9558158104870907, "eval_f1": 0.9559601479074069, "eval_loss": 0.3263898193836212, "eval_runtime": 13.5426, "eval_samples_per_second": 554.842, "eval_steps_per_second": 8.713, "step": 329500 }, { "epoch": 14.08, "learning_rate": 7.402052835986514e-06, "loss": 0.0172, "step": 330000 }, { "epoch": 14.08, "eval_accuracy": 0.9550173010380623, "eval_f1": 0.9552692103290779, "eval_loss": 0.3311573565006256, "eval_runtime": 13.5321, "eval_samples_per_second": 555.273, "eval_steps_per_second": 8.72, "step": 330000 }, { "epoch": 14.11, "learning_rate": 7.375378771712689e-06, "loss": 0.0167, "step": 330500 }, { "epoch": 14.11, "eval_accuracy": 0.9538195368645196, "eval_f1": 0.9540514746090495, "eval_loss": 0.34398627281188965, "eval_runtime": 13.5429, "eval_samples_per_second": 554.831, "eval_steps_per_second": 8.713, "step": 330500 }, { "epoch": 14.13, "learning_rate": 7.348704707438864e-06, "loss": 0.0171, "step": 331000 }, { "epoch": 14.13, "eval_accuracy": 0.9576789992014906, "eval_f1": 0.9578177765352257, "eval_loss": 0.3113496005535126, "eval_runtime": 13.5401, "eval_samples_per_second": 554.945, "eval_steps_per_second": 8.715, "step": 331000 } ], "logging_steps": 500, "max_steps": 468620, "num_train_epochs": 20, "save_steps": 500, "total_flos": 5.573569942291212e+18, "trial_name": null, "trial_params": null }