diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,10611 +0,0 @@ -{ - "best_metric": 0.9576789992014906, - "best_model_checkpoint": "checkpoints/checkpoint-331000", - "epoch": 14.126584439417865, - "eval_steps": 500, - "global_step": 331000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.02, - "learning_rate": 1.0669625709530112e-07, - "loss": 0.6022, - "step": 500 - }, - { - "epoch": 0.02, - "eval_accuracy": 0.543784934788395, - "eval_f1": 0.5427684499683697, - "eval_loss": 0.9619719982147217, - "eval_runtime": 29.8388, - "eval_samples_per_second": 251.82, - "eval_steps_per_second": 3.955, - "step": 500 - }, - { - "epoch": 0.04, - "learning_rate": 2.1339251419060223e-07, - "loss": 0.485, - "step": 1000 - }, - { - "epoch": 0.04, - "eval_accuracy": 0.5958211338834176, - "eval_f1": 0.5978559682284419, - "eval_loss": 0.9289390444755554, - "eval_runtime": 29.8421, - "eval_samples_per_second": 251.792, - "eval_steps_per_second": 3.954, - "step": 1000 - }, - { - "epoch": 0.06, - "learning_rate": 3.200887712859033e-07, - "loss": 0.392, - "step": 1500 - }, - { - "epoch": 0.06, - "eval_accuracy": 0.6265637476710141, - "eval_f1": 0.6288184460866618, - "eval_loss": 0.9415813088417053, - "eval_runtime": 29.8528, - "eval_samples_per_second": 251.701, - "eval_steps_per_second": 3.953, - "step": 1500 - }, - { - "epoch": 0.09, - "learning_rate": 4.2678502838120447e-07, - "loss": 0.341, - "step": 2000 - }, - { - "epoch": 0.09, - "eval_accuracy": 0.6396060686718126, - "eval_f1": 0.6420018684811181, - "eval_loss": 0.9537326097488403, - "eval_runtime": 29.6716, - "eval_samples_per_second": 253.239, - "eval_steps_per_second": 3.977, - "step": 2000 - }, - { - "epoch": 0.11, - "learning_rate": 5.332678929623149e-07, - "loss": 0.3014, - "step": 2500 - }, - { - "epoch": 0.11, - "eval_accuracy": 0.6586372105403248, - "eval_f1": 0.6606654532497396, - "eval_loss": 0.9310646057128906, - "eval_runtime": 29.6727, - "eval_samples_per_second": 253.229, - "eval_steps_per_second": 3.977, - "step": 2500 - }, - { - "epoch": 0.13, - "learning_rate": 6.39964150057616e-07, - "loss": 0.2904, - "step": 3000 - }, - { - "epoch": 0.13, - "eval_accuracy": 0.659701889805696, - "eval_f1": 0.6629432187101901, - "eval_loss": 0.9263201951980591, - "eval_runtime": 29.8167, - "eval_samples_per_second": 252.007, - "eval_steps_per_second": 3.958, - "step": 3000 - }, - { - "epoch": 0.15, - "learning_rate": 7.466604071529171e-07, - "loss": 0.2777, - "step": 3500 - }, - { - "epoch": 0.15, - "eval_accuracy": 0.6752728240617514, - "eval_f1": 0.6780225280179651, - "eval_loss": 0.874735951423645, - "eval_runtime": 29.7236, - "eval_samples_per_second": 252.795, - "eval_steps_per_second": 3.97, - "step": 3500 - }, - { - "epoch": 0.17, - "learning_rate": 8.533566642482183e-07, - "loss": 0.2592, - "step": 4000 - }, - { - "epoch": 0.17, - "eval_accuracy": 0.6756720787862657, - "eval_f1": 0.6786108310156049, - "eval_loss": 0.8717394471168518, - "eval_runtime": 29.7655, - "eval_samples_per_second": 252.44, - "eval_steps_per_second": 3.964, - "step": 4000 - }, - { - "epoch": 0.19, - "learning_rate": 9.598395288293286e-07, - "loss": 0.2517, - "step": 4500 - }, - { - "epoch": 0.19, - "eval_accuracy": 0.6734096353473517, - "eval_f1": 0.6769719434965177, - "eval_loss": 0.8735432624816895, - "eval_runtime": 29.6907, - "eval_samples_per_second": 253.076, - "eval_steps_per_second": 3.974, - "step": 4500 - }, - { - "epoch": 0.21, - "learning_rate": 1.0665357859246298e-06, - "loss": 0.248, - "step": 5000 - }, - { - "epoch": 0.21, - "eval_accuracy": 0.6948363055629492, - "eval_f1": 0.6978001595314681, - "eval_loss": 0.813610315322876, - "eval_runtime": 29.6586, - "eval_samples_per_second": 253.35, - "eval_steps_per_second": 3.979, - "step": 5000 - }, - { - "epoch": 0.23, - "learning_rate": 1.173232043019931e-06, - "loss": 0.2356, - "step": 5500 - }, - { - "epoch": 0.23, - "eval_accuracy": 0.6996273622571201, - "eval_f1": 0.7034185823176623, - "eval_loss": 0.7985087633132935, - "eval_runtime": 29.648, - "eval_samples_per_second": 253.441, - "eval_steps_per_second": 3.98, - "step": 5500 - }, - { - "epoch": 0.26, - "learning_rate": 1.279928300115232e-06, - "loss": 0.2309, - "step": 6000 - }, - { - "epoch": 0.26, - "eval_accuracy": 0.6981634282672345, - "eval_f1": 0.7017567836313319, - "eval_loss": 0.8125708103179932, - "eval_runtime": 29.8184, - "eval_samples_per_second": 251.992, - "eval_steps_per_second": 3.957, - "step": 6000 - }, - { - "epoch": 0.28, - "learning_rate": 1.3864111646963426e-06, - "loss": 0.2361, - "step": 6500 - }, - { - "epoch": 0.28, - "eval_accuracy": 0.7104072398190046, - "eval_f1": 0.7139759041126813, - "eval_loss": 0.7547827363014221, - "eval_runtime": 29.9387, - "eval_samples_per_second": 250.98, - "eval_steps_per_second": 3.941, - "step": 6500 - }, - { - "epoch": 0.3, - "learning_rate": 1.4931074217916436e-06, - "loss": 0.228, - "step": 7000 - }, - { - "epoch": 0.3, - "eval_accuracy": 0.7069470322065478, - "eval_f1": 0.7106086090282727, - "eval_loss": 0.7759659290313721, - "eval_runtime": 29.786, - "eval_samples_per_second": 252.267, - "eval_steps_per_second": 3.962, - "step": 7000 - }, - { - "epoch": 0.32, - "learning_rate": 1.5998036788869448e-06, - "loss": 0.2208, - "step": 7500 - }, - { - "epoch": 0.32, - "eval_accuracy": 0.7167953154112323, - "eval_f1": 0.7197346181733503, - "eval_loss": 0.7356535792350769, - "eval_runtime": 29.8209, - "eval_samples_per_second": 251.971, - "eval_steps_per_second": 3.957, - "step": 7500 - }, - { - "epoch": 0.34, - "learning_rate": 1.706499935982246e-06, - "loss": 0.2163, - "step": 8000 - }, - { - "epoch": 0.34, - "eval_accuracy": 0.7199893532073462, - "eval_f1": 0.7244465646996111, - "eval_loss": 0.7504526972770691, - "eval_runtime": 29.7061, - "eval_samples_per_second": 252.944, - "eval_steps_per_second": 3.972, - "step": 8000 - }, - { - "epoch": 0.36, - "learning_rate": 1.8129828005633564e-06, - "loss": 0.2107, - "step": 8500 - }, - { - "epoch": 0.36, - "eval_accuracy": 0.7410167686984296, - "eval_f1": 0.7423783524470383, - "eval_loss": 0.6787217855453491, - "eval_runtime": 29.6863, - "eval_samples_per_second": 253.113, - "eval_steps_per_second": 3.975, - "step": 8500 - }, - { - "epoch": 0.38, - "learning_rate": 1.9196790576586572e-06, - "loss": 0.2149, - "step": 9000 - }, - { - "epoch": 0.38, - "eval_accuracy": 0.7233164759116316, - "eval_f1": 0.7276929881395522, - "eval_loss": 0.726280927658081, - "eval_runtime": 29.7736, - "eval_samples_per_second": 252.372, - "eval_steps_per_second": 3.963, - "step": 9000 - }, - { - "epoch": 0.41, - "learning_rate": 2.0263753147539587e-06, - "loss": 0.2136, - "step": 9500 - }, - { - "epoch": 0.41, - "eval_accuracy": 0.7230503060952888, - "eval_f1": 0.7276284165227827, - "eval_loss": 0.73542720079422, - "eval_runtime": 29.6589, - "eval_samples_per_second": 253.348, - "eval_steps_per_second": 3.979, - "step": 9500 - }, - { - "epoch": 0.43, - "learning_rate": 2.1330715718492596e-06, - "loss": 0.2048, - "step": 10000 - }, - { - "epoch": 0.43, - "eval_accuracy": 0.7463401650252861, - "eval_f1": 0.7498995143342098, - "eval_loss": 0.6706867218017578, - "eval_runtime": 29.649, - "eval_samples_per_second": 253.432, - "eval_steps_per_second": 3.98, - "step": 10000 - }, - { - "epoch": 0.45, - "learning_rate": 2.2395544364303703e-06, - "loss": 0.2061, - "step": 10500 - }, - { - "epoch": 0.45, - "eval_accuracy": 0.7655043918019696, - "eval_f1": 0.7677479761415009, - "eval_loss": 0.6240710020065308, - "eval_runtime": 29.7677, - "eval_samples_per_second": 252.421, - "eval_steps_per_second": 3.964, - "step": 10500 - }, - { - "epoch": 0.47, - "learning_rate": 2.3462506935256712e-06, - "loss": 0.2023, - "step": 11000 - }, - { - "epoch": 0.47, - "eval_accuracy": 0.7657705616183125, - "eval_f1": 0.7675978015109074, - "eval_loss": 0.6099062561988831, - "eval_runtime": 28.8989, - "eval_samples_per_second": 260.01, - "eval_steps_per_second": 4.083, - "step": 11000 - }, - { - "epoch": 0.49, - "learning_rate": 2.4529469506209722e-06, - "loss": 0.1971, - "step": 11500 - }, - { - "epoch": 0.49, - "eval_accuracy": 0.766569071067341, - "eval_f1": 0.7688363381777508, - "eval_loss": 0.6124671697616577, - "eval_runtime": 29.7028, - "eval_samples_per_second": 252.973, - "eval_steps_per_second": 3.973, - "step": 11500 - }, - { - "epoch": 0.51, - "learning_rate": 2.5596432077162737e-06, - "loss": 0.2007, - "step": 12000 - }, - { - "epoch": 0.51, - "eval_accuracy": 0.7802768166089965, - "eval_f1": 0.7815354154818521, - "eval_loss": 0.5773194432258606, - "eval_runtime": 29.6902, - "eval_samples_per_second": 253.08, - "eval_steps_per_second": 3.974, - "step": 12000 - }, - { - "epoch": 0.53, - "learning_rate": 2.6663394648115747e-06, - "loss": 0.1966, - "step": 12500 - }, - { - "epoch": 0.53, - "eval_accuracy": 0.7766835240883684, - "eval_f1": 0.7787527362371823, - "eval_loss": 0.5817181468009949, - "eval_runtime": 29.6898, - "eval_samples_per_second": 253.083, - "eval_steps_per_second": 3.974, - "step": 12500 - }, - { - "epoch": 0.55, - "learning_rate": 2.7728223293926853e-06, - "loss": 0.1904, - "step": 13000 - }, - { - "epoch": 0.55, - "eval_accuracy": 0.763375033271227, - "eval_f1": 0.7673421658493134, - "eval_loss": 0.6202647089958191, - "eval_runtime": 29.6897, - "eval_samples_per_second": 253.084, - "eval_steps_per_second": 3.974, - "step": 13000 - }, - { - "epoch": 0.58, - "learning_rate": 2.879518586487986e-06, - "loss": 0.1951, - "step": 13500 - }, - { - "epoch": 0.58, - "eval_accuracy": 0.7721586372105403, - "eval_f1": 0.7752940740863317, - "eval_loss": 0.6014246344566345, - "eval_runtime": 28.9444, - "eval_samples_per_second": 259.601, - "eval_steps_per_second": 4.077, - "step": 13500 - }, - { - "epoch": 0.6, - "learning_rate": 2.9862148435832873e-06, - "loss": 0.1876, - "step": 14000 - }, - { - "epoch": 0.6, - "eval_accuracy": 0.7704285334043119, - "eval_f1": 0.7738889189274433, - "eval_loss": 0.5962545871734619, - "eval_runtime": 29.7266, - "eval_samples_per_second": 252.77, - "eval_steps_per_second": 3.97, - "step": 14000 - }, - { - "epoch": 0.62, - "learning_rate": 3.0929111006785883e-06, - "loss": 0.1887, - "step": 14500 - }, - { - "epoch": 0.62, - "eval_accuracy": 0.7913228639872238, - "eval_f1": 0.7934762289692718, - "eval_loss": 0.5552248954772949, - "eval_runtime": 29.7855, - "eval_samples_per_second": 252.271, - "eval_steps_per_second": 3.962, - "step": 14500 - }, - { - "epoch": 0.64, - "learning_rate": 3.199393965259699e-06, - "loss": 0.1869, - "step": 15000 - }, - { - "epoch": 0.64, - "eval_accuracy": 0.7954484961405377, - "eval_f1": 0.7971630354645414, - "eval_loss": 0.5504642724990845, - "eval_runtime": 29.8102, - "eval_samples_per_second": 252.061, - "eval_steps_per_second": 3.958, - "step": 15000 - }, - { - "epoch": 0.66, - "learning_rate": 3.3060902223550003e-06, - "loss": 0.1885, - "step": 15500 - }, - { - "epoch": 0.66, - "eval_accuracy": 0.7981101943039659, - "eval_f1": 0.8001115884271371, - "eval_loss": 0.5248883962631226, - "eval_runtime": 29.8233, - "eval_samples_per_second": 251.951, - "eval_steps_per_second": 3.957, - "step": 15500 - }, - { - "epoch": 0.68, - "learning_rate": 3.4127864794503013e-06, - "loss": 0.1841, - "step": 16000 - }, - { - "epoch": 0.68, - "eval_accuracy": 0.76177801437317, - "eval_f1": 0.7663511235727536, - "eval_loss": 0.6254962682723999, - "eval_runtime": 28.8714, - "eval_samples_per_second": 260.258, - "eval_steps_per_second": 4.087, - "step": 16000 - }, - { - "epoch": 0.7, - "learning_rate": 3.5194827365456023e-06, - "loss": 0.1888, - "step": 16500 - }, - { - "epoch": 0.7, - "eval_accuracy": 0.8039659302635082, - "eval_f1": 0.8057213697184751, - "eval_loss": 0.513132631778717, - "eval_runtime": 29.7082, - "eval_samples_per_second": 252.927, - "eval_steps_per_second": 3.972, - "step": 16500 - }, - { - "epoch": 0.73, - "learning_rate": 3.625965601126713e-06, - "loss": 0.1822, - "step": 17000 - }, - { - "epoch": 0.73, - "eval_accuracy": 0.7708277881288262, - "eval_f1": 0.7751738701524662, - "eval_loss": 0.6065024733543396, - "eval_runtime": 29.6783, - "eval_samples_per_second": 253.182, - "eval_steps_per_second": 3.976, - "step": 17000 - }, - { - "epoch": 0.75, - "learning_rate": 3.7326618582220135e-06, - "loss": 0.1824, - "step": 17500 - }, - { - "epoch": 0.75, - "eval_accuracy": 0.8144796380090498, - "eval_f1": 0.815863436789794, - "eval_loss": 0.48926714062690735, - "eval_runtime": 29.8006, - "eval_samples_per_second": 252.143, - "eval_steps_per_second": 3.96, - "step": 17500 - }, - { - "epoch": 0.77, - "learning_rate": 3.8393581153173145e-06, - "loss": 0.1762, - "step": 18000 - }, - { - "epoch": 0.77, - "eval_accuracy": 0.8018365717327655, - "eval_f1": 0.8042255377709717, - "eval_loss": 0.5292934775352478, - "eval_runtime": 29.6674, - "eval_samples_per_second": 253.275, - "eval_steps_per_second": 3.977, - "step": 18000 - }, - { - "epoch": 0.79, - "learning_rate": 3.946054372412616e-06, - "loss": 0.1794, - "step": 18500 - }, - { - "epoch": 0.79, - "eval_accuracy": 0.7919882885280809, - "eval_f1": 0.7951855368343619, - "eval_loss": 0.5379685163497925, - "eval_runtime": 29.6555, - "eval_samples_per_second": 253.376, - "eval_steps_per_second": 3.979, - "step": 18500 - }, - { - "epoch": 0.81, - "learning_rate": 4.052750629507917e-06, - "loss": 0.178, - "step": 19000 - }, - { - "epoch": 0.81, - "eval_accuracy": 0.7966462603140804, - "eval_f1": 0.7997248083930035, - "eval_loss": 0.547903299331665, - "eval_runtime": 29.6966, - "eval_samples_per_second": 253.026, - "eval_steps_per_second": 3.974, - "step": 19000 - }, - { - "epoch": 0.83, - "learning_rate": 4.159446886603218e-06, - "loss": 0.178, - "step": 19500 - }, - { - "epoch": 0.83, - "eval_accuracy": 0.8191376097950492, - "eval_f1": 0.820192566679803, - "eval_loss": 0.48710888624191284, - "eval_runtime": 29.6619, - "eval_samples_per_second": 253.322, - "eval_steps_per_second": 3.978, - "step": 19500 - }, - { - "epoch": 0.85, - "learning_rate": 4.266143143698519e-06, - "loss": 0.1804, - "step": 20000 - }, - { - "epoch": 0.85, - "eval_accuracy": 0.7983763641203088, - "eval_f1": 0.80157249996382, - "eval_loss": 0.5329124331474304, - "eval_runtime": 29.6874, - "eval_samples_per_second": 253.104, - "eval_steps_per_second": 3.975, - "step": 20000 - }, - { - "epoch": 0.87, - "learning_rate": 4.372839400793821e-06, - "loss": 0.1735, - "step": 20500 - }, - { - "epoch": 0.87, - "eval_accuracy": 0.8005057226510514, - "eval_f1": 0.8025166030334374, - "eval_loss": 0.5359117388725281, - "eval_runtime": 29.7951, - "eval_samples_per_second": 252.189, - "eval_steps_per_second": 3.96, - "step": 20500 - }, - { - "epoch": 0.9, - "learning_rate": 4.479322265374931e-06, - "loss": 0.1757, - "step": 21000 - }, - { - "epoch": 0.9, - "eval_accuracy": 0.8148788927335641, - "eval_f1": 0.8169785681348869, - "eval_loss": 0.49457210302352905, - "eval_runtime": 29.7201, - "eval_samples_per_second": 252.825, - "eval_steps_per_second": 3.97, - "step": 21000 - }, - { - "epoch": 0.92, - "learning_rate": 4.5858051299560415e-06, - "loss": 0.1784, - "step": 21500 - }, - { - "epoch": 0.92, - "eval_accuracy": 0.7914559488953953, - "eval_f1": 0.7952801320443871, - "eval_loss": 0.5443964600563049, - "eval_runtime": 29.7969, - "eval_samples_per_second": 252.174, - "eval_steps_per_second": 3.96, - "step": 21500 - }, - { - "epoch": 0.94, - "learning_rate": 4.6925013870513425e-06, - "loss": 0.1727, - "step": 22000 - }, - { - "epoch": 0.94, - "eval_accuracy": 0.7762842693638541, - "eval_f1": 0.7809969777740893, - "eval_loss": 0.6244210004806519, - "eval_runtime": 29.7843, - "eval_samples_per_second": 252.281, - "eval_steps_per_second": 3.962, - "step": 22000 - }, - { - "epoch": 0.96, - "learning_rate": 4.7991976441466435e-06, - "loss": 0.1661, - "step": 22500 - }, - { - "epoch": 0.96, - "eval_accuracy": 0.8233963268565345, - "eval_f1": 0.8247457942936195, - "eval_loss": 0.4613490104675293, - "eval_runtime": 29.7228, - "eval_samples_per_second": 252.802, - "eval_steps_per_second": 3.97, - "step": 22500 - }, - { - "epoch": 0.98, - "learning_rate": 4.9058939012419445e-06, - "loss": 0.1765, - "step": 23000 - }, - { - "epoch": 0.98, - "eval_accuracy": 0.8155443172744211, - "eval_f1": 0.817901282131261, - "eval_loss": 0.4752632677555084, - "eval_runtime": 29.6943, - "eval_samples_per_second": 253.045, - "eval_steps_per_second": 3.974, - "step": 23000 - }, - { - "epoch": 1.0, - "learning_rate": 5.012376765823055e-06, - "loss": 0.1691, - "step": 23500 - }, - { - "epoch": 1.0, - "eval_accuracy": 0.8148788927335641, - "eval_f1": 0.8171852369583342, - "eval_loss": 0.4768976867198944, - "eval_runtime": 29.8514, - "eval_samples_per_second": 251.713, - "eval_steps_per_second": 3.953, - "step": 23500 - }, - { - "epoch": 1.02, - "learning_rate": 5.119073022918356e-06, - "loss": 0.1602, - "step": 24000 - }, - { - "epoch": 1.02, - "eval_accuracy": 0.8349747138674475, - "eval_f1": 0.8360082038928794, - "eval_loss": 0.4381480813026428, - "eval_runtime": 29.7058, - "eval_samples_per_second": 252.947, - "eval_steps_per_second": 3.972, - "step": 24000 - }, - { - "epoch": 1.05, - "learning_rate": 5.225769280013657e-06, - "loss": 0.1598, - "step": 24500 - }, - { - "epoch": 1.05, - "eval_accuracy": 0.8154112323662497, - "eval_f1": 0.817754338851287, - "eval_loss": 0.4830179512500763, - "eval_runtime": 29.7125, - "eval_samples_per_second": 252.89, - "eval_steps_per_second": 3.971, - "step": 24500 - }, - { - "epoch": 1.07, - "learning_rate": 5.332465537108958e-06, - "loss": 0.1572, - "step": 25000 - }, - { - "epoch": 1.07, - "eval_accuracy": 0.8293851477242481, - "eval_f1": 0.8311224826812309, - "eval_loss": 0.46394336223602295, - "eval_runtime": 29.7759, - "eval_samples_per_second": 252.352, - "eval_steps_per_second": 3.963, - "step": 25000 - }, - { - "epoch": 1.09, - "learning_rate": 5.43916179420426e-06, - "loss": 0.1595, - "step": 25500 - }, - { - "epoch": 1.09, - "eval_accuracy": 0.8148788927335641, - "eval_f1": 0.8178297752035011, - "eval_loss": 0.5001041889190674, - "eval_runtime": 29.6893, - "eval_samples_per_second": 253.088, - "eval_steps_per_second": 3.974, - "step": 25500 - }, - { - "epoch": 1.11, - "learning_rate": 5.545858051299561e-06, - "loss": 0.161, - "step": 26000 - }, - { - "epoch": 1.11, - "eval_accuracy": 0.8259249401117913, - "eval_f1": 0.8286636751609872, - "eval_loss": 0.48775410652160645, - "eval_runtime": 29.8731, - "eval_samples_per_second": 251.531, - "eval_steps_per_second": 3.95, - "step": 26000 - }, - { - "epoch": 1.13, - "learning_rate": 5.652554308394862e-06, - "loss": 0.1579, - "step": 26500 - }, - { - "epoch": 1.13, - "eval_accuracy": 0.8295182326324195, - "eval_f1": 0.8312661426787813, - "eval_loss": 0.46488162875175476, - "eval_runtime": 29.804, - "eval_samples_per_second": 252.114, - "eval_steps_per_second": 3.959, - "step": 26500 - }, - { - "epoch": 1.15, - "learning_rate": 5.759250565490163e-06, - "loss": 0.158, - "step": 27000 - }, - { - "epoch": 1.15, - "eval_accuracy": 0.8315145062549907, - "eval_f1": 0.8338268703712137, - "eval_loss": 0.468116819858551, - "eval_runtime": 29.7512, - "eval_samples_per_second": 252.561, - "eval_steps_per_second": 3.966, - "step": 27000 - }, - { - "epoch": 1.17, - "learning_rate": 5.8657334300712735e-06, - "loss": 0.1564, - "step": 27500 - }, - { - "epoch": 1.17, - "eval_accuracy": 0.8424274687250466, - "eval_f1": 0.8440072450050907, - "eval_loss": 0.43628567457199097, - "eval_runtime": 29.7871, - "eval_samples_per_second": 252.256, - "eval_steps_per_second": 3.961, - "step": 27500 - }, - { - "epoch": 1.19, - "learning_rate": 5.9724296871665745e-06, - "loss": 0.1563, - "step": 28000 - }, - { - "epoch": 1.19, - "eval_accuracy": 0.8393665158371041, - "eval_f1": 0.8406275107384483, - "eval_loss": 0.4493769705295563, - "eval_runtime": 29.8008, - "eval_samples_per_second": 252.141, - "eval_steps_per_second": 3.96, - "step": 28000 - }, - { - "epoch": 1.22, - "learning_rate": 6.0791259442618755e-06, - "loss": 0.1537, - "step": 28500 - }, - { - "epoch": 1.22, - "eval_accuracy": 0.8558690444503594, - "eval_f1": 0.8564129141434538, - "eval_loss": 0.40108588337898254, - "eval_runtime": 29.7407, - "eval_samples_per_second": 252.65, - "eval_steps_per_second": 3.968, - "step": 28500 - }, - { - "epoch": 1.24, - "learning_rate": 6.1858222013571765e-06, - "loss": 0.1563, - "step": 29000 - }, - { - "epoch": 1.24, - "eval_accuracy": 0.8498802235826457, - "eval_f1": 0.8508552401873143, - "eval_loss": 0.39731621742248535, - "eval_runtime": 29.6785, - "eval_samples_per_second": 253.18, - "eval_steps_per_second": 3.976, - "step": 29000 - }, - { - "epoch": 1.26, - "learning_rate": 6.292305065938288e-06, - "loss": 0.1561, - "step": 29500 - }, - { - "epoch": 1.26, - "eval_accuracy": 0.8237955815810487, - "eval_f1": 0.8271945122035734, - "eval_loss": 0.46914729475975037, - "eval_runtime": 29.7151, - "eval_samples_per_second": 252.868, - "eval_steps_per_second": 3.971, - "step": 29500 - }, - { - "epoch": 1.28, - "learning_rate": 6.399001323033589e-06, - "loss": 0.1528, - "step": 30000 - }, - { - "epoch": 1.28, - "eval_accuracy": 0.8251264306627628, - "eval_f1": 0.8285461748150632, - "eval_loss": 0.48026174306869507, - "eval_runtime": 29.6374, - "eval_samples_per_second": 253.531, - "eval_steps_per_second": 3.981, - "step": 30000 - }, - { - "epoch": 1.3, - "learning_rate": 6.505697580128889e-06, - "loss": 0.1544, - "step": 30500 - }, - { - "epoch": 1.3, - "eval_accuracy": 0.848682459409103, - "eval_f1": 0.8501262102833276, - "eval_loss": 0.41893479228019714, - "eval_runtime": 29.6645, - "eval_samples_per_second": 253.3, - "eval_steps_per_second": 3.978, - "step": 30500 - }, - { - "epoch": 1.32, - "learning_rate": 6.61239383722419e-06, - "loss": 0.1539, - "step": 31000 - }, - { - "epoch": 1.32, - "eval_accuracy": 0.851876497205217, - "eval_f1": 0.8531827777031692, - "eval_loss": 0.4037366509437561, - "eval_runtime": 29.7077, - "eval_samples_per_second": 252.931, - "eval_steps_per_second": 3.972, - "step": 31000 - }, - { - "epoch": 1.34, - "learning_rate": 6.7188767018053016e-06, - "loss": 0.151, - "step": 31500 - }, - { - "epoch": 1.34, - "eval_accuracy": 0.852541921746074, - "eval_f1": 0.8535937254114443, - "eval_loss": 0.39944180846214294, - "eval_runtime": 29.7895, - "eval_samples_per_second": 252.237, - "eval_steps_per_second": 3.961, - "step": 31500 - }, - { - "epoch": 1.37, - "learning_rate": 6.8255729589006026e-06, - "loss": 0.152, - "step": 32000 - }, - { - "epoch": 1.37, - "eval_accuracy": 0.8347085440511046, - "eval_f1": 0.8372269081601834, - "eval_loss": 0.45202094316482544, - "eval_runtime": 29.6443, - "eval_samples_per_second": 253.472, - "eval_steps_per_second": 3.981, - "step": 32000 - }, - { - "epoch": 1.39, - "learning_rate": 6.9322692159959036e-06, - "loss": 0.1518, - "step": 32500 - }, - { - "epoch": 1.39, - "eval_accuracy": 0.8540058557359596, - "eval_f1": 0.8549640879293109, - "eval_loss": 0.4035201668739319, - "eval_runtime": 29.6728, - "eval_samples_per_second": 253.229, - "eval_steps_per_second": 3.977, - "step": 32500 - }, - { - "epoch": 1.41, - "learning_rate": 7.0389654730912046e-06, - "loss": 0.1525, - "step": 33000 - }, - { - "epoch": 1.41, - "eval_accuracy": 0.8542720255523024, - "eval_f1": 0.8556504958817148, - "eval_loss": 0.3930774927139282, - "eval_runtime": 29.4043, - "eval_samples_per_second": 255.541, - "eval_steps_per_second": 4.013, - "step": 33000 - }, - { - "epoch": 1.43, - "learning_rate": 7.145661730186506e-06, - "loss": 0.1531, - "step": 33500 - }, - { - "epoch": 1.43, - "eval_accuracy": 0.8571998935320735, - "eval_f1": 0.8578085198022855, - "eval_loss": 0.39213454723358154, - "eval_runtime": 29.717, - "eval_samples_per_second": 252.852, - "eval_steps_per_second": 3.971, - "step": 33500 - }, - { - "epoch": 1.45, - "learning_rate": 7.252357987281807e-06, - "loss": 0.1508, - "step": 34000 - }, - { - "epoch": 1.45, - "eval_accuracy": 0.8645195634815012, - "eval_f1": 0.8653816271187702, - "eval_loss": 0.3561285138130188, - "eval_runtime": 29.654, - "eval_samples_per_second": 253.389, - "eval_steps_per_second": 3.979, - "step": 34000 - }, - { - "epoch": 1.47, - "learning_rate": 7.3590542443771076e-06, - "loss": 0.1484, - "step": 34500 - }, - { - "epoch": 1.47, - "eval_accuracy": 0.8627894596752729, - "eval_f1": 0.8636694754124691, - "eval_loss": 0.3743633031845093, - "eval_runtime": 29.8096, - "eval_samples_per_second": 252.066, - "eval_steps_per_second": 3.958, - "step": 34500 - }, - { - "epoch": 1.49, - "learning_rate": 7.4657505014724086e-06, - "loss": 0.1506, - "step": 35000 - }, - { - "epoch": 1.49, - "eval_accuracy": 0.8727708277881289, - "eval_f1": 0.8731333607131244, - "eval_loss": 0.3446885347366333, - "eval_runtime": 29.6928, - "eval_samples_per_second": 253.058, - "eval_steps_per_second": 3.974, - "step": 35000 - }, - { - "epoch": 1.52, - "learning_rate": 7.57223336605352e-06, - "loss": 0.1498, - "step": 35500 - }, - { - "epoch": 1.52, - "eval_accuracy": 0.8578653180729305, - "eval_f1": 0.8594115234962781, - "eval_loss": 0.3940994441509247, - "eval_runtime": 28.8268, - "eval_samples_per_second": 260.66, - "eval_steps_per_second": 4.093, - "step": 35500 - }, - { - "epoch": 1.54, - "learning_rate": 7.67892962314882e-06, - "loss": 0.1492, - "step": 36000 - }, - { - "epoch": 1.54, - "eval_accuracy": 0.8517434122970455, - "eval_f1": 0.8539360175778631, - "eval_loss": 0.42693892121315, - "eval_runtime": 29.7835, - "eval_samples_per_second": 252.287, - "eval_steps_per_second": 3.962, - "step": 36000 - }, - { - "epoch": 1.56, - "learning_rate": 7.785625880244122e-06, - "loss": 0.1512, - "step": 36500 - }, - { - "epoch": 1.56, - "eval_accuracy": 0.8623902049507586, - "eval_f1": 0.8638277895919978, - "eval_loss": 0.38825100660324097, - "eval_runtime": 29.7295, - "eval_samples_per_second": 252.745, - "eval_steps_per_second": 3.969, - "step": 36500 - }, - { - "epoch": 1.58, - "learning_rate": 7.892322137339424e-06, - "loss": 0.1488, - "step": 37000 - }, - { - "epoch": 1.58, - "eval_accuracy": 0.8622571200425871, - "eval_f1": 0.8625953129968353, - "eval_loss": 0.3947807848453522, - "eval_runtime": 29.8226, - "eval_samples_per_second": 251.957, - "eval_steps_per_second": 3.957, - "step": 37000 - }, - { - "epoch": 1.6, - "learning_rate": 7.998805001920533e-06, - "loss": 0.1462, - "step": 37500 - }, - { - "epoch": 1.6, - "eval_accuracy": 0.857998402981102, - "eval_f1": 0.85965218608276, - "eval_loss": 0.39402276277542114, - "eval_runtime": 29.6801, - "eval_samples_per_second": 253.167, - "eval_steps_per_second": 3.976, - "step": 37500 - }, - { - "epoch": 1.62, - "learning_rate": 8.105501259015835e-06, - "loss": 0.1444, - "step": 38000 - }, - { - "epoch": 1.62, - "eval_accuracy": 0.8752994410433856, - "eval_f1": 0.8754268843244172, - "eval_loss": 0.3435206115245819, - "eval_runtime": 28.916, - "eval_samples_per_second": 259.856, - "eval_steps_per_second": 4.081, - "step": 38000 - }, - { - "epoch": 1.64, - "learning_rate": 8.212197516111135e-06, - "loss": 0.1479, - "step": 38500 - }, - { - "epoch": 1.64, - "eval_accuracy": 0.8776949693904711, - "eval_f1": 0.8777389705505072, - "eval_loss": 0.34012433886528015, - "eval_runtime": 29.8124, - "eval_samples_per_second": 252.043, - "eval_steps_per_second": 3.958, - "step": 38500 - }, - { - "epoch": 1.66, - "learning_rate": 8.318893773206437e-06, - "loss": 0.1454, - "step": 39000 - }, - { - "epoch": 1.66, - "eval_accuracy": 0.8727708277881289, - "eval_f1": 0.8738380396408167, - "eval_loss": 0.3515642583370209, - "eval_runtime": 29.6541, - "eval_samples_per_second": 253.388, - "eval_steps_per_second": 3.979, - "step": 39000 - }, - { - "epoch": 1.69, - "learning_rate": 8.425376637787547e-06, - "loss": 0.1455, - "step": 39500 - }, - { - "epoch": 1.69, - "eval_accuracy": 0.8675805163694437, - "eval_f1": 0.8691207089918026, - "eval_loss": 0.3673495352268219, - "eval_runtime": 29.6701, - "eval_samples_per_second": 253.251, - "eval_steps_per_second": 3.977, - "step": 39500 - }, - { - "epoch": 1.71, - "learning_rate": 8.532072894882847e-06, - "loss": 0.1453, - "step": 40000 - }, - { - "epoch": 1.71, - "eval_accuracy": 0.8581314878892734, - "eval_f1": 0.8595605264062034, - "eval_loss": 0.39343753457069397, - "eval_runtime": 29.6716, - "eval_samples_per_second": 253.239, - "eval_steps_per_second": 3.977, - "step": 40000 - }, - { - "epoch": 1.73, - "learning_rate": 8.63876915197815e-06, - "loss": 0.147, - "step": 40500 - }, - { - "epoch": 1.73, - "eval_accuracy": 0.8658504125632154, - "eval_f1": 0.8668749400965359, - "eval_loss": 0.36048775911331177, - "eval_runtime": 27.3595, - "eval_samples_per_second": 274.639, - "eval_steps_per_second": 4.313, - "step": 40500 - }, - { - "epoch": 1.75, - "learning_rate": 8.745465409073451e-06, - "loss": 0.1516, - "step": 41000 - }, - { - "epoch": 1.75, - "eval_accuracy": 0.8758317806760714, - "eval_f1": 0.8766235747210261, - "eval_loss": 0.35379454493522644, - "eval_runtime": 29.6799, - "eval_samples_per_second": 253.168, - "eval_steps_per_second": 3.976, - "step": 41000 - }, - { - "epoch": 1.77, - "learning_rate": 8.851948273654562e-06, - "loss": 0.1395, - "step": 41500 - }, - { - "epoch": 1.77, - "eval_accuracy": 0.8714399787064147, - "eval_f1": 0.8728150152532584, - "eval_loss": 0.3659563362598419, - "eval_runtime": 29.6835, - "eval_samples_per_second": 253.137, - "eval_steps_per_second": 3.975, - "step": 41500 - }, - { - "epoch": 1.79, - "learning_rate": 8.958644530749862e-06, - "loss": 0.1407, - "step": 42000 - }, - { - "epoch": 1.79, - "eval_accuracy": 0.8734362523289859, - "eval_f1": 0.8743262687390195, - "eval_loss": 0.3471178710460663, - "eval_runtime": 29.6846, - "eval_samples_per_second": 253.128, - "eval_steps_per_second": 3.975, - "step": 42000 - }, - { - "epoch": 1.81, - "learning_rate": 9.065340787845162e-06, - "loss": 0.1399, - "step": 42500 - }, - { - "epoch": 1.81, - "eval_accuracy": 0.8856800638807559, - "eval_f1": 0.8861514607271497, - "eval_loss": 0.32689064741134644, - "eval_runtime": 29.7995, - "eval_samples_per_second": 252.152, - "eval_steps_per_second": 3.96, - "step": 42500 - }, - { - "epoch": 1.84, - "learning_rate": 9.172037044940464e-06, - "loss": 0.1447, - "step": 43000 - }, - { - "epoch": 1.84, - "eval_accuracy": 0.8697098749001864, - "eval_f1": 0.8708193235034514, - "eval_loss": 0.3712153732776642, - "eval_runtime": 29.722, - "eval_samples_per_second": 252.809, - "eval_steps_per_second": 3.97, - "step": 43000 - }, - { - "epoch": 1.86, - "learning_rate": 9.278733302035766e-06, - "loss": 0.1453, - "step": 43500 - }, - { - "epoch": 1.86, - "eval_accuracy": 0.8903380356667554, - "eval_f1": 0.8906690257334923, - "eval_loss": 0.30124372243881226, - "eval_runtime": 29.8038, - "eval_samples_per_second": 252.115, - "eval_steps_per_second": 3.959, - "step": 43500 - }, - { - "epoch": 1.88, - "learning_rate": 9.385429559131066e-06, - "loss": 0.1451, - "step": 44000 - }, - { - "epoch": 1.88, - "eval_accuracy": 0.8871439978706415, - "eval_f1": 0.8877045418770082, - "eval_loss": 0.31062984466552734, - "eval_runtime": 29.6847, - "eval_samples_per_second": 253.127, - "eval_steps_per_second": 3.975, - "step": 44000 - }, - { - "epoch": 1.9, - "learning_rate": 9.491912423712178e-06, - "loss": 0.1409, - "step": 44500 - }, - { - "epoch": 1.9, - "eval_accuracy": 0.8790258184721853, - "eval_f1": 0.8799332093833431, - "eval_loss": 0.3516901135444641, - "eval_runtime": 29.7103, - "eval_samples_per_second": 252.909, - "eval_steps_per_second": 3.972, - "step": 44500 - }, - { - "epoch": 1.92, - "learning_rate": 9.598608680807478e-06, - "loss": 0.1417, - "step": 45000 - }, - { - "epoch": 1.92, - "eval_accuracy": 0.8727708277881289, - "eval_f1": 0.8742134115368855, - "eval_loss": 0.3526758849620819, - "eval_runtime": 29.6753, - "eval_samples_per_second": 253.207, - "eval_steps_per_second": 3.976, - "step": 45000 - }, - { - "epoch": 1.94, - "learning_rate": 9.705304937902778e-06, - "loss": 0.1409, - "step": 45500 - }, - { - "epoch": 1.94, - "eval_accuracy": 0.8880755922278414, - "eval_f1": 0.8886875427320827, - "eval_loss": 0.3103240430355072, - "eval_runtime": 29.7312, - "eval_samples_per_second": 252.731, - "eval_steps_per_second": 3.969, - "step": 45500 - }, - { - "epoch": 1.96, - "learning_rate": 9.81200119499808e-06, - "loss": 0.1479, - "step": 46000 - }, - { - "epoch": 1.96, - "eval_accuracy": 0.8900718658504125, - "eval_f1": 0.8906162569128221, - "eval_loss": 0.30096864700317383, - "eval_runtime": 29.8, - "eval_samples_per_second": 252.148, - "eval_steps_per_second": 3.96, - "step": 46000 - }, - { - "epoch": 1.98, - "learning_rate": 9.91869745209338e-06, - "loss": 0.1385, - "step": 46500 - }, - { - "epoch": 1.98, - "eval_accuracy": 0.8846153846153846, - "eval_f1": 0.885573613420597, - "eval_loss": 0.320277601480484, - "eval_runtime": 29.687, - "eval_samples_per_second": 253.107, - "eval_steps_per_second": 3.975, - "step": 46500 - }, - { - "epoch": 2.01, - "learning_rate": 1.0025180316674493e-05, - "loss": 0.1334, - "step": 47000 - }, - { - "epoch": 2.01, - "eval_accuracy": 0.889273356401384, - "eval_f1": 0.8898038480316975, - "eval_loss": 0.3248152732849121, - "eval_runtime": 29.6988, - "eval_samples_per_second": 253.007, - "eval_steps_per_second": 3.973, - "step": 47000 - }, - { - "epoch": 2.03, - "learning_rate": 1.0131876573769793e-05, - "loss": 0.121, - "step": 47500 - }, - { - "epoch": 2.03, - "eval_accuracy": 0.8856800638807559, - "eval_f1": 0.8866614530137226, - "eval_loss": 0.32954302430152893, - "eval_runtime": 29.7176, - "eval_samples_per_second": 252.847, - "eval_steps_per_second": 3.971, - "step": 47500 - }, - { - "epoch": 2.05, - "learning_rate": 1.0238572830865095e-05, - "loss": 0.1261, - "step": 48000 - }, - { - "epoch": 2.05, - "eval_accuracy": 0.8984562150652116, - "eval_f1": 0.8988722889159447, - "eval_loss": 0.2932971119880676, - "eval_runtime": 29.7394, - "eval_samples_per_second": 252.662, - "eval_steps_per_second": 3.968, - "step": 48000 - }, - { - "epoch": 2.07, - "learning_rate": 1.0345269087960395e-05, - "loss": 0.1181, - "step": 48500 - }, - { - "epoch": 2.07, - "eval_accuracy": 0.8951290923609263, - "eval_f1": 0.8958668889166673, - "eval_loss": 0.3214079439640045, - "eval_runtime": 29.6828, - "eval_samples_per_second": 253.143, - "eval_steps_per_second": 3.975, - "step": 48500 - }, - { - "epoch": 2.09, - "learning_rate": 1.0451751952541507e-05, - "loss": 0.1267, - "step": 49000 - }, - { - "epoch": 2.09, - "eval_accuracy": 0.896992281075326, - "eval_f1": 0.8978174395785067, - "eval_loss": 0.31766536831855774, - "eval_runtime": 29.7643, - "eval_samples_per_second": 252.45, - "eval_steps_per_second": 3.964, - "step": 49000 - }, - { - "epoch": 2.11, - "learning_rate": 1.0558448209636807e-05, - "loss": 0.124, - "step": 49500 - }, - { - "epoch": 2.11, - "eval_accuracy": 0.8655842427468725, - "eval_f1": 0.8674991335930158, - "eval_loss": 0.3998588025569916, - "eval_runtime": 29.6648, - "eval_samples_per_second": 253.297, - "eval_steps_per_second": 3.978, - "step": 49500 - }, - { - "epoch": 2.13, - "learning_rate": 1.0665144466732107e-05, - "loss": 0.1238, - "step": 50000 - }, - { - "epoch": 2.13, - "eval_accuracy": 0.8912696300239553, - "eval_f1": 0.8920994435131362, - "eval_loss": 0.32742416858673096, - "eval_runtime": 29.7221, - "eval_samples_per_second": 252.808, - "eval_steps_per_second": 3.97, - "step": 50000 - }, - { - "epoch": 2.16, - "learning_rate": 1.077184072382741e-05, - "loss": 0.1206, - "step": 50500 - }, - { - "epoch": 2.16, - "eval_accuracy": 0.9039126963002395, - "eval_f1": 0.9041946980678703, - "eval_loss": 0.29819589853286743, - "eval_runtime": 29.6654, - "eval_samples_per_second": 253.292, - "eval_steps_per_second": 3.978, - "step": 50500 - }, - { - "epoch": 2.18, - "learning_rate": 1.087832358840852e-05, - "loss": 0.1231, - "step": 51000 - }, - { - "epoch": 2.18, - "eval_accuracy": 0.864785733297844, - "eval_f1": 0.8675346101327358, - "eval_loss": 0.40950268507003784, - "eval_runtime": 29.6781, - "eval_samples_per_second": 253.183, - "eval_steps_per_second": 3.976, - "step": 51000 - }, - { - "epoch": 2.2, - "learning_rate": 1.0985019845503822e-05, - "loss": 0.1211, - "step": 51500 - }, - { - "epoch": 2.2, - "eval_accuracy": 0.8814213468192706, - "eval_f1": 0.8826759157361712, - "eval_loss": 0.376949667930603, - "eval_runtime": 29.6468, - "eval_samples_per_second": 253.45, - "eval_steps_per_second": 3.98, - "step": 51500 - }, - { - "epoch": 2.22, - "learning_rate": 1.1091716102599122e-05, - "loss": 0.129, - "step": 52000 - }, - { - "epoch": 2.22, - "eval_accuracy": 0.9021825924940112, - "eval_f1": 0.9026398982081912, - "eval_loss": 0.28755125403404236, - "eval_runtime": 29.6857, - "eval_samples_per_second": 253.119, - "eval_steps_per_second": 3.975, - "step": 52000 - }, - { - "epoch": 2.24, - "learning_rate": 1.1198412359694424e-05, - "loss": 0.1193, - "step": 52500 - }, - { - "epoch": 2.24, - "eval_accuracy": 0.883151450625499, - "eval_f1": 0.8842688770624403, - "eval_loss": 0.3539595901966095, - "eval_runtime": 29.6767, - "eval_samples_per_second": 253.195, - "eval_steps_per_second": 3.976, - "step": 52500 - }, - { - "epoch": 2.26, - "learning_rate": 1.1304895224275534e-05, - "loss": 0.12, - "step": 53000 - }, - { - "epoch": 2.26, - "eval_accuracy": 0.860260846420016, - "eval_f1": 0.8633802325021412, - "eval_loss": 0.4444720149040222, - "eval_runtime": 29.8492, - "eval_samples_per_second": 251.732, - "eval_steps_per_second": 3.953, - "step": 53000 - }, - { - "epoch": 2.28, - "learning_rate": 1.1411591481370834e-05, - "loss": 0.1205, - "step": 53500 - }, - { - "epoch": 2.28, - "eval_accuracy": 0.9056428001064679, - "eval_f1": 0.9060288772812102, - "eval_loss": 0.28057876229286194, - "eval_runtime": 29.7764, - "eval_samples_per_second": 252.348, - "eval_steps_per_second": 3.963, - "step": 53500 - }, - { - "epoch": 2.3, - "learning_rate": 1.1518287738466136e-05, - "loss": 0.1225, - "step": 54000 - }, - { - "epoch": 2.3, - "eval_accuracy": 0.90018631887144, - "eval_f1": 0.9011140009401021, - "eval_loss": 0.3092280924320221, - "eval_runtime": 29.7057, - "eval_samples_per_second": 252.948, - "eval_steps_per_second": 3.972, - "step": 54000 - }, - { - "epoch": 2.33, - "learning_rate": 1.1624983995561436e-05, - "loss": 0.1242, - "step": 54500 - }, - { - "epoch": 2.33, - "eval_accuracy": 0.8866116582379558, - "eval_f1": 0.8877422144240315, - "eval_loss": 0.3376993238925934, - "eval_runtime": 29.662, - "eval_samples_per_second": 253.321, - "eval_steps_per_second": 3.978, - "step": 54500 - }, - { - "epoch": 2.35, - "learning_rate": 1.1731466860142547e-05, - "loss": 0.1208, - "step": 55000 - }, - { - "epoch": 2.35, - "eval_accuracy": 0.8895395262177269, - "eval_f1": 0.8908095697962214, - "eval_loss": 0.3369642496109009, - "eval_runtime": 29.6995, - "eval_samples_per_second": 253.001, - "eval_steps_per_second": 3.973, - "step": 55000 - }, - { - "epoch": 2.37, - "learning_rate": 1.1838163117237849e-05, - "loss": 0.1214, - "step": 55500 - }, - { - "epoch": 2.37, - "eval_accuracy": 0.8932659036465265, - "eval_f1": 0.8942589850549713, - "eval_loss": 0.3164837658405304, - "eval_runtime": 29.7307, - "eval_samples_per_second": 252.736, - "eval_steps_per_second": 3.969, - "step": 55500 - }, - { - "epoch": 2.39, - "learning_rate": 1.1944645981818958e-05, - "loss": 0.1252, - "step": 56000 - }, - { - "epoch": 2.39, - "eval_accuracy": 0.9021825924940112, - "eval_f1": 0.902743360666717, - "eval_loss": 0.29441598057746887, - "eval_runtime": 29.7002, - "eval_samples_per_second": 252.995, - "eval_steps_per_second": 3.973, - "step": 56000 - }, - { - "epoch": 2.41, - "learning_rate": 1.2051342238914261e-05, - "loss": 0.1219, - "step": 56500 - }, - { - "epoch": 2.41, - "eval_accuracy": 0.8979238754325259, - "eval_f1": 0.8984367636555247, - "eval_loss": 0.32014647126197815, - "eval_runtime": 29.7756, - "eval_samples_per_second": 252.354, - "eval_steps_per_second": 3.963, - "step": 56500 - }, - { - "epoch": 2.43, - "learning_rate": 1.2158038496009562e-05, - "loss": 0.1195, - "step": 57000 - }, - { - "epoch": 2.43, - "eval_accuracy": 0.9109661964333244, - "eval_f1": 0.9108760160900574, - "eval_loss": 0.2840426564216614, - "eval_runtime": 29.7122, - "eval_samples_per_second": 252.893, - "eval_steps_per_second": 3.971, - "step": 57000 - }, - { - "epoch": 2.45, - "learning_rate": 1.2264734753104862e-05, - "loss": 0.126, - "step": 57500 - }, - { - "epoch": 2.45, - "eval_accuracy": 0.9041788661165824, - "eval_f1": 0.9046980888295584, - "eval_loss": 0.2939707338809967, - "eval_runtime": 28.8636, - "eval_samples_per_second": 260.327, - "eval_steps_per_second": 4.088, - "step": 57500 - }, - { - "epoch": 2.48, - "learning_rate": 1.2371431010200164e-05, - "loss": 0.1282, - "step": 58000 - }, - { - "epoch": 2.48, - "eval_accuracy": 0.894064413095555, - "eval_f1": 0.8951953592249402, - "eval_loss": 0.3158866763114929, - "eval_runtime": 29.738, - "eval_samples_per_second": 252.674, - "eval_steps_per_second": 3.968, - "step": 58000 - }, - { - "epoch": 2.5, - "learning_rate": 1.2478127267295464e-05, - "loss": 0.1226, - "step": 58500 - }, - { - "epoch": 2.5, - "eval_accuracy": 0.8977907905243545, - "eval_f1": 0.8987052263199368, - "eval_loss": 0.31644803285598755, - "eval_runtime": 29.779, - "eval_samples_per_second": 252.325, - "eval_steps_per_second": 3.963, - "step": 58500 - }, - { - "epoch": 2.52, - "learning_rate": 1.2584823524390765e-05, - "loss": 0.1204, - "step": 59000 - }, - { - "epoch": 2.52, - "eval_accuracy": 0.9188182060154378, - "eval_f1": 0.9190661023623332, - "eval_loss": 0.2515789568424225, - "eval_runtime": 29.6787, - "eval_samples_per_second": 253.178, - "eval_steps_per_second": 3.976, - "step": 59000 - }, - { - "epoch": 2.54, - "learning_rate": 1.2691519781486066e-05, - "loss": 0.1217, - "step": 59500 - }, - { - "epoch": 2.54, - "eval_accuracy": 0.886345488421613, - "eval_f1": 0.8883012828628285, - "eval_loss": 0.3361978232860565, - "eval_runtime": 29.7086, - "eval_samples_per_second": 252.923, - "eval_steps_per_second": 3.972, - "step": 59500 - }, - { - "epoch": 2.56, - "learning_rate": 1.2798002646067178e-05, - "loss": 0.1196, - "step": 60000 - }, - { - "epoch": 2.56, - "eval_accuracy": 0.9121639606068672, - "eval_f1": 0.912425036592791, - "eval_loss": 0.2658819854259491, - "eval_runtime": 28.9201, - "eval_samples_per_second": 259.82, - "eval_steps_per_second": 4.08, - "step": 60000 - }, - { - "epoch": 2.58, - "learning_rate": 1.2904698903162478e-05, - "loss": 0.1251, - "step": 60500 - }, - { - "epoch": 2.58, - "eval_accuracy": 0.9209475645461804, - "eval_f1": 0.9211303421839581, - "eval_loss": 0.23802870512008667, - "eval_runtime": 29.6859, - "eval_samples_per_second": 253.117, - "eval_steps_per_second": 3.975, - "step": 60500 - }, - { - "epoch": 2.6, - "learning_rate": 1.3011395160257778e-05, - "loss": 0.1189, - "step": 61000 - }, - { - "epoch": 2.6, - "eval_accuracy": 0.9095022624434389, - "eval_f1": 0.9100155518175987, - "eval_loss": 0.2792932987213135, - "eval_runtime": 29.677, - "eval_samples_per_second": 253.193, - "eval_steps_per_second": 3.976, - "step": 61000 - }, - { - "epoch": 2.62, - "learning_rate": 1.311809141735308e-05, - "loss": 0.1236, - "step": 61500 - }, - { - "epoch": 2.62, - "eval_accuracy": 0.9140271493212669, - "eval_f1": 0.9145109646655343, - "eval_loss": 0.25876516103744507, - "eval_runtime": 29.7058, - "eval_samples_per_second": 252.947, - "eval_steps_per_second": 3.972, - "step": 61500 - }, - { - "epoch": 2.65, - "learning_rate": 1.322457428193419e-05, - "loss": 0.1214, - "step": 62000 - }, - { - "epoch": 2.65, - "eval_accuracy": 0.9057758850146393, - "eval_f1": 0.9067875023670071, - "eval_loss": 0.2852957546710968, - "eval_runtime": 29.6747, - "eval_samples_per_second": 253.212, - "eval_steps_per_second": 3.976, - "step": 62000 - }, - { - "epoch": 2.67, - "learning_rate": 1.3331270539029493e-05, - "loss": 0.118, - "step": 62500 - }, - { - "epoch": 2.67, - "eval_accuracy": 0.9136278945967528, - "eval_f1": 0.9141599682657664, - "eval_loss": 0.27117565274238586, - "eval_runtime": 28.9704, - "eval_samples_per_second": 259.368, - "eval_steps_per_second": 4.073, - "step": 62500 - }, - { - "epoch": 2.69, - "learning_rate": 1.3437966796124793e-05, - "loss": 0.1185, - "step": 63000 - }, - { - "epoch": 2.69, - "eval_accuracy": 0.896992281075326, - "eval_f1": 0.8987913278259848, - "eval_loss": 0.3407359719276428, - "eval_runtime": 29.6984, - "eval_samples_per_second": 253.01, - "eval_steps_per_second": 3.973, - "step": 63000 - }, - { - "epoch": 2.71, - "learning_rate": 1.3544663053220095e-05, - "loss": 0.1205, - "step": 63500 - }, - { - "epoch": 2.71, - "eval_accuracy": 0.9174873569337237, - "eval_f1": 0.9178361952808003, - "eval_loss": 0.2523012161254883, - "eval_runtime": 29.8027, - "eval_samples_per_second": 252.125, - "eval_steps_per_second": 3.959, - "step": 63500 - }, - { - "epoch": 2.73, - "learning_rate": 1.3651145917801205e-05, - "loss": 0.1237, - "step": 64000 - }, - { - "epoch": 2.73, - "eval_accuracy": 0.9218791589033803, - "eval_f1": 0.9220981450875413, - "eval_loss": 0.24377423524856567, - "eval_runtime": 29.4972, - "eval_samples_per_second": 254.736, - "eval_steps_per_second": 4.0, - "step": 64000 - }, - { - "epoch": 2.75, - "learning_rate": 1.3757842174896505e-05, - "loss": 0.1209, - "step": 64500 - }, - { - "epoch": 2.75, - "eval_accuracy": 0.9113654511578387, - "eval_f1": 0.9120163666906858, - "eval_loss": 0.2814728319644928, - "eval_runtime": 29.6606, - "eval_samples_per_second": 253.332, - "eval_steps_per_second": 3.978, - "step": 64500 - }, - { - "epoch": 2.77, - "learning_rate": 1.3864538431991807e-05, - "loss": 0.1181, - "step": 65000 - }, - { - "epoch": 2.77, - "eval_accuracy": 0.9112323662496673, - "eval_f1": 0.9117679194164329, - "eval_loss": 0.27400583028793335, - "eval_runtime": 29.6997, - "eval_samples_per_second": 252.999, - "eval_steps_per_second": 3.973, - "step": 65000 - }, - { - "epoch": 2.8, - "learning_rate": 1.3971234689087107e-05, - "loss": 0.1247, - "step": 65500 - }, - { - "epoch": 2.8, - "eval_accuracy": 0.8949960074527549, - "eval_f1": 0.8963813695574201, - "eval_loss": 0.3465683162212372, - "eval_runtime": 29.8235, - "eval_samples_per_second": 251.949, - "eval_steps_per_second": 3.957, - "step": 65500 - }, - { - "epoch": 2.82, - "learning_rate": 1.4077717553668218e-05, - "loss": 0.1214, - "step": 66000 - }, - { - "epoch": 2.82, - "eval_accuracy": 0.9019164226776684, - "eval_f1": 0.9023520343501465, - "eval_loss": 0.2800099849700928, - "eval_runtime": 29.685, - "eval_samples_per_second": 253.125, - "eval_steps_per_second": 3.975, - "step": 66000 - }, - { - "epoch": 2.84, - "learning_rate": 1.418441381076352e-05, - "loss": 0.1232, - "step": 66500 - }, - { - "epoch": 2.84, - "eval_accuracy": 0.8988554697897259, - "eval_f1": 0.8998071192198867, - "eval_loss": 0.31529906392097473, - "eval_runtime": 29.7055, - "eval_samples_per_second": 252.95, - "eval_steps_per_second": 3.972, - "step": 66500 - }, - { - "epoch": 2.86, - "learning_rate": 1.429111006785882e-05, - "loss": 0.1235, - "step": 67000 - }, - { - "epoch": 2.86, - "eval_accuracy": 0.9121639606068672, - "eval_f1": 0.9130752200374649, - "eval_loss": 0.2936250567436218, - "eval_runtime": 29.6997, - "eval_samples_per_second": 253.0, - "eval_steps_per_second": 3.973, - "step": 67000 - }, - { - "epoch": 2.88, - "learning_rate": 1.4397806324954122e-05, - "loss": 0.1194, - "step": 67500 - }, - { - "epoch": 2.88, - "eval_accuracy": 0.9176204418418952, - "eval_f1": 0.9179026321234128, - "eval_loss": 0.23929628729820251, - "eval_runtime": 29.742, - "eval_samples_per_second": 252.639, - "eval_steps_per_second": 3.967, - "step": 67500 - }, - { - "epoch": 2.9, - "learning_rate": 1.4504289189535232e-05, - "loss": 0.1212, - "step": 68000 - }, - { - "epoch": 2.9, - "eval_accuracy": 0.9214799041788662, - "eval_f1": 0.9217761013099526, - "eval_loss": 0.2274727076292038, - "eval_runtime": 29.6815, - "eval_samples_per_second": 253.154, - "eval_steps_per_second": 3.976, - "step": 68000 - }, - { - "epoch": 2.92, - "learning_rate": 1.4610985446630532e-05, - "loss": 0.1179, - "step": 68500 - }, - { - "epoch": 2.92, - "eval_accuracy": 0.9103007718924674, - "eval_f1": 0.9111492564025686, - "eval_loss": 0.26987186074256897, - "eval_runtime": 29.6917, - "eval_samples_per_second": 253.067, - "eval_steps_per_second": 3.974, - "step": 68500 - }, - { - "epoch": 2.94, - "learning_rate": 1.4717681703725834e-05, - "loss": 0.1248, - "step": 69000 - }, - { - "epoch": 2.94, - "eval_accuracy": 0.9265371306893798, - "eval_f1": 0.926618029973173, - "eval_loss": 0.22422577440738678, - "eval_runtime": 29.8231, - "eval_samples_per_second": 251.952, - "eval_steps_per_second": 3.957, - "step": 69000 - }, - { - "epoch": 2.97, - "learning_rate": 1.4824377960821134e-05, - "loss": 0.1193, - "step": 69500 - }, - { - "epoch": 2.97, - "eval_accuracy": 0.9174873569337237, - "eval_f1": 0.9179152081146503, - "eval_loss": 0.2527088224887848, - "eval_runtime": 29.7228, - "eval_samples_per_second": 252.803, - "eval_steps_per_second": 3.97, - "step": 69500 - }, - { - "epoch": 2.99, - "learning_rate": 1.4930860825402247e-05, - "loss": 0.1191, - "step": 70000 - }, - { - "epoch": 2.99, - "eval_accuracy": 0.9103007718924674, - "eval_f1": 0.9114240853347546, - "eval_loss": 0.2774083614349365, - "eval_runtime": 29.8732, - "eval_samples_per_second": 251.53, - "eval_steps_per_second": 3.95, - "step": 70000 - }, - { - "epoch": 3.01, - "learning_rate": 1.5037557082497547e-05, - "loss": 0.1134, - "step": 70500 - }, - { - "epoch": 3.01, - "eval_accuracy": 0.9244077721586372, - "eval_f1": 0.924962986957909, - "eval_loss": 0.26340293884277344, - "eval_runtime": 29.7837, - "eval_samples_per_second": 252.286, - "eval_steps_per_second": 3.962, - "step": 70500 - }, - { - "epoch": 3.03, - "learning_rate": 1.5144253339592849e-05, - "loss": 0.0997, - "step": 71000 - }, - { - "epoch": 3.03, - "eval_accuracy": 0.9166888474846953, - "eval_f1": 0.9174450725360211, - "eval_loss": 0.2868385314941406, - "eval_runtime": 29.6773, - "eval_samples_per_second": 253.19, - "eval_steps_per_second": 3.976, - "step": 71000 - }, - { - "epoch": 3.05, - "learning_rate": 1.5250949596688149e-05, - "loss": 0.0997, - "step": 71500 - }, - { - "epoch": 3.05, - "eval_accuracy": 0.9075059888208677, - "eval_f1": 0.9083709688675202, - "eval_loss": 0.3178350329399109, - "eval_runtime": 29.7056, - "eval_samples_per_second": 252.949, - "eval_steps_per_second": 3.972, - "step": 71500 - }, - { - "epoch": 3.07, - "learning_rate": 1.5357432461269258e-05, - "loss": 0.103, - "step": 72000 - }, - { - "epoch": 3.07, - "eval_accuracy": 0.9055097151982965, - "eval_f1": 0.9066829178335443, - "eval_loss": 0.3471682071685791, - "eval_runtime": 29.8055, - "eval_samples_per_second": 252.101, - "eval_steps_per_second": 3.959, - "step": 72000 - }, - { - "epoch": 3.09, - "learning_rate": 1.5464128718364563e-05, - "loss": 0.1001, - "step": 72500 - }, - { - "epoch": 3.09, - "eval_accuracy": 0.9209475645461804, - "eval_f1": 0.9213604258381931, - "eval_loss": 0.2840951681137085, - "eval_runtime": 29.7444, - "eval_samples_per_second": 252.619, - "eval_steps_per_second": 3.967, - "step": 72500 - }, - { - "epoch": 3.12, - "learning_rate": 1.557082497545986e-05, - "loss": 0.1026, - "step": 73000 - }, - { - "epoch": 3.12, - "eval_accuracy": 0.9126963002395528, - "eval_f1": 0.9135468482778786, - "eval_loss": 0.31749972701072693, - "eval_runtime": 29.6905, - "eval_samples_per_second": 253.078, - "eval_steps_per_second": 3.974, - "step": 73000 - }, - { - "epoch": 3.14, - "learning_rate": 1.5677521232555163e-05, - "loss": 0.0992, - "step": 73500 - }, - { - "epoch": 3.14, - "eval_accuracy": 0.91243013042321, - "eval_f1": 0.9134755081204278, - "eval_loss": 0.3127423822879791, - "eval_runtime": 29.6796, - "eval_samples_per_second": 253.171, - "eval_steps_per_second": 3.976, - "step": 73500 - }, - { - "epoch": 3.16, - "learning_rate": 1.5784004097136276e-05, - "loss": 0.0962, - "step": 74000 - }, - { - "epoch": 3.16, - "eval_accuracy": 0.9240085174341229, - "eval_f1": 0.9243631550194533, - "eval_loss": 0.27587592601776123, - "eval_runtime": 29.6853, - "eval_samples_per_second": 253.122, - "eval_steps_per_second": 3.975, - "step": 74000 - }, - { - "epoch": 3.18, - "learning_rate": 1.5890700354231574e-05, - "loss": 0.1029, - "step": 74500 - }, - { - "epoch": 3.18, - "eval_accuracy": 0.9244077721586372, - "eval_f1": 0.9248144452947205, - "eval_loss": 0.25898730754852295, - "eval_runtime": 29.716, - "eval_samples_per_second": 252.86, - "eval_steps_per_second": 3.971, - "step": 74500 - }, - { - "epoch": 3.2, - "learning_rate": 1.5997396611326876e-05, - "loss": 0.1017, - "step": 75000 - }, - { - "epoch": 3.2, - "eval_accuracy": 0.9157572531274953, - "eval_f1": 0.9160725646478424, - "eval_loss": 0.29474782943725586, - "eval_runtime": 29.7179, - "eval_samples_per_second": 252.844, - "eval_steps_per_second": 3.971, - "step": 75000 - }, - { - "epoch": 3.22, - "learning_rate": 1.6103879475907985e-05, - "loss": 0.0975, - "step": 75500 - }, - { - "epoch": 3.22, - "eval_accuracy": 0.9104338568006388, - "eval_f1": 0.9114419436408188, - "eval_loss": 0.3418111205101013, - "eval_runtime": 29.7159, - "eval_samples_per_second": 252.861, - "eval_steps_per_second": 3.971, - "step": 75500 - }, - { - "epoch": 3.24, - "learning_rate": 1.6210575733003287e-05, - "loss": 0.1017, - "step": 76000 - }, - { - "epoch": 3.24, - "eval_accuracy": 0.920814479638009, - "eval_f1": 0.9212922615435651, - "eval_loss": 0.2858801484107971, - "eval_runtime": 29.7053, - "eval_samples_per_second": 252.952, - "eval_steps_per_second": 3.972, - "step": 76000 - }, - { - "epoch": 3.26, - "learning_rate": 1.631727199009859e-05, - "loss": 0.1056, - "step": 76500 - }, - { - "epoch": 3.26, - "eval_accuracy": 0.9258717061485228, - "eval_f1": 0.9258138327069322, - "eval_loss": 0.26912811398506165, - "eval_runtime": 29.8329, - "eval_samples_per_second": 251.87, - "eval_steps_per_second": 3.955, - "step": 76500 - }, - { - "epoch": 3.29, - "learning_rate": 1.642396824719389e-05, - "loss": 0.1005, - "step": 77000 - }, - { - "epoch": 3.29, - "eval_accuracy": 0.8961937716262975, - "eval_f1": 0.898532464780015, - "eval_loss": 0.3822651505470276, - "eval_runtime": 29.6919, - "eval_samples_per_second": 253.065, - "eval_steps_per_second": 3.974, - "step": 77000 - }, - { - "epoch": 3.31, - "learning_rate": 1.6530664504289192e-05, - "loss": 0.1016, - "step": 77500 - }, - { - "epoch": 3.31, - "eval_accuracy": 0.8926004791056694, - "eval_f1": 0.89485432153672, - "eval_loss": 0.3906500041484833, - "eval_runtime": 29.7236, - "eval_samples_per_second": 252.796, - "eval_steps_per_second": 3.97, - "step": 77500 - }, - { - "epoch": 3.33, - "learning_rate": 1.663736076138449e-05, - "loss": 0.1015, - "step": 78000 - }, - { - "epoch": 3.33, - "eval_accuracy": 0.9012509981368113, - "eval_f1": 0.9025446954153953, - "eval_loss": 0.36229291558265686, - "eval_runtime": 29.7176, - "eval_samples_per_second": 252.847, - "eval_steps_per_second": 3.971, - "step": 78000 - }, - { - "epoch": 3.35, - "learning_rate": 1.6744057018479793e-05, - "loss": 0.1068, - "step": 78500 - }, - { - "epoch": 3.35, - "eval_accuracy": 0.9196167154644663, - "eval_f1": 0.9202631935738008, - "eval_loss": 0.28303927183151245, - "eval_runtime": 29.6695, - "eval_samples_per_second": 253.257, - "eval_steps_per_second": 3.977, - "step": 78500 - }, - { - "epoch": 3.37, - "learning_rate": 1.6850753275575094e-05, - "loss": 0.1023, - "step": 79000 - }, - { - "epoch": 3.37, - "eval_accuracy": 0.9190843758317807, - "eval_f1": 0.919786947414631, - "eval_loss": 0.285210520029068, - "eval_runtime": 29.66, - "eval_samples_per_second": 253.338, - "eval_steps_per_second": 3.978, - "step": 79000 - }, - { - "epoch": 3.39, - "learning_rate": 1.6957236140156207e-05, - "loss": 0.104, - "step": 79500 - }, - { - "epoch": 3.39, - "eval_accuracy": 0.914692573862124, - "eval_f1": 0.9155263369499395, - "eval_loss": 0.2765791714191437, - "eval_runtime": 29.2294, - "eval_samples_per_second": 257.07, - "eval_steps_per_second": 4.037, - "step": 79500 - }, - { - "epoch": 3.41, - "learning_rate": 1.7063932397251505e-05, - "loss": 0.1057, - "step": 80000 - }, - { - "epoch": 3.41, - "eval_accuracy": 0.9245408570668087, - "eval_f1": 0.9249301872373819, - "eval_loss": 0.2611972391605377, - "eval_runtime": 29.7005, - "eval_samples_per_second": 252.992, - "eval_steps_per_second": 3.973, - "step": 80000 - }, - { - "epoch": 3.44, - "learning_rate": 1.7170628654346807e-05, - "loss": 0.1052, - "step": 80500 - }, - { - "epoch": 3.44, - "eval_accuracy": 0.9154910833111525, - "eval_f1": 0.9164321348413044, - "eval_loss": 0.3005645275115967, - "eval_runtime": 29.7061, - "eval_samples_per_second": 252.944, - "eval_steps_per_second": 3.972, - "step": 80500 - }, - { - "epoch": 3.46, - "learning_rate": 1.727711151892792e-05, - "loss": 0.1009, - "step": 81000 - }, - { - "epoch": 3.46, - "eval_accuracy": 0.9065743944636678, - "eval_f1": 0.907476050006008, - "eval_loss": 0.30318447947502136, - "eval_runtime": 29.7072, - "eval_samples_per_second": 252.935, - "eval_steps_per_second": 3.972, - "step": 81000 - }, - { - "epoch": 3.48, - "learning_rate": 1.7383807776023218e-05, - "loss": 0.1033, - "step": 81500 - }, - { - "epoch": 3.48, - "eval_accuracy": 0.906973649188182, - "eval_f1": 0.9082369305977184, - "eval_loss": 0.31744304299354553, - "eval_runtime": 29.6831, - "eval_samples_per_second": 253.141, - "eval_steps_per_second": 3.975, - "step": 81500 - }, - { - "epoch": 3.5, - "learning_rate": 1.749050403311852e-05, - "loss": 0.1099, - "step": 82000 - }, - { - "epoch": 3.5, - "eval_accuracy": 0.9252062816076657, - "eval_f1": 0.9255446305995462, - "eval_loss": 0.23546352982521057, - "eval_runtime": 28.9475, - "eval_samples_per_second": 259.574, - "eval_steps_per_second": 4.076, - "step": 82000 - }, - { - "epoch": 3.52, - "learning_rate": 1.759720029021382e-05, - "loss": 0.1075, - "step": 82500 - }, - { - "epoch": 3.52, - "eval_accuracy": 0.9303965930263508, - "eval_f1": 0.9305716790224627, - "eval_loss": 0.23489102721214294, - "eval_runtime": 29.7169, - "eval_samples_per_second": 252.853, - "eval_steps_per_second": 3.971, - "step": 82500 - }, - { - "epoch": 3.54, - "learning_rate": 1.7703896547309123e-05, - "loss": 0.1082, - "step": 83000 - }, - { - "epoch": 3.54, - "eval_accuracy": 0.9017833377694969, - "eval_f1": 0.9031724871437866, - "eval_loss": 0.3438743054866791, - "eval_runtime": 29.683, - "eval_samples_per_second": 253.141, - "eval_steps_per_second": 3.975, - "step": 83000 - }, - { - "epoch": 3.56, - "learning_rate": 1.7810379411890232e-05, - "loss": 0.107, - "step": 83500 - }, - { - "epoch": 3.56, - "eval_accuracy": 0.9190843758317807, - "eval_f1": 0.919808533673745, - "eval_loss": 0.2954671084880829, - "eval_runtime": 29.6856, - "eval_samples_per_second": 253.119, - "eval_steps_per_second": 3.975, - "step": 83500 - }, - { - "epoch": 3.58, - "learning_rate": 1.7917075668985534e-05, - "loss": 0.1069, - "step": 84000 - }, - { - "epoch": 3.58, - "eval_accuracy": 0.9269363854138941, - "eval_f1": 0.9274280575614218, - "eval_loss": 0.2474394142627716, - "eval_runtime": 29.7052, - "eval_samples_per_second": 252.952, - "eval_steps_per_second": 3.972, - "step": 84000 - }, - { - "epoch": 3.61, - "learning_rate": 1.8023771926080836e-05, - "loss": 0.1101, - "step": 84500 - }, - { - "epoch": 3.61, - "eval_accuracy": 0.9232100079850944, - "eval_f1": 0.9238938753643797, - "eval_loss": 0.28328654170036316, - "eval_runtime": 28.9009, - "eval_samples_per_second": 259.992, - "eval_steps_per_second": 4.083, - "step": 84500 - }, - { - "epoch": 3.63, - "learning_rate": 1.8130468183176134e-05, - "loss": 0.1121, - "step": 85000 - }, - { - "epoch": 3.63, - "eval_accuracy": 0.9190843758317807, - "eval_f1": 0.9199827093655857, - "eval_loss": 0.2627813220024109, - "eval_runtime": 29.7188, - "eval_samples_per_second": 252.837, - "eval_steps_per_second": 3.971, - "step": 85000 - }, - { - "epoch": 3.65, - "learning_rate": 1.8237164440271436e-05, - "loss": 0.1083, - "step": 85500 - }, - { - "epoch": 3.65, - "eval_accuracy": 0.9326590364652648, - "eval_f1": 0.9329981540836807, - "eval_loss": 0.24135735630989075, - "eval_runtime": 29.6566, - "eval_samples_per_second": 253.367, - "eval_steps_per_second": 3.979, - "step": 85500 - }, - { - "epoch": 3.67, - "learning_rate": 1.8343860697366738e-05, - "loss": 0.1039, - "step": 86000 - }, - { - "epoch": 3.67, - "eval_accuracy": 0.9241416023422944, - "eval_f1": 0.924742970053259, - "eval_loss": 0.26063743233680725, - "eval_runtime": 29.6683, - "eval_samples_per_second": 253.267, - "eval_steps_per_second": 3.977, - "step": 86000 - }, - { - "epoch": 3.69, - "learning_rate": 1.845055695446204e-05, - "loss": 0.1055, - "step": 86500 - }, - { - "epoch": 3.69, - "eval_accuracy": 0.9122970455150385, - "eval_f1": 0.9130212242543791, - "eval_loss": 0.2822701036930084, - "eval_runtime": 29.685, - "eval_samples_per_second": 253.124, - "eval_steps_per_second": 3.975, - "step": 86500 - }, - { - "epoch": 3.71, - "learning_rate": 1.8557253211557342e-05, - "loss": 0.107, - "step": 87000 - }, - { - "epoch": 3.71, - "eval_accuracy": 0.9278679797710939, - "eval_f1": 0.9283307100808318, - "eval_loss": 0.24245958030223846, - "eval_runtime": 27.1463, - "eval_samples_per_second": 276.797, - "eval_steps_per_second": 4.347, - "step": 87000 - }, - { - "epoch": 3.73, - "learning_rate": 1.866373607613845e-05, - "loss": 0.1075, - "step": 87500 - }, - { - "epoch": 3.73, - "eval_accuracy": 0.9148256587702954, - "eval_f1": 0.9157022847611576, - "eval_loss": 0.2839546799659729, - "eval_runtime": 29.7126, - "eval_samples_per_second": 252.889, - "eval_steps_per_second": 3.971, - "step": 87500 - }, - { - "epoch": 3.76, - "learning_rate": 1.8770432333233752e-05, - "loss": 0.1078, - "step": 88000 - }, - { - "epoch": 3.76, - "eval_accuracy": 0.9299973383018366, - "eval_f1": 0.9302152729819625, - "eval_loss": 0.22580114006996155, - "eval_runtime": 29.6944, - "eval_samples_per_second": 253.044, - "eval_steps_per_second": 3.974, - "step": 88000 - }, - { - "epoch": 3.78, - "learning_rate": 1.887712859032905e-05, - "loss": 0.1072, - "step": 88500 - }, - { - "epoch": 3.78, - "eval_accuracy": 0.9287995741282938, - "eval_f1": 0.9292731554080146, - "eval_loss": 0.2478398084640503, - "eval_runtime": 29.7139, - "eval_samples_per_second": 252.878, - "eval_steps_per_second": 3.971, - "step": 88500 - }, - { - "epoch": 3.8, - "learning_rate": 1.8983824847424356e-05, - "loss": 0.1097, - "step": 89000 - }, - { - "epoch": 3.8, - "eval_accuracy": 0.9060420548309822, - "eval_f1": 0.9077407417767054, - "eval_loss": 0.31326791644096375, - "eval_runtime": 29.7069, - "eval_samples_per_second": 252.938, - "eval_steps_per_second": 3.972, - "step": 89000 - }, - { - "epoch": 3.82, - "learning_rate": 1.9090307712005465e-05, - "loss": 0.1037, - "step": 89500 - }, - { - "epoch": 3.82, - "eval_accuracy": 0.9298642533936652, - "eval_f1": 0.9303728668629602, - "eval_loss": 0.2570092976093292, - "eval_runtime": 29.7155, - "eval_samples_per_second": 252.864, - "eval_steps_per_second": 3.971, - "step": 89500 - }, - { - "epoch": 3.84, - "learning_rate": 1.9197003969100764e-05, - "loss": 0.1096, - "step": 90000 - }, - { - "epoch": 3.84, - "eval_accuracy": 0.9269363854138941, - "eval_f1": 0.9273172425282155, - "eval_loss": 0.24834661185741425, - "eval_runtime": 29.7079, - "eval_samples_per_second": 252.929, - "eval_steps_per_second": 3.972, - "step": 90000 - }, - { - "epoch": 3.86, - "learning_rate": 1.930370022619607e-05, - "loss": 0.1082, - "step": 90500 - }, - { - "epoch": 3.86, - "eval_accuracy": 0.9241416023422944, - "eval_f1": 0.9247359215150383, - "eval_loss": 0.2676146328449249, - "eval_runtime": 29.6791, - "eval_samples_per_second": 253.175, - "eval_steps_per_second": 3.976, - "step": 90500 - }, - { - "epoch": 3.88, - "learning_rate": 1.9410183090777178e-05, - "loss": 0.1096, - "step": 91000 - }, - { - "epoch": 3.88, - "eval_accuracy": 0.9311951024753793, - "eval_f1": 0.9314316700507581, - "eval_loss": 0.23314546048641205, - "eval_runtime": 29.6746, - "eval_samples_per_second": 253.214, - "eval_steps_per_second": 3.976, - "step": 91000 - }, - { - "epoch": 3.91, - "learning_rate": 1.9516879347872476e-05, - "loss": 0.1035, - "step": 91500 - }, - { - "epoch": 3.91, - "eval_accuracy": 0.9250731966994943, - "eval_f1": 0.9256904714759885, - "eval_loss": 0.27229878306388855, - "eval_runtime": 29.8012, - "eval_samples_per_second": 252.137, - "eval_steps_per_second": 3.96, - "step": 91500 - }, - { - "epoch": 3.93, - "learning_rate": 1.9623575604967778e-05, - "loss": 0.1089, - "step": 92000 - }, - { - "epoch": 3.93, - "eval_accuracy": 0.9250731966994943, - "eval_f1": 0.9255950104312578, - "eval_loss": 0.2421479970216751, - "eval_runtime": 29.7466, - "eval_samples_per_second": 252.6, - "eval_steps_per_second": 3.967, - "step": 92000 - }, - { - "epoch": 3.95, - "learning_rate": 1.973027186206308e-05, - "loss": 0.1074, - "step": 92500 - }, - { - "epoch": 3.95, - "eval_accuracy": 0.9317274421080649, - "eval_f1": 0.9321005917005625, - "eval_loss": 0.23583532869815826, - "eval_runtime": 29.682, - "eval_samples_per_second": 253.15, - "eval_steps_per_second": 3.975, - "step": 92500 - }, - { - "epoch": 3.97, - "learning_rate": 1.983696811915838e-05, - "loss": 0.1098, - "step": 93000 - }, - { - "epoch": 3.97, - "eval_accuracy": 0.9274687250465797, - "eval_f1": 0.9278264551595683, - "eval_loss": 0.23873497545719147, - "eval_runtime": 29.6836, - "eval_samples_per_second": 253.136, - "eval_steps_per_second": 3.975, - "step": 93000 - }, - { - "epoch": 3.99, - "learning_rate": 1.9943664376253684e-05, - "loss": 0.1082, - "step": 93500 - }, - { - "epoch": 3.99, - "eval_accuracy": 0.9343891402714932, - "eval_f1": 0.9346466592321752, - "eval_loss": 0.22431565821170807, - "eval_runtime": 29.6604, - "eval_samples_per_second": 253.334, - "eval_steps_per_second": 3.978, - "step": 93500 - }, - { - "epoch": 4.01, - "learning_rate": 1.9987409841662757e-05, - "loss": 0.0967, - "step": 94000 - }, - { - "epoch": 4.01, - "eval_accuracy": 0.9284003194037797, - "eval_f1": 0.9290340605178463, - "eval_loss": 0.29876431822776794, - "eval_runtime": 29.7877, - "eval_samples_per_second": 252.251, - "eval_steps_per_second": 3.961, - "step": 94000 - }, - { - "epoch": 4.03, - "learning_rate": 1.996078912551748e-05, - "loss": 0.0841, - "step": 94500 - }, - { - "epoch": 4.03, - "eval_accuracy": 0.9225445834442374, - "eval_f1": 0.9227958817621394, - "eval_loss": 0.3175296187400818, - "eval_runtime": 29.7507, - "eval_samples_per_second": 252.565, - "eval_steps_per_second": 3.966, - "step": 94500 - }, - { - "epoch": 4.05, - "learning_rate": 1.9934115061243654e-05, - "loss": 0.0882, - "step": 95000 - }, - { - "epoch": 4.05, - "eval_accuracy": 0.9233430928932659, - "eval_f1": 0.9241105837969426, - "eval_loss": 0.3155466616153717, - "eval_runtime": 29.6776, - "eval_samples_per_second": 253.187, - "eval_steps_per_second": 3.976, - "step": 95000 - }, - { - "epoch": 4.08, - "learning_rate": 1.990744099696983e-05, - "loss": 0.0883, - "step": 95500 - }, - { - "epoch": 4.08, - "eval_accuracy": 0.9313281873835507, - "eval_f1": 0.9316087493957224, - "eval_loss": 0.24962832033634186, - "eval_runtime": 29.6708, - "eval_samples_per_second": 253.245, - "eval_steps_per_second": 3.977, - "step": 95500 - }, - { - "epoch": 4.1, - "learning_rate": 1.9880766932696003e-05, - "loss": 0.0899, - "step": 96000 - }, - { - "epoch": 4.1, - "eval_accuracy": 0.9112323662496673, - "eval_f1": 0.912925405010681, - "eval_loss": 0.3676290512084961, - "eval_runtime": 29.6697, - "eval_samples_per_second": 253.255, - "eval_steps_per_second": 3.977, - "step": 96000 - }, - { - "epoch": 4.12, - "learning_rate": 1.9854146216550725e-05, - "loss": 0.0859, - "step": 96500 - }, - { - "epoch": 4.12, - "eval_accuracy": 0.9353207346286931, - "eval_f1": 0.9356799670819131, - "eval_loss": 0.25173771381378174, - "eval_runtime": 29.6986, - "eval_samples_per_second": 253.008, - "eval_steps_per_second": 3.973, - "step": 96500 - }, - { - "epoch": 4.14, - "learning_rate": 1.98274721522769e-05, - "loss": 0.0852, - "step": 97000 - }, - { - "epoch": 4.14, - "eval_accuracy": 0.9256055363321799, - "eval_f1": 0.9263085710587642, - "eval_loss": 0.30905455350875854, - "eval_runtime": 29.7296, - "eval_samples_per_second": 252.744, - "eval_steps_per_second": 3.969, - "step": 97000 - }, - { - "epoch": 4.16, - "learning_rate": 1.9800798088003074e-05, - "loss": 0.0908, - "step": 97500 - }, - { - "epoch": 4.16, - "eval_accuracy": 0.9284003194037797, - "eval_f1": 0.9289808331035979, - "eval_loss": 0.2927681505680084, - "eval_runtime": 29.6939, - "eval_samples_per_second": 253.048, - "eval_steps_per_second": 3.974, - "step": 97500 - }, - { - "epoch": 4.18, - "learning_rate": 1.977412402372925e-05, - "loss": 0.0876, - "step": 98000 - }, - { - "epoch": 4.18, - "eval_accuracy": 0.9378493478839499, - "eval_f1": 0.9379757198421206, - "eval_loss": 0.23220977187156677, - "eval_runtime": 29.7676, - "eval_samples_per_second": 252.422, - "eval_steps_per_second": 3.964, - "step": 98000 - }, - { - "epoch": 4.2, - "learning_rate": 1.974750330758397e-05, - "loss": 0.0884, - "step": 98500 - }, - { - "epoch": 4.2, - "eval_accuracy": 0.9299973383018366, - "eval_f1": 0.9304508259060222, - "eval_loss": 0.2828425467014313, - "eval_runtime": 29.6703, - "eval_samples_per_second": 253.25, - "eval_steps_per_second": 3.977, - "step": 98500 - }, - { - "epoch": 4.23, - "learning_rate": 1.972082924331015e-05, - "loss": 0.0906, - "step": 99000 - }, - { - "epoch": 4.23, - "eval_accuracy": 0.9338568006388076, - "eval_f1": 0.934280526666979, - "eval_loss": 0.25503915548324585, - "eval_runtime": 29.6986, - "eval_samples_per_second": 253.008, - "eval_steps_per_second": 3.973, - "step": 99000 - }, - { - "epoch": 4.25, - "learning_rate": 1.969415517903632e-05, - "loss": 0.0923, - "step": 99500 - }, - { - "epoch": 4.25, - "eval_accuracy": 0.9330582911897791, - "eval_f1": 0.9332689003068755, - "eval_loss": 0.25108087062835693, - "eval_runtime": 29.8057, - "eval_samples_per_second": 252.1, - "eval_steps_per_second": 3.959, - "step": 99500 - }, - { - "epoch": 4.27, - "learning_rate": 1.9667481114762497e-05, - "loss": 0.0929, - "step": 100000 - }, - { - "epoch": 4.27, - "eval_accuracy": 0.9353207346286931, - "eval_f1": 0.935553232165074, - "eval_loss": 0.26846399903297424, - "eval_runtime": 29.7753, - "eval_samples_per_second": 252.357, - "eval_steps_per_second": 3.963, - "step": 100000 - }, - { - "epoch": 4.29, - "learning_rate": 1.9640807050488672e-05, - "loss": 0.091, - "step": 100500 - }, - { - "epoch": 4.29, - "eval_accuracy": 0.9272025552302369, - "eval_f1": 0.9279322378363214, - "eval_loss": 0.30113697052001953, - "eval_runtime": 29.8112, - "eval_samples_per_second": 252.053, - "eval_steps_per_second": 3.958, - "step": 100500 - }, - { - "epoch": 4.31, - "learning_rate": 1.9614132986214843e-05, - "loss": 0.093, - "step": 101000 - }, - { - "epoch": 4.31, - "eval_accuracy": 0.92467394197498, - "eval_f1": 0.92528965941888, - "eval_loss": 0.28048303723335266, - "eval_runtime": 29.6933, - "eval_samples_per_second": 253.054, - "eval_steps_per_second": 3.974, - "step": 101000 - }, - { - "epoch": 4.33, - "learning_rate": 1.958745892194102e-05, - "loss": 0.0894, - "step": 101500 - }, - { - "epoch": 4.33, - "eval_accuracy": 0.920149055097152, - "eval_f1": 0.9212457425349848, - "eval_loss": 0.30429819226264954, - "eval_runtime": 29.6772, - "eval_samples_per_second": 253.191, - "eval_steps_per_second": 3.976, - "step": 101500 - }, - { - "epoch": 4.35, - "learning_rate": 1.9560784857667196e-05, - "loss": 0.0912, - "step": 102000 - }, - { - "epoch": 4.35, - "eval_accuracy": 0.9229438381687517, - "eval_f1": 0.9239101512746192, - "eval_loss": 0.30318892002105713, - "eval_runtime": 29.7216, - "eval_samples_per_second": 252.812, - "eval_steps_per_second": 3.97, - "step": 102000 - }, - { - "epoch": 4.37, - "learning_rate": 1.953411079339337e-05, - "loss": 0.0914, - "step": 102500 - }, - { - "epoch": 4.37, - "eval_accuracy": 0.9164226776683524, - "eval_f1": 0.9172366101282634, - "eval_loss": 0.31719163060188293, - "eval_runtime": 29.6376, - "eval_samples_per_second": 253.529, - "eval_steps_per_second": 3.981, - "step": 102500 - }, - { - "epoch": 4.4, - "learning_rate": 1.9507490077248092e-05, - "loss": 0.0902, - "step": 103000 - }, - { - "epoch": 4.4, - "eval_accuracy": 0.9337237157306362, - "eval_f1": 0.9340855551795185, - "eval_loss": 0.25142449140548706, - "eval_runtime": 29.7958, - "eval_samples_per_second": 252.183, - "eval_steps_per_second": 3.96, - "step": 103000 - }, - { - "epoch": 4.42, - "learning_rate": 1.9480869361102814e-05, - "loss": 0.0948, - "step": 103500 - }, - { - "epoch": 4.42, - "eval_accuracy": 0.9225445834442374, - "eval_f1": 0.923140532339757, - "eval_loss": 0.2710263133049011, - "eval_runtime": 29.694, - "eval_samples_per_second": 253.048, - "eval_steps_per_second": 3.974, - "step": 103500 - }, - { - "epoch": 4.44, - "learning_rate": 1.945419529682899e-05, - "loss": 0.0926, - "step": 104000 - }, - { - "epoch": 4.44, - "eval_accuracy": 0.9319936119244078, - "eval_f1": 0.9323841800449281, - "eval_loss": 0.2564203441143036, - "eval_runtime": 28.8753, - "eval_samples_per_second": 260.222, - "eval_steps_per_second": 4.087, - "step": 104000 - }, - { - "epoch": 4.46, - "learning_rate": 1.9427521232555163e-05, - "loss": 0.0918, - "step": 104500 - }, - { - "epoch": 4.46, - "eval_accuracy": 0.9375831780676072, - "eval_f1": 0.9377531909606569, - "eval_loss": 0.2197369635105133, - "eval_runtime": 29.7897, - "eval_samples_per_second": 252.235, - "eval_steps_per_second": 3.961, - "step": 104500 - }, - { - "epoch": 4.48, - "learning_rate": 1.9400847168281338e-05, - "loss": 0.0928, - "step": 105000 - }, - { - "epoch": 4.48, - "eval_accuracy": 0.9230769230769231, - "eval_f1": 0.9238162163050091, - "eval_loss": 0.2949956953525543, - "eval_runtime": 29.7038, - "eval_samples_per_second": 252.964, - "eval_steps_per_second": 3.973, - "step": 105000 - }, - { - "epoch": 4.5, - "learning_rate": 1.9374173104007512e-05, - "loss": 0.0911, - "step": 105500 - }, - { - "epoch": 4.5, - "eval_accuracy": 0.9261378759648656, - "eval_f1": 0.9270039183662078, - "eval_loss": 0.3085399568080902, - "eval_runtime": 29.6996, - "eval_samples_per_second": 253.0, - "eval_steps_per_second": 3.973, - "step": 105500 - }, - { - "epoch": 4.52, - "learning_rate": 1.9347499039733687e-05, - "loss": 0.0916, - "step": 106000 - }, - { - "epoch": 4.52, - "eval_accuracy": 0.9256055363321799, - "eval_f1": 0.9265535422815637, - "eval_loss": 0.2896316647529602, - "eval_runtime": 29.7002, - "eval_samples_per_second": 252.995, - "eval_steps_per_second": 3.973, - "step": 106000 - }, - { - "epoch": 4.55, - "learning_rate": 1.932087832358841e-05, - "loss": 0.0903, - "step": 106500 - }, - { - "epoch": 4.55, - "eval_accuracy": 0.9277348948629226, - "eval_f1": 0.9284942853366587, - "eval_loss": 0.30977749824523926, - "eval_runtime": 28.875, - "eval_samples_per_second": 260.225, - "eval_steps_per_second": 4.087, - "step": 106500 - }, - { - "epoch": 4.57, - "learning_rate": 1.9294204259314583e-05, - "loss": 0.0909, - "step": 107000 - }, - { - "epoch": 4.57, - "eval_accuracy": 0.9168219323928667, - "eval_f1": 0.9182154468537094, - "eval_loss": 0.33723878860473633, - "eval_runtime": 29.6597, - "eval_samples_per_second": 253.34, - "eval_steps_per_second": 3.978, - "step": 107000 - }, - { - "epoch": 4.59, - "learning_rate": 1.926753019504076e-05, - "loss": 0.0974, - "step": 107500 - }, - { - "epoch": 4.59, - "eval_accuracy": 0.9206813947298377, - "eval_f1": 0.9215757708309522, - "eval_loss": 0.2856293022632599, - "eval_runtime": 29.641, - "eval_samples_per_second": 253.5, - "eval_steps_per_second": 3.981, - "step": 107500 - }, - { - "epoch": 4.61, - "learning_rate": 1.9240856130766936e-05, - "loss": 0.0947, - "step": 108000 - }, - { - "epoch": 4.61, - "eval_accuracy": 0.9347883949960074, - "eval_f1": 0.9353102062008903, - "eval_loss": 0.23915627598762512, - "eval_runtime": 29.6912, - "eval_samples_per_second": 253.071, - "eval_steps_per_second": 3.974, - "step": 108000 - }, - { - "epoch": 4.63, - "learning_rate": 1.921418206649311e-05, - "loss": 0.0923, - "step": 108500 - }, - { - "epoch": 4.63, - "eval_accuracy": 0.9389140271493213, - "eval_f1": 0.939221752097426, - "eval_loss": 0.23398292064666748, - "eval_runtime": 29.6922, - "eval_samples_per_second": 253.063, - "eval_steps_per_second": 3.974, - "step": 108500 - }, - { - "epoch": 4.65, - "learning_rate": 1.9187508002219285e-05, - "loss": 0.0888, - "step": 109000 - }, - { - "epoch": 4.65, - "eval_accuracy": 0.9249401117913228, - "eval_f1": 0.9261639270408716, - "eval_loss": 0.3061336278915405, - "eval_runtime": 28.8839, - "eval_samples_per_second": 260.145, - "eval_steps_per_second": 4.085, - "step": 109000 - }, - { - "epoch": 4.67, - "learning_rate": 1.916083393794546e-05, - "loss": 0.0936, - "step": 109500 - }, - { - "epoch": 4.67, - "eval_accuracy": 0.9330582911897791, - "eval_f1": 0.9333501847126282, - "eval_loss": 0.24740859866142273, - "eval_runtime": 29.7064, - "eval_samples_per_second": 252.942, - "eval_steps_per_second": 3.972, - "step": 109500 - }, - { - "epoch": 4.69, - "learning_rate": 1.9134159873671634e-05, - "loss": 0.0943, - "step": 110000 - }, - { - "epoch": 4.69, - "eval_accuracy": 0.9383816875166356, - "eval_f1": 0.9386827805161855, - "eval_loss": 0.21956907212734222, - "eval_runtime": 29.6931, - "eval_samples_per_second": 253.056, - "eval_steps_per_second": 3.974, - "step": 110000 - }, - { - "epoch": 4.72, - "learning_rate": 1.9107539157526356e-05, - "loss": 0.0912, - "step": 110500 - }, - { - "epoch": 4.72, - "eval_accuracy": 0.9184189512909237, - "eval_f1": 0.9196958151079796, - "eval_loss": 0.2968537509441376, - "eval_runtime": 29.65, - "eval_samples_per_second": 253.423, - "eval_steps_per_second": 3.98, - "step": 110500 - }, - { - "epoch": 4.74, - "learning_rate": 1.9080918441381078e-05, - "loss": 0.0885, - "step": 111000 - }, - { - "epoch": 4.74, - "eval_accuracy": 0.9330582911897791, - "eval_f1": 0.9335718524554016, - "eval_loss": 0.25288963317871094, - "eval_runtime": 29.6594, - "eval_samples_per_second": 253.343, - "eval_steps_per_second": 3.979, - "step": 111000 - }, - { - "epoch": 4.76, - "learning_rate": 1.9054244377107252e-05, - "loss": 0.0939, - "step": 111500 - }, - { - "epoch": 4.76, - "eval_accuracy": 0.9333244610061219, - "eval_f1": 0.9340340926527145, - "eval_loss": 0.2551884949207306, - "eval_runtime": 29.6834, - "eval_samples_per_second": 253.138, - "eval_steps_per_second": 3.975, - "step": 111500 - }, - { - "epoch": 4.78, - "learning_rate": 1.9027570312833427e-05, - "loss": 0.0901, - "step": 112000 - }, - { - "epoch": 4.78, - "eval_accuracy": 0.9357199893532073, - "eval_f1": 0.9362421857356324, - "eval_loss": 0.25156065821647644, - "eval_runtime": 29.7058, - "eval_samples_per_second": 252.947, - "eval_steps_per_second": 3.972, - "step": 112000 - }, - { - "epoch": 4.8, - "learning_rate": 1.90008962485596e-05, - "loss": 0.0961, - "step": 112500 - }, - { - "epoch": 4.8, - "eval_accuracy": 0.9329252062816077, - "eval_f1": 0.933612036082154, - "eval_loss": 0.2644532322883606, - "eval_runtime": 29.6722, - "eval_samples_per_second": 253.234, - "eval_steps_per_second": 3.977, - "step": 112500 - }, - { - "epoch": 4.82, - "learning_rate": 1.897422218428578e-05, - "loss": 0.0941, - "step": 113000 - }, - { - "epoch": 4.82, - "eval_accuracy": 0.9307958477508651, - "eval_f1": 0.931396585946887, - "eval_loss": 0.2659129202365875, - "eval_runtime": 29.7902, - "eval_samples_per_second": 252.231, - "eval_steps_per_second": 3.961, - "step": 113000 - }, - { - "epoch": 4.84, - "learning_rate": 1.89476014681405e-05, - "loss": 0.0917, - "step": 113500 - }, - { - "epoch": 4.84, - "eval_accuracy": 0.9394463667820069, - "eval_f1": 0.9397464897507521, - "eval_loss": 0.24435795843601227, - "eval_runtime": 29.6919, - "eval_samples_per_second": 253.065, - "eval_steps_per_second": 3.974, - "step": 113500 - }, - { - "epoch": 4.87, - "learning_rate": 1.8920927403866672e-05, - "loss": 0.0961, - "step": 114000 - }, - { - "epoch": 4.87, - "eval_accuracy": 0.9337237157306362, - "eval_f1": 0.9339511492582226, - "eval_loss": 0.2280196100473404, - "eval_runtime": 29.7315, - "eval_samples_per_second": 252.728, - "eval_steps_per_second": 3.969, - "step": 114000 - }, - { - "epoch": 4.89, - "learning_rate": 1.889425333959285e-05, - "loss": 0.0968, - "step": 114500 - }, - { - "epoch": 4.89, - "eval_accuracy": 0.9357199893532073, - "eval_f1": 0.9363108940519986, - "eval_loss": 0.2443583458662033, - "eval_runtime": 29.7211, - "eval_samples_per_second": 252.817, - "eval_steps_per_second": 3.97, - "step": 114500 - }, - { - "epoch": 4.91, - "learning_rate": 1.8867579275319025e-05, - "loss": 0.0945, - "step": 115000 - }, - { - "epoch": 4.91, - "eval_accuracy": 0.9309289326590364, - "eval_f1": 0.931598757443973, - "eval_loss": 0.264121949672699, - "eval_runtime": 29.6755, - "eval_samples_per_second": 253.205, - "eval_steps_per_second": 3.976, - "step": 115000 - }, - { - "epoch": 4.93, - "learning_rate": 1.8840958559173747e-05, - "loss": 0.0943, - "step": 115500 - }, - { - "epoch": 4.93, - "eval_accuracy": 0.9355869044450359, - "eval_f1": 0.9360854367526686, - "eval_loss": 0.23954808712005615, - "eval_runtime": 29.6726, - "eval_samples_per_second": 253.23, - "eval_steps_per_second": 3.977, - "step": 115500 - }, - { - "epoch": 4.95, - "learning_rate": 1.881428449489992e-05, - "loss": 0.092, - "step": 116000 - }, - { - "epoch": 4.95, - "eval_accuracy": 0.9401117913228639, - "eval_f1": 0.940554328007054, - "eval_loss": 0.21225783228874207, - "eval_runtime": 29.6796, - "eval_samples_per_second": 253.171, - "eval_steps_per_second": 3.976, - "step": 116000 - }, - { - "epoch": 4.97, - "learning_rate": 1.8787610430626096e-05, - "loss": 0.0921, - "step": 116500 - }, - { - "epoch": 4.97, - "eval_accuracy": 0.9387809422411498, - "eval_f1": 0.9391814867688993, - "eval_loss": 0.24649563431739807, - "eval_runtime": 29.7414, - "eval_samples_per_second": 252.644, - "eval_steps_per_second": 3.968, - "step": 116500 - }, - { - "epoch": 4.99, - "learning_rate": 1.876093636635227e-05, - "loss": 0.0924, - "step": 117000 - }, - { - "epoch": 4.99, - "eval_accuracy": 0.9330582911897791, - "eval_f1": 0.9336951473017778, - "eval_loss": 0.2614225745201111, - "eval_runtime": 29.7333, - "eval_samples_per_second": 252.713, - "eval_steps_per_second": 3.969, - "step": 117000 - }, - { - "epoch": 5.01, - "learning_rate": 1.8734262302078445e-05, - "loss": 0.081, - "step": 117500 - }, - { - "epoch": 5.01, - "eval_accuracy": 0.9291988288528081, - "eval_f1": 0.9299493314090986, - "eval_loss": 0.3145081698894501, - "eval_runtime": 29.7729, - "eval_samples_per_second": 252.378, - "eval_steps_per_second": 3.963, - "step": 117500 - }, - { - "epoch": 5.04, - "learning_rate": 1.870758823780462e-05, - "loss": 0.0718, - "step": 118000 - }, - { - "epoch": 5.04, - "eval_accuracy": 0.9407772158637211, - "eval_f1": 0.9410248102577136, - "eval_loss": 0.24476242065429688, - "eval_runtime": 29.6709, - "eval_samples_per_second": 253.245, - "eval_steps_per_second": 3.977, - "step": 118000 - }, - { - "epoch": 5.06, - "learning_rate": 1.868096752165934e-05, - "loss": 0.0725, - "step": 118500 - }, - { - "epoch": 5.06, - "eval_accuracy": 0.9322597817407506, - "eval_f1": 0.932907820406911, - "eval_loss": 0.2969840466976166, - "eval_runtime": 29.6827, - "eval_samples_per_second": 253.144, - "eval_steps_per_second": 3.975, - "step": 118500 - }, - { - "epoch": 5.08, - "learning_rate": 1.8654293457385516e-05, - "loss": 0.0715, - "step": 119000 - }, - { - "epoch": 5.08, - "eval_accuracy": 0.9319936119244078, - "eval_f1": 0.932401349859726, - "eval_loss": 0.3180652856826782, - "eval_runtime": 29.8767, - "eval_samples_per_second": 251.5, - "eval_steps_per_second": 3.95, - "step": 119000 - }, - { - "epoch": 5.1, - "learning_rate": 1.862761939311169e-05, - "loss": 0.0707, - "step": 119500 - }, - { - "epoch": 5.1, - "eval_accuracy": 0.9220122438115518, - "eval_f1": 0.9233960607632528, - "eval_loss": 0.3678698241710663, - "eval_runtime": 29.7048, - "eval_samples_per_second": 252.956, - "eval_steps_per_second": 3.972, - "step": 119500 - }, - { - "epoch": 5.12, - "learning_rate": 1.8600945328837865e-05, - "loss": 0.0739, - "step": 120000 - }, - { - "epoch": 5.12, - "eval_accuracy": 0.9419749800372638, - "eval_f1": 0.9422688639940412, - "eval_loss": 0.23977598547935486, - "eval_runtime": 29.7047, - "eval_samples_per_second": 252.957, - "eval_steps_per_second": 3.972, - "step": 120000 - }, - { - "epoch": 5.14, - "learning_rate": 1.857427126456404e-05, - "loss": 0.0732, - "step": 120500 - }, - { - "epoch": 5.14, - "eval_accuracy": 0.9409103007718924, - "eval_f1": 0.9413533843553499, - "eval_loss": 0.2784920334815979, - "eval_runtime": 29.6623, - "eval_samples_per_second": 253.319, - "eval_steps_per_second": 3.978, - "step": 120500 - }, - { - "epoch": 5.16, - "learning_rate": 1.8547650548418765e-05, - "loss": 0.0729, - "step": 121000 - }, - { - "epoch": 5.16, - "eval_accuracy": 0.9407772158637211, - "eval_f1": 0.9411080827560356, - "eval_loss": 0.2614119350910187, - "eval_runtime": 29.6988, - "eval_samples_per_second": 253.007, - "eval_steps_per_second": 3.973, - "step": 121000 - }, - { - "epoch": 5.19, - "learning_rate": 1.8520976484144936e-05, - "loss": 0.0721, - "step": 121500 - }, - { - "epoch": 5.19, - "eval_accuracy": 0.9354538195368646, - "eval_f1": 0.9360616006194592, - "eval_loss": 0.2742285430431366, - "eval_runtime": 29.7205, - "eval_samples_per_second": 252.822, - "eval_steps_per_second": 3.97, - "step": 121500 - }, - { - "epoch": 5.21, - "learning_rate": 1.8494302419871114e-05, - "loss": 0.0755, - "step": 122000 - }, - { - "epoch": 5.21, - "eval_accuracy": 0.9212137343625233, - "eval_f1": 0.922317359924068, - "eval_loss": 0.3301926255226135, - "eval_runtime": 29.7206, - "eval_samples_per_second": 252.821, - "eval_steps_per_second": 3.97, - "step": 122000 - }, - { - "epoch": 5.23, - "learning_rate": 1.846762835559729e-05, - "loss": 0.0778, - "step": 122500 - }, - { - "epoch": 5.23, - "eval_accuracy": 0.9303965930263508, - "eval_f1": 0.9308745735895487, - "eval_loss": 0.2771119773387909, - "eval_runtime": 29.7333, - "eval_samples_per_second": 252.713, - "eval_steps_per_second": 3.969, - "step": 122500 - }, - { - "epoch": 5.25, - "learning_rate": 1.8440954291323463e-05, - "loss": 0.0778, - "step": 123000 - }, - { - "epoch": 5.25, - "eval_accuracy": 0.9346553100878361, - "eval_f1": 0.9351655806765509, - "eval_loss": 0.26929518580436707, - "eval_runtime": 29.8793, - "eval_samples_per_second": 251.479, - "eval_steps_per_second": 3.949, - "step": 123000 - }, - { - "epoch": 5.27, - "learning_rate": 1.8414333575178185e-05, - "loss": 0.0742, - "step": 123500 - }, - { - "epoch": 5.27, - "eval_accuracy": 0.9449028480170348, - "eval_f1": 0.9451744443467338, - "eval_loss": 0.23084259033203125, - "eval_runtime": 29.7304, - "eval_samples_per_second": 252.738, - "eval_steps_per_second": 3.969, - "step": 123500 - }, - { - "epoch": 5.29, - "learning_rate": 1.838765951090436e-05, - "loss": 0.0732, - "step": 124000 - }, - { - "epoch": 5.29, - "eval_accuracy": 0.9378493478839499, - "eval_f1": 0.9383794117060509, - "eval_loss": 0.294933557510376, - "eval_runtime": 29.7559, - "eval_samples_per_second": 252.522, - "eval_steps_per_second": 3.966, - "step": 124000 - }, - { - "epoch": 5.31, - "learning_rate": 1.8360985446630534e-05, - "loss": 0.0742, - "step": 124500 - }, - { - "epoch": 5.31, - "eval_accuracy": 0.9254724514240085, - "eval_f1": 0.9263212611151752, - "eval_loss": 0.32705116271972656, - "eval_runtime": 29.7061, - "eval_samples_per_second": 252.944, - "eval_steps_per_second": 3.972, - "step": 124500 - }, - { - "epoch": 5.33, - "learning_rate": 1.833431138235671e-05, - "loss": 0.0769, - "step": 125000 - }, - { - "epoch": 5.33, - "eval_accuracy": 0.9449028480170348, - "eval_f1": 0.9449892328941387, - "eval_loss": 0.23981066048145294, - "eval_runtime": 29.7033, - "eval_samples_per_second": 252.969, - "eval_steps_per_second": 3.973, - "step": 125000 - }, - { - "epoch": 5.36, - "learning_rate": 1.830769066621143e-05, - "loss": 0.0764, - "step": 125500 - }, - { - "epoch": 5.36, - "eval_accuracy": 0.9334575459142933, - "eval_f1": 0.9338798349360586, - "eval_loss": 0.2838508188724518, - "eval_runtime": 29.8318, - "eval_samples_per_second": 251.879, - "eval_steps_per_second": 3.956, - "step": 125500 - }, - { - "epoch": 5.38, - "learning_rate": 1.8281016601937605e-05, - "loss": 0.0765, - "step": 126000 - }, - { - "epoch": 5.38, - "eval_accuracy": 0.9341229704551504, - "eval_f1": 0.9347863135821975, - "eval_loss": 0.27602633833885193, - "eval_runtime": 28.984, - "eval_samples_per_second": 259.246, - "eval_steps_per_second": 4.071, - "step": 126000 - }, - { - "epoch": 5.4, - "learning_rate": 1.825439588579233e-05, - "loss": 0.0781, - "step": 126500 - }, - { - "epoch": 5.4, - "eval_accuracy": 0.9327921213734363, - "eval_f1": 0.9334944076626738, - "eval_loss": 0.27298033237457275, - "eval_runtime": 29.7395, - "eval_samples_per_second": 252.661, - "eval_steps_per_second": 3.968, - "step": 126500 - }, - { - "epoch": 5.42, - "learning_rate": 1.82277218215185e-05, - "loss": 0.0791, - "step": 127000 - }, - { - "epoch": 5.42, - "eval_accuracy": 0.9361192440777216, - "eval_f1": 0.9366051447103599, - "eval_loss": 0.279489666223526, - "eval_runtime": 29.6861, - "eval_samples_per_second": 253.115, - "eval_steps_per_second": 3.975, - "step": 127000 - }, - { - "epoch": 5.44, - "learning_rate": 1.8201047757244676e-05, - "loss": 0.0755, - "step": 127500 - }, - { - "epoch": 5.44, - "eval_accuracy": 0.9410433856800638, - "eval_f1": 0.9414163947590737, - "eval_loss": 0.27021223306655884, - "eval_runtime": 29.7025, - "eval_samples_per_second": 252.975, - "eval_steps_per_second": 3.973, - "step": 127500 - }, - { - "epoch": 5.46, - "learning_rate": 1.8174373692970854e-05, - "loss": 0.0808, - "step": 128000 - }, - { - "epoch": 5.46, - "eval_accuracy": 0.9319936119244078, - "eval_f1": 0.9325696896758618, - "eval_loss": 0.28826984763145447, - "eval_runtime": 29.7213, - "eval_samples_per_second": 252.815, - "eval_steps_per_second": 3.97, - "step": 128000 - }, - { - "epoch": 5.48, - "learning_rate": 1.8147699628697025e-05, - "loss": 0.0815, - "step": 128500 - }, - { - "epoch": 5.48, - "eval_accuracy": 0.9303965930263508, - "eval_f1": 0.9312246929240023, - "eval_loss": 0.28006842732429504, - "eval_runtime": 28.8693, - "eval_samples_per_second": 260.276, - "eval_steps_per_second": 4.087, - "step": 128500 - }, - { - "epoch": 5.51, - "learning_rate": 1.8121025564423203e-05, - "loss": 0.0781, - "step": 129000 - }, - { - "epoch": 5.51, - "eval_accuracy": 0.9342560553633218, - "eval_f1": 0.934916102930222, - "eval_loss": 0.28170469403266907, - "eval_runtime": 29.7318, - "eval_samples_per_second": 252.726, - "eval_steps_per_second": 3.969, - "step": 129000 - }, - { - "epoch": 5.53, - "learning_rate": 1.8094351500149377e-05, - "loss": 0.0799, - "step": 129500 - }, - { - "epoch": 5.53, - "eval_accuracy": 0.9367846686185787, - "eval_f1": 0.9373211035369594, - "eval_loss": 0.26480868458747864, - "eval_runtime": 29.6578, - "eval_samples_per_second": 253.356, - "eval_steps_per_second": 3.979, - "step": 129500 - }, - { - "epoch": 5.55, - "learning_rate": 1.806767743587555e-05, - "loss": 0.0795, - "step": 130000 - }, - { - "epoch": 5.55, - "eval_accuracy": 0.9418418951290923, - "eval_f1": 0.9421717077366013, - "eval_loss": 0.24259281158447266, - "eval_runtime": 29.6526, - "eval_samples_per_second": 253.401, - "eval_steps_per_second": 3.979, - "step": 130000 - }, - { - "epoch": 5.57, - "learning_rate": 1.8041003371601726e-05, - "loss": 0.0779, - "step": 130500 - }, - { - "epoch": 5.57, - "eval_accuracy": 0.932392866648922, - "eval_f1": 0.9327021166238731, - "eval_loss": 0.2653105854988098, - "eval_runtime": 29.6697, - "eval_samples_per_second": 253.255, - "eval_steps_per_second": 3.977, - "step": 130500 - }, - { - "epoch": 5.59, - "learning_rate": 1.80143293073279e-05, - "loss": 0.0805, - "step": 131000 - }, - { - "epoch": 5.59, - "eval_accuracy": 0.9347883949960074, - "eval_f1": 0.9353918774215707, - "eval_loss": 0.27313682436943054, - "eval_runtime": 28.8119, - "eval_samples_per_second": 260.795, - "eval_steps_per_second": 4.096, - "step": 131000 - }, - { - "epoch": 5.61, - "learning_rate": 1.7987655243054076e-05, - "loss": 0.0744, - "step": 131500 - }, - { - "epoch": 5.61, - "eval_accuracy": 0.9407772158637211, - "eval_f1": 0.9411238964325214, - "eval_loss": 0.2567736804485321, - "eval_runtime": 29.8525, - "eval_samples_per_second": 251.705, - "eval_steps_per_second": 3.953, - "step": 131500 - }, - { - "epoch": 5.63, - "learning_rate": 1.796098117878025e-05, - "loss": 0.0795, - "step": 132000 - }, - { - "epoch": 5.63, - "eval_accuracy": 0.9342560553633218, - "eval_f1": 0.9348669837102812, - "eval_loss": 0.279877632856369, - "eval_runtime": 29.7073, - "eval_samples_per_second": 252.935, - "eval_steps_per_second": 3.972, - "step": 132000 - }, - { - "epoch": 5.65, - "learning_rate": 1.7934360462634972e-05, - "loss": 0.0799, - "step": 132500 - }, - { - "epoch": 5.65, - "eval_accuracy": 0.9399787064146926, - "eval_f1": 0.9403908704532573, - "eval_loss": 0.27136918902397156, - "eval_runtime": 29.6962, - "eval_samples_per_second": 253.029, - "eval_steps_per_second": 3.974, - "step": 132500 - }, - { - "epoch": 5.68, - "learning_rate": 1.7907686398361147e-05, - "loss": 0.0795, - "step": 133000 - }, - { - "epoch": 5.68, - "eval_accuracy": 0.9445035932925206, - "eval_f1": 0.9446204625899423, - "eval_loss": 0.24645930528640747, - "eval_runtime": 29.8001, - "eval_samples_per_second": 252.147, - "eval_steps_per_second": 3.96, - "step": 133000 - }, - { - "epoch": 5.7, - "learning_rate": 1.788101233408732e-05, - "loss": 0.0773, - "step": 133500 - }, - { - "epoch": 5.7, - "eval_accuracy": 0.9367846686185787, - "eval_f1": 0.9372887189762769, - "eval_loss": 0.23624612390995026, - "eval_runtime": 29.0666, - "eval_samples_per_second": 258.51, - "eval_steps_per_second": 4.06, - "step": 133500 - }, - { - "epoch": 5.72, - "learning_rate": 1.7854338269813496e-05, - "loss": 0.0773, - "step": 134000 - }, - { - "epoch": 5.72, - "eval_accuracy": 0.9454351876497206, - "eval_f1": 0.9457146735470864, - "eval_loss": 0.2246805727481842, - "eval_runtime": 29.7092, - "eval_samples_per_second": 252.918, - "eval_steps_per_second": 3.972, - "step": 134000 - }, - { - "epoch": 5.74, - "learning_rate": 1.7827770901796765e-05, - "loss": 0.0802, - "step": 134500 - }, - { - "epoch": 5.74, - "eval_accuracy": 0.9433058291189779, - "eval_f1": 0.9436458127410952, - "eval_loss": 0.219477578997612, - "eval_runtime": 29.7077, - "eval_samples_per_second": 252.931, - "eval_steps_per_second": 3.972, - "step": 134500 - }, - { - "epoch": 5.76, - "learning_rate": 1.7801096837522943e-05, - "loss": 0.0805, - "step": 135000 - }, - { - "epoch": 5.76, - "eval_accuracy": 0.9355869044450359, - "eval_f1": 0.9362078931632605, - "eval_loss": 0.25872257351875305, - "eval_runtime": 29.7076, - "eval_samples_per_second": 252.932, - "eval_steps_per_second": 3.972, - "step": 135000 - }, - { - "epoch": 5.78, - "learning_rate": 1.7774422773249117e-05, - "loss": 0.0796, - "step": 135500 - }, - { - "epoch": 5.78, - "eval_accuracy": 0.9276018099547512, - "eval_f1": 0.9286700082110425, - "eval_loss": 0.2882857024669647, - "eval_runtime": 29.6463, - "eval_samples_per_second": 253.455, - "eval_steps_per_second": 3.98, - "step": 135500 - }, - { - "epoch": 5.8, - "learning_rate": 1.774774870897529e-05, - "loss": 0.0797, - "step": 136000 - }, - { - "epoch": 5.8, - "eval_accuracy": 0.9451690178333777, - "eval_f1": 0.9454800904796664, - "eval_loss": 0.22322283685207367, - "eval_runtime": 29.7184, - "eval_samples_per_second": 252.84, - "eval_steps_per_second": 3.971, - "step": 136000 - }, - { - "epoch": 5.83, - "learning_rate": 1.7721074644701466e-05, - "loss": 0.0783, - "step": 136500 - }, - { - "epoch": 5.83, - "eval_accuracy": 0.9492946499866916, - "eval_f1": 0.9494469008050342, - "eval_loss": 0.20879070460796356, - "eval_runtime": 29.697, - "eval_samples_per_second": 253.022, - "eval_steps_per_second": 3.973, - "step": 136500 - }, - { - "epoch": 5.85, - "learning_rate": 1.769440058042764e-05, - "loss": 0.0806, - "step": 137000 - }, - { - "epoch": 5.85, - "eval_accuracy": 0.9418418951290923, - "eval_f1": 0.942174291286256, - "eval_loss": 0.2473965436220169, - "eval_runtime": 29.8072, - "eval_samples_per_second": 252.086, - "eval_steps_per_second": 3.959, - "step": 137000 - }, - { - "epoch": 5.87, - "learning_rate": 1.7667726516153816e-05, - "loss": 0.0818, - "step": 137500 - }, - { - "epoch": 5.87, - "eval_accuracy": 0.9391801969656641, - "eval_f1": 0.9397529317052404, - "eval_loss": 0.2630070745944977, - "eval_runtime": 29.6999, - "eval_samples_per_second": 252.997, - "eval_steps_per_second": 3.973, - "step": 137500 - }, - { - "epoch": 5.89, - "learning_rate": 1.764105245187999e-05, - "loss": 0.0773, - "step": 138000 - }, - { - "epoch": 5.89, - "eval_accuracy": 0.9476976310886346, - "eval_f1": 0.9478580726278416, - "eval_loss": 0.234640434384346, - "eval_runtime": 29.8086, - "eval_samples_per_second": 252.075, - "eval_steps_per_second": 3.959, - "step": 138000 - }, - { - "epoch": 5.91, - "learning_rate": 1.7614431735734712e-05, - "loss": 0.0795, - "step": 138500 - }, - { - "epoch": 5.91, - "eval_accuracy": 0.9373170082512643, - "eval_f1": 0.9377090107319418, - "eval_loss": 0.28119930624961853, - "eval_runtime": 29.7146, - "eval_samples_per_second": 252.872, - "eval_steps_per_second": 3.971, - "step": 138500 - }, - { - "epoch": 5.93, - "learning_rate": 1.7587757671460887e-05, - "loss": 0.0816, - "step": 139000 - }, - { - "epoch": 5.93, - "eval_accuracy": 0.9274687250465797, - "eval_f1": 0.9281982795988495, - "eval_loss": 0.32029488682746887, - "eval_runtime": 29.7504, - "eval_samples_per_second": 252.568, - "eval_steps_per_second": 3.966, - "step": 139000 - }, - { - "epoch": 5.95, - "learning_rate": 1.756108360718706e-05, - "loss": 0.0773, - "step": 139500 - }, - { - "epoch": 5.95, - "eval_accuracy": 0.9375831780676072, - "eval_f1": 0.9379461602562825, - "eval_loss": 0.2567751705646515, - "eval_runtime": 29.8294, - "eval_samples_per_second": 251.899, - "eval_steps_per_second": 3.956, - "step": 139500 - }, - { - "epoch": 5.97, - "learning_rate": 1.7534409542913236e-05, - "loss": 0.08, - "step": 140000 - }, - { - "epoch": 5.97, - "eval_accuracy": 0.9463667820069204, - "eval_f1": 0.9466704964218854, - "eval_loss": 0.21513184905052185, - "eval_runtime": 29.6771, - "eval_samples_per_second": 253.192, - "eval_steps_per_second": 3.976, - "step": 140000 - }, - { - "epoch": 6.0, - "learning_rate": 1.750773547863941e-05, - "loss": 0.0782, - "step": 140500 - }, - { - "epoch": 6.0, - "eval_accuracy": 0.9387809422411498, - "eval_f1": 0.9394699729362729, - "eval_loss": 0.28363919258117676, - "eval_runtime": 29.7313, - "eval_samples_per_second": 252.73, - "eval_steps_per_second": 3.969, - "step": 140500 - }, - { - "epoch": 6.02, - "learning_rate": 1.7481061414365585e-05, - "loss": 0.0618, - "step": 141000 - }, - { - "epoch": 6.02, - "eval_accuracy": 0.9318605270162363, - "eval_f1": 0.9324640088965003, - "eval_loss": 0.3425421416759491, - "eval_runtime": 29.7537, - "eval_samples_per_second": 252.54, - "eval_steps_per_second": 3.966, - "step": 141000 - }, - { - "epoch": 6.04, - "learning_rate": 1.7454440698220307e-05, - "loss": 0.0645, - "step": 141500 - }, - { - "epoch": 6.04, - "eval_accuracy": 0.9341229704551504, - "eval_f1": 0.934781602448178, - "eval_loss": 0.3139813542366028, - "eval_runtime": 29.7013, - "eval_samples_per_second": 252.985, - "eval_steps_per_second": 3.973, - "step": 141500 - }, - { - "epoch": 6.06, - "learning_rate": 1.7427766633946485e-05, - "loss": 0.0576, - "step": 142000 - }, - { - "epoch": 6.06, - "eval_accuracy": 0.9390471120574927, - "eval_f1": 0.9395110845241024, - "eval_loss": 0.29140228033065796, - "eval_runtime": 29.7171, - "eval_samples_per_second": 252.851, - "eval_steps_per_second": 3.971, - "step": 142000 - }, - { - "epoch": 6.08, - "learning_rate": 1.7401092569672656e-05, - "loss": 0.0651, - "step": 142500 - }, - { - "epoch": 6.08, - "eval_accuracy": 0.9395794516901783, - "eval_f1": 0.9400294457871766, - "eval_loss": 0.2746965289115906, - "eval_runtime": 29.689, - "eval_samples_per_second": 253.09, - "eval_steps_per_second": 3.975, - "step": 142500 - }, - { - "epoch": 6.1, - "learning_rate": 1.737441850539883e-05, - "loss": 0.0638, - "step": 143000 - }, - { - "epoch": 6.1, - "eval_accuracy": 0.926270960873037, - "eval_f1": 0.9275467084065636, - "eval_loss": 0.3684926927089691, - "eval_runtime": 29.7002, - "eval_samples_per_second": 252.995, - "eval_steps_per_second": 3.973, - "step": 143000 - }, - { - "epoch": 6.12, - "learning_rate": 1.7347744441125008e-05, - "loss": 0.062, - "step": 143500 - }, - { - "epoch": 6.12, - "eval_accuracy": 0.9361192440777216, - "eval_f1": 0.9367507413022468, - "eval_loss": 0.28878238797187805, - "eval_runtime": 29.7487, - "eval_samples_per_second": 252.583, - "eval_steps_per_second": 3.967, - "step": 143500 - }, - { - "epoch": 6.15, - "learning_rate": 1.7321070376851183e-05, - "loss": 0.0602, - "step": 144000 - }, - { - "epoch": 6.15, - "eval_accuracy": 0.9413095554964067, - "eval_f1": 0.9414811417776511, - "eval_loss": 0.2957022190093994, - "eval_runtime": 29.7114, - "eval_samples_per_second": 252.9, - "eval_steps_per_second": 3.972, - "step": 144000 - }, - { - "epoch": 6.17, - "learning_rate": 1.7294449660705905e-05, - "loss": 0.0629, - "step": 144500 - }, - { - "epoch": 6.17, - "eval_accuracy": 0.9418418951290923, - "eval_f1": 0.9423131558938396, - "eval_loss": 0.2822032570838928, - "eval_runtime": 29.7961, - "eval_samples_per_second": 252.181, - "eval_steps_per_second": 3.96, - "step": 144500 - }, - { - "epoch": 6.19, - "learning_rate": 1.726777559643208e-05, - "loss": 0.0647, - "step": 145000 - }, - { - "epoch": 6.19, - "eval_accuracy": 0.9375831780676072, - "eval_f1": 0.9381601008634161, - "eval_loss": 0.2937462031841278, - "eval_runtime": 29.7028, - "eval_samples_per_second": 252.973, - "eval_steps_per_second": 3.973, - "step": 145000 - }, - { - "epoch": 6.21, - "learning_rate": 1.7241101532158254e-05, - "loss": 0.0641, - "step": 145500 - }, - { - "epoch": 6.21, - "eval_accuracy": 0.9359861591695502, - "eval_f1": 0.9364402700524492, - "eval_loss": 0.29298967123031616, - "eval_runtime": 29.7381, - "eval_samples_per_second": 252.673, - "eval_steps_per_second": 3.968, - "step": 145500 - }, - { - "epoch": 6.23, - "learning_rate": 1.7214427467884428e-05, - "loss": 0.0619, - "step": 146000 - }, - { - "epoch": 6.23, - "eval_accuracy": 0.9391801969656641, - "eval_f1": 0.9396488023591716, - "eval_loss": 0.3040963113307953, - "eval_runtime": 29.6999, - "eval_samples_per_second": 252.997, - "eval_steps_per_second": 3.973, - "step": 146000 - }, - { - "epoch": 6.25, - "learning_rate": 1.718780675173915e-05, - "loss": 0.0662, - "step": 146500 - }, - { - "epoch": 6.25, - "eval_accuracy": 0.9459675272824062, - "eval_f1": 0.9461600576362096, - "eval_loss": 0.24619624018669128, - "eval_runtime": 29.6621, - "eval_samples_per_second": 253.32, - "eval_steps_per_second": 3.978, - "step": 146500 - }, - { - "epoch": 6.27, - "learning_rate": 1.7161132687465325e-05, - "loss": 0.0642, - "step": 147000 - }, - { - "epoch": 6.27, - "eval_accuracy": 0.9391801969656641, - "eval_f1": 0.9393517216774104, - "eval_loss": 0.29791733622550964, - "eval_runtime": 29.7053, - "eval_samples_per_second": 252.952, - "eval_steps_per_second": 3.972, - "step": 147000 - }, - { - "epoch": 6.3, - "learning_rate": 1.71344586231915e-05, - "loss": 0.0638, - "step": 147500 - }, - { - "epoch": 6.3, - "eval_accuracy": 0.9295980835773223, - "eval_f1": 0.9306057177926395, - "eval_loss": 0.3107518255710602, - "eval_runtime": 29.699, - "eval_samples_per_second": 253.005, - "eval_steps_per_second": 3.973, - "step": 147500 - }, - { - "epoch": 6.32, - "learning_rate": 1.7107784558917674e-05, - "loss": 0.0646, - "step": 148000 - }, - { - "epoch": 6.32, - "eval_accuracy": 0.9399787064146926, - "eval_f1": 0.940479475585594, - "eval_loss": 0.2788504362106323, - "eval_runtime": 27.5638, - "eval_samples_per_second": 272.604, - "eval_steps_per_second": 4.281, - "step": 148000 - }, - { - "epoch": 6.34, - "learning_rate": 1.7081163842772396e-05, - "loss": 0.0644, - "step": 148500 - }, - { - "epoch": 6.34, - "eval_accuracy": 0.9381155177002928, - "eval_f1": 0.9382586915112797, - "eval_loss": 0.2806726396083832, - "eval_runtime": 29.7107, - "eval_samples_per_second": 252.906, - "eval_steps_per_second": 3.972, - "step": 148500 - }, - { - "epoch": 6.36, - "learning_rate": 1.705448977849857e-05, - "loss": 0.0649, - "step": 149000 - }, - { - "epoch": 6.36, - "eval_accuracy": 0.9419749800372638, - "eval_f1": 0.9424753113790683, - "eval_loss": 0.27096980810165405, - "eval_runtime": 29.7017, - "eval_samples_per_second": 252.982, - "eval_steps_per_second": 3.973, - "step": 149000 - }, - { - "epoch": 6.38, - "learning_rate": 1.7027815714224748e-05, - "loss": 0.0634, - "step": 149500 - }, - { - "epoch": 6.38, - "eval_accuracy": 0.9346553100878361, - "eval_f1": 0.9353827036795522, - "eval_loss": 0.29936379194259644, - "eval_runtime": 29.6752, - "eval_samples_per_second": 253.208, - "eval_steps_per_second": 3.976, - "step": 149500 - }, - { - "epoch": 6.4, - "learning_rate": 1.700114164995092e-05, - "loss": 0.0607, - "step": 150000 - }, - { - "epoch": 6.4, - "eval_accuracy": 0.9512909236092627, - "eval_f1": 0.9513936901264027, - "eval_loss": 0.23151962459087372, - "eval_runtime": 29.7021, - "eval_samples_per_second": 252.979, - "eval_steps_per_second": 3.973, - "step": 150000 - }, - { - "epoch": 6.42, - "learning_rate": 1.6974467585677097e-05, - "loss": 0.0653, - "step": 150500 - }, - { - "epoch": 6.42, - "eval_accuracy": 0.9184189512909237, - "eval_f1": 0.9201895395649388, - "eval_loss": 0.3934233486652374, - "eval_runtime": 28.8229, - "eval_samples_per_second": 260.695, - "eval_steps_per_second": 4.094, - "step": 150500 - }, - { - "epoch": 6.44, - "learning_rate": 1.6947793521403272e-05, - "loss": 0.0678, - "step": 151000 - }, - { - "epoch": 6.44, - "eval_accuracy": 0.9209475645461804, - "eval_f1": 0.9226098175059657, - "eval_loss": 0.3584538996219635, - "eval_runtime": 29.7316, - "eval_samples_per_second": 252.728, - "eval_steps_per_second": 3.969, - "step": 151000 - }, - { - "epoch": 6.47, - "learning_rate": 1.6921119457129443e-05, - "loss": 0.064, - "step": 151500 - }, - { - "epoch": 6.47, - "eval_accuracy": 0.95009315943572, - "eval_f1": 0.950299824662573, - "eval_loss": 0.22422486543655396, - "eval_runtime": 29.6281, - "eval_samples_per_second": 253.611, - "eval_steps_per_second": 3.983, - "step": 151500 - }, - { - "epoch": 6.49, - "learning_rate": 1.689444539285562e-05, - "loss": 0.0718, - "step": 152000 - }, - { - "epoch": 6.49, - "eval_accuracy": 0.9387809422411498, - "eval_f1": 0.9393330915737532, - "eval_loss": 0.2708144783973694, - "eval_runtime": 29.7081, - "eval_samples_per_second": 252.928, - "eval_steps_per_second": 3.972, - "step": 152000 - }, - { - "epoch": 6.51, - "learning_rate": 1.6867771328581795e-05, - "loss": 0.0677, - "step": 152500 - }, - { - "epoch": 6.51, - "eval_accuracy": 0.9371839233430929, - "eval_f1": 0.9377985107844401, - "eval_loss": 0.3036825358867645, - "eval_runtime": 29.6939, - "eval_samples_per_second": 253.048, - "eval_steps_per_second": 3.974, - "step": 152500 - }, - { - "epoch": 6.53, - "learning_rate": 1.684109726430797e-05, - "loss": 0.0623, - "step": 153000 - }, - { - "epoch": 6.53, - "eval_accuracy": 0.9480968858131488, - "eval_f1": 0.9483669366706274, - "eval_loss": 0.2451292872428894, - "eval_runtime": 28.8917, - "eval_samples_per_second": 260.075, - "eval_steps_per_second": 4.084, - "step": 153000 - }, - { - "epoch": 6.55, - "learning_rate": 1.6814476548162692e-05, - "loss": 0.0648, - "step": 153500 - }, - { - "epoch": 6.55, - "eval_accuracy": 0.9351876497205217, - "eval_f1": 0.9358404642854584, - "eval_loss": 0.33823534846305847, - "eval_runtime": 29.725, - "eval_samples_per_second": 252.784, - "eval_steps_per_second": 3.97, - "step": 153500 - }, - { - "epoch": 6.57, - "learning_rate": 1.6787802483888866e-05, - "loss": 0.0684, - "step": 154000 - }, - { - "epoch": 6.57, - "eval_accuracy": 0.9389140271493213, - "eval_f1": 0.9393959959904314, - "eval_loss": 0.288310170173645, - "eval_runtime": 29.667, - "eval_samples_per_second": 253.278, - "eval_steps_per_second": 3.977, - "step": 154000 - }, - { - "epoch": 6.59, - "learning_rate": 1.676112841961504e-05, - "loss": 0.0696, - "step": 154500 - }, - { - "epoch": 6.59, - "eval_accuracy": 0.9322597817407506, - "eval_f1": 0.933185090369889, - "eval_loss": 0.32053282856941223, - "eval_runtime": 29.7076, - "eval_samples_per_second": 252.932, - "eval_steps_per_second": 3.972, - "step": 154500 - }, - { - "epoch": 6.62, - "learning_rate": 1.6734454355341216e-05, - "loss": 0.067, - "step": 155000 - }, - { - "epoch": 6.62, - "eval_accuracy": 0.9458344423742348, - "eval_f1": 0.9460017411974384, - "eval_loss": 0.24401821196079254, - "eval_runtime": 29.6951, - "eval_samples_per_second": 253.038, - "eval_steps_per_second": 3.974, - "step": 155000 - }, - { - "epoch": 6.64, - "learning_rate": 1.670778029106739e-05, - "loss": 0.0675, - "step": 155500 - }, - { - "epoch": 6.64, - "eval_accuracy": 0.9498269896193772, - "eval_f1": 0.9499514061301335, - "eval_loss": 0.23875835537910461, - "eval_runtime": 27.5871, - "eval_samples_per_second": 272.374, - "eval_steps_per_second": 4.277, - "step": 155500 - }, - { - "epoch": 6.66, - "learning_rate": 1.6681159574922112e-05, - "loss": 0.0663, - "step": 156000 - }, - { - "epoch": 6.66, - "eval_accuracy": 0.9415757253127496, - "eval_f1": 0.9417655310043141, - "eval_loss": 0.2690986394882202, - "eval_runtime": 29.6963, - "eval_samples_per_second": 253.028, - "eval_steps_per_second": 3.974, - "step": 156000 - }, - { - "epoch": 6.68, - "learning_rate": 1.665448551064829e-05, - "loss": 0.0657, - "step": 156500 - }, - { - "epoch": 6.68, - "eval_accuracy": 0.9379824327921213, - "eval_f1": 0.9386089434837862, - "eval_loss": 0.27384960651397705, - "eval_runtime": 29.7752, - "eval_samples_per_second": 252.358, - "eval_steps_per_second": 3.963, - "step": 156500 - }, - { - "epoch": 6.7, - "learning_rate": 1.662781144637446e-05, - "loss": 0.0649, - "step": 157000 - }, - { - "epoch": 6.7, - "eval_accuracy": 0.9451690178333777, - "eval_f1": 0.9454631168972781, - "eval_loss": 0.24599717557430267, - "eval_runtime": 29.6815, - "eval_samples_per_second": 253.154, - "eval_steps_per_second": 3.976, - "step": 157000 - }, - { - "epoch": 6.72, - "learning_rate": 1.660113738210064e-05, - "loss": 0.0666, - "step": 157500 - }, - { - "epoch": 6.72, - "eval_accuracy": 0.9295980835773223, - "eval_f1": 0.9306336795200063, - "eval_loss": 0.3208254873752594, - "eval_runtime": 29.6781, - "eval_samples_per_second": 253.184, - "eval_steps_per_second": 3.976, - "step": 157500 - }, - { - "epoch": 6.74, - "learning_rate": 1.657451666595536e-05, - "loss": 0.0665, - "step": 158000 - }, - { - "epoch": 6.74, - "eval_accuracy": 0.9429065743944637, - "eval_f1": 0.9432725369505285, - "eval_loss": 0.2655596435070038, - "eval_runtime": 29.7332, - "eval_samples_per_second": 252.714, - "eval_steps_per_second": 3.969, - "step": 158000 - }, - { - "epoch": 6.76, - "learning_rate": 1.6547842601681535e-05, - "loss": 0.0693, - "step": 158500 - }, - { - "epoch": 6.76, - "eval_accuracy": 0.9365184988022358, - "eval_f1": 0.9370932056261295, - "eval_loss": 0.27700603008270264, - "eval_runtime": 29.7205, - "eval_samples_per_second": 252.822, - "eval_steps_per_second": 3.97, - "step": 158500 - }, - { - "epoch": 6.79, - "learning_rate": 1.6521168537407707e-05, - "loss": 0.0657, - "step": 159000 - }, - { - "epoch": 6.79, - "eval_accuracy": 0.9374500931594357, - "eval_f1": 0.9380905652232998, - "eval_loss": 0.29713597893714905, - "eval_runtime": 29.7259, - "eval_samples_per_second": 252.776, - "eval_steps_per_second": 3.97, - "step": 159000 - }, - { - "epoch": 6.81, - "learning_rate": 1.6494494473133885e-05, - "loss": 0.069, - "step": 159500 - }, - { - "epoch": 6.81, - "eval_accuracy": 0.9378493478839499, - "eval_f1": 0.9384099076790622, - "eval_loss": 0.28762125968933105, - "eval_runtime": 29.817, - "eval_samples_per_second": 252.004, - "eval_steps_per_second": 3.957, - "step": 159500 - }, - { - "epoch": 6.83, - "learning_rate": 1.646782040886006e-05, - "loss": 0.0699, - "step": 160000 - }, - { - "epoch": 6.83, - "eval_accuracy": 0.9393132818738356, - "eval_f1": 0.9397331535036828, - "eval_loss": 0.29436245560646057, - "eval_runtime": 29.7168, - "eval_samples_per_second": 252.853, - "eval_steps_per_second": 3.971, - "step": 160000 - }, - { - "epoch": 6.85, - "learning_rate": 1.6441146344586234e-05, - "loss": 0.0653, - "step": 160500 - }, - { - "epoch": 6.85, - "eval_accuracy": 0.9406441309555497, - "eval_f1": 0.9411619893438787, - "eval_loss": 0.31076741218566895, - "eval_runtime": 29.8798, - "eval_samples_per_second": 251.474, - "eval_steps_per_second": 3.949, - "step": 160500 - }, - { - "epoch": 6.87, - "learning_rate": 1.6414525628440956e-05, - "loss": 0.0686, - "step": 161000 - }, - { - "epoch": 6.87, - "eval_accuracy": 0.9402448762310354, - "eval_f1": 0.9405518842645035, - "eval_loss": 0.2986462712287903, - "eval_runtime": 29.6951, - "eval_samples_per_second": 253.038, - "eval_steps_per_second": 3.974, - "step": 161000 - }, - { - "epoch": 6.89, - "learning_rate": 1.638785156416713e-05, - "loss": 0.0655, - "step": 161500 - }, - { - "epoch": 6.89, - "eval_accuracy": 0.9411764705882353, - "eval_f1": 0.9416928170807948, - "eval_loss": 0.28847745060920715, - "eval_runtime": 29.6973, - "eval_samples_per_second": 253.02, - "eval_steps_per_second": 3.973, - "step": 161500 - }, - { - "epoch": 6.91, - "learning_rate": 1.6361177499893305e-05, - "loss": 0.0678, - "step": 162000 - }, - { - "epoch": 6.91, - "eval_accuracy": 0.9472983763641203, - "eval_f1": 0.9475552479617785, - "eval_loss": 0.2651350796222687, - "eval_runtime": 29.67, - "eval_samples_per_second": 253.253, - "eval_steps_per_second": 3.977, - "step": 162000 - }, - { - "epoch": 6.94, - "learning_rate": 1.633450343561948e-05, - "loss": 0.0698, - "step": 162500 - }, - { - "epoch": 6.94, - "eval_accuracy": 0.9482299707213202, - "eval_f1": 0.9484219919203383, - "eval_loss": 0.2195654958486557, - "eval_runtime": 29.7092, - "eval_samples_per_second": 252.919, - "eval_steps_per_second": 3.972, - "step": 162500 - }, - { - "epoch": 6.96, - "learning_rate": 1.6307829371345654e-05, - "loss": 0.0687, - "step": 163000 - }, - { - "epoch": 6.96, - "eval_accuracy": 0.9466329518232632, - "eval_f1": 0.9466828668162871, - "eval_loss": 0.2680336534976959, - "eval_runtime": 29.7341, - "eval_samples_per_second": 252.707, - "eval_steps_per_second": 3.969, - "step": 163000 - }, - { - "epoch": 6.98, - "learning_rate": 1.6281155307071828e-05, - "loss": 0.0672, - "step": 163500 - }, - { - "epoch": 6.98, - "eval_accuracy": 0.9438381687516636, - "eval_f1": 0.9442382604190801, - "eval_loss": 0.26866111159324646, - "eval_runtime": 29.6987, - "eval_samples_per_second": 253.007, - "eval_steps_per_second": 3.973, - "step": 163500 - }, - { - "epoch": 7.0, - "learning_rate": 1.6254481242798003e-05, - "loss": 0.0706, - "step": 164000 - }, - { - "epoch": 7.0, - "eval_accuracy": 0.9383816875166356, - "eval_f1": 0.9387976146150384, - "eval_loss": 0.28669700026512146, - "eval_runtime": 29.6298, - "eval_samples_per_second": 253.596, - "eval_steps_per_second": 3.982, - "step": 164000 - }, - { - "epoch": 7.02, - "learning_rate": 1.622780717852418e-05, - "loss": 0.0502, - "step": 164500 - }, - { - "epoch": 7.02, - "eval_accuracy": 0.9495608198030343, - "eval_f1": 0.949692327575141, - "eval_loss": 0.25467291474342346, - "eval_runtime": 29.675, - "eval_samples_per_second": 253.21, - "eval_steps_per_second": 3.976, - "step": 164500 - }, - { - "epoch": 7.04, - "learning_rate": 1.6201186462378903e-05, - "loss": 0.0519, - "step": 165000 - }, - { - "epoch": 7.04, - "eval_accuracy": 0.9389140271493213, - "eval_f1": 0.9394556541905762, - "eval_loss": 0.3056061565876007, - "eval_runtime": 29.802, - "eval_samples_per_second": 252.131, - "eval_steps_per_second": 3.959, - "step": 165000 - }, - { - "epoch": 7.06, - "learning_rate": 1.6174512398105077e-05, - "loss": 0.0512, - "step": 165500 - }, - { - "epoch": 7.06, - "eval_accuracy": 0.9387809422411498, - "eval_f1": 0.9393428013619174, - "eval_loss": 0.2825697362422943, - "eval_runtime": 29.6865, - "eval_samples_per_second": 253.112, - "eval_steps_per_second": 3.975, - "step": 165500 - }, - { - "epoch": 7.08, - "learning_rate": 1.614783833383125e-05, - "loss": 0.0507, - "step": 166000 - }, - { - "epoch": 7.08, - "eval_accuracy": 0.9435719989353207, - "eval_f1": 0.9440688946956373, - "eval_loss": 0.31017938256263733, - "eval_runtime": 29.6679, - "eval_samples_per_second": 253.271, - "eval_steps_per_second": 3.977, - "step": 166000 - }, - { - "epoch": 7.11, - "learning_rate": 1.6121164269557426e-05, - "loss": 0.0527, - "step": 166500 - }, - { - "epoch": 7.11, - "eval_accuracy": 0.9472983763641203, - "eval_f1": 0.947657360420552, - "eval_loss": 0.27538731694221497, - "eval_runtime": 29.6711, - "eval_samples_per_second": 253.243, - "eval_steps_per_second": 3.977, - "step": 166500 - }, - { - "epoch": 7.13, - "learning_rate": 1.60944902052836e-05, - "loss": 0.0498, - "step": 167000 - }, - { - "epoch": 7.13, - "eval_accuracy": 0.9375831780676072, - "eval_f1": 0.9384167986894664, - "eval_loss": 0.3295161724090576, - "eval_runtime": 29.6457, - "eval_samples_per_second": 253.46, - "eval_steps_per_second": 3.98, - "step": 167000 - }, - { - "epoch": 7.15, - "learning_rate": 1.6067816141009775e-05, - "loss": 0.0531, - "step": 167500 - }, - { - "epoch": 7.15, - "eval_accuracy": 0.9374500931594357, - "eval_f1": 0.9381703971218172, - "eval_loss": 0.30897414684295654, - "eval_runtime": 29.6649, - "eval_samples_per_second": 253.296, - "eval_steps_per_second": 3.978, - "step": 167500 - }, - { - "epoch": 7.17, - "learning_rate": 1.604114207673595e-05, - "loss": 0.0521, - "step": 168000 - }, - { - "epoch": 7.17, - "eval_accuracy": 0.9458344423742348, - "eval_f1": 0.9461902179062976, - "eval_loss": 0.2677062451839447, - "eval_runtime": 29.7336, - "eval_samples_per_second": 252.711, - "eval_steps_per_second": 3.969, - "step": 168000 - }, - { - "epoch": 7.19, - "learning_rate": 1.6014521360590672e-05, - "loss": 0.0521, - "step": 168500 - }, - { - "epoch": 7.19, - "eval_accuracy": 0.9463667820069204, - "eval_f1": 0.9466521291682617, - "eval_loss": 0.25630277395248413, - "eval_runtime": 29.6838, - "eval_samples_per_second": 253.135, - "eval_steps_per_second": 3.975, - "step": 168500 - }, - { - "epoch": 7.21, - "learning_rate": 1.5987847296316846e-05, - "loss": 0.0538, - "step": 169000 - }, - { - "epoch": 7.21, - "eval_accuracy": 0.9427734894862922, - "eval_f1": 0.9431255075942407, - "eval_loss": 0.297467440366745, - "eval_runtime": 29.6889, - "eval_samples_per_second": 253.091, - "eval_steps_per_second": 3.975, - "step": 169000 - }, - { - "epoch": 7.23, - "learning_rate": 1.596117323204302e-05, - "loss": 0.0544, - "step": 169500 - }, - { - "epoch": 7.23, - "eval_accuracy": 0.9429065743944637, - "eval_f1": 0.9434603808144678, - "eval_loss": 0.3097640872001648, - "eval_runtime": 29.669, - "eval_samples_per_second": 253.261, - "eval_steps_per_second": 3.977, - "step": 169500 - }, - { - "epoch": 7.26, - "learning_rate": 1.5934499167769195e-05, - "loss": 0.0558, - "step": 170000 - }, - { - "epoch": 7.26, - "eval_accuracy": 0.9487623103540058, - "eval_f1": 0.9489934319226285, - "eval_loss": 0.2626325786113739, - "eval_runtime": 29.5649, - "eval_samples_per_second": 254.153, - "eval_steps_per_second": 3.991, - "step": 170000 - }, - { - "epoch": 7.28, - "learning_rate": 1.590782510349537e-05, - "loss": 0.0529, - "step": 170500 - }, - { - "epoch": 7.28, - "eval_accuracy": 0.953952621772691, - "eval_f1": 0.95408693304066, - "eval_loss": 0.25428780913352966, - "eval_runtime": 29.728, - "eval_samples_per_second": 252.758, - "eval_steps_per_second": 3.969, - "step": 170500 - }, - { - "epoch": 7.3, - "learning_rate": 1.5881151039221544e-05, - "loss": 0.0539, - "step": 171000 - }, - { - "epoch": 7.3, - "eval_accuracy": 0.9322597817407506, - "eval_f1": 0.9330774684249004, - "eval_loss": 0.35910770297050476, - "eval_runtime": 29.6509, - "eval_samples_per_second": 253.415, - "eval_steps_per_second": 3.98, - "step": 171000 - }, - { - "epoch": 7.32, - "learning_rate": 1.5854476974947722e-05, - "loss": 0.0556, - "step": 171500 - }, - { - "epoch": 7.32, - "eval_accuracy": 0.9454351876497206, - "eval_f1": 0.9457784140036132, - "eval_loss": 0.2696589529514313, - "eval_runtime": 29.7053, - "eval_samples_per_second": 252.952, - "eval_steps_per_second": 3.972, - "step": 171500 - }, - { - "epoch": 7.34, - "learning_rate": 1.5827802910673894e-05, - "loss": 0.0552, - "step": 172000 - }, - { - "epoch": 7.34, - "eval_accuracy": 0.9315943571998936, - "eval_f1": 0.9324976127211432, - "eval_loss": 0.31420910358428955, - "eval_runtime": 29.7064, - "eval_samples_per_second": 252.942, - "eval_steps_per_second": 3.972, - "step": 172000 - }, - { - "epoch": 7.36, - "learning_rate": 1.5801182194528615e-05, - "loss": 0.0564, - "step": 172500 - }, - { - "epoch": 7.36, - "eval_accuracy": 0.9451690178333777, - "eval_f1": 0.9455382053945202, - "eval_loss": 0.2848692834377289, - "eval_runtime": 28.8185, - "eval_samples_per_second": 260.735, - "eval_steps_per_second": 4.095, - "step": 172500 - }, - { - "epoch": 7.38, - "learning_rate": 1.577450813025479e-05, - "loss": 0.0578, - "step": 173000 - }, - { - "epoch": 7.38, - "eval_accuracy": 0.9434389140271493, - "eval_f1": 0.9436506939263858, - "eval_loss": 0.3079562485218048, - "eval_runtime": 29.6972, - "eval_samples_per_second": 253.021, - "eval_steps_per_second": 3.973, - "step": 173000 - }, - { - "epoch": 7.4, - "learning_rate": 1.5747834065980968e-05, - "loss": 0.056, - "step": 173500 - }, - { - "epoch": 7.4, - "eval_accuracy": 0.932392866648922, - "eval_f1": 0.9333872565432142, - "eval_loss": 0.3507286608219147, - "eval_runtime": 29.6862, - "eval_samples_per_second": 253.114, - "eval_steps_per_second": 3.975, - "step": 173500 - }, - { - "epoch": 7.43, - "learning_rate": 1.5721160001707142e-05, - "loss": 0.0556, - "step": 174000 - }, - { - "epoch": 7.43, - "eval_accuracy": 0.9476976310886346, - "eval_f1": 0.9479980723041073, - "eval_loss": 0.2626301944255829, - "eval_runtime": 29.656, - "eval_samples_per_second": 253.372, - "eval_steps_per_second": 3.979, - "step": 174000 - }, - { - "epoch": 7.45, - "learning_rate": 1.5694539285561864e-05, - "loss": 0.0559, - "step": 174500 - }, - { - "epoch": 7.45, - "eval_accuracy": 0.9421080649454352, - "eval_f1": 0.942717103046345, - "eval_loss": 0.30394506454467773, - "eval_runtime": 29.7575, - "eval_samples_per_second": 252.508, - "eval_steps_per_second": 3.965, - "step": 174500 - }, - { - "epoch": 7.47, - "learning_rate": 1.566786522128804e-05, - "loss": 0.0566, - "step": 175000 - }, - { - "epoch": 7.47, - "eval_accuracy": 0.9411764705882353, - "eval_f1": 0.9416901978862157, - "eval_loss": 0.2973109185695648, - "eval_runtime": 28.9023, - "eval_samples_per_second": 259.979, - "eval_steps_per_second": 4.083, - "step": 175000 - }, - { - "epoch": 7.49, - "learning_rate": 1.5641191157014213e-05, - "loss": 0.0547, - "step": 175500 - }, - { - "epoch": 7.49, - "eval_accuracy": 0.9427734894862922, - "eval_f1": 0.9431437005424792, - "eval_loss": 0.3120613098144531, - "eval_runtime": 29.7436, - "eval_samples_per_second": 252.626, - "eval_steps_per_second": 3.967, - "step": 175500 - }, - { - "epoch": 7.51, - "learning_rate": 1.5614517092740388e-05, - "loss": 0.0603, - "step": 176000 - }, - { - "epoch": 7.51, - "eval_accuracy": 0.9370508384349214, - "eval_f1": 0.9378835010471506, - "eval_loss": 0.33420896530151367, - "eval_runtime": 29.6953, - "eval_samples_per_second": 253.037, - "eval_steps_per_second": 3.974, - "step": 176000 - }, - { - "epoch": 7.53, - "learning_rate": 1.5587843028466563e-05, - "loss": 0.0587, - "step": 176500 - }, - { - "epoch": 7.53, - "eval_accuracy": 0.9442374234761778, - "eval_f1": 0.944739177561909, - "eval_loss": 0.27532902359962463, - "eval_runtime": 29.7222, - "eval_samples_per_second": 252.808, - "eval_steps_per_second": 3.97, - "step": 176500 - }, - { - "epoch": 7.55, - "learning_rate": 1.5561222312321284e-05, - "loss": 0.0551, - "step": 177000 - }, - { - "epoch": 7.55, - "eval_accuracy": 0.9429065743944637, - "eval_f1": 0.9434823852597485, - "eval_loss": 0.2956356704235077, - "eval_runtime": 29.6918, - "eval_samples_per_second": 253.067, - "eval_steps_per_second": 3.974, - "step": 177000 - }, - { - "epoch": 7.58, - "learning_rate": 1.5534548248047462e-05, - "loss": 0.0562, - "step": 177500 - }, - { - "epoch": 7.58, - "eval_accuracy": 0.9522225179664626, - "eval_f1": 0.9524300805690111, - "eval_loss": 0.24791304767131805, - "eval_runtime": 27.4327, - "eval_samples_per_second": 273.907, - "eval_steps_per_second": 4.301, - "step": 177500 - }, - { - "epoch": 7.6, - "learning_rate": 1.5507874183773634e-05, - "loss": 0.0585, - "step": 178000 - }, - { - "epoch": 7.6, - "eval_accuracy": 0.9458344423742348, - "eval_f1": 0.9460760121457206, - "eval_loss": 0.2704208791255951, - "eval_runtime": 29.7221, - "eval_samples_per_second": 252.808, - "eval_steps_per_second": 3.97, - "step": 178000 - }, - { - "epoch": 7.62, - "learning_rate": 1.5481253467628355e-05, - "loss": 0.0585, - "step": 178500 - }, - { - "epoch": 7.62, - "eval_accuracy": 0.9438381687516636, - "eval_f1": 0.9442962119660283, - "eval_loss": 0.27086368203163147, - "eval_runtime": 29.6917, - "eval_samples_per_second": 253.067, - "eval_steps_per_second": 3.974, - "step": 178500 - }, - { - "epoch": 7.64, - "learning_rate": 1.5454579403354533e-05, - "loss": 0.0557, - "step": 179000 - }, - { - "epoch": 7.64, - "eval_accuracy": 0.9430396593026351, - "eval_f1": 0.9433439009698508, - "eval_loss": 0.27691367268562317, - "eval_runtime": 29.7194, - "eval_samples_per_second": 252.832, - "eval_steps_per_second": 3.97, - "step": 179000 - }, - { - "epoch": 7.66, - "learning_rate": 1.5427905339080708e-05, - "loss": 0.0567, - "step": 179500 - }, - { - "epoch": 7.66, - "eval_accuracy": 0.9453021027415491, - "eval_f1": 0.9455333997360763, - "eval_loss": 0.2835189998149872, - "eval_runtime": 29.7023, - "eval_samples_per_second": 252.977, - "eval_steps_per_second": 3.973, - "step": 179500 - }, - { - "epoch": 7.68, - "learning_rate": 1.540123127480688e-05, - "loss": 0.0582, - "step": 180000 - }, - { - "epoch": 7.68, - "eval_accuracy": 0.9437050838434922, - "eval_f1": 0.9440707056312977, - "eval_loss": 0.2772218883037567, - "eval_runtime": 29.729, - "eval_samples_per_second": 252.75, - "eval_steps_per_second": 3.969, - "step": 180000 - }, - { - "epoch": 7.7, - "learning_rate": 1.5374557210533057e-05, - "loss": 0.057, - "step": 180500 - }, - { - "epoch": 7.7, - "eval_accuracy": 0.9423742347617781, - "eval_f1": 0.9429248070232732, - "eval_loss": 0.28114861249923706, - "eval_runtime": 29.6798, - "eval_samples_per_second": 253.168, - "eval_steps_per_second": 3.976, - "step": 180500 - }, - { - "epoch": 7.72, - "learning_rate": 1.534788314625923e-05, - "loss": 0.0583, - "step": 181000 - }, - { - "epoch": 7.72, - "eval_accuracy": 0.9463667820069204, - "eval_f1": 0.9466935511032313, - "eval_loss": 0.25516778230667114, - "eval_runtime": 29.6849, - "eval_samples_per_second": 253.126, - "eval_steps_per_second": 3.975, - "step": 181000 - }, - { - "epoch": 7.75, - "learning_rate": 1.5321209081985403e-05, - "loss": 0.056, - "step": 181500 - }, - { - "epoch": 7.75, - "eval_accuracy": 0.9494277348948629, - "eval_f1": 0.9496087114889121, - "eval_loss": 0.26424041390419006, - "eval_runtime": 29.7218, - "eval_samples_per_second": 252.811, - "eval_steps_per_second": 3.97, - "step": 181500 - }, - { - "epoch": 7.77, - "learning_rate": 1.529453501771158e-05, - "loss": 0.055, - "step": 182000 - }, - { - "epoch": 7.77, - "eval_accuracy": 0.9466329518232632, - "eval_f1": 0.94691961434201, - "eval_loss": 0.26326122879981995, - "eval_runtime": 29.7953, - "eval_samples_per_second": 252.187, - "eval_steps_per_second": 3.96, - "step": 182000 - }, - { - "epoch": 7.79, - "learning_rate": 1.5267914301566303e-05, - "loss": 0.0561, - "step": 182500 - }, - { - "epoch": 7.79, - "eval_accuracy": 0.9470322065477775, - "eval_f1": 0.9472489943845809, - "eval_loss": 0.25921469926834106, - "eval_runtime": 29.7325, - "eval_samples_per_second": 252.72, - "eval_steps_per_second": 3.969, - "step": 182500 - }, - { - "epoch": 7.81, - "learning_rate": 1.5241240237292475e-05, - "loss": 0.0619, - "step": 183000 - }, - { - "epoch": 7.81, - "eval_accuracy": 0.9249401117913228, - "eval_f1": 0.9262515351007677, - "eval_loss": 0.366608202457428, - "eval_runtime": 29.6783, - "eval_samples_per_second": 253.181, - "eval_steps_per_second": 3.976, - "step": 183000 - }, - { - "epoch": 7.83, - "learning_rate": 1.5214566173018652e-05, - "loss": 0.0581, - "step": 183500 - }, - { - "epoch": 7.83, - "eval_accuracy": 0.9402448762310354, - "eval_f1": 0.9409055984690178, - "eval_loss": 0.3166221082210541, - "eval_runtime": 29.6828, - "eval_samples_per_second": 253.143, - "eval_steps_per_second": 3.975, - "step": 183500 - }, - { - "epoch": 7.85, - "learning_rate": 1.5187892108744826e-05, - "loss": 0.0608, - "step": 184000 - }, - { - "epoch": 7.85, - "eval_accuracy": 0.9338568006388076, - "eval_f1": 0.9346581461981202, - "eval_loss": 0.3185623288154602, - "eval_runtime": 29.6715, - "eval_samples_per_second": 253.239, - "eval_steps_per_second": 3.977, - "step": 184000 - }, - { - "epoch": 7.87, - "learning_rate": 1.5161271392599548e-05, - "loss": 0.0575, - "step": 184500 - }, - { - "epoch": 7.87, - "eval_accuracy": 0.9431727442108065, - "eval_f1": 0.9435642942522529, - "eval_loss": 0.28907355666160583, - "eval_runtime": 29.6659, - "eval_samples_per_second": 253.288, - "eval_steps_per_second": 3.978, - "step": 184500 - }, - { - "epoch": 7.9, - "learning_rate": 1.5134597328325724e-05, - "loss": 0.0576, - "step": 185000 - }, - { - "epoch": 7.9, - "eval_accuracy": 0.9494277348948629, - "eval_f1": 0.949495115749563, - "eval_loss": 0.24902021884918213, - "eval_runtime": 29.1132, - "eval_samples_per_second": 258.096, - "eval_steps_per_second": 4.053, - "step": 185000 - }, - { - "epoch": 7.92, - "learning_rate": 1.5107923264051897e-05, - "loss": 0.0589, - "step": 185500 - }, - { - "epoch": 7.92, - "eval_accuracy": 0.9401117913228639, - "eval_f1": 0.940751907329565, - "eval_loss": 0.29299965500831604, - "eval_runtime": 13.6025, - "eval_samples_per_second": 552.4, - "eval_steps_per_second": 8.675, - "step": 185500 - }, - { - "epoch": 7.94, - "learning_rate": 1.5081249199778073e-05, - "loss": 0.0585, - "step": 186000 - }, - { - "epoch": 7.94, - "eval_accuracy": 0.9237423476177802, - "eval_f1": 0.9250711808500939, - "eval_loss": 0.35780373215675354, - "eval_runtime": 13.5373, - "eval_samples_per_second": 555.059, - "eval_steps_per_second": 8.717, - "step": 186000 - }, - { - "epoch": 7.96, - "learning_rate": 1.5054575135504248e-05, - "loss": 0.06, - "step": 186500 - }, - { - "epoch": 7.96, - "eval_accuracy": 0.9383816875166356, - "eval_f1": 0.9388439209070308, - "eval_loss": 0.28486374020576477, - "eval_runtime": 13.5297, - "eval_samples_per_second": 555.37, - "eval_steps_per_second": 8.722, - "step": 186500 - }, - { - "epoch": 7.98, - "learning_rate": 1.502795441935897e-05, - "loss": 0.0591, - "step": 187000 - }, - { - "epoch": 7.98, - "eval_accuracy": 0.9446366782006921, - "eval_f1": 0.9448190570182394, - "eval_loss": 0.25872182846069336, - "eval_runtime": 13.5273, - "eval_samples_per_second": 555.47, - "eval_steps_per_second": 8.723, - "step": 187000 - }, - { - "epoch": 8.0, - "learning_rate": 1.5001333703213692e-05, - "loss": 0.0549, - "step": 187500 - }, - { - "epoch": 8.0, - "eval_accuracy": 0.9486292254458344, - "eval_f1": 0.9489107961009511, - "eval_loss": 0.26960158348083496, - "eval_runtime": 13.5289, - "eval_samples_per_second": 555.406, - "eval_steps_per_second": 8.722, - "step": 187500 - }, - { - "epoch": 8.02, - "learning_rate": 1.4974659638939868e-05, - "loss": 0.0426, - "step": 188000 - }, - { - "epoch": 8.02, - "eval_accuracy": 0.9391801969656641, - "eval_f1": 0.9398611153432043, - "eval_loss": 0.32398930191993713, - "eval_runtime": 13.5276, - "eval_samples_per_second": 555.456, - "eval_steps_per_second": 8.723, - "step": 188000 - }, - { - "epoch": 8.04, - "learning_rate": 1.4947985574666043e-05, - "loss": 0.04, - "step": 188500 - }, - { - "epoch": 8.04, - "eval_accuracy": 0.9405110460473782, - "eval_f1": 0.9408104589990299, - "eval_loss": 0.3479633629322052, - "eval_runtime": 13.5408, - "eval_samples_per_second": 554.917, - "eval_steps_per_second": 8.714, - "step": 188500 - }, - { - "epoch": 8.07, - "learning_rate": 1.4921311510392215e-05, - "loss": 0.0452, - "step": 189000 - }, - { - "epoch": 8.07, - "eval_accuracy": 0.9457013574660633, - "eval_f1": 0.9459910257850512, - "eval_loss": 0.3050368130207062, - "eval_runtime": 13.5374, - "eval_samples_per_second": 555.056, - "eval_steps_per_second": 8.717, - "step": 189000 - }, - { - "epoch": 8.09, - "learning_rate": 1.4894637446118392e-05, - "loss": 0.0459, - "step": 189500 - }, - { - "epoch": 8.09, - "eval_accuracy": 0.9427734894862922, - "eval_f1": 0.9431772082967884, - "eval_loss": 0.30137473344802856, - "eval_runtime": 13.5381, - "eval_samples_per_second": 555.027, - "eval_steps_per_second": 8.716, - "step": 189500 - }, - { - "epoch": 8.11, - "learning_rate": 1.4868016729973114e-05, - "loss": 0.0459, - "step": 190000 - }, - { - "epoch": 8.11, - "eval_accuracy": 0.943971253659835, - "eval_f1": 0.9444235461365681, - "eval_loss": 0.30643758177757263, - "eval_runtime": 13.5406, - "eval_samples_per_second": 554.926, - "eval_steps_per_second": 8.715, - "step": 190000 - }, - { - "epoch": 8.13, - "learning_rate": 1.4841342665699288e-05, - "loss": 0.0483, - "step": 190500 - }, - { - "epoch": 8.13, - "eval_accuracy": 0.9381155177002928, - "eval_f1": 0.9387478969810729, - "eval_loss": 0.32927992939949036, - "eval_runtime": 13.5327, - "eval_samples_per_second": 555.246, - "eval_steps_per_second": 8.72, - "step": 190500 - }, - { - "epoch": 8.15, - "learning_rate": 1.4814668601425464e-05, - "loss": 0.0489, - "step": 191000 - }, - { - "epoch": 8.15, - "eval_accuracy": 0.9410433856800638, - "eval_f1": 0.9413968738750169, - "eval_loss": 0.33153387904167175, - "eval_runtime": 13.5315, - "eval_samples_per_second": 555.295, - "eval_steps_per_second": 8.72, - "step": 191000 - }, - { - "epoch": 8.17, - "learning_rate": 1.4787994537151637e-05, - "loss": 0.0473, - "step": 191500 - }, - { - "epoch": 8.17, - "eval_accuracy": 0.9486292254458344, - "eval_f1": 0.9489248949784995, - "eval_loss": 0.27476081252098083, - "eval_runtime": 13.5362, - "eval_samples_per_second": 555.103, - "eval_steps_per_second": 8.717, - "step": 191500 - }, - { - "epoch": 8.19, - "learning_rate": 1.4761320472877813e-05, - "loss": 0.045, - "step": 192000 - }, - { - "epoch": 8.19, - "eval_accuracy": 0.9442374234761778, - "eval_f1": 0.9447492534031262, - "eval_loss": 0.3161003291606903, - "eval_runtime": 13.536, - "eval_samples_per_second": 555.114, - "eval_steps_per_second": 8.718, - "step": 192000 - }, - { - "epoch": 8.22, - "learning_rate": 1.4734646408603988e-05, - "loss": 0.0467, - "step": 192500 - }, - { - "epoch": 8.22, - "eval_accuracy": 0.9468991216396061, - "eval_f1": 0.9471827896586111, - "eval_loss": 0.2846441864967346, - "eval_runtime": 13.5383, - "eval_samples_per_second": 555.016, - "eval_steps_per_second": 8.716, - "step": 192500 - }, - { - "epoch": 8.24, - "learning_rate": 1.470797234433016e-05, - "loss": 0.046, - "step": 193000 - }, - { - "epoch": 8.24, - "eval_accuracy": 0.9423742347617781, - "eval_f1": 0.9428938683607367, - "eval_loss": 0.3229263722896576, - "eval_runtime": 13.5303, - "eval_samples_per_second": 555.347, - "eval_steps_per_second": 8.721, - "step": 193000 - }, - { - "epoch": 8.26, - "learning_rate": 1.4681298280056337e-05, - "loss": 0.0457, - "step": 193500 - }, - { - "epoch": 8.26, - "eval_accuracy": 0.946233697098749, - "eval_f1": 0.946569454970224, - "eval_loss": 0.30373692512512207, - "eval_runtime": 13.5229, - "eval_samples_per_second": 555.649, - "eval_steps_per_second": 8.726, - "step": 193500 - }, - { - "epoch": 8.28, - "learning_rate": 1.4654677563911059e-05, - "loss": 0.0455, - "step": 194000 - }, - { - "epoch": 8.28, - "eval_accuracy": 0.9437050838434922, - "eval_f1": 0.9440899632750663, - "eval_loss": 0.31653735041618347, - "eval_runtime": 13.5251, - "eval_samples_per_second": 555.559, - "eval_steps_per_second": 8.725, - "step": 194000 - }, - { - "epoch": 8.3, - "learning_rate": 1.4628003499637234e-05, - "loss": 0.047, - "step": 194500 - }, - { - "epoch": 8.3, - "eval_accuracy": 0.9466329518232632, - "eval_f1": 0.946954671999758, - "eval_loss": 0.29640811681747437, - "eval_runtime": 13.5268, - "eval_samples_per_second": 555.49, - "eval_steps_per_second": 8.723, - "step": 194500 - }, - { - "epoch": 8.32, - "learning_rate": 1.460132943536341e-05, - "loss": 0.0468, - "step": 195000 - }, - { - "epoch": 8.32, - "eval_accuracy": 0.946233697098749, - "eval_f1": 0.9465351520904887, - "eval_loss": 0.2944372594356537, - "eval_runtime": 13.5261, - "eval_samples_per_second": 555.519, - "eval_steps_per_second": 8.724, - "step": 195000 - }, - { - "epoch": 8.34, - "learning_rate": 1.4574708719218132e-05, - "loss": 0.0459, - "step": 195500 - }, - { - "epoch": 8.34, - "eval_accuracy": 0.9389140271493213, - "eval_f1": 0.9394949185938651, - "eval_loss": 0.36057648062705994, - "eval_runtime": 13.5297, - "eval_samples_per_second": 555.371, - "eval_steps_per_second": 8.722, - "step": 195500 - }, - { - "epoch": 8.36, - "learning_rate": 1.4548034654944305e-05, - "loss": 0.0493, - "step": 196000 - }, - { - "epoch": 8.36, - "eval_accuracy": 0.9417088102209209, - "eval_f1": 0.9422040130273488, - "eval_loss": 0.3340831398963928, - "eval_runtime": 13.5271, - "eval_samples_per_second": 555.478, - "eval_steps_per_second": 8.723, - "step": 196000 - }, - { - "epoch": 8.39, - "learning_rate": 1.452136059067048e-05, - "loss": 0.0492, - "step": 196500 - }, - { - "epoch": 8.39, - "eval_accuracy": 0.9423742347617781, - "eval_f1": 0.9428594973324005, - "eval_loss": 0.2942802608013153, - "eval_runtime": 13.5234, - "eval_samples_per_second": 555.631, - "eval_steps_per_second": 8.726, - "step": 196500 - }, - { - "epoch": 8.41, - "learning_rate": 1.4494686526396655e-05, - "loss": 0.0458, - "step": 197000 - }, - { - "epoch": 8.41, - "eval_accuracy": 0.9515570934256056, - "eval_f1": 0.9516996748274084, - "eval_loss": 0.2723489999771118, - "eval_runtime": 13.523, - "eval_samples_per_second": 555.644, - "eval_steps_per_second": 8.726, - "step": 197000 - }, - { - "epoch": 8.43, - "learning_rate": 1.4468065810251377e-05, - "loss": 0.0494, - "step": 197500 - }, - { - "epoch": 8.43, - "eval_accuracy": 0.9510247537929198, - "eval_f1": 0.9511898273887466, - "eval_loss": 0.24962320923805237, - "eval_runtime": 13.5287, - "eval_samples_per_second": 555.411, - "eval_steps_per_second": 8.722, - "step": 197500 - }, - { - "epoch": 8.45, - "learning_rate": 1.4441391745977553e-05, - "loss": 0.0472, - "step": 198000 - }, - { - "epoch": 8.45, - "eval_accuracy": 0.9366515837104072, - "eval_f1": 0.9374091182440442, - "eval_loss": 0.33135172724723816, - "eval_runtime": 13.5362, - "eval_samples_per_second": 555.105, - "eval_steps_per_second": 8.717, - "step": 198000 - }, - { - "epoch": 8.47, - "learning_rate": 1.4414717681703726e-05, - "loss": 0.0501, - "step": 198500 - }, - { - "epoch": 8.47, - "eval_accuracy": 0.9461006121905776, - "eval_f1": 0.946517345274512, - "eval_loss": 0.2846202850341797, - "eval_runtime": 13.527, - "eval_samples_per_second": 555.48, - "eval_steps_per_second": 8.723, - "step": 198500 - }, - { - "epoch": 8.49, - "learning_rate": 1.43880436174299e-05, - "loss": 0.0467, - "step": 199000 - }, - { - "epoch": 8.49, - "eval_accuracy": 0.9451690178333777, - "eval_f1": 0.9454834491860682, - "eval_loss": 0.29985642433166504, - "eval_runtime": 13.5281, - "eval_samples_per_second": 555.437, - "eval_steps_per_second": 8.723, - "step": 199000 - }, - { - "epoch": 8.51, - "learning_rate": 1.4361369553156077e-05, - "loss": 0.0467, - "step": 199500 - }, - { - "epoch": 8.51, - "eval_accuracy": 0.9393132818738356, - "eval_f1": 0.939983462083631, - "eval_loss": 0.3112838566303253, - "eval_runtime": 13.5306, - "eval_samples_per_second": 555.334, - "eval_steps_per_second": 8.721, - "step": 199500 - }, - { - "epoch": 8.54, - "learning_rate": 1.4334695488882252e-05, - "loss": 0.0507, - "step": 200000 - }, - { - "epoch": 8.54, - "eval_accuracy": 0.9425073196699494, - "eval_f1": 0.9429940607490731, - "eval_loss": 0.31961777806282043, - "eval_runtime": 13.5305, - "eval_samples_per_second": 555.337, - "eval_steps_per_second": 8.721, - "step": 200000 - }, - { - "epoch": 8.56, - "learning_rate": 1.4308021424608424e-05, - "loss": 0.0489, - "step": 200500 - }, - { - "epoch": 8.56, - "eval_accuracy": 0.9280010646792654, - "eval_f1": 0.9293048945182593, - "eval_loss": 0.4206550121307373, - "eval_runtime": 13.5338, - "eval_samples_per_second": 555.202, - "eval_steps_per_second": 8.719, - "step": 200500 - }, - { - "epoch": 8.58, - "learning_rate": 1.42813473603346e-05, - "loss": 0.048, - "step": 201000 - }, - { - "epoch": 8.58, - "eval_accuracy": 0.9427734894862922, - "eval_f1": 0.9432724652442028, - "eval_loss": 0.3334466516971588, - "eval_runtime": 13.5332, - "eval_samples_per_second": 555.225, - "eval_steps_per_second": 8.719, - "step": 201000 - }, - { - "epoch": 8.6, - "learning_rate": 1.4254673296060775e-05, - "loss": 0.0464, - "step": 201500 - }, - { - "epoch": 8.6, - "eval_accuracy": 0.9451690178333777, - "eval_f1": 0.9455351354611425, - "eval_loss": 0.3089136481285095, - "eval_runtime": 13.5321, - "eval_samples_per_second": 555.272, - "eval_steps_per_second": 8.72, - "step": 201500 - }, - { - "epoch": 8.62, - "learning_rate": 1.4228052579915497e-05, - "loss": 0.0511, - "step": 202000 - }, - { - "epoch": 8.62, - "eval_accuracy": 0.9476976310886346, - "eval_f1": 0.9480208872624477, - "eval_loss": 0.2791631817817688, - "eval_runtime": 13.5329, - "eval_samples_per_second": 555.241, - "eval_steps_per_second": 8.72, - "step": 202000 - }, - { - "epoch": 8.64, - "learning_rate": 1.4201378515641673e-05, - "loss": 0.0475, - "step": 202500 - }, - { - "epoch": 8.64, - "eval_accuracy": 0.9461006121905776, - "eval_f1": 0.9465029093795351, - "eval_loss": 0.2907649874687195, - "eval_runtime": 13.5424, - "eval_samples_per_second": 554.849, - "eval_steps_per_second": 8.713, - "step": 202500 - }, - { - "epoch": 8.66, - "learning_rate": 1.4174704451367846e-05, - "loss": 0.0456, - "step": 203000 - }, - { - "epoch": 8.66, - "eval_accuracy": 0.9445035932925206, - "eval_f1": 0.9449522232609514, - "eval_loss": 0.3088465929031372, - "eval_runtime": 13.542, - "eval_samples_per_second": 554.868, - "eval_steps_per_second": 8.714, - "step": 203000 - }, - { - "epoch": 8.69, - "learning_rate": 1.4148030387094022e-05, - "loss": 0.0477, - "step": 203500 - }, - { - "epoch": 8.69, - "eval_accuracy": 0.9442374234761778, - "eval_f1": 0.9446454812862733, - "eval_loss": 0.29683443903923035, - "eval_runtime": 13.5391, - "eval_samples_per_second": 554.985, - "eval_steps_per_second": 8.716, - "step": 203500 - }, - { - "epoch": 8.71, - "learning_rate": 1.4121356322820197e-05, - "loss": 0.0489, - "step": 204000 - }, - { - "epoch": 8.71, - "eval_accuracy": 0.9461006121905776, - "eval_f1": 0.9463714591570415, - "eval_loss": 0.2853368818759918, - "eval_runtime": 13.5424, - "eval_samples_per_second": 554.85, - "eval_steps_per_second": 8.713, - "step": 204000 - }, - { - "epoch": 8.73, - "learning_rate": 1.409468225854637e-05, - "loss": 0.0505, - "step": 204500 - }, - { - "epoch": 8.73, - "eval_accuracy": 0.9417088102209209, - "eval_f1": 0.9422965965625818, - "eval_loss": 0.31411096453666687, - "eval_runtime": 13.5401, - "eval_samples_per_second": 554.944, - "eval_steps_per_second": 8.715, - "step": 204500 - }, - { - "epoch": 8.75, - "learning_rate": 1.4068008194272546e-05, - "loss": 0.0511, - "step": 205000 - }, - { - "epoch": 8.75, - "eval_accuracy": 0.9488953952621773, - "eval_f1": 0.9491206021399284, - "eval_loss": 0.25752925872802734, - "eval_runtime": 13.5421, - "eval_samples_per_second": 554.863, - "eval_steps_per_second": 8.714, - "step": 205000 - }, - { - "epoch": 8.77, - "learning_rate": 1.404133412999872e-05, - "loss": 0.0498, - "step": 205500 - }, - { - "epoch": 8.77, - "eval_accuracy": 0.9472983763641203, - "eval_f1": 0.9476323959588058, - "eval_loss": 0.2809707820415497, - "eval_runtime": 13.5386, - "eval_samples_per_second": 555.006, - "eval_steps_per_second": 8.716, - "step": 205500 - }, - { - "epoch": 8.79, - "learning_rate": 1.4014713413853443e-05, - "loss": 0.0514, - "step": 206000 - }, - { - "epoch": 8.79, - "eval_accuracy": 0.9471652914559489, - "eval_f1": 0.9474699446592635, - "eval_loss": 0.29401782155036926, - "eval_runtime": 13.5388, - "eval_samples_per_second": 554.998, - "eval_steps_per_second": 8.716, - "step": 206000 - }, - { - "epoch": 8.81, - "learning_rate": 1.3988039349579619e-05, - "loss": 0.0488, - "step": 206500 - }, - { - "epoch": 8.81, - "eval_accuracy": 0.9490284801703487, - "eval_f1": 0.9493332336992797, - "eval_loss": 0.28929102420806885, - "eval_runtime": 13.5372, - "eval_samples_per_second": 555.061, - "eval_steps_per_second": 8.717, - "step": 206500 - }, - { - "epoch": 8.83, - "learning_rate": 1.3961365285305792e-05, - "loss": 0.0492, - "step": 207000 - }, - { - "epoch": 8.83, - "eval_accuracy": 0.9438381687516636, - "eval_f1": 0.9441721189079093, - "eval_loss": 0.27806001901626587, - "eval_runtime": 13.5306, - "eval_samples_per_second": 555.336, - "eval_steps_per_second": 8.721, - "step": 207000 - }, - { - "epoch": 8.86, - "learning_rate": 1.3934691221031966e-05, - "loss": 0.0506, - "step": 207500 - }, - { - "epoch": 8.86, - "eval_accuracy": 0.9347883949960074, - "eval_f1": 0.9354718798641715, - "eval_loss": 0.36486494541168213, - "eval_runtime": 13.5304, - "eval_samples_per_second": 555.343, - "eval_steps_per_second": 8.721, - "step": 207500 - }, - { - "epoch": 8.88, - "learning_rate": 1.3908017156758142e-05, - "loss": 0.0474, - "step": 208000 - }, - { - "epoch": 8.88, - "eval_accuracy": 0.9495608198030343, - "eval_f1": 0.9498771170325628, - "eval_loss": 0.2834174633026123, - "eval_runtime": 13.5288, - "eval_samples_per_second": 555.408, - "eval_steps_per_second": 8.722, - "step": 208000 - }, - { - "epoch": 8.9, - "learning_rate": 1.3881343092484317e-05, - "loss": 0.0513, - "step": 208500 - }, - { - "epoch": 8.9, - "eval_accuracy": 0.9455682725578919, - "eval_f1": 0.9459814913067134, - "eval_loss": 0.2689332365989685, - "eval_runtime": 13.5299, - "eval_samples_per_second": 555.364, - "eval_steps_per_second": 8.721, - "step": 208500 - }, - { - "epoch": 8.92, - "learning_rate": 1.3854669028210491e-05, - "loss": 0.0489, - "step": 209000 - }, - { - "epoch": 8.92, - "eval_accuracy": 0.9426404045781208, - "eval_f1": 0.9431762099216892, - "eval_loss": 0.31225860118865967, - "eval_runtime": 13.5161, - "eval_samples_per_second": 555.929, - "eval_steps_per_second": 8.73, - "step": 209000 - }, - { - "epoch": 8.94, - "learning_rate": 1.3827994963936666e-05, - "loss": 0.0499, - "step": 209500 - }, - { - "epoch": 8.94, - "eval_accuracy": 0.9389140271493213, - "eval_f1": 0.9396302937762872, - "eval_loss": 0.32025986909866333, - "eval_runtime": 13.5316, - "eval_samples_per_second": 555.292, - "eval_steps_per_second": 8.72, - "step": 209500 - }, - { - "epoch": 8.96, - "learning_rate": 1.3801374247791388e-05, - "loss": 0.0508, - "step": 210000 - }, - { - "epoch": 8.96, - "eval_accuracy": 0.9463667820069204, - "eval_f1": 0.946708426144522, - "eval_loss": 0.2876236140727997, - "eval_runtime": 13.53, - "eval_samples_per_second": 555.36, - "eval_steps_per_second": 8.721, - "step": 210000 - }, - { - "epoch": 8.98, - "learning_rate": 1.3774700183517564e-05, - "loss": 0.0513, - "step": 210500 - }, - { - "epoch": 8.98, - "eval_accuracy": 0.9446366782006921, - "eval_f1": 0.9450770442358372, - "eval_loss": 0.31053757667541504, - "eval_runtime": 13.5348, - "eval_samples_per_second": 555.16, - "eval_steps_per_second": 8.718, - "step": 210500 - }, - { - "epoch": 9.01, - "learning_rate": 1.3748026119243739e-05, - "loss": 0.045, - "step": 211000 - }, - { - "epoch": 9.01, - "eval_accuracy": 0.9483630556294916, - "eval_f1": 0.9486779121833928, - "eval_loss": 0.31297969818115234, - "eval_runtime": 13.5337, - "eval_samples_per_second": 555.209, - "eval_steps_per_second": 8.719, - "step": 211000 - }, - { - "epoch": 9.03, - "learning_rate": 1.3721352054969912e-05, - "loss": 0.0386, - "step": 211500 - }, - { - "epoch": 9.03, - "eval_accuracy": 0.9401117913228639, - "eval_f1": 0.9406984058449347, - "eval_loss": 0.35765621066093445, - "eval_runtime": 13.5363, - "eval_samples_per_second": 555.101, - "eval_steps_per_second": 8.717, - "step": 211500 - }, - { - "epoch": 9.05, - "learning_rate": 1.3694731338824637e-05, - "loss": 0.0373, - "step": 212000 - }, - { - "epoch": 9.05, - "eval_accuracy": 0.9461006121905776, - "eval_f1": 0.9464518539873553, - "eval_loss": 0.3229225277900696, - "eval_runtime": 13.5205, - "eval_samples_per_second": 555.749, - "eval_steps_per_second": 8.728, - "step": 212000 - }, - { - "epoch": 9.07, - "learning_rate": 1.366805727455081e-05, - "loss": 0.0367, - "step": 212500 - }, - { - "epoch": 9.07, - "eval_accuracy": 0.9458344423742348, - "eval_f1": 0.9461143122974073, - "eval_loss": 0.33056244254112244, - "eval_runtime": 13.5161, - "eval_samples_per_second": 555.93, - "eval_steps_per_second": 8.73, - "step": 212500 - }, - { - "epoch": 9.09, - "learning_rate": 1.3641383210276984e-05, - "loss": 0.0363, - "step": 213000 - }, - { - "epoch": 9.09, - "eval_accuracy": 0.9455682725578919, - "eval_f1": 0.9459606462006873, - "eval_loss": 0.3319728374481201, - "eval_runtime": 13.5062, - "eval_samples_per_second": 556.336, - "eval_steps_per_second": 8.737, - "step": 213000 - }, - { - "epoch": 9.11, - "learning_rate": 1.3614762494131708e-05, - "loss": 0.039, - "step": 213500 - }, - { - "epoch": 9.11, - "eval_accuracy": 0.9510247537929198, - "eval_f1": 0.951212137839212, - "eval_loss": 0.2826208770275116, - "eval_runtime": 13.5238, - "eval_samples_per_second": 555.613, - "eval_steps_per_second": 8.725, - "step": 213500 - }, - { - "epoch": 9.13, - "learning_rate": 1.3588088429857882e-05, - "loss": 0.0392, - "step": 214000 - }, - { - "epoch": 9.13, - "eval_accuracy": 0.9423742347617781, - "eval_f1": 0.9428820676687188, - "eval_loss": 0.33029282093048096, - "eval_runtime": 13.5236, - "eval_samples_per_second": 555.62, - "eval_steps_per_second": 8.725, - "step": 214000 - }, - { - "epoch": 9.15, - "learning_rate": 1.3561414365584055e-05, - "loss": 0.0387, - "step": 214500 - }, - { - "epoch": 9.15, - "eval_accuracy": 0.946233697098749, - "eval_f1": 0.9464641559514156, - "eval_loss": 0.3023781180381775, - "eval_runtime": 13.5258, - "eval_samples_per_second": 555.531, - "eval_steps_per_second": 8.724, - "step": 214500 - }, - { - "epoch": 9.18, - "learning_rate": 1.3534740301310231e-05, - "loss": 0.0385, - "step": 215000 - }, - { - "epoch": 9.18, - "eval_accuracy": 0.936252328985893, - "eval_f1": 0.9368822232154065, - "eval_loss": 0.3795050084590912, - "eval_runtime": 13.5245, - "eval_samples_per_second": 555.584, - "eval_steps_per_second": 8.725, - "step": 215000 - }, - { - "epoch": 9.2, - "learning_rate": 1.3508066237036406e-05, - "loss": 0.0388, - "step": 215500 - }, - { - "epoch": 9.2, - "eval_accuracy": 0.9435719989353207, - "eval_f1": 0.9439760866552441, - "eval_loss": 0.3283912241458893, - "eval_runtime": 13.5235, - "eval_samples_per_second": 555.623, - "eval_steps_per_second": 8.726, - "step": 215500 - }, - { - "epoch": 9.22, - "learning_rate": 1.3481392172762579e-05, - "loss": 0.0409, - "step": 216000 - }, - { - "epoch": 9.22, - "eval_accuracy": 0.9413095554964067, - "eval_f1": 0.9418583676160031, - "eval_loss": 0.3618067800998688, - "eval_runtime": 13.5245, - "eval_samples_per_second": 555.584, - "eval_steps_per_second": 8.725, - "step": 216000 - }, - { - "epoch": 9.24, - "learning_rate": 1.3454718108488755e-05, - "loss": 0.0431, - "step": 216500 - }, - { - "epoch": 9.24, - "eval_accuracy": 0.9361192440777216, - "eval_f1": 0.9369543755725985, - "eval_loss": 0.3455849289894104, - "eval_runtime": 13.5333, - "eval_samples_per_second": 555.222, - "eval_steps_per_second": 8.719, - "step": 216500 - }, - { - "epoch": 9.26, - "learning_rate": 1.342804404421493e-05, - "loss": 0.0383, - "step": 217000 - }, - { - "epoch": 9.26, - "eval_accuracy": 0.9297311684854938, - "eval_f1": 0.9305976202705633, - "eval_loss": 0.41196706891059875, - "eval_runtime": 13.534, - "eval_samples_per_second": 555.196, - "eval_steps_per_second": 8.719, - "step": 217000 - }, - { - "epoch": 9.28, - "learning_rate": 1.3401369979941106e-05, - "loss": 0.0396, - "step": 217500 - }, - { - "epoch": 9.28, - "eval_accuracy": 0.9508916688847485, - "eval_f1": 0.9510984681604356, - "eval_loss": 0.29180270433425903, - "eval_runtime": 13.5363, - "eval_samples_per_second": 555.098, - "eval_steps_per_second": 8.717, - "step": 217500 - }, - { - "epoch": 9.3, - "learning_rate": 1.3374749263795828e-05, - "loss": 0.0399, - "step": 218000 - }, - { - "epoch": 9.3, - "eval_accuracy": 0.9512909236092627, - "eval_f1": 0.9515091138685884, - "eval_loss": 0.29510194063186646, - "eval_runtime": 13.536, - "eval_samples_per_second": 555.111, - "eval_steps_per_second": 8.717, - "step": 218000 - }, - { - "epoch": 9.33, - "learning_rate": 1.3348075199522e-05, - "loss": 0.0386, - "step": 218500 - }, - { - "epoch": 9.33, - "eval_accuracy": 0.9434389140271493, - "eval_f1": 0.9439485797046033, - "eval_loss": 0.33171477913856506, - "eval_runtime": 13.535, - "eval_samples_per_second": 555.153, - "eval_steps_per_second": 8.718, - "step": 218500 - }, - { - "epoch": 9.35, - "learning_rate": 1.3321401135248177e-05, - "loss": 0.0408, - "step": 219000 - }, - { - "epoch": 9.35, - "eval_accuracy": 0.9419749800372638, - "eval_f1": 0.9424593134472877, - "eval_loss": 0.33151671290397644, - "eval_runtime": 13.5389, - "eval_samples_per_second": 554.995, - "eval_steps_per_second": 8.716, - "step": 219000 - }, - { - "epoch": 9.37, - "learning_rate": 1.3294727070974351e-05, - "loss": 0.0424, - "step": 219500 - }, - { - "epoch": 9.37, - "eval_accuracy": 0.9466329518232632, - "eval_f1": 0.9469195787369524, - "eval_loss": 0.3001687526702881, - "eval_runtime": 13.5369, - "eval_samples_per_second": 555.073, - "eval_steps_per_second": 8.717, - "step": 219500 - }, - { - "epoch": 9.39, - "learning_rate": 1.3268053006700526e-05, - "loss": 0.0409, - "step": 220000 - }, - { - "epoch": 9.39, - "eval_accuracy": 0.9455682725578919, - "eval_f1": 0.9459156136981188, - "eval_loss": 0.3342529237270355, - "eval_runtime": 13.5382, - "eval_samples_per_second": 555.023, - "eval_steps_per_second": 8.716, - "step": 220000 - }, - { - "epoch": 9.41, - "learning_rate": 1.3241378942426702e-05, - "loss": 0.0408, - "step": 220500 - }, - { - "epoch": 9.41, - "eval_accuracy": 0.9367846686185787, - "eval_f1": 0.9375661881227262, - "eval_loss": 0.3659190237522125, - "eval_runtime": 13.534, - "eval_samples_per_second": 555.196, - "eval_steps_per_second": 8.719, - "step": 220500 - }, - { - "epoch": 9.43, - "learning_rate": 1.3214704878152875e-05, - "loss": 0.0402, - "step": 221000 - }, - { - "epoch": 9.43, - "eval_accuracy": 0.9442374234761778, - "eval_f1": 0.9447072226618535, - "eval_loss": 0.32796511054039, - "eval_runtime": 13.5246, - "eval_samples_per_second": 555.582, - "eval_steps_per_second": 8.725, - "step": 221000 - }, - { - "epoch": 9.45, - "learning_rate": 1.318803081387905e-05, - "loss": 0.041, - "step": 221500 - }, - { - "epoch": 9.45, - "eval_accuracy": 0.9498269896193772, - "eval_f1": 0.9501153797703192, - "eval_loss": 0.2731388509273529, - "eval_runtime": 13.5271, - "eval_samples_per_second": 555.478, - "eval_steps_per_second": 8.723, - "step": 221500 - }, - { - "epoch": 9.47, - "learning_rate": 1.3161410097733773e-05, - "loss": 0.0435, - "step": 222000 - }, - { - "epoch": 9.47, - "eval_accuracy": 0.9467660367314347, - "eval_f1": 0.9469677220421611, - "eval_loss": 0.32359492778778076, - "eval_runtime": 13.53, - "eval_samples_per_second": 555.358, - "eval_steps_per_second": 8.721, - "step": 222000 - }, - { - "epoch": 9.5, - "learning_rate": 1.3134736033459948e-05, - "loss": 0.0424, - "step": 222500 - }, - { - "epoch": 9.5, - "eval_accuracy": 0.9498269896193772, - "eval_f1": 0.9500851711345424, - "eval_loss": 0.28242748975753784, - "eval_runtime": 13.5277, - "eval_samples_per_second": 555.451, - "eval_steps_per_second": 8.723, - "step": 222500 - }, - { - "epoch": 9.52, - "learning_rate": 1.310811531731467e-05, - "loss": 0.0409, - "step": 223000 - }, - { - "epoch": 9.52, - "eval_accuracy": 0.9407772158637211, - "eval_f1": 0.9414327506311181, - "eval_loss": 0.35967016220092773, - "eval_runtime": 13.5314, - "eval_samples_per_second": 555.303, - "eval_steps_per_second": 8.72, - "step": 223000 - }, - { - "epoch": 9.54, - "learning_rate": 1.3081441253040846e-05, - "loss": 0.0405, - "step": 223500 - }, - { - "epoch": 9.54, - "eval_accuracy": 0.9409103007718924, - "eval_f1": 0.9415566706878806, - "eval_loss": 0.3606089651584625, - "eval_runtime": 13.5349, - "eval_samples_per_second": 555.157, - "eval_steps_per_second": 8.718, - "step": 223500 - }, - { - "epoch": 9.56, - "learning_rate": 1.3054767188767019e-05, - "loss": 0.0419, - "step": 224000 - }, - { - "epoch": 9.56, - "eval_accuracy": 0.9471652914559489, - "eval_f1": 0.9475127712539337, - "eval_loss": 0.3135475516319275, - "eval_runtime": 13.5324, - "eval_samples_per_second": 555.259, - "eval_steps_per_second": 8.72, - "step": 224000 - }, - { - "epoch": 9.58, - "learning_rate": 1.3028093124493193e-05, - "loss": 0.039, - "step": 224500 - }, - { - "epoch": 9.58, - "eval_accuracy": 0.9527548575991482, - "eval_f1": 0.9530442815959349, - "eval_loss": 0.2737436890602112, - "eval_runtime": 13.5316, - "eval_samples_per_second": 555.292, - "eval_steps_per_second": 8.72, - "step": 224500 - }, - { - "epoch": 9.6, - "learning_rate": 1.3001472408347917e-05, - "loss": 0.0425, - "step": 225000 - }, - { - "epoch": 9.6, - "eval_accuracy": 0.9458344423742348, - "eval_f1": 0.9462683305954652, - "eval_loss": 0.2910870313644409, - "eval_runtime": 13.5324, - "eval_samples_per_second": 555.26, - "eval_steps_per_second": 8.72, - "step": 225000 - }, - { - "epoch": 9.62, - "learning_rate": 1.2974798344074091e-05, - "loss": 0.0411, - "step": 225500 - }, - { - "epoch": 9.62, - "eval_accuracy": 0.9507585839765771, - "eval_f1": 0.9509916378732837, - "eval_loss": 0.2779683768749237, - "eval_runtime": 13.5184, - "eval_samples_per_second": 555.837, - "eval_steps_per_second": 8.729, - "step": 225500 - }, - { - "epoch": 9.65, - "learning_rate": 1.2948124279800264e-05, - "loss": 0.0446, - "step": 226000 - }, - { - "epoch": 9.65, - "eval_accuracy": 0.9468991216396061, - "eval_f1": 0.9472618308735409, - "eval_loss": 0.2749107778072357, - "eval_runtime": 13.5289, - "eval_samples_per_second": 555.404, - "eval_steps_per_second": 8.722, - "step": 226000 - }, - { - "epoch": 9.67, - "learning_rate": 1.292145021552644e-05, - "loss": 0.0417, - "step": 226500 - }, - { - "epoch": 9.67, - "eval_accuracy": 0.9455682725578919, - "eval_f1": 0.9459565056263725, - "eval_loss": 0.3201073408126831, - "eval_runtime": 13.5287, - "eval_samples_per_second": 555.41, - "eval_steps_per_second": 8.722, - "step": 226500 - }, - { - "epoch": 9.69, - "learning_rate": 1.2894776151252615e-05, - "loss": 0.0426, - "step": 227000 - }, - { - "epoch": 9.69, - "eval_accuracy": 0.9464998669150918, - "eval_f1": 0.9468397463588856, - "eval_loss": 0.2988472282886505, - "eval_runtime": 13.5281, - "eval_samples_per_second": 555.438, - "eval_steps_per_second": 8.723, - "step": 227000 - }, - { - "epoch": 9.71, - "learning_rate": 1.2868102086978791e-05, - "loss": 0.0434, - "step": 227500 - }, - { - "epoch": 9.71, - "eval_accuracy": 0.9433058291189779, - "eval_f1": 0.9435376363499867, - "eval_loss": 0.32506898045539856, - "eval_runtime": 13.5305, - "eval_samples_per_second": 555.337, - "eval_steps_per_second": 8.721, - "step": 227500 - }, - { - "epoch": 9.73, - "learning_rate": 1.2841481370833513e-05, - "loss": 0.0424, - "step": 228000 - }, - { - "epoch": 9.73, - "eval_accuracy": 0.9511578387010913, - "eval_f1": 0.951360071589869, - "eval_loss": 0.283211886882782, - "eval_runtime": 13.5317, - "eval_samples_per_second": 555.29, - "eval_steps_per_second": 8.72, - "step": 228000 - }, - { - "epoch": 9.75, - "learning_rate": 1.2814807306559686e-05, - "loss": 0.0405, - "step": 228500 - }, - { - "epoch": 9.75, - "eval_accuracy": 0.9527548575991482, - "eval_f1": 0.9527633365669924, - "eval_loss": 0.2723678946495056, - "eval_runtime": 13.5292, - "eval_samples_per_second": 555.391, - "eval_steps_per_second": 8.722, - "step": 228500 - }, - { - "epoch": 9.77, - "learning_rate": 1.278813324228586e-05, - "loss": 0.0434, - "step": 229000 - }, - { - "epoch": 9.77, - "eval_accuracy": 0.9421080649454352, - "eval_f1": 0.9426021558819764, - "eval_loss": 0.3235361576080322, - "eval_runtime": 13.5321, - "eval_samples_per_second": 555.27, - "eval_steps_per_second": 8.72, - "step": 229000 - }, - { - "epoch": 9.79, - "learning_rate": 1.2761459178012037e-05, - "loss": 0.0411, - "step": 229500 - }, - { - "epoch": 9.79, - "eval_accuracy": 0.9507585839765771, - "eval_f1": 0.950927039725016, - "eval_loss": 0.2742987275123596, - "eval_runtime": 13.5327, - "eval_samples_per_second": 555.247, - "eval_steps_per_second": 8.72, - "step": 229500 - }, - { - "epoch": 9.82, - "learning_rate": 1.2734785113738211e-05, - "loss": 0.0431, - "step": 230000 - }, - { - "epoch": 9.82, - "eval_accuracy": 0.95009315943572, - "eval_f1": 0.9503696415522803, - "eval_loss": 0.2664912939071655, - "eval_runtime": 13.5414, - "eval_samples_per_second": 554.89, - "eval_steps_per_second": 8.714, - "step": 230000 - }, - { - "epoch": 9.84, - "learning_rate": 1.2708111049464386e-05, - "loss": 0.0395, - "step": 230500 - }, - { - "epoch": 9.84, - "eval_accuracy": 0.9453021027415491, - "eval_f1": 0.9456690842646758, - "eval_loss": 0.32275640964508057, - "eval_runtime": 13.542, - "eval_samples_per_second": 554.865, - "eval_steps_per_second": 8.714, - "step": 230500 - }, - { - "epoch": 9.86, - "learning_rate": 1.2681490333319108e-05, - "loss": 0.0441, - "step": 231000 - }, - { - "epoch": 9.86, - "eval_accuracy": 0.9425073196699494, - "eval_f1": 0.9430064792809049, - "eval_loss": 0.30706483125686646, - "eval_runtime": 13.5412, - "eval_samples_per_second": 554.898, - "eval_steps_per_second": 8.714, - "step": 231000 - }, - { - "epoch": 9.88, - "learning_rate": 1.2654816269045282e-05, - "loss": 0.0442, - "step": 231500 - }, - { - "epoch": 9.88, - "eval_accuracy": 0.9397125365983497, - "eval_f1": 0.9403983821206406, - "eval_loss": 0.32434916496276855, - "eval_runtime": 13.5379, - "eval_samples_per_second": 555.033, - "eval_steps_per_second": 8.716, - "step": 231500 - }, - { - "epoch": 9.9, - "learning_rate": 1.2628142204771459e-05, - "loss": 0.0429, - "step": 232000 - }, - { - "epoch": 9.9, - "eval_accuracy": 0.9484961405376631, - "eval_f1": 0.9488449570964996, - "eval_loss": 0.2867507040500641, - "eval_runtime": 13.542, - "eval_samples_per_second": 554.867, - "eval_steps_per_second": 8.714, - "step": 232000 - }, - { - "epoch": 9.92, - "learning_rate": 1.2601468140497633e-05, - "loss": 0.044, - "step": 232500 - }, - { - "epoch": 9.92, - "eval_accuracy": 0.9434389140271493, - "eval_f1": 0.9440101830061064, - "eval_loss": 0.30820271372795105, - "eval_runtime": 13.54, - "eval_samples_per_second": 554.947, - "eval_steps_per_second": 8.715, - "step": 232500 - }, - { - "epoch": 9.94, - "learning_rate": 1.2574794076223806e-05, - "loss": 0.0428, - "step": 233000 - }, - { - "epoch": 9.94, - "eval_accuracy": 0.9341229704551504, - "eval_f1": 0.9350479509633295, - "eval_loss": 0.368161141872406, - "eval_runtime": 13.541, - "eval_samples_per_second": 554.909, - "eval_steps_per_second": 8.714, - "step": 233000 - }, - { - "epoch": 9.97, - "learning_rate": 1.2548120011949982e-05, - "loss": 0.0443, - "step": 233500 - }, - { - "epoch": 9.97, - "eval_accuracy": 0.9467660367314347, - "eval_f1": 0.9471681860906885, - "eval_loss": 0.27612563967704773, - "eval_runtime": 13.5379, - "eval_samples_per_second": 555.033, - "eval_steps_per_second": 8.716, - "step": 233500 - }, - { - "epoch": 9.99, - "learning_rate": 1.2521445947676157e-05, - "loss": 0.0432, - "step": 234000 - }, - { - "epoch": 9.99, - "eval_accuracy": 0.9427734894862922, - "eval_f1": 0.9432861941973121, - "eval_loss": 0.29645493626594543, - "eval_runtime": 13.5391, - "eval_samples_per_second": 554.983, - "eval_steps_per_second": 8.715, - "step": 234000 - }, - { - "epoch": 10.01, - "learning_rate": 1.2494825231530879e-05, - "loss": 0.0383, - "step": 234500 - }, - { - "epoch": 10.01, - "eval_accuracy": 0.9527548575991482, - "eval_f1": 0.9527383304408453, - "eval_loss": 0.26099878549575806, - "eval_runtime": 13.5389, - "eval_samples_per_second": 554.992, - "eval_steps_per_second": 8.716, - "step": 234500 - }, - { - "epoch": 10.03, - "learning_rate": 1.2468151167257055e-05, - "loss": 0.0319, - "step": 235000 - }, - { - "epoch": 10.03, - "eval_accuracy": 0.9350545648123503, - "eval_f1": 0.9359789154619395, - "eval_loss": 0.38348767161369324, - "eval_runtime": 13.528, - "eval_samples_per_second": 555.44, - "eval_steps_per_second": 8.723, - "step": 235000 - }, - { - "epoch": 10.05, - "learning_rate": 1.2441477102983228e-05, - "loss": 0.0319, - "step": 235500 - }, - { - "epoch": 10.05, - "eval_accuracy": 0.9386478573329784, - "eval_f1": 0.9393782650425354, - "eval_loss": 0.3721713721752167, - "eval_runtime": 13.5326, - "eval_samples_per_second": 555.251, - "eval_steps_per_second": 8.72, - "step": 235500 - }, - { - "epoch": 10.07, - "learning_rate": 1.2414803038709402e-05, - "loss": 0.0312, - "step": 236000 - }, - { - "epoch": 10.07, - "eval_accuracy": 0.9453021027415491, - "eval_f1": 0.9457109004028486, - "eval_loss": 0.3375546336174011, - "eval_runtime": 13.5304, - "eval_samples_per_second": 555.341, - "eval_steps_per_second": 8.721, - "step": 236000 - }, - { - "epoch": 10.09, - "learning_rate": 1.2388128974435579e-05, - "loss": 0.0315, - "step": 236500 - }, - { - "epoch": 10.09, - "eval_accuracy": 0.9306627628426937, - "eval_f1": 0.9316744044650579, - "eval_loss": 0.4443936347961426, - "eval_runtime": 13.5272, - "eval_samples_per_second": 555.472, - "eval_steps_per_second": 8.723, - "step": 236500 - }, - { - "epoch": 10.11, - "learning_rate": 1.2361454910161751e-05, - "loss": 0.0345, - "step": 237000 - }, - { - "epoch": 10.11, - "eval_accuracy": 0.9449028480170348, - "eval_f1": 0.9453803935392843, - "eval_loss": 0.34678158164024353, - "eval_runtime": 13.53, - "eval_samples_per_second": 555.36, - "eval_steps_per_second": 8.721, - "step": 237000 - }, - { - "epoch": 10.14, - "learning_rate": 1.2334834194016473e-05, - "loss": 0.0326, - "step": 237500 - }, - { - "epoch": 10.14, - "eval_accuracy": 0.932392866648922, - "eval_f1": 0.9335176394538748, - "eval_loss": 0.42278626561164856, - "eval_runtime": 13.5277, - "eval_samples_per_second": 555.453, - "eval_steps_per_second": 8.723, - "step": 237500 - }, - { - "epoch": 10.16, - "learning_rate": 1.230816012974265e-05, - "loss": 0.0329, - "step": 238000 - }, - { - "epoch": 10.16, - "eval_accuracy": 0.9447697631088634, - "eval_f1": 0.945271521756412, - "eval_loss": 0.3269987106323242, - "eval_runtime": 13.5308, - "eval_samples_per_second": 555.324, - "eval_steps_per_second": 8.721, - "step": 238000 - }, - { - "epoch": 10.18, - "learning_rate": 1.2281486065468824e-05, - "loss": 0.0349, - "step": 238500 - }, - { - "epoch": 10.18, - "eval_accuracy": 0.9447697631088634, - "eval_f1": 0.9452700002965628, - "eval_loss": 0.3369627892971039, - "eval_runtime": 13.5278, - "eval_samples_per_second": 555.448, - "eval_steps_per_second": 8.723, - "step": 238500 - }, - { - "epoch": 10.2, - "learning_rate": 1.2254812001195e-05, - "loss": 0.03, - "step": 239000 - }, - { - "epoch": 10.2, - "eval_accuracy": 0.9488953952621773, - "eval_f1": 0.9492453592745367, - "eval_loss": 0.32061174511909485, - "eval_runtime": 13.5307, - "eval_samples_per_second": 555.331, - "eval_steps_per_second": 8.721, - "step": 239000 - }, - { - "epoch": 10.22, - "learning_rate": 1.2228191285049722e-05, - "loss": 0.0358, - "step": 239500 - }, - { - "epoch": 10.22, - "eval_accuracy": 0.9461006121905776, - "eval_f1": 0.9465327811229762, - "eval_loss": 0.33827266097068787, - "eval_runtime": 13.5261, - "eval_samples_per_second": 555.518, - "eval_steps_per_second": 8.724, - "step": 239500 - }, - { - "epoch": 10.24, - "learning_rate": 1.2201517220775895e-05, - "loss": 0.0367, - "step": 240000 - }, - { - "epoch": 10.24, - "eval_accuracy": 0.9530210274154911, - "eval_f1": 0.9531614604691627, - "eval_loss": 0.25948166847229004, - "eval_runtime": 13.5325, - "eval_samples_per_second": 555.256, - "eval_steps_per_second": 8.72, - "step": 240000 - }, - { - "epoch": 10.26, - "learning_rate": 1.2174843156502071e-05, - "loss": 0.0317, - "step": 240500 - }, - { - "epoch": 10.26, - "eval_accuracy": 0.9526217726909768, - "eval_f1": 0.9528273246855208, - "eval_loss": 0.30694305896759033, - "eval_runtime": 13.5328, - "eval_samples_per_second": 555.243, - "eval_steps_per_second": 8.72, - "step": 240500 - }, - { - "epoch": 10.29, - "learning_rate": 1.2148169092228246e-05, - "loss": 0.033, - "step": 241000 - }, - { - "epoch": 10.29, - "eval_accuracy": 0.9484961405376631, - "eval_f1": 0.9488020635699753, - "eval_loss": 0.3017740249633789, - "eval_runtime": 13.5313, - "eval_samples_per_second": 555.307, - "eval_steps_per_second": 8.721, - "step": 241000 - }, - { - "epoch": 10.31, - "learning_rate": 1.212149502795442e-05, - "loss": 0.036, - "step": 241500 - }, - { - "epoch": 10.31, - "eval_accuracy": 0.9480968858131488, - "eval_f1": 0.9484357589115017, - "eval_loss": 0.3117830753326416, - "eval_runtime": 13.5326, - "eval_samples_per_second": 555.252, - "eval_steps_per_second": 8.72, - "step": 241500 - }, - { - "epoch": 10.33, - "learning_rate": 1.2094820963680595e-05, - "loss": 0.0343, - "step": 242000 - }, - { - "epoch": 10.33, - "eval_accuracy": 0.9502262443438914, - "eval_f1": 0.9504874707212669, - "eval_loss": 0.29442715644836426, - "eval_runtime": 13.514, - "eval_samples_per_second": 556.017, - "eval_steps_per_second": 8.732, - "step": 242000 - }, - { - "epoch": 10.35, - "learning_rate": 1.2068200247535319e-05, - "loss": 0.0341, - "step": 242500 - }, - { - "epoch": 10.35, - "eval_accuracy": 0.9450359329252063, - "eval_f1": 0.9455823002502098, - "eval_loss": 0.33251458406448364, - "eval_runtime": 13.5, - "eval_samples_per_second": 556.593, - "eval_steps_per_second": 8.741, - "step": 242500 - }, - { - "epoch": 10.37, - "learning_rate": 1.2041526183261491e-05, - "loss": 0.0376, - "step": 243000 - }, - { - "epoch": 10.37, - "eval_accuracy": 0.9346553100878361, - "eval_f1": 0.9354847028775204, - "eval_loss": 0.3757520020008087, - "eval_runtime": 13.4999, - "eval_samples_per_second": 556.598, - "eval_steps_per_second": 8.741, - "step": 243000 - }, - { - "epoch": 10.39, - "learning_rate": 1.2014852118987668e-05, - "loss": 0.033, - "step": 243500 - }, - { - "epoch": 10.39, - "eval_accuracy": 0.947830715996806, - "eval_f1": 0.9482354473268947, - "eval_loss": 0.29782551527023315, - "eval_runtime": 13.5045, - "eval_samples_per_second": 556.409, - "eval_steps_per_second": 8.738, - "step": 243500 - }, - { - "epoch": 10.41, - "learning_rate": 1.1988178054713842e-05, - "loss": 0.0347, - "step": 244000 - }, - { - "epoch": 10.41, - "eval_accuracy": 0.9503593292520628, - "eval_f1": 0.9506089663521747, - "eval_loss": 0.28753605484962463, - "eval_runtime": 13.5173, - "eval_samples_per_second": 555.881, - "eval_steps_per_second": 8.73, - "step": 244000 - }, - { - "epoch": 10.43, - "learning_rate": 1.1961503990440015e-05, - "loss": 0.0357, - "step": 244500 - }, - { - "epoch": 10.43, - "eval_accuracy": 0.9449028480170348, - "eval_f1": 0.9453483829345272, - "eval_loss": 0.3388841450214386, - "eval_runtime": 13.5245, - "eval_samples_per_second": 555.584, - "eval_steps_per_second": 8.725, - "step": 244500 - }, - { - "epoch": 10.46, - "learning_rate": 1.1934829926166191e-05, - "loss": 0.034, - "step": 245000 - }, - { - "epoch": 10.46, - "eval_accuracy": 0.9474314612722917, - "eval_f1": 0.9476865138649213, - "eval_loss": 0.3259483575820923, - "eval_runtime": 13.5224, - "eval_samples_per_second": 555.672, - "eval_steps_per_second": 8.726, - "step": 245000 - }, - { - "epoch": 10.48, - "learning_rate": 1.1908209210020913e-05, - "loss": 0.0388, - "step": 245500 - }, - { - "epoch": 10.48, - "eval_accuracy": 0.9454351876497206, - "eval_f1": 0.9458281591899665, - "eval_loss": 0.32985618710517883, - "eval_runtime": 13.5192, - "eval_samples_per_second": 555.8, - "eval_steps_per_second": 8.728, - "step": 245500 - }, - { - "epoch": 10.5, - "learning_rate": 1.1881535145747088e-05, - "loss": 0.0364, - "step": 246000 - }, - { - "epoch": 10.5, - "eval_accuracy": 0.9491615650785201, - "eval_f1": 0.9494692785609192, - "eval_loss": 0.2861853539943695, - "eval_runtime": 13.5275, - "eval_samples_per_second": 555.462, - "eval_steps_per_second": 8.723, - "step": 246000 - }, - { - "epoch": 10.52, - "learning_rate": 1.1854861081473264e-05, - "loss": 0.0359, - "step": 246500 - }, - { - "epoch": 10.52, - "eval_accuracy": 0.9419749800372638, - "eval_f1": 0.9426052474217692, - "eval_loss": 0.3198188245296478, - "eval_runtime": 13.5288, - "eval_samples_per_second": 555.408, - "eval_steps_per_second": 8.722, - "step": 246500 - }, - { - "epoch": 10.54, - "learning_rate": 1.1828187017199437e-05, - "loss": 0.0366, - "step": 247000 - }, - { - "epoch": 10.54, - "eval_accuracy": 0.9429065743944637, - "eval_f1": 0.9435218841102918, - "eval_loss": 0.3460228443145752, - "eval_runtime": 13.5192, - "eval_samples_per_second": 555.802, - "eval_steps_per_second": 8.728, - "step": 247000 - }, - { - "epoch": 10.56, - "learning_rate": 1.1801512952925613e-05, - "loss": 0.0365, - "step": 247500 - }, - { - "epoch": 10.56, - "eval_accuracy": 0.9442374234761778, - "eval_f1": 0.9447131929091213, - "eval_loss": 0.30125898122787476, - "eval_runtime": 13.5139, - "eval_samples_per_second": 556.019, - "eval_steps_per_second": 8.732, - "step": 247500 - }, - { - "epoch": 10.58, - "learning_rate": 1.1774838888651788e-05, - "loss": 0.0344, - "step": 248000 - }, - { - "epoch": 10.58, - "eval_accuracy": 0.9434389140271493, - "eval_f1": 0.94390686705927, - "eval_loss": 0.3141534626483917, - "eval_runtime": 13.5171, - "eval_samples_per_second": 555.889, - "eval_steps_per_second": 8.73, - "step": 248000 - }, - { - "epoch": 10.61, - "learning_rate": 1.174816482437796e-05, - "loss": 0.0348, - "step": 248500 - }, - { - "epoch": 10.61, - "eval_accuracy": 0.9474314612722917, - "eval_f1": 0.9477872430849247, - "eval_loss": 0.2989385426044464, - "eval_runtime": 13.515, - "eval_samples_per_second": 555.973, - "eval_steps_per_second": 8.731, - "step": 248500 - }, - { - "epoch": 10.63, - "learning_rate": 1.1721490760104137e-05, - "loss": 0.0357, - "step": 249000 - }, - { - "epoch": 10.63, - "eval_accuracy": 0.9455682725578919, - "eval_f1": 0.9459662824329179, - "eval_loss": 0.29782894253730774, - "eval_runtime": 13.5043, - "eval_samples_per_second": 556.414, - "eval_steps_per_second": 8.738, - "step": 249000 - }, - { - "epoch": 10.65, - "learning_rate": 1.1694870043958859e-05, - "loss": 0.0335, - "step": 249500 - }, - { - "epoch": 10.65, - "eval_accuracy": 0.9415757253127496, - "eval_f1": 0.9420959106843136, - "eval_loss": 0.3017115890979767, - "eval_runtime": 13.515, - "eval_samples_per_second": 555.976, - "eval_steps_per_second": 8.731, - "step": 249500 - }, - { - "epoch": 10.67, - "learning_rate": 1.1668195979685033e-05, - "loss": 0.035, - "step": 250000 - }, - { - "epoch": 10.67, - "eval_accuracy": 0.9430396593026351, - "eval_f1": 0.9436321487656903, - "eval_loss": 0.3339087665081024, - "eval_runtime": 13.5029, - "eval_samples_per_second": 556.474, - "eval_steps_per_second": 8.739, - "step": 250000 - }, - { - "epoch": 10.69, - "learning_rate": 1.1641575263539755e-05, - "loss": 0.0365, - "step": 250500 - }, - { - "epoch": 10.69, - "eval_accuracy": 0.9461006121905776, - "eval_f1": 0.946504806534825, - "eval_loss": 0.3119305670261383, - "eval_runtime": 13.5164, - "eval_samples_per_second": 555.917, - "eval_steps_per_second": 8.73, - "step": 250500 - }, - { - "epoch": 10.71, - "learning_rate": 1.1614901199265931e-05, - "loss": 0.0349, - "step": 251000 - }, - { - "epoch": 10.71, - "eval_accuracy": 0.9431727442108065, - "eval_f1": 0.9436855116972849, - "eval_loss": 0.3208593726158142, - "eval_runtime": 13.5183, - "eval_samples_per_second": 555.84, - "eval_steps_per_second": 8.729, - "step": 251000 - }, - { - "epoch": 10.73, - "learning_rate": 1.1588227134992104e-05, - "loss": 0.0378, - "step": 251500 - }, - { - "epoch": 10.73, - "eval_accuracy": 0.9494277348948629, - "eval_f1": 0.9497391996371748, - "eval_loss": 0.29452651739120483, - "eval_runtime": 13.504, - "eval_samples_per_second": 556.426, - "eval_steps_per_second": 8.738, - "step": 251500 - }, - { - "epoch": 10.75, - "learning_rate": 1.156155307071828e-05, - "loss": 0.0379, - "step": 252000 - }, - { - "epoch": 10.75, - "eval_accuracy": 0.9441043385680064, - "eval_f1": 0.944606696971724, - "eval_loss": 0.3278190493583679, - "eval_runtime": 13.5057, - "eval_samples_per_second": 556.359, - "eval_steps_per_second": 8.737, - "step": 252000 - }, - { - "epoch": 10.78, - "learning_rate": 1.1534879006444455e-05, - "loss": 0.0357, - "step": 252500 - }, - { - "epoch": 10.78, - "eval_accuracy": 0.9490284801703487, - "eval_f1": 0.9493800643943184, - "eval_loss": 0.29757875204086304, - "eval_runtime": 13.5211, - "eval_samples_per_second": 555.723, - "eval_steps_per_second": 8.727, - "step": 252500 - }, - { - "epoch": 10.8, - "learning_rate": 1.1508258290299177e-05, - "loss": 0.0387, - "step": 253000 - }, - { - "epoch": 10.8, - "eval_accuracy": 0.9393132818738356, - "eval_f1": 0.9400745889233175, - "eval_loss": 0.3252202868461609, - "eval_runtime": 13.5093, - "eval_samples_per_second": 556.208, - "eval_steps_per_second": 8.735, - "step": 253000 - }, - { - "epoch": 10.82, - "learning_rate": 1.1481584226025353e-05, - "loss": 0.0376, - "step": 253500 - }, - { - "epoch": 10.82, - "eval_accuracy": 0.9527548575991482, - "eval_f1": 0.9529652109380102, - "eval_loss": 0.25803959369659424, - "eval_runtime": 13.5164, - "eval_samples_per_second": 555.919, - "eval_steps_per_second": 8.73, - "step": 253500 - }, - { - "epoch": 10.84, - "learning_rate": 1.1454910161751528e-05, - "loss": 0.0405, - "step": 254000 - }, - { - "epoch": 10.84, - "eval_accuracy": 0.9427734894862922, - "eval_f1": 0.9433379426738755, - "eval_loss": 0.3349682092666626, - "eval_runtime": 13.5239, - "eval_samples_per_second": 555.611, - "eval_steps_per_second": 8.725, - "step": 254000 - }, - { - "epoch": 10.86, - "learning_rate": 1.14282360974777e-05, - "loss": 0.0363, - "step": 254500 - }, - { - "epoch": 10.86, - "eval_accuracy": 0.9487623103540058, - "eval_f1": 0.9491131402425562, - "eval_loss": 0.3017532229423523, - "eval_runtime": 13.5269, - "eval_samples_per_second": 555.486, - "eval_steps_per_second": 8.723, - "step": 254500 - }, - { - "epoch": 10.88, - "learning_rate": 1.1401562033203877e-05, - "loss": 0.0347, - "step": 255000 - }, - { - "epoch": 10.88, - "eval_accuracy": 0.9480968858131488, - "eval_f1": 0.9484905581037079, - "eval_loss": 0.30602821707725525, - "eval_runtime": 13.5262, - "eval_samples_per_second": 555.513, - "eval_steps_per_second": 8.724, - "step": 255000 - }, - { - "epoch": 10.9, - "learning_rate": 1.1374941317058599e-05, - "loss": 0.0348, - "step": 255500 - }, - { - "epoch": 10.9, - "eval_accuracy": 0.9457013574660633, - "eval_f1": 0.9461498483987765, - "eval_loss": 0.3226492404937744, - "eval_runtime": 13.5325, - "eval_samples_per_second": 555.257, - "eval_steps_per_second": 8.72, - "step": 255500 - }, - { - "epoch": 10.93, - "learning_rate": 1.1348267252784773e-05, - "loss": 0.0357, - "step": 256000 - }, - { - "epoch": 10.93, - "eval_accuracy": 0.9484961405376631, - "eval_f1": 0.9487462035465501, - "eval_loss": 0.30912718176841736, - "eval_runtime": 13.533, - "eval_samples_per_second": 555.236, - "eval_steps_per_second": 8.719, - "step": 256000 - }, - { - "epoch": 10.95, - "learning_rate": 1.132159318851095e-05, - "loss": 0.0345, - "step": 256500 - }, - { - "epoch": 10.95, - "eval_accuracy": 0.9467660367314347, - "eval_f1": 0.9472597356450273, - "eval_loss": 0.32901766896247864, - "eval_runtime": 13.5296, - "eval_samples_per_second": 555.374, - "eval_steps_per_second": 8.722, - "step": 256500 - }, - { - "epoch": 10.97, - "learning_rate": 1.1294919124237122e-05, - "loss": 0.0391, - "step": 257000 - }, - { - "epoch": 10.97, - "eval_accuracy": 0.9502262443438914, - "eval_f1": 0.9505242429513786, - "eval_loss": 0.2757878005504608, - "eval_runtime": 13.5316, - "eval_samples_per_second": 555.291, - "eval_steps_per_second": 8.72, - "step": 257000 - }, - { - "epoch": 10.99, - "learning_rate": 1.1268245059963297e-05, - "loss": 0.035, - "step": 257500 - }, - { - "epoch": 10.99, - "eval_accuracy": 0.9472983763641203, - "eval_f1": 0.9476722219527903, - "eval_loss": 0.30939844250679016, - "eval_runtime": 13.5323, - "eval_samples_per_second": 555.265, - "eval_steps_per_second": 8.72, - "step": 257500 - }, - { - "epoch": 11.01, - "learning_rate": 1.124162434381802e-05, - "loss": 0.0292, - "step": 258000 - }, - { - "epoch": 11.01, - "eval_accuracy": 0.9522225179664626, - "eval_f1": 0.9525102493286882, - "eval_loss": 0.29116666316986084, - "eval_runtime": 13.5378, - "eval_samples_per_second": 555.04, - "eval_steps_per_second": 8.716, - "step": 258000 - }, - { - "epoch": 11.03, - "learning_rate": 1.1214950279544195e-05, - "loss": 0.0254, - "step": 258500 - }, - { - "epoch": 11.03, - "eval_accuracy": 0.9471652914559489, - "eval_f1": 0.9475831203573146, - "eval_loss": 0.3402584195137024, - "eval_runtime": 13.5397, - "eval_samples_per_second": 554.96, - "eval_steps_per_second": 8.715, - "step": 258500 - }, - { - "epoch": 11.05, - "learning_rate": 1.1188276215270368e-05, - "loss": 0.028, - "step": 259000 - }, - { - "epoch": 11.05, - "eval_accuracy": 0.9502262443438914, - "eval_f1": 0.9506309679003032, - "eval_loss": 0.2876528799533844, - "eval_runtime": 13.539, - "eval_samples_per_second": 554.991, - "eval_steps_per_second": 8.716, - "step": 259000 - }, - { - "epoch": 11.08, - "learning_rate": 1.1161602150996544e-05, - "loss": 0.0291, - "step": 259500 - }, - { - "epoch": 11.08, - "eval_accuracy": 0.9512909236092627, - "eval_f1": 0.9516158915240817, - "eval_loss": 0.2986809313297272, - "eval_runtime": 13.5362, - "eval_samples_per_second": 555.103, - "eval_steps_per_second": 8.717, - "step": 259500 - }, - { - "epoch": 11.1, - "learning_rate": 1.1134928086722719e-05, - "loss": 0.0279, - "step": 260000 - }, - { - "epoch": 11.1, - "eval_accuracy": 0.9461006121905776, - "eval_f1": 0.9466771033072814, - "eval_loss": 0.3648306429386139, - "eval_runtime": 13.5352, - "eval_samples_per_second": 555.144, - "eval_steps_per_second": 8.718, - "step": 260000 - }, - { - "epoch": 11.12, - "learning_rate": 1.1108254022448895e-05, - "loss": 0.0297, - "step": 260500 - }, - { - "epoch": 11.12, - "eval_accuracy": 0.9483630556294916, - "eval_f1": 0.948826460056088, - "eval_loss": 0.3425619900226593, - "eval_runtime": 13.5252, - "eval_samples_per_second": 555.554, - "eval_steps_per_second": 8.724, - "step": 260500 - }, - { - "epoch": 11.14, - "learning_rate": 1.1081579958175068e-05, - "loss": 0.0301, - "step": 261000 - }, - { - "epoch": 11.14, - "eval_accuracy": 0.9488953952621773, - "eval_f1": 0.9493716498936634, - "eval_loss": 0.31174421310424805, - "eval_runtime": 13.5347, - "eval_samples_per_second": 555.166, - "eval_steps_per_second": 8.718, - "step": 261000 - }, - { - "epoch": 11.16, - "learning_rate": 1.1054905893901242e-05, - "loss": 0.0284, - "step": 261500 - }, - { - "epoch": 11.16, - "eval_accuracy": 0.9510247537929198, - "eval_f1": 0.9514079176915496, - "eval_loss": 0.30318567156791687, - "eval_runtime": 13.5381, - "eval_samples_per_second": 555.026, - "eval_steps_per_second": 8.716, - "step": 261500 - }, - { - "epoch": 11.18, - "learning_rate": 1.1028231829627418e-05, - "loss": 0.0299, - "step": 262000 - }, - { - "epoch": 11.18, - "eval_accuracy": 0.9542187915890338, - "eval_f1": 0.9544176427194278, - "eval_loss": 0.2822323441505432, - "eval_runtime": 13.5372, - "eval_samples_per_second": 555.062, - "eval_steps_per_second": 8.717, - "step": 262000 - }, - { - "epoch": 11.2, - "learning_rate": 1.100161111348214e-05, - "loss": 0.0279, - "step": 262500 - }, - { - "epoch": 11.2, - "eval_accuracy": 0.9445035932925206, - "eval_f1": 0.9450459515410018, - "eval_loss": 0.3772587180137634, - "eval_runtime": 13.5407, - "eval_samples_per_second": 554.918, - "eval_steps_per_second": 8.714, - "step": 262500 - }, - { - "epoch": 11.22, - "learning_rate": 1.0974937049208315e-05, - "loss": 0.0287, - "step": 263000 - }, - { - "epoch": 11.22, - "eval_accuracy": 0.9459675272824062, - "eval_f1": 0.9464438417497797, - "eval_loss": 0.3681629002094269, - "eval_runtime": 13.5273, - "eval_samples_per_second": 555.47, - "eval_steps_per_second": 8.723, - "step": 263000 - }, - { - "epoch": 11.25, - "learning_rate": 1.094826298493449e-05, - "loss": 0.0296, - "step": 263500 - }, - { - "epoch": 11.25, - "eval_accuracy": 0.946233697098749, - "eval_f1": 0.9467587683332466, - "eval_loss": 0.3240737318992615, - "eval_runtime": 13.5306, - "eval_samples_per_second": 555.335, - "eval_steps_per_second": 8.721, - "step": 263500 - }, - { - "epoch": 11.27, - "learning_rate": 1.0921588920660664e-05, - "loss": 0.029, - "step": 264000 - }, - { - "epoch": 11.27, - "eval_accuracy": 0.9488953952621773, - "eval_f1": 0.9493013988508006, - "eval_loss": 0.3337612748146057, - "eval_runtime": 13.535, - "eval_samples_per_second": 555.152, - "eval_steps_per_second": 8.718, - "step": 264000 - }, - { - "epoch": 11.29, - "learning_rate": 1.0894968204515386e-05, - "loss": 0.0305, - "step": 264500 - }, - { - "epoch": 11.29, - "eval_accuracy": 0.9511578387010913, - "eval_f1": 0.9513546227894613, - "eval_loss": 0.28592097759246826, - "eval_runtime": 13.5321, - "eval_samples_per_second": 555.271, - "eval_steps_per_second": 8.72, - "step": 264500 - }, - { - "epoch": 11.31, - "learning_rate": 1.0868294140241562e-05, - "loss": 0.0289, - "step": 265000 - }, - { - "epoch": 11.31, - "eval_accuracy": 0.9472983763641203, - "eval_f1": 0.9478212294888846, - "eval_loss": 0.3027932941913605, - "eval_runtime": 13.5302, - "eval_samples_per_second": 555.348, - "eval_steps_per_second": 8.721, - "step": 265000 - }, - { - "epoch": 11.33, - "learning_rate": 1.0841620075967737e-05, - "loss": 0.0294, - "step": 265500 - }, - { - "epoch": 11.33, - "eval_accuracy": 0.9437050838434922, - "eval_f1": 0.944358158604522, - "eval_loss": 0.34016963839530945, - "eval_runtime": 13.5339, - "eval_samples_per_second": 555.198, - "eval_steps_per_second": 8.719, - "step": 265500 - }, - { - "epoch": 11.35, - "learning_rate": 1.081494601169391e-05, - "loss": 0.0301, - "step": 266000 - }, - { - "epoch": 11.35, - "eval_accuracy": 0.9512909236092627, - "eval_f1": 0.9516383347086875, - "eval_loss": 0.29323649406433105, - "eval_runtime": 13.5315, - "eval_samples_per_second": 555.299, - "eval_steps_per_second": 8.72, - "step": 266000 - }, - { - "epoch": 11.37, - "learning_rate": 1.0788325295548635e-05, - "loss": 0.0323, - "step": 266500 - }, - { - "epoch": 11.37, - "eval_accuracy": 0.9474314612722917, - "eval_f1": 0.9479135405572265, - "eval_loss": 0.32282477617263794, - "eval_runtime": 13.5167, - "eval_samples_per_second": 555.906, - "eval_steps_per_second": 8.73, - "step": 266500 - }, - { - "epoch": 11.4, - "learning_rate": 1.0761651231274808e-05, - "loss": 0.0294, - "step": 267000 - }, - { - "epoch": 11.4, - "eval_accuracy": 0.9471652914559489, - "eval_f1": 0.9474788361666246, - "eval_loss": 0.34356236457824707, - "eval_runtime": 13.5297, - "eval_samples_per_second": 555.372, - "eval_steps_per_second": 8.722, - "step": 267000 - }, - { - "epoch": 11.42, - "learning_rate": 1.073503051512953e-05, - "loss": 0.0299, - "step": 267500 - }, - { - "epoch": 11.42, - "eval_accuracy": 0.9496939047112057, - "eval_f1": 0.9500661332245157, - "eval_loss": 0.3229745328426361, - "eval_runtime": 13.5139, - "eval_samples_per_second": 556.019, - "eval_steps_per_second": 8.732, - "step": 267500 - }, - { - "epoch": 11.44, - "learning_rate": 1.0708356450855706e-05, - "loss": 0.0295, - "step": 268000 - }, - { - "epoch": 11.44, - "eval_accuracy": 0.9532871972318339, - "eval_f1": 0.9535328056655195, - "eval_loss": 0.3073120415210724, - "eval_runtime": 13.5288, - "eval_samples_per_second": 555.407, - "eval_steps_per_second": 8.722, - "step": 268000 - }, - { - "epoch": 11.46, - "learning_rate": 1.068168238658188e-05, - "loss": 0.0285, - "step": 268500 - }, - { - "epoch": 11.46, - "eval_accuracy": 0.9484961405376631, - "eval_f1": 0.9489967962192396, - "eval_loss": 0.3616872727870941, - "eval_runtime": 13.5307, - "eval_samples_per_second": 555.331, - "eval_steps_per_second": 8.721, - "step": 268500 - }, - { - "epoch": 11.48, - "learning_rate": 1.0655008322308053e-05, - "loss": 0.0311, - "step": 269000 - }, - { - "epoch": 11.48, - "eval_accuracy": 0.9546180463135481, - "eval_f1": 0.9547804895488143, - "eval_loss": 0.2949013411998749, - "eval_runtime": 13.5302, - "eval_samples_per_second": 555.35, - "eval_steps_per_second": 8.721, - "step": 269000 - }, - { - "epoch": 11.5, - "learning_rate": 1.0628387606162778e-05, - "loss": 0.0327, - "step": 269500 - }, - { - "epoch": 11.5, - "eval_accuracy": 0.9538195368645196, - "eval_f1": 0.954041082728273, - "eval_loss": 0.2957130968570709, - "eval_runtime": 13.5313, - "eval_samples_per_second": 555.306, - "eval_steps_per_second": 8.721, - "step": 269500 - }, - { - "epoch": 11.52, - "learning_rate": 1.0601713541888951e-05, - "loss": 0.0277, - "step": 270000 - }, - { - "epoch": 11.52, - "eval_accuracy": 0.9518232632419483, - "eval_f1": 0.9521390780449444, - "eval_loss": 0.3210020661354065, - "eval_runtime": 13.5284, - "eval_samples_per_second": 555.426, - "eval_steps_per_second": 8.722, - "step": 270000 - }, - { - "epoch": 11.54, - "learning_rate": 1.0575039477615126e-05, - "loss": 0.0312, - "step": 270500 - }, - { - "epoch": 11.54, - "eval_accuracy": 0.9494277348948629, - "eval_f1": 0.9497943099716304, - "eval_loss": 0.33388105034828186, - "eval_runtime": 13.5275, - "eval_samples_per_second": 555.462, - "eval_steps_per_second": 8.723, - "step": 270500 - }, - { - "epoch": 11.57, - "learning_rate": 1.0548365413341302e-05, - "loss": 0.0307, - "step": 271000 - }, - { - "epoch": 11.57, - "eval_accuracy": 0.9391801969656641, - "eval_f1": 0.9400404568776457, - "eval_loss": 0.4182300567626953, - "eval_runtime": 13.5305, - "eval_samples_per_second": 555.338, - "eval_steps_per_second": 8.721, - "step": 271000 - }, - { - "epoch": 11.59, - "learning_rate": 1.0521691349067475e-05, - "loss": 0.0286, - "step": 271500 - }, - { - "epoch": 11.59, - "eval_accuracy": 0.9342560553633218, - "eval_f1": 0.9353321721209846, - "eval_loss": 0.42980387806892395, - "eval_runtime": 13.5324, - "eval_samples_per_second": 555.262, - "eval_steps_per_second": 8.72, - "step": 271500 - }, - { - "epoch": 11.61, - "learning_rate": 1.0495070632922197e-05, - "loss": 0.0322, - "step": 272000 - }, - { - "epoch": 11.61, - "eval_accuracy": 0.9463667820069204, - "eval_f1": 0.9467999447575667, - "eval_loss": 0.3350697457790375, - "eval_runtime": 13.5467, - "eval_samples_per_second": 554.674, - "eval_steps_per_second": 8.711, - "step": 272000 - }, - { - "epoch": 11.63, - "learning_rate": 1.0468396568648373e-05, - "loss": 0.0322, - "step": 272500 - }, - { - "epoch": 11.63, - "eval_accuracy": 0.9468991216396061, - "eval_f1": 0.9474194226678742, - "eval_loss": 0.3376242518424988, - "eval_runtime": 13.5463, - "eval_samples_per_second": 554.688, - "eval_steps_per_second": 8.711, - "step": 272500 - }, - { - "epoch": 11.65, - "learning_rate": 1.0441722504374548e-05, - "loss": 0.0284, - "step": 273000 - }, - { - "epoch": 11.65, - "eval_accuracy": 0.9519563481501198, - "eval_f1": 0.9522952967136102, - "eval_loss": 0.30849677324295044, - "eval_runtime": 13.5447, - "eval_samples_per_second": 554.756, - "eval_steps_per_second": 8.712, - "step": 273000 - }, - { - "epoch": 11.67, - "learning_rate": 1.041504844010072e-05, - "loss": 0.0291, - "step": 273500 - }, - { - "epoch": 11.67, - "eval_accuracy": 0.9540857066808623, - "eval_f1": 0.9543742598604628, - "eval_loss": 0.3051382899284363, - "eval_runtime": 13.5382, - "eval_samples_per_second": 555.021, - "eval_steps_per_second": 8.716, - "step": 273500 - }, - { - "epoch": 11.69, - "learning_rate": 1.0388374375826897e-05, - "loss": 0.0315, - "step": 274000 - }, - { - "epoch": 11.69, - "eval_accuracy": 0.9463667820069204, - "eval_f1": 0.9468753229061821, - "eval_loss": 0.33453667163848877, - "eval_runtime": 13.5381, - "eval_samples_per_second": 555.027, - "eval_steps_per_second": 8.716, - "step": 274000 - }, - { - "epoch": 11.72, - "learning_rate": 1.0361753659681619e-05, - "loss": 0.0296, - "step": 274500 - }, - { - "epoch": 11.72, - "eval_accuracy": 0.9532871972318339, - "eval_f1": 0.9535879021225674, - "eval_loss": 0.2986421585083008, - "eval_runtime": 13.5415, - "eval_samples_per_second": 554.886, - "eval_steps_per_second": 8.714, - "step": 274500 - }, - { - "epoch": 11.74, - "learning_rate": 1.0335079595407793e-05, - "loss": 0.0317, - "step": 275000 - }, - { - "epoch": 11.74, - "eval_accuracy": 0.9467660367314347, - "eval_f1": 0.9470580392199042, - "eval_loss": 0.3374924957752228, - "eval_runtime": 13.5405, - "eval_samples_per_second": 554.927, - "eval_steps_per_second": 8.715, - "step": 275000 - }, - { - "epoch": 11.76, - "learning_rate": 1.030840553113397e-05, - "loss": 0.0301, - "step": 275500 - }, - { - "epoch": 11.76, - "eval_accuracy": 0.9472983763641203, - "eval_f1": 0.9477488712814109, - "eval_loss": 0.3174923062324524, - "eval_runtime": 13.5443, - "eval_samples_per_second": 554.77, - "eval_steps_per_second": 8.712, - "step": 275500 - }, - { - "epoch": 11.78, - "learning_rate": 1.0281731466860144e-05, - "loss": 0.0328, - "step": 276000 - }, - { - "epoch": 11.78, - "eval_accuracy": 0.9506254990684057, - "eval_f1": 0.9509012299136492, - "eval_loss": 0.30687013268470764, - "eval_runtime": 13.5377, - "eval_samples_per_second": 555.043, - "eval_steps_per_second": 8.716, - "step": 276000 - }, - { - "epoch": 11.8, - "learning_rate": 1.0255057402586318e-05, - "loss": 0.0308, - "step": 276500 - }, - { - "epoch": 11.8, - "eval_accuracy": 0.9496939047112057, - "eval_f1": 0.9499248413227012, - "eval_loss": 0.3148394227027893, - "eval_runtime": 13.54, - "eval_samples_per_second": 554.947, - "eval_steps_per_second": 8.715, - "step": 276500 - }, - { - "epoch": 11.82, - "learning_rate": 1.0228383338312493e-05, - "loss": 0.0284, - "step": 277000 - }, - { - "epoch": 11.82, - "eval_accuracy": 0.9504924141602342, - "eval_f1": 0.9507888811610475, - "eval_loss": 0.28838837146759033, - "eval_runtime": 13.5292, - "eval_samples_per_second": 555.39, - "eval_steps_per_second": 8.722, - "step": 277000 - }, - { - "epoch": 11.84, - "learning_rate": 1.0201709274038668e-05, - "loss": 0.0301, - "step": 277500 - }, - { - "epoch": 11.84, - "eval_accuracy": 0.9457013574660633, - "eval_f1": 0.9461555213511837, - "eval_loss": 0.3430428206920624, - "eval_runtime": 13.5298, - "eval_samples_per_second": 555.366, - "eval_steps_per_second": 8.721, - "step": 277500 - }, - { - "epoch": 11.86, - "learning_rate": 1.0175035209764844e-05, - "loss": 0.0288, - "step": 278000 - }, - { - "epoch": 11.86, - "eval_accuracy": 0.9491615650785201, - "eval_f1": 0.9494279298765108, - "eval_loss": 0.30778148770332336, - "eval_runtime": 13.5324, - "eval_samples_per_second": 555.26, - "eval_steps_per_second": 8.72, - "step": 278000 - }, - { - "epoch": 11.89, - "learning_rate": 1.0148414493619566e-05, - "loss": 0.0302, - "step": 278500 - }, - { - "epoch": 11.89, - "eval_accuracy": 0.9475645461804632, - "eval_f1": 0.9479488373562613, - "eval_loss": 0.32820576429367065, - "eval_runtime": 13.5289, - "eval_samples_per_second": 555.402, - "eval_steps_per_second": 8.722, - "step": 278500 - }, - { - "epoch": 11.91, - "learning_rate": 1.0121740429345739e-05, - "loss": 0.0322, - "step": 279000 - }, - { - "epoch": 11.91, - "eval_accuracy": 0.9506254990684057, - "eval_f1": 0.9509672286308062, - "eval_loss": 0.30490967631340027, - "eval_runtime": 13.531, - "eval_samples_per_second": 555.318, - "eval_steps_per_second": 8.721, - "step": 279000 - }, - { - "epoch": 11.93, - "learning_rate": 1.0095066365071915e-05, - "loss": 0.0291, - "step": 279500 - }, - { - "epoch": 11.93, - "eval_accuracy": 0.9377162629757786, - "eval_f1": 0.9385255738922734, - "eval_loss": 0.42235612869262695, - "eval_runtime": 13.5309, - "eval_samples_per_second": 555.322, - "eval_steps_per_second": 8.721, - "step": 279500 - }, - { - "epoch": 11.95, - "learning_rate": 1.006839230079809e-05, - "loss": 0.0303, - "step": 280000 - }, - { - "epoch": 11.95, - "eval_accuracy": 0.9512909236092627, - "eval_f1": 0.9515785237481514, - "eval_loss": 0.31639474630355835, - "eval_runtime": 13.5318, - "eval_samples_per_second": 555.286, - "eval_steps_per_second": 8.72, - "step": 280000 - }, - { - "epoch": 11.97, - "learning_rate": 1.0041771584652811e-05, - "loss": 0.0316, - "step": 280500 - }, - { - "epoch": 11.97, - "eval_accuracy": 0.951690178333777, - "eval_f1": 0.951872103274024, - "eval_loss": 0.3232201635837555, - "eval_runtime": 13.5298, - "eval_samples_per_second": 555.368, - "eval_steps_per_second": 8.722, - "step": 280500 - }, - { - "epoch": 11.99, - "learning_rate": 1.0015097520378987e-05, - "loss": 0.0313, - "step": 281000 - }, - { - "epoch": 11.99, - "eval_accuracy": 0.9457013574660633, - "eval_f1": 0.9462042561834819, - "eval_loss": 0.35591208934783936, - "eval_runtime": 13.5346, - "eval_samples_per_second": 555.169, - "eval_steps_per_second": 8.718, - "step": 281000 - }, - { - "epoch": 12.01, - "learning_rate": 9.98842345610516e-06, - "loss": 0.0235, - "step": 281500 - }, - { - "epoch": 12.01, - "eval_accuracy": 0.9484961405376631, - "eval_f1": 0.9488481933097076, - "eval_loss": 0.3524312674999237, - "eval_runtime": 13.533, - "eval_samples_per_second": 555.237, - "eval_steps_per_second": 8.719, - "step": 281500 - }, - { - "epoch": 12.04, - "learning_rate": 9.961749391831337e-06, - "loss": 0.0236, - "step": 282000 - }, - { - "epoch": 12.04, - "eval_accuracy": 0.9492946499866916, - "eval_f1": 0.9495979257310748, - "eval_loss": 0.34940576553344727, - "eval_runtime": 13.5339, - "eval_samples_per_second": 555.197, - "eval_steps_per_second": 8.719, - "step": 282000 - }, - { - "epoch": 12.06, - "learning_rate": 9.935128675686058e-06, - "loss": 0.0228, - "step": 282500 - }, - { - "epoch": 12.06, - "eval_accuracy": 0.9515570934256056, - "eval_f1": 0.9519025620870449, - "eval_loss": 0.34196367859840393, - "eval_runtime": 13.5343, - "eval_samples_per_second": 555.182, - "eval_steps_per_second": 8.719, - "step": 282500 - }, - { - "epoch": 12.08, - "learning_rate": 9.908454611412233e-06, - "loss": 0.0228, - "step": 283000 - }, - { - "epoch": 12.08, - "eval_accuracy": 0.9459675272824062, - "eval_f1": 0.9463649056755801, - "eval_loss": 0.3793661892414093, - "eval_runtime": 13.5368, - "eval_samples_per_second": 555.078, - "eval_steps_per_second": 8.717, - "step": 283000 - }, - { - "epoch": 12.1, - "learning_rate": 9.881780547138408e-06, - "loss": 0.0255, - "step": 283500 - }, - { - "epoch": 12.1, - "eval_accuracy": 0.9496939047112057, - "eval_f1": 0.9499314374127825, - "eval_loss": 0.3424386978149414, - "eval_runtime": 13.5163, - "eval_samples_per_second": 555.921, - "eval_steps_per_second": 8.73, - "step": 283500 - }, - { - "epoch": 12.12, - "learning_rate": 9.855106482864582e-06, - "loss": 0.0225, - "step": 284000 - }, - { - "epoch": 12.12, - "eval_accuracy": 0.9540857066808623, - "eval_f1": 0.954274654474955, - "eval_loss": 0.30393466353416443, - "eval_runtime": 13.5359, - "eval_samples_per_second": 555.118, - "eval_steps_per_second": 8.718, - "step": 284000 - }, - { - "epoch": 12.14, - "learning_rate": 9.828485766719304e-06, - "loss": 0.0233, - "step": 284500 - }, - { - "epoch": 12.14, - "eval_accuracy": 0.9490284801703487, - "eval_f1": 0.9493907632773001, - "eval_loss": 0.34559109807014465, - "eval_runtime": 13.5353, - "eval_samples_per_second": 555.14, - "eval_steps_per_second": 8.718, - "step": 284500 - }, - { - "epoch": 12.16, - "learning_rate": 9.80181170244548e-06, - "loss": 0.0234, - "step": 285000 - }, - { - "epoch": 12.16, - "eval_accuracy": 0.9373170082512643, - "eval_f1": 0.9381188612046385, - "eval_loss": 0.4422382116317749, - "eval_runtime": 13.5325, - "eval_samples_per_second": 555.254, - "eval_steps_per_second": 8.72, - "step": 285000 - }, - { - "epoch": 12.18, - "learning_rate": 9.775190986300202e-06, - "loss": 0.0248, - "step": 285500 - }, - { - "epoch": 12.18, - "eval_accuracy": 0.9518232632419483, - "eval_f1": 0.9520718964883907, - "eval_loss": 0.30406928062438965, - "eval_runtime": 13.5351, - "eval_samples_per_second": 555.151, - "eval_steps_per_second": 8.718, - "step": 285500 - }, - { - "epoch": 12.21, - "learning_rate": 9.748516922026377e-06, - "loss": 0.0233, - "step": 286000 - }, - { - "epoch": 12.21, - "eval_accuracy": 0.9507585839765771, - "eval_f1": 0.9510769606493671, - "eval_loss": 0.3260180354118347, - "eval_runtime": 13.5389, - "eval_samples_per_second": 554.992, - "eval_steps_per_second": 8.716, - "step": 286000 - }, - { - "epoch": 12.23, - "learning_rate": 9.72184285775255e-06, - "loss": 0.0257, - "step": 286500 - }, - { - "epoch": 12.23, - "eval_accuracy": 0.9467660367314347, - "eval_f1": 0.9471589365766602, - "eval_loss": 0.35731199383735657, - "eval_runtime": 13.539, - "eval_samples_per_second": 554.988, - "eval_steps_per_second": 8.716, - "step": 286500 - }, - { - "epoch": 12.25, - "learning_rate": 9.695168793478726e-06, - "loss": 0.0232, - "step": 287000 - }, - { - "epoch": 12.25, - "eval_accuracy": 0.9458344423742348, - "eval_f1": 0.9464105058302559, - "eval_loss": 0.3893645703792572, - "eval_runtime": 13.5441, - "eval_samples_per_second": 554.779, - "eval_steps_per_second": 8.712, - "step": 287000 - }, - { - "epoch": 12.27, - "learning_rate": 9.6684947292049e-06, - "loss": 0.0252, - "step": 287500 - }, - { - "epoch": 12.27, - "eval_accuracy": 0.9495608198030343, - "eval_f1": 0.9499226526270541, - "eval_loss": 0.35022905468940735, - "eval_runtime": 13.5389, - "eval_samples_per_second": 554.993, - "eval_steps_per_second": 8.716, - "step": 287500 - }, - { - "epoch": 12.29, - "learning_rate": 9.641820664931075e-06, - "loss": 0.0253, - "step": 288000 - }, - { - "epoch": 12.29, - "eval_accuracy": 0.9512909236092627, - "eval_f1": 0.9515790174427777, - "eval_loss": 0.3345930278301239, - "eval_runtime": 13.5468, - "eval_samples_per_second": 554.669, - "eval_steps_per_second": 8.711, - "step": 288000 - }, - { - "epoch": 12.31, - "learning_rate": 9.61514660065725e-06, - "loss": 0.0247, - "step": 288500 - }, - { - "epoch": 12.31, - "eval_accuracy": 0.9488953952621773, - "eval_f1": 0.9493082796480175, - "eval_loss": 0.3517289161682129, - "eval_runtime": 13.5314, - "eval_samples_per_second": 555.302, - "eval_steps_per_second": 8.72, - "step": 288500 - }, - { - "epoch": 12.33, - "learning_rate": 9.588525884511973e-06, - "loss": 0.0263, - "step": 289000 - }, - { - "epoch": 12.33, - "eval_accuracy": 0.9487623103540058, - "eval_f1": 0.9491705634218461, - "eval_loss": 0.34944280982017517, - "eval_runtime": 13.5432, - "eval_samples_per_second": 554.816, - "eval_steps_per_second": 8.713, - "step": 289000 - }, - { - "epoch": 12.36, - "learning_rate": 9.561851820238148e-06, - "loss": 0.0245, - "step": 289500 - }, - { - "epoch": 12.36, - "eval_accuracy": 0.9496939047112057, - "eval_f1": 0.9500608964308354, - "eval_loss": 0.3433271646499634, - "eval_runtime": 13.5444, - "eval_samples_per_second": 554.767, - "eval_steps_per_second": 8.712, - "step": 289500 - }, - { - "epoch": 12.38, - "learning_rate": 9.53517775596432e-06, - "loss": 0.0227, - "step": 290000 - }, - { - "epoch": 12.38, - "eval_accuracy": 0.9530210274154911, - "eval_f1": 0.9533301903565214, - "eval_loss": 0.31356149911880493, - "eval_runtime": 13.5415, - "eval_samples_per_second": 554.885, - "eval_steps_per_second": 8.714, - "step": 290000 - }, - { - "epoch": 12.4, - "learning_rate": 9.508503691690497e-06, - "loss": 0.0267, - "step": 290500 - }, - { - "epoch": 12.4, - "eval_accuracy": 0.9484961405376631, - "eval_f1": 0.9488992009331783, - "eval_loss": 0.34211859107017517, - "eval_runtime": 13.5416, - "eval_samples_per_second": 554.882, - "eval_steps_per_second": 8.714, - "step": 290500 - }, - { - "epoch": 12.42, - "learning_rate": 9.481829627416671e-06, - "loss": 0.0243, - "step": 291000 - }, - { - "epoch": 12.42, - "eval_accuracy": 0.9443705083843492, - "eval_f1": 0.9448289751919863, - "eval_loss": 0.35784754157066345, - "eval_runtime": 13.537, - "eval_samples_per_second": 555.07, - "eval_steps_per_second": 8.717, - "step": 291000 - }, - { - "epoch": 12.44, - "learning_rate": 9.455155563142846e-06, - "loss": 0.0269, - "step": 291500 - }, - { - "epoch": 12.44, - "eval_accuracy": 0.9479638009049773, - "eval_f1": 0.9482919091970918, - "eval_loss": 0.3485228717327118, - "eval_runtime": 13.5293, - "eval_samples_per_second": 555.389, - "eval_steps_per_second": 8.722, - "step": 291500 - }, - { - "epoch": 12.46, - "learning_rate": 9.428534846997568e-06, - "loss": 0.0245, - "step": 292000 - }, - { - "epoch": 12.46, - "eval_accuracy": 0.9417088102209209, - "eval_f1": 0.9424175859815097, - "eval_loss": 0.4244661331176758, - "eval_runtime": 13.5348, - "eval_samples_per_second": 555.162, - "eval_steps_per_second": 8.718, - "step": 292000 - }, - { - "epoch": 12.48, - "learning_rate": 9.401860782723742e-06, - "loss": 0.0242, - "step": 292500 - }, - { - "epoch": 12.48, - "eval_accuracy": 0.9464998669150918, - "eval_f1": 0.9469866857888503, - "eval_loss": 0.38829919695854187, - "eval_runtime": 13.5322, - "eval_samples_per_second": 555.27, - "eval_steps_per_second": 8.72, - "step": 292500 - }, - { - "epoch": 12.5, - "learning_rate": 9.375186718449918e-06, - "loss": 0.0255, - "step": 293000 - }, - { - "epoch": 12.5, - "eval_accuracy": 0.9391801969656641, - "eval_f1": 0.9399025777758279, - "eval_loss": 0.4465163052082062, - "eval_runtime": 13.5383, - "eval_samples_per_second": 555.02, - "eval_steps_per_second": 8.716, - "step": 293000 - }, - { - "epoch": 12.53, - "learning_rate": 9.348512654176091e-06, - "loss": 0.0276, - "step": 293500 - }, - { - "epoch": 12.53, - "eval_accuracy": 0.9423742347617781, - "eval_f1": 0.9429647320426405, - "eval_loss": 0.4152087867259979, - "eval_runtime": 13.5354, - "eval_samples_per_second": 555.137, - "eval_steps_per_second": 8.718, - "step": 293500 - }, - { - "epoch": 12.55, - "learning_rate": 9.321838589902267e-06, - "loss": 0.0246, - "step": 294000 - }, - { - "epoch": 12.55, - "eval_accuracy": 0.9397125365983497, - "eval_f1": 0.9404570177915673, - "eval_loss": 0.42639487981796265, - "eval_runtime": 13.5309, - "eval_samples_per_second": 555.321, - "eval_steps_per_second": 8.721, - "step": 294000 - }, - { - "epoch": 12.57, - "learning_rate": 9.295164525628442e-06, - "loss": 0.0259, - "step": 294500 - }, - { - "epoch": 12.57, - "eval_accuracy": 0.9496939047112057, - "eval_f1": 0.9500699644825673, - "eval_loss": 0.34086450934410095, - "eval_runtime": 13.5329, - "eval_samples_per_second": 555.238, - "eval_steps_per_second": 8.719, - "step": 294500 - }, - { - "epoch": 12.59, - "learning_rate": 9.268543809483164e-06, - "loss": 0.028, - "step": 295000 - }, - { - "epoch": 12.59, - "eval_accuracy": 0.9449028480170348, - "eval_f1": 0.9454380735919529, - "eval_loss": 0.35605818033218384, - "eval_runtime": 13.5307, - "eval_samples_per_second": 555.331, - "eval_steps_per_second": 8.721, - "step": 295000 - }, - { - "epoch": 12.61, - "learning_rate": 9.241869745209339e-06, - "loss": 0.0259, - "step": 295500 - }, - { - "epoch": 12.61, - "eval_accuracy": 0.9476976310886346, - "eval_f1": 0.9481629327735969, - "eval_loss": 0.3794984817504883, - "eval_runtime": 13.5317, - "eval_samples_per_second": 555.287, - "eval_steps_per_second": 8.72, - "step": 295500 - }, - { - "epoch": 12.63, - "learning_rate": 9.215195680935513e-06, - "loss": 0.0235, - "step": 296000 - }, - { - "epoch": 12.63, - "eval_accuracy": 0.9502262443438914, - "eval_f1": 0.95056367027671, - "eval_loss": 0.3418872356414795, - "eval_runtime": 13.5317, - "eval_samples_per_second": 555.289, - "eval_steps_per_second": 8.72, - "step": 296000 - }, - { - "epoch": 12.65, - "learning_rate": 9.18852161666169e-06, - "loss": 0.0239, - "step": 296500 - }, - { - "epoch": 12.65, - "eval_accuracy": 0.9504924141602342, - "eval_f1": 0.9509260011654419, - "eval_loss": 0.3470332622528076, - "eval_runtime": 13.5278, - "eval_samples_per_second": 555.448, - "eval_steps_per_second": 8.723, - "step": 296500 - }, - { - "epoch": 12.68, - "learning_rate": 9.161847552387862e-06, - "loss": 0.0237, - "step": 297000 - }, - { - "epoch": 12.68, - "eval_accuracy": 0.9470322065477775, - "eval_f1": 0.9476021217895277, - "eval_loss": 0.3777107298374176, - "eval_runtime": 13.5316, - "eval_samples_per_second": 555.292, - "eval_steps_per_second": 8.72, - "step": 297000 - }, - { - "epoch": 12.7, - "learning_rate": 9.135173488114037e-06, - "loss": 0.0283, - "step": 297500 - }, - { - "epoch": 12.7, - "eval_accuracy": 0.9470322065477775, - "eval_f1": 0.9475167386604156, - "eval_loss": 0.360249787569046, - "eval_runtime": 13.5329, - "eval_samples_per_second": 555.241, - "eval_steps_per_second": 8.72, - "step": 297500 - }, - { - "epoch": 12.72, - "learning_rate": 9.108499423840213e-06, - "loss": 0.0233, - "step": 298000 - }, - { - "epoch": 12.72, - "eval_accuracy": 0.9434389140271493, - "eval_f1": 0.9440649766238651, - "eval_loss": 0.3837593197822571, - "eval_runtime": 13.533, - "eval_samples_per_second": 555.235, - "eval_steps_per_second": 8.719, - "step": 298000 - }, - { - "epoch": 12.74, - "learning_rate": 9.081825359566387e-06, - "loss": 0.0261, - "step": 298500 - }, - { - "epoch": 12.74, - "eval_accuracy": 0.943971253659835, - "eval_f1": 0.9446036441422058, - "eval_loss": 0.37317943572998047, - "eval_runtime": 13.5218, - "eval_samples_per_second": 555.696, - "eval_steps_per_second": 8.727, - "step": 298500 - }, - { - "epoch": 12.76, - "learning_rate": 9.05520464342111e-06, - "loss": 0.0273, - "step": 299000 - }, - { - "epoch": 12.76, - "eval_accuracy": 0.9451690178333777, - "eval_f1": 0.9455156483734646, - "eval_loss": 0.36402907967567444, - "eval_runtime": 13.5359, - "eval_samples_per_second": 555.117, - "eval_steps_per_second": 8.718, - "step": 299000 - }, - { - "epoch": 12.78, - "learning_rate": 9.028530579147284e-06, - "loss": 0.0281, - "step": 299500 - }, - { - "epoch": 12.78, - "eval_accuracy": 0.9486292254458344, - "eval_f1": 0.9490071762088005, - "eval_loss": 0.3424794673919678, - "eval_runtime": 13.5348, - "eval_samples_per_second": 555.161, - "eval_steps_per_second": 8.718, - "step": 299500 - }, - { - "epoch": 12.8, - "learning_rate": 9.001909863002006e-06, - "loss": 0.0258, - "step": 300000 - }, - { - "epoch": 12.8, - "eval_accuracy": 0.9480968858131488, - "eval_f1": 0.948523862823844, - "eval_loss": 0.3134201765060425, - "eval_runtime": 13.533, - "eval_samples_per_second": 555.234, - "eval_steps_per_second": 8.719, - "step": 300000 - }, - { - "epoch": 12.82, - "learning_rate": 8.97528914685673e-06, - "loss": 0.0274, - "step": 300500 - }, - { - "epoch": 12.82, - "eval_accuracy": 0.9490284801703487, - "eval_f1": 0.949378066518175, - "eval_loss": 0.3174980878829956, - "eval_runtime": 13.5411, - "eval_samples_per_second": 554.902, - "eval_steps_per_second": 8.714, - "step": 300500 - }, - { - "epoch": 12.85, - "learning_rate": 8.948615082582904e-06, - "loss": 0.0287, - "step": 301000 - }, - { - "epoch": 12.85, - "eval_accuracy": 0.9492946499866916, - "eval_f1": 0.9496516160818783, - "eval_loss": 0.30968043208122253, - "eval_runtime": 13.543, - "eval_samples_per_second": 554.827, - "eval_steps_per_second": 8.713, - "step": 301000 - }, - { - "epoch": 12.87, - "learning_rate": 8.921941018309079e-06, - "loss": 0.0239, - "step": 301500 - }, - { - "epoch": 12.87, - "eval_accuracy": 0.9450359329252063, - "eval_f1": 0.9456431323564933, - "eval_loss": 0.3644786775112152, - "eval_runtime": 13.5436, - "eval_samples_per_second": 554.799, - "eval_steps_per_second": 8.713, - "step": 301500 - }, - { - "epoch": 12.89, - "learning_rate": 8.895266954035253e-06, - "loss": 0.0267, - "step": 302000 - }, - { - "epoch": 12.89, - "eval_accuracy": 0.9486292254458344, - "eval_f1": 0.9490490952355025, - "eval_loss": 0.3264901340007782, - "eval_runtime": 13.5378, - "eval_samples_per_second": 555.038, - "eval_steps_per_second": 8.716, - "step": 302000 - }, - { - "epoch": 12.91, - "learning_rate": 8.868592889761428e-06, - "loss": 0.0256, - "step": 302500 - }, - { - "epoch": 12.91, - "eval_accuracy": 0.9410433856800638, - "eval_f1": 0.9418673516898102, - "eval_loss": 0.39492905139923096, - "eval_runtime": 13.5485, - "eval_samples_per_second": 554.599, - "eval_steps_per_second": 8.709, - "step": 302500 - }, - { - "epoch": 12.93, - "learning_rate": 8.841918825487604e-06, - "loss": 0.0266, - "step": 303000 - }, - { - "epoch": 12.93, - "eval_accuracy": 0.9474314612722917, - "eval_f1": 0.9479752478937516, - "eval_loss": 0.33922043442726135, - "eval_runtime": 13.5432, - "eval_samples_per_second": 554.819, - "eval_steps_per_second": 8.713, - "step": 303000 - }, - { - "epoch": 12.95, - "learning_rate": 8.815244761213777e-06, - "loss": 0.0309, - "step": 303500 - }, - { - "epoch": 12.95, - "eval_accuracy": 0.9518232632419483, - "eval_f1": 0.9521082903193254, - "eval_loss": 0.2975204885005951, - "eval_runtime": 13.5374, - "eval_samples_per_second": 555.056, - "eval_steps_per_second": 8.717, - "step": 303500 - }, - { - "epoch": 12.97, - "learning_rate": 8.788570696939951e-06, - "loss": 0.0285, - "step": 304000 - }, - { - "epoch": 12.97, - "eval_accuracy": 0.9468991216396061, - "eval_f1": 0.9474010746701429, - "eval_loss": 0.3319370150566101, - "eval_runtime": 13.5414, - "eval_samples_per_second": 554.892, - "eval_steps_per_second": 8.714, - "step": 304000 - }, - { - "epoch": 13.0, - "learning_rate": 8.761896632666127e-06, - "loss": 0.0277, - "step": 304500 - }, - { - "epoch": 13.0, - "eval_accuracy": 0.9471652914559489, - "eval_f1": 0.9475862830905143, - "eval_loss": 0.3333088755607605, - "eval_runtime": 13.5417, - "eval_samples_per_second": 554.877, - "eval_steps_per_second": 8.714, - "step": 304500 - }, - { - "epoch": 13.02, - "learning_rate": 8.735222568392302e-06, - "loss": 0.0195, - "step": 305000 - }, - { - "epoch": 13.02, - "eval_accuracy": 0.9470322065477775, - "eval_f1": 0.9474741308095099, - "eval_loss": 0.3394637405872345, - "eval_runtime": 13.5268, - "eval_samples_per_second": 555.489, - "eval_steps_per_second": 8.723, - "step": 305000 - }, - { - "epoch": 13.04, - "learning_rate": 8.708601852247024e-06, - "loss": 0.0177, - "step": 305500 - }, - { - "epoch": 13.04, - "eval_accuracy": 0.9507585839765771, - "eval_f1": 0.9511306898962475, - "eval_loss": 0.33947256207466125, - "eval_runtime": 13.529, - "eval_samples_per_second": 555.399, - "eval_steps_per_second": 8.722, - "step": 305500 - }, - { - "epoch": 13.06, - "learning_rate": 8.681927787973198e-06, - "loss": 0.0203, - "step": 306000 - }, - { - "epoch": 13.06, - "eval_accuracy": 0.9466329518232632, - "eval_f1": 0.9470831681704304, - "eval_loss": 0.38961419463157654, - "eval_runtime": 13.5335, - "eval_samples_per_second": 555.216, - "eval_steps_per_second": 8.719, - "step": 306000 - }, - { - "epoch": 13.08, - "learning_rate": 8.655253723699375e-06, - "loss": 0.0187, - "step": 306500 - }, - { - "epoch": 13.08, - "eval_accuracy": 0.9498269896193772, - "eval_f1": 0.9500627333947544, - "eval_loss": 0.3579484224319458, - "eval_runtime": 13.5321, - "eval_samples_per_second": 555.273, - "eval_steps_per_second": 8.72, - "step": 306500 - }, - { - "epoch": 13.1, - "learning_rate": 8.628579659425548e-06, - "loss": 0.0217, - "step": 307000 - }, - { - "epoch": 13.1, - "eval_accuracy": 0.9524886877828054, - "eval_f1": 0.952847940349729, - "eval_loss": 0.3058973550796509, - "eval_runtime": 13.5306, - "eval_samples_per_second": 555.333, - "eval_steps_per_second": 8.721, - "step": 307000 - }, - { - "epoch": 13.12, - "learning_rate": 8.601905595151722e-06, - "loss": 0.0176, - "step": 307500 - }, - { - "epoch": 13.12, - "eval_accuracy": 0.9515570934256056, - "eval_f1": 0.9519249944919858, - "eval_loss": 0.34081974625587463, - "eval_runtime": 13.5326, - "eval_samples_per_second": 555.253, - "eval_steps_per_second": 8.72, - "step": 307500 - }, - { - "epoch": 13.14, - "learning_rate": 8.575231530877898e-06, - "loss": 0.0216, - "step": 308000 - }, - { - "epoch": 13.14, - "eval_accuracy": 0.9490284801703487, - "eval_f1": 0.9494784208638937, - "eval_loss": 0.37706053256988525, - "eval_runtime": 13.5281, - "eval_samples_per_second": 555.436, - "eval_steps_per_second": 8.723, - "step": 308000 - }, - { - "epoch": 13.17, - "learning_rate": 8.548557466604073e-06, - "loss": 0.0198, - "step": 308500 - }, - { - "epoch": 13.17, - "eval_accuracy": 0.9474314612722917, - "eval_f1": 0.9478336601536719, - "eval_loss": 0.35731518268585205, - "eval_runtime": 13.5317, - "eval_samples_per_second": 555.288, - "eval_steps_per_second": 8.72, - "step": 308500 - }, - { - "epoch": 13.19, - "learning_rate": 8.521883402330247e-06, - "loss": 0.0189, - "step": 309000 - }, - { - "epoch": 13.19, - "eval_accuracy": 0.9502262443438914, - "eval_f1": 0.9506267413803191, - "eval_loss": 0.3440324068069458, - "eval_runtime": 13.5286, - "eval_samples_per_second": 555.415, - "eval_steps_per_second": 8.722, - "step": 309000 - }, - { - "epoch": 13.21, - "learning_rate": 8.495316034313517e-06, - "loss": 0.0209, - "step": 309500 - }, - { - "epoch": 13.21, - "eval_accuracy": 0.95009315943572, - "eval_f1": 0.9505051712846733, - "eval_loss": 0.3339444398880005, - "eval_runtime": 13.5263, - "eval_samples_per_second": 555.511, - "eval_steps_per_second": 8.724, - "step": 309500 - }, - { - "epoch": 13.23, - "learning_rate": 8.468641970039691e-06, - "loss": 0.0192, - "step": 310000 - }, - { - "epoch": 13.23, - "eval_accuracy": 0.9459675272824062, - "eval_f1": 0.9464722133111272, - "eval_loss": 0.37150633335113525, - "eval_runtime": 13.5326, - "eval_samples_per_second": 555.253, - "eval_steps_per_second": 8.72, - "step": 310000 - }, - { - "epoch": 13.25, - "learning_rate": 8.441967905765866e-06, - "loss": 0.021, - "step": 310500 - }, - { - "epoch": 13.25, - "eval_accuracy": 0.9504924141602342, - "eval_f1": 0.950787518846855, - "eval_loss": 0.35833537578582764, - "eval_runtime": 13.5315, - "eval_samples_per_second": 555.299, - "eval_steps_per_second": 8.72, - "step": 310500 - }, - { - "epoch": 13.27, - "learning_rate": 8.415293841492042e-06, - "loss": 0.0211, - "step": 311000 - }, - { - "epoch": 13.27, - "eval_accuracy": 0.9499600745275486, - "eval_f1": 0.9503528961719908, - "eval_loss": 0.3444558084011078, - "eval_runtime": 13.5321, - "eval_samples_per_second": 555.273, - "eval_steps_per_second": 8.72, - "step": 311000 - }, - { - "epoch": 13.29, - "learning_rate": 8.388619777218215e-06, - "loss": 0.0174, - "step": 311500 - }, - { - "epoch": 13.29, - "eval_accuracy": 0.9483630556294916, - "eval_f1": 0.948639503786924, - "eval_loss": 0.3671543002128601, - "eval_runtime": 13.5177, - "eval_samples_per_second": 555.864, - "eval_steps_per_second": 8.729, - "step": 311500 - }, - { - "epoch": 13.32, - "learning_rate": 8.361945712944391e-06, - "loss": 0.0198, - "step": 312000 - }, - { - "epoch": 13.32, - "eval_accuracy": 0.9506254990684057, - "eval_f1": 0.9509650396188268, - "eval_loss": 0.35667410492897034, - "eval_runtime": 13.5286, - "eval_samples_per_second": 555.416, - "eval_steps_per_second": 8.722, - "step": 312000 - }, - { - "epoch": 13.34, - "learning_rate": 8.335324996799113e-06, - "loss": 0.024, - "step": 312500 - }, - { - "epoch": 13.34, - "eval_accuracy": 0.9512909236092627, - "eval_f1": 0.9516829387166317, - "eval_loss": 0.342978298664093, - "eval_runtime": 13.5287, - "eval_samples_per_second": 555.411, - "eval_steps_per_second": 8.722, - "step": 312500 - }, - { - "epoch": 13.36, - "learning_rate": 8.308650932525288e-06, - "loss": 0.0218, - "step": 313000 - }, - { - "epoch": 13.36, - "eval_accuracy": 0.9488953952621773, - "eval_f1": 0.9491380374650025, - "eval_loss": 0.3575313985347748, - "eval_runtime": 13.53, - "eval_samples_per_second": 555.36, - "eval_steps_per_second": 8.721, - "step": 313000 - }, - { - "epoch": 13.38, - "learning_rate": 8.281976868251462e-06, - "loss": 0.0216, - "step": 313500 - }, - { - "epoch": 13.38, - "eval_accuracy": 0.9515570934256056, - "eval_f1": 0.9518313582997843, - "eval_loss": 0.3285492956638336, - "eval_runtime": 13.5303, - "eval_samples_per_second": 555.346, - "eval_steps_per_second": 8.721, - "step": 313500 - }, - { - "epoch": 13.4, - "learning_rate": 8.255302803977637e-06, - "loss": 0.0219, - "step": 314000 - }, - { - "epoch": 13.4, - "eval_accuracy": 0.9520894330582912, - "eval_f1": 0.9523844337000912, - "eval_loss": 0.33300209045410156, - "eval_runtime": 13.549, - "eval_samples_per_second": 554.581, - "eval_steps_per_second": 8.709, - "step": 314000 - }, - { - "epoch": 13.42, - "learning_rate": 8.228628739703813e-06, - "loss": 0.0222, - "step": 314500 - }, - { - "epoch": 13.42, - "eval_accuracy": 0.9422411498536066, - "eval_f1": 0.9429110456077876, - "eval_loss": 0.4017827808856964, - "eval_runtime": 13.5434, - "eval_samples_per_second": 554.808, - "eval_steps_per_second": 8.713, - "step": 314500 - }, - { - "epoch": 13.44, - "learning_rate": 8.202008023558535e-06, - "loss": 0.0223, - "step": 315000 - }, - { - "epoch": 13.44, - "eval_accuracy": 0.9542187915890338, - "eval_f1": 0.954503749292435, - "eval_loss": 0.3192913234233856, - "eval_runtime": 13.5411, - "eval_samples_per_second": 554.904, - "eval_steps_per_second": 8.714, - "step": 315000 - }, - { - "epoch": 13.47, - "learning_rate": 8.17533395928471e-06, - "loss": 0.0208, - "step": 315500 - }, - { - "epoch": 13.47, - "eval_accuracy": 0.9491615650785201, - "eval_f1": 0.9495546249079766, - "eval_loss": 0.34587451815605164, - "eval_runtime": 13.5465, - "eval_samples_per_second": 554.682, - "eval_steps_per_second": 8.711, - "step": 315500 - }, - { - "epoch": 13.49, - "learning_rate": 8.148659895010884e-06, - "loss": 0.021, - "step": 316000 - }, - { - "epoch": 13.49, - "eval_accuracy": 0.9480968858131488, - "eval_f1": 0.9485904316567751, - "eval_loss": 0.36984121799468994, - "eval_runtime": 13.5392, - "eval_samples_per_second": 554.979, - "eval_steps_per_second": 8.715, - "step": 316000 - }, - { - "epoch": 13.51, - "learning_rate": 8.121985830737058e-06, - "loss": 0.0229, - "step": 316500 - }, - { - "epoch": 13.51, - "eval_accuracy": 0.9503593292520628, - "eval_f1": 0.9507095888927306, - "eval_loss": 0.36136379837989807, - "eval_runtime": 13.542, - "eval_samples_per_second": 554.865, - "eval_steps_per_second": 8.714, - "step": 316500 - }, - { - "epoch": 13.53, - "learning_rate": 8.095311766463233e-06, - "loss": 0.0216, - "step": 317000 - }, - { - "epoch": 13.53, - "eval_accuracy": 0.9498269896193772, - "eval_f1": 0.9502031930904051, - "eval_loss": 0.3747590482234955, - "eval_runtime": 13.5418, - "eval_samples_per_second": 554.873, - "eval_steps_per_second": 8.714, - "step": 317000 - }, - { - "epoch": 13.55, - "learning_rate": 8.068637702189407e-06, - "loss": 0.0215, - "step": 317500 - }, - { - "epoch": 13.55, - "eval_accuracy": 0.9487623103540058, - "eval_f1": 0.9490990236660635, - "eval_loss": 0.3659830689430237, - "eval_runtime": 13.5408, - "eval_samples_per_second": 554.916, - "eval_steps_per_second": 8.714, - "step": 317500 - }, - { - "epoch": 13.57, - "learning_rate": 8.041963637915584e-06, - "loss": 0.0212, - "step": 318000 - }, - { - "epoch": 13.57, - "eval_accuracy": 0.9546180463135481, - "eval_f1": 0.9548286526427764, - "eval_loss": 0.32127419114112854, - "eval_runtime": 13.5416, - "eval_samples_per_second": 554.881, - "eval_steps_per_second": 8.714, - "step": 318000 - }, - { - "epoch": 13.59, - "learning_rate": 8.015342921770306e-06, - "loss": 0.0221, - "step": 318500 - }, - { - "epoch": 13.59, - "eval_accuracy": 0.9520894330582912, - "eval_f1": 0.9524872864210052, - "eval_loss": 0.3612636923789978, - "eval_runtime": 13.543, - "eval_samples_per_second": 554.827, - "eval_steps_per_second": 8.713, - "step": 318500 - }, - { - "epoch": 13.61, - "learning_rate": 7.98866885749648e-06, - "loss": 0.0224, - "step": 319000 - }, - { - "epoch": 13.61, - "eval_accuracy": 0.9468991216396061, - "eval_f1": 0.9474092608026106, - "eval_loss": 0.3816893398761749, - "eval_runtime": 13.5345, - "eval_samples_per_second": 555.175, - "eval_steps_per_second": 8.718, - "step": 319000 - }, - { - "epoch": 13.64, - "learning_rate": 7.961994793222655e-06, - "loss": 0.021, - "step": 319500 - }, - { - "epoch": 13.64, - "eval_accuracy": 0.9449028480170348, - "eval_f1": 0.9454074908591944, - "eval_loss": 0.41708582639694214, - "eval_runtime": 13.5337, - "eval_samples_per_second": 555.205, - "eval_steps_per_second": 8.719, - "step": 319500 - }, - { - "epoch": 13.66, - "learning_rate": 7.93532072894883e-06, - "loss": 0.0212, - "step": 320000 - }, - { - "epoch": 13.66, - "eval_accuracy": 0.9466329518232632, - "eval_f1": 0.9471857115940103, - "eval_loss": 0.41624367237091064, - "eval_runtime": 13.5161, - "eval_samples_per_second": 555.931, - "eval_steps_per_second": 8.73, - "step": 320000 - }, - { - "epoch": 13.68, - "learning_rate": 7.908646664675004e-06, - "loss": 0.025, - "step": 320500 - }, - { - "epoch": 13.68, - "eval_accuracy": 0.9524886877828054, - "eval_f1": 0.9527800688729483, - "eval_loss": 0.3502090275287628, - "eval_runtime": 13.515, - "eval_samples_per_second": 555.974, - "eval_steps_per_second": 8.731, - "step": 320500 - }, - { - "epoch": 13.7, - "learning_rate": 7.881972600401178e-06, - "loss": 0.0206, - "step": 321000 - }, - { - "epoch": 13.7, - "eval_accuracy": 0.9482299707213202, - "eval_f1": 0.9486548518369584, - "eval_loss": 0.3662354648113251, - "eval_runtime": 13.5152, - "eval_samples_per_second": 555.965, - "eval_steps_per_second": 8.731, - "step": 321000 - }, - { - "epoch": 13.72, - "learning_rate": 7.8553518842559e-06, - "loss": 0.0238, - "step": 321500 - }, - { - "epoch": 13.72, - "eval_accuracy": 0.9502262443438914, - "eval_f1": 0.9505464950704269, - "eval_loss": 0.341840535402298, - "eval_runtime": 13.52, - "eval_samples_per_second": 555.769, - "eval_steps_per_second": 8.728, - "step": 321500 - }, - { - "epoch": 13.74, - "learning_rate": 7.828677819982076e-06, - "loss": 0.021, - "step": 322000 - }, - { - "epoch": 13.74, - "eval_accuracy": 0.9507585839765771, - "eval_f1": 0.9510538753270081, - "eval_loss": 0.3421614170074463, - "eval_runtime": 13.5138, - "eval_samples_per_second": 556.023, - "eval_steps_per_second": 8.732, - "step": 322000 - }, - { - "epoch": 13.76, - "learning_rate": 7.802003755708251e-06, - "loss": 0.02, - "step": 322500 - }, - { - "epoch": 13.76, - "eval_accuracy": 0.9522225179664626, - "eval_f1": 0.9525058871622871, - "eval_loss": 0.33706653118133545, - "eval_runtime": 13.514, - "eval_samples_per_second": 556.017, - "eval_steps_per_second": 8.732, - "step": 322500 - }, - { - "epoch": 13.79, - "learning_rate": 7.775329691434426e-06, - "loss": 0.0207, - "step": 323000 - }, - { - "epoch": 13.79, - "eval_accuracy": 0.9472983763641203, - "eval_f1": 0.9477291310319618, - "eval_loss": 0.3597688376903534, - "eval_runtime": 13.5206, - "eval_samples_per_second": 555.743, - "eval_steps_per_second": 8.727, - "step": 323000 - }, - { - "epoch": 13.81, - "learning_rate": 7.7486556271606e-06, - "loss": 0.0203, - "step": 323500 - }, - { - "epoch": 13.81, - "eval_accuracy": 0.9495608198030343, - "eval_f1": 0.9498900072393941, - "eval_loss": 0.3365311920642853, - "eval_runtime": 13.5216, - "eval_samples_per_second": 555.703, - "eval_steps_per_second": 8.727, - "step": 323500 - }, - { - "epoch": 13.83, - "learning_rate": 7.722034911015322e-06, - "loss": 0.0235, - "step": 324000 - }, - { - "epoch": 13.83, - "eval_accuracy": 0.9499600745275486, - "eval_f1": 0.9503223785334844, - "eval_loss": 0.32426974177360535, - "eval_runtime": 13.5237, - "eval_samples_per_second": 555.619, - "eval_steps_per_second": 8.725, - "step": 324000 - }, - { - "epoch": 13.85, - "learning_rate": 7.695360846741497e-06, - "loss": 0.0211, - "step": 324500 - }, - { - "epoch": 13.85, - "eval_accuracy": 0.9479638009049773, - "eval_f1": 0.9484043560686116, - "eval_loss": 0.35503652691841125, - "eval_runtime": 13.5258, - "eval_samples_per_second": 555.532, - "eval_steps_per_second": 8.724, - "step": 324500 - }, - { - "epoch": 13.87, - "learning_rate": 7.668686782467671e-06, - "loss": 0.0206, - "step": 325000 - }, - { - "epoch": 13.87, - "eval_accuracy": 0.9519563481501198, - "eval_f1": 0.952238975411297, - "eval_loss": 0.31724080443382263, - "eval_runtime": 13.5262, - "eval_samples_per_second": 555.515, - "eval_steps_per_second": 8.724, - "step": 325000 - }, - { - "epoch": 13.89, - "learning_rate": 7.642012718193846e-06, - "loss": 0.0244, - "step": 325500 - }, - { - "epoch": 13.89, - "eval_accuracy": 0.95009315943572, - "eval_f1": 0.9504515199397509, - "eval_loss": 0.3432355225086212, - "eval_runtime": 13.5167, - "eval_samples_per_second": 555.907, - "eval_steps_per_second": 8.73, - "step": 325500 - }, - { - "epoch": 13.91, - "learning_rate": 7.615338653920021e-06, - "loss": 0.0205, - "step": 326000 - }, - { - "epoch": 13.91, - "eval_accuracy": 0.9530210274154911, - "eval_f1": 0.9533589200873134, - "eval_loss": 0.3301331698894501, - "eval_runtime": 13.529, - "eval_samples_per_second": 555.398, - "eval_steps_per_second": 8.722, - "step": 326000 - }, - { - "epoch": 13.93, - "learning_rate": 7.588664589646196e-06, - "loss": 0.0205, - "step": 326500 - }, - { - "epoch": 13.93, - "eval_accuracy": 0.9502262443438914, - "eval_f1": 0.9504567720611751, - "eval_loss": 0.3518039882183075, - "eval_runtime": 13.5331, - "eval_samples_per_second": 555.229, - "eval_steps_per_second": 8.719, - "step": 326500 - }, - { - "epoch": 13.96, - "learning_rate": 7.56199052537237e-06, - "loss": 0.0235, - "step": 327000 - }, - { - "epoch": 13.96, - "eval_accuracy": 0.9532871972318339, - "eval_f1": 0.9535624980400332, - "eval_loss": 0.3395467698574066, - "eval_runtime": 13.5318, - "eval_samples_per_second": 555.284, - "eval_steps_per_second": 8.72, - "step": 327000 - }, - { - "epoch": 13.98, - "learning_rate": 7.535369809227093e-06, - "loss": 0.0205, - "step": 327500 - }, - { - "epoch": 13.98, - "eval_accuracy": 0.9511578387010913, - "eval_f1": 0.9514405872868344, - "eval_loss": 0.3324923813343048, - "eval_runtime": 13.5349, - "eval_samples_per_second": 555.157, - "eval_steps_per_second": 8.718, - "step": 327500 - }, - { - "epoch": 14.0, - "learning_rate": 7.508695744953267e-06, - "loss": 0.0213, - "step": 328000 - }, - { - "epoch": 14.0, - "eval_accuracy": 0.9468991216396061, - "eval_f1": 0.9473362888156124, - "eval_loss": 0.3898778557777405, - "eval_runtime": 13.541, - "eval_samples_per_second": 554.909, - "eval_steps_per_second": 8.714, - "step": 328000 - }, - { - "epoch": 14.02, - "learning_rate": 7.48207502880799e-06, - "loss": 0.0179, - "step": 328500 - }, - { - "epoch": 14.02, - "eval_accuracy": 0.9498269896193772, - "eval_f1": 0.950198372051693, - "eval_loss": 0.35235846042633057, - "eval_runtime": 13.5403, - "eval_samples_per_second": 554.937, - "eval_steps_per_second": 8.715, - "step": 328500 - }, - { - "epoch": 14.04, - "learning_rate": 7.4554009645341656e-06, - "loss": 0.0163, - "step": 329000 - }, - { - "epoch": 14.04, - "eval_accuracy": 0.9518232632419483, - "eval_f1": 0.9521459996214305, - "eval_loss": 0.3546800911426544, - "eval_runtime": 13.5254, - "eval_samples_per_second": 555.546, - "eval_steps_per_second": 8.724, - "step": 329000 - }, - { - "epoch": 14.06, - "learning_rate": 7.428726900260339e-06, - "loss": 0.0141, - "step": 329500 - }, - { - "epoch": 14.06, - "eval_accuracy": 0.9558158104870907, - "eval_f1": 0.9559601479074069, - "eval_loss": 0.3263898193836212, - "eval_runtime": 13.5426, - "eval_samples_per_second": 554.842, - "eval_steps_per_second": 8.713, - "step": 329500 - }, - { - "epoch": 14.08, - "learning_rate": 7.402052835986514e-06, - "loss": 0.0172, - "step": 330000 - }, - { - "epoch": 14.08, - "eval_accuracy": 0.9550173010380623, - "eval_f1": 0.9552692103290779, - "eval_loss": 0.3311573565006256, - "eval_runtime": 13.5321, - "eval_samples_per_second": 555.273, - "eval_steps_per_second": 8.72, - "step": 330000 - }, - { - "epoch": 14.11, - "learning_rate": 7.375378771712689e-06, - "loss": 0.0167, - "step": 330500 - }, - { - "epoch": 14.11, - "eval_accuracy": 0.9538195368645196, - "eval_f1": 0.9540514746090495, - "eval_loss": 0.34398627281188965, - "eval_runtime": 13.5429, - "eval_samples_per_second": 554.831, - "eval_steps_per_second": 8.713, - "step": 330500 - }, - { - "epoch": 14.13, - "learning_rate": 7.348704707438864e-06, - "loss": 0.0171, - "step": 331000 - }, - { - "epoch": 14.13, - "eval_accuracy": 0.9576789992014906, - "eval_f1": 0.9578177765352257, - "eval_loss": 0.3113496005535126, - "eval_runtime": 13.5401, - "eval_samples_per_second": 554.945, - "eval_steps_per_second": 8.715, - "step": 331000 - } - ], - "logging_steps": 500, - "max_steps": 468620, - "num_train_epochs": 20, - "save_steps": 500, - "total_flos": 5.573569942291212e+18, - "trial_name": null, - "trial_params": null -}