{ "best_metric": 0.44850993156433105, "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-77__gpt2/checkpoint-1650", "epoch": 2.830188679245283, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.9783362218370885e-05, "loss": 9.5626, "step": 20 }, { "epoch": 0.07, "learning_rate": 4.956672443674177e-05, "loss": 8.3337, "step": 40 }, { "epoch": 0.09, "eval_distillation_accuracy_counterfactual": 0.4032837352488456, "eval_distillation_accuracy_factual": 0.9027706516162134, "eval_distillation_f1_counterfactual": 0.25332361626563743, "eval_distillation_f1_factual": 0.8923986152500426, "eval_groundtruth_accuracy_counterfactual": 0.3948178553104156, "eval_groundtruth_f1_counterfactual": 0.2496202659272842, "eval_groundtruth_f1_factual": 0.6769021047158763, "eval_icace_cosine": 0.5089748501777649, "eval_icace_l2": 0.6751341819763184, "eval_icace_normdiff": 0.4728732407093048, "eval_loss": 6.914986610412598, "eval_runtime": 8.5076, "eval_samples_per_second": 458.181, "eval_steps_per_second": 7.17, "step": 50 }, { "epoch": 0.1, "learning_rate": 4.935008665511265e-05, "loss": 8.0708, "step": 60 }, { "epoch": 0.14, "learning_rate": 4.913344887348354e-05, "loss": 8.0155, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.891681109185442e-05, "loss": 7.8721, "step": 100 }, { "epoch": 0.17, "eval_distillation_accuracy_counterfactual": 0.4112365315546434, "eval_distillation_accuracy_factual": 0.8499230374551052, "eval_distillation_f1_counterfactual": 0.26628916171325984, "eval_distillation_f1_factual": 0.8285211369866856, "eval_groundtruth_accuracy_counterfactual": 0.38506926629040533, "eval_groundtruth_f1_counterfactual": 0.25271938974464514, "eval_groundtruth_f1_factual": 0.6489095740193955, "eval_icace_cosine": 0.5038185119628906, "eval_icace_l2": 0.6619729399681091, "eval_icace_normdiff": 0.46057918667793274, "eval_loss": 6.711576461791992, "eval_runtime": 8.463, "eval_samples_per_second": 460.596, "eval_steps_per_second": 7.208, "step": 100 }, { "epoch": 0.21, "learning_rate": 4.8700173310225307e-05, "loss": 7.7697, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.848353552859619e-05, "loss": 7.4746, "step": 140 }, { "epoch": 0.26, "eval_distillation_accuracy_counterfactual": 0.4345818368394048, "eval_distillation_accuracy_factual": 0.8943047716777834, "eval_distillation_f1_counterfactual": 0.36539387414949204, "eval_distillation_f1_factual": 0.8790390714655469, "eval_groundtruth_accuracy_counterfactual": 0.4379168804515136, "eval_groundtruth_f1_counterfactual": 0.37232051642074804, "eval_groundtruth_f1_factual": 0.6805448228169029, "eval_icace_cosine": 0.49212712049484253, "eval_icace_l2": 0.6124334931373596, "eval_icace_normdiff": 0.3626319468021393, "eval_loss": 6.113770484924316, "eval_runtime": 11.9098, "eval_samples_per_second": 327.294, "eval_steps_per_second": 5.122, "step": 150 }, { "epoch": 0.27, "learning_rate": 4.826689774696707e-05, "loss": 6.935, "step": 160 }, { "epoch": 0.31, "learning_rate": 4.8050259965337955e-05, "loss": 6.4993, "step": 180 }, { "epoch": 0.34, "learning_rate": 4.7833622183708845e-05, "loss": 6.3969, "step": 200 }, { "epoch": 0.34, "eval_distillation_accuracy_counterfactual": 0.4517701385325808, "eval_distillation_accuracy_factual": 0.8819907644946127, "eval_distillation_f1_counterfactual": 0.3672780672425353, "eval_distillation_f1_factual": 0.8740916315844087, "eval_groundtruth_accuracy_counterfactual": 0.4479220112878399, "eval_groundtruth_f1_counterfactual": 0.3685202541209631, "eval_groundtruth_f1_factual": 0.6689431963175946, "eval_icace_cosine": 0.47133857011795044, "eval_icace_l2": 0.5820692777633667, "eval_icace_normdiff": 0.3234429359436035, "eval_loss": 5.622773170471191, "eval_runtime": 8.9261, "eval_samples_per_second": 436.697, "eval_steps_per_second": 6.834, "step": 200 }, { "epoch": 0.38, "learning_rate": 4.761698440207972e-05, "loss": 6.3015, "step": 220 }, { "epoch": 0.41, "learning_rate": 4.740034662045061e-05, "loss": 6.4147, "step": 240 }, { "epoch": 0.43, "eval_distillation_accuracy_counterfactual": 0.4943560800410467, "eval_distillation_accuracy_factual": 0.8771164699846075, "eval_distillation_f1_counterfactual": 0.40141613862482084, "eval_distillation_f1_factual": 0.8651475943969047, "eval_groundtruth_accuracy_counterfactual": 0.4933299127757825, "eval_groundtruth_f1_counterfactual": 0.4046242383424789, "eval_groundtruth_f1_factual": 0.6782475825605336, "eval_icace_cosine": 0.4729008674621582, "eval_icace_l2": 0.5720574855804443, "eval_icace_normdiff": 0.32980820536613464, "eval_loss": 5.256165027618408, "eval_runtime": 8.5352, "eval_samples_per_second": 456.695, "eval_steps_per_second": 7.147, "step": 250 }, { "epoch": 0.45, "learning_rate": 4.7183708838821494e-05, "loss": 6.028, "step": 260 }, { "epoch": 0.48, "learning_rate": 4.6967071057192376e-05, "loss": 6.2127, "step": 280 }, { "epoch": 0.51, "learning_rate": 4.675043327556326e-05, "loss": 6.0754, "step": 300 }, { "epoch": 0.51, "eval_distillation_accuracy_counterfactual": 0.5148794253463315, "eval_distillation_accuracy_factual": 0.8891739353514623, "eval_distillation_f1_counterfactual": 0.46975585794913294, "eval_distillation_f1_factual": 0.8782460233330489, "eval_groundtruth_accuracy_counterfactual": 0.4979476654694715, "eval_groundtruth_f1_counterfactual": 0.4602149484897823, "eval_groundtruth_f1_factual": 0.6837890933812207, "eval_icace_cosine": 0.4736707806587219, "eval_icace_l2": 0.5735559463500977, "eval_icace_normdiff": 0.3409141004085541, "eval_loss": 5.250072479248047, "eval_runtime": 11.0546, "eval_samples_per_second": 352.614, "eval_steps_per_second": 5.518, "step": 300 }, { "epoch": 0.55, "learning_rate": 4.653379549393415e-05, "loss": 5.9987, "step": 320 }, { "epoch": 0.58, "learning_rate": 4.6317157712305025e-05, "loss": 5.9832, "step": 340 }, { "epoch": 0.6, "eval_distillation_accuracy_counterfactual": 0.517444843509492, "eval_distillation_accuracy_factual": 0.857619291944587, "eval_distillation_f1_counterfactual": 0.44135045481715707, "eval_distillation_f1_factual": 0.8472747551327465, "eval_groundtruth_accuracy_counterfactual": 0.49974345818368393, "eval_groundtruth_f1_counterfactual": 0.42269358576354665, "eval_groundtruth_f1_factual": 0.679666572315821, "eval_icace_cosine": 0.45823103189468384, "eval_icace_l2": 0.543055534362793, "eval_icace_normdiff": 0.31377869844436646, "eval_loss": 5.03781795501709, "eval_runtime": 8.5125, "eval_samples_per_second": 457.913, "eval_steps_per_second": 7.166, "step": 350 }, { "epoch": 0.62, "learning_rate": 4.6100519930675915e-05, "loss": 5.8506, "step": 360 }, { "epoch": 0.65, "learning_rate": 4.58838821490468e-05, "loss": 5.9995, "step": 380 }, { "epoch": 0.69, "learning_rate": 4.566724436741768e-05, "loss": 5.7842, "step": 400 }, { "epoch": 0.69, "eval_distillation_accuracy_counterfactual": 0.517188301693176, "eval_distillation_accuracy_factual": 0.8840430990251411, "eval_distillation_f1_counterfactual": 0.4448974695548891, "eval_distillation_f1_factual": 0.8713802743397793, "eval_groundtruth_accuracy_counterfactual": 0.4917906618778861, "eval_groundtruth_f1_counterfactual": 0.4248414092801352, "eval_groundtruth_f1_factual": 0.6686668393362314, "eval_icace_cosine": 0.4501557946205139, "eval_icace_l2": 0.5426948070526123, "eval_icace_normdiff": 0.3083663284778595, "eval_loss": 5.02903938293457, "eval_runtime": 8.5015, "eval_samples_per_second": 458.508, "eval_steps_per_second": 7.175, "step": 400 }, { "epoch": 0.72, "learning_rate": 4.5450606585788563e-05, "loss": 5.6897, "step": 420 }, { "epoch": 0.75, "learning_rate": 4.5233968804159446e-05, "loss": 5.8004, "step": 440 }, { "epoch": 0.77, "eval_distillation_accuracy_counterfactual": 0.5094920472036942, "eval_distillation_accuracy_factual": 0.8442791174961519, "eval_distillation_f1_counterfactual": 0.44401367157402866, "eval_distillation_f1_factual": 0.8355913313428855, "eval_groundtruth_accuracy_counterfactual": 0.49846074910210364, "eval_groundtruth_f1_counterfactual": 0.4334166563409603, "eval_groundtruth_f1_factual": 0.6784960242261955, "eval_icace_cosine": 0.46040675044059753, "eval_icace_l2": 0.5599527359008789, "eval_icace_normdiff": 0.32556313276290894, "eval_loss": 4.990879535675049, "eval_runtime": 10.9171, "eval_samples_per_second": 357.055, "eval_steps_per_second": 5.588, "step": 450 }, { "epoch": 0.79, "learning_rate": 4.501733102253033e-05, "loss": 5.5857, "step": 460 }, { "epoch": 0.82, "learning_rate": 4.480069324090121e-05, "loss": 5.6857, "step": 480 }, { "epoch": 0.86, "learning_rate": 4.45840554592721e-05, "loss": 5.5945, "step": 500 }, { "epoch": 0.86, "eval_distillation_accuracy_counterfactual": 0.5318111852231914, "eval_distillation_accuracy_factual": 0.8478707029245767, "eval_distillation_f1_counterfactual": 0.45551454374802824, "eval_distillation_f1_factual": 0.8370787172997429, "eval_groundtruth_accuracy_counterfactual": 0.521549512570549, "eval_groundtruth_f1_counterfactual": 0.4477727205067822, "eval_groundtruth_f1_factual": 0.668142089075085, "eval_icace_cosine": 0.45585161447525024, "eval_icace_l2": 0.5328731536865234, "eval_icace_normdiff": 0.3072195053100586, "eval_loss": 4.901017665863037, "eval_runtime": 9.9099, "eval_samples_per_second": 393.345, "eval_steps_per_second": 6.155, "step": 500 }, { "epoch": 0.89, "learning_rate": 4.436741767764298e-05, "loss": 5.5083, "step": 520 }, { "epoch": 0.93, "learning_rate": 4.415077989601387e-05, "loss": 5.601, "step": 540 }, { "epoch": 0.94, "eval_distillation_accuracy_counterfactual": 0.5310415597742432, "eval_distillation_accuracy_factual": 0.8553104155977425, "eval_distillation_f1_counterfactual": 0.44816624998010435, "eval_distillation_f1_factual": 0.8381029427228823, "eval_groundtruth_accuracy_counterfactual": 0.5292457670600308, "eval_groundtruth_f1_counterfactual": 0.45516123514488516, "eval_groundtruth_f1_factual": 0.6732840864319071, "eval_icace_cosine": 0.4528113603591919, "eval_icace_l2": 0.5345481634140015, "eval_icace_normdiff": 0.2938852608203888, "eval_loss": 4.935153961181641, "eval_runtime": 8.5303, "eval_samples_per_second": 456.96, "eval_steps_per_second": 7.151, "step": 550 }, { "epoch": 0.96, "learning_rate": 4.393414211438475e-05, "loss": 5.4693, "step": 560 }, { "epoch": 0.99, "learning_rate": 4.371750433275563e-05, "loss": 5.3859, "step": 580 }, { "epoch": 1.03, "learning_rate": 4.3500866551126516e-05, "loss": 5.4444, "step": 600 }, { "epoch": 1.03, "eval_distillation_accuracy_counterfactual": 0.5351462288353002, "eval_distillation_accuracy_factual": 0.8376090302719343, "eval_distillation_f1_counterfactual": 0.4646928383032319, "eval_distillation_f1_factual": 0.8136129774481347, "eval_groundtruth_accuracy_counterfactual": 0.5220625962031811, "eval_groundtruth_f1_counterfactual": 0.45940569881831583, "eval_groundtruth_f1_factual": 0.6464129478426055, "eval_icace_cosine": 0.4499565064907074, "eval_icace_l2": 0.53062504529953, "eval_icace_normdiff": 0.2946617007255554, "eval_loss": 4.922811508178711, "eval_runtime": 8.5635, "eval_samples_per_second": 455.185, "eval_steps_per_second": 7.123, "step": 600 }, { "epoch": 1.06, "learning_rate": 4.3284228769497406e-05, "loss": 5.3757, "step": 620 }, { "epoch": 1.1, "learning_rate": 4.306759098786828e-05, "loss": 5.1859, "step": 640 }, { "epoch": 1.11, "eval_distillation_accuracy_counterfactual": 0.517188301693176, "eval_distillation_accuracy_factual": 0.8509492047203694, "eval_distillation_f1_counterfactual": 0.4290727658266418, "eval_distillation_f1_factual": 0.8384558217767074, "eval_groundtruth_accuracy_counterfactual": 0.5082093381221139, "eval_groundtruth_f1_counterfactual": 0.42269647972268587, "eval_groundtruth_f1_factual": 0.6755980806829072, "eval_icace_cosine": 0.4485946595668793, "eval_icace_l2": 0.5397273898124695, "eval_icace_normdiff": 0.3031124770641327, "eval_loss": 4.948563575744629, "eval_runtime": 13.0275, "eval_samples_per_second": 299.213, "eval_steps_per_second": 4.682, "step": 650 }, { "epoch": 1.13, "learning_rate": 4.285095320623917e-05, "loss": 5.3242, "step": 660 }, { "epoch": 1.17, "learning_rate": 4.2634315424610055e-05, "loss": 5.2305, "step": 680 }, { "epoch": 1.2, "learning_rate": 4.241767764298094e-05, "loss": 5.2249, "step": 700 }, { "epoch": 1.2, "eval_distillation_accuracy_counterfactual": 0.5302719343252951, "eval_distillation_accuracy_factual": 0.8591585428424833, "eval_distillation_f1_counterfactual": 0.46664853039612986, "eval_distillation_f1_factual": 0.8435082433820902, "eval_groundtruth_accuracy_counterfactual": 0.5192406362237044, "eval_groundtruth_f1_counterfactual": 0.4639940942910896, "eval_groundtruth_f1_factual": 0.6622920619946735, "eval_icace_cosine": 0.4588872194290161, "eval_icace_l2": 0.5473797917366028, "eval_icace_normdiff": 0.3158855736255646, "eval_loss": 4.849107265472412, "eval_runtime": 8.5061, "eval_samples_per_second": 458.261, "eval_steps_per_second": 7.171, "step": 700 }, { "epoch": 1.23, "learning_rate": 4.220103986135182e-05, "loss": 5.1991, "step": 720 }, { "epoch": 1.27, "learning_rate": 4.198440207972271e-05, "loss": 5.1411, "step": 740 }, { "epoch": 1.29, "eval_distillation_accuracy_counterfactual": 0.5159055926115957, "eval_distillation_accuracy_factual": 0.8353001539250898, "eval_distillation_f1_counterfactual": 0.4418890285977243, "eval_distillation_f1_factual": 0.8264105873351507, "eval_groundtruth_accuracy_counterfactual": 0.5069266290405336, "eval_groundtruth_f1_counterfactual": 0.4368112939623562, "eval_groundtruth_f1_factual": 0.6655303387754998, "eval_icace_cosine": 0.4616710841655731, "eval_icace_l2": 0.5492509007453918, "eval_icace_normdiff": 0.31496721506118774, "eval_loss": 5.071572303771973, "eval_runtime": 8.5176, "eval_samples_per_second": 457.64, "eval_steps_per_second": 7.162, "step": 750 }, { "epoch": 1.3, "learning_rate": 4.1767764298093586e-05, "loss": 5.0501, "step": 760 }, { "epoch": 1.34, "learning_rate": 4.1551126516464476e-05, "loss": 5.2481, "step": 780 }, { "epoch": 1.37, "learning_rate": 4.133448873483536e-05, "loss": 5.3003, "step": 800 }, { "epoch": 1.37, "eval_distillation_accuracy_counterfactual": 0.5292457670600308, "eval_distillation_accuracy_factual": 0.8542842483324782, "eval_distillation_f1_counterfactual": 0.45336534145034557, "eval_distillation_f1_factual": 0.8351566200793666, "eval_groundtruth_accuracy_counterfactual": 0.5102616726526423, "eval_groundtruth_f1_counterfactual": 0.4470237373445373, "eval_groundtruth_f1_factual": 0.6513563562975246, "eval_icace_cosine": 0.4572685658931732, "eval_icace_l2": 0.5440567135810852, "eval_icace_normdiff": 0.30389901995658875, "eval_loss": 4.905848979949951, "eval_runtime": 12.0231, "eval_samples_per_second": 324.21, "eval_steps_per_second": 5.074, "step": 800 }, { "epoch": 1.41, "learning_rate": 4.111785095320624e-05, "loss": 5.3652, "step": 820 }, { "epoch": 1.44, "learning_rate": 4.0901213171577124e-05, "loss": 5.1777, "step": 840 }, { "epoch": 1.46, "eval_distillation_accuracy_counterfactual": 0.5328373524884557, "eval_distillation_accuracy_factual": 0.848896870189841, "eval_distillation_f1_counterfactual": 0.43122807119418605, "eval_distillation_f1_factual": 0.8380166634503379, "eval_groundtruth_accuracy_counterfactual": 0.525910723447922, "eval_groundtruth_f1_counterfactual": 0.42855980851941905, "eval_groundtruth_f1_factual": 0.6740485107403849, "eval_icace_cosine": 0.45742782950401306, "eval_icace_l2": 0.5312564969062805, "eval_icace_normdiff": 0.29801884293556213, "eval_loss": 4.82774019241333, "eval_runtime": 8.5135, "eval_samples_per_second": 457.863, "eval_steps_per_second": 7.165, "step": 850 }, { "epoch": 1.48, "learning_rate": 4.068457538994801e-05, "loss": 5.082, "step": 860 }, { "epoch": 1.51, "learning_rate": 4.04679376083189e-05, "loss": 5.0848, "step": 880 }, { "epoch": 1.54, "learning_rate": 4.025129982668977e-05, "loss": 5.0317, "step": 900 }, { "epoch": 1.54, "eval_distillation_accuracy_counterfactual": 0.5364289379168804, "eval_distillation_accuracy_factual": 0.8586454592098512, "eval_distillation_f1_counterfactual": 0.47824703705574834, "eval_distillation_f1_factual": 0.8466850762264413, "eval_groundtruth_accuracy_counterfactual": 0.5192406362237044, "eval_groundtruth_f1_counterfactual": 0.4689573317719734, "eval_groundtruth_f1_factual": 0.6614762917109959, "eval_icace_cosine": 0.4618070125579834, "eval_icace_l2": 0.551954448223114, "eval_icace_normdiff": 0.32031771540641785, "eval_loss": 4.871151924133301, "eval_runtime": 8.5291, "eval_samples_per_second": 457.026, "eval_steps_per_second": 7.152, "step": 900 }, { "epoch": 1.58, "learning_rate": 4.003466204506066e-05, "loss": 5.1618, "step": 920 }, { "epoch": 1.61, "learning_rate": 3.9818024263431546e-05, "loss": 5.1056, "step": 940 }, { "epoch": 1.63, "eval_distillation_accuracy_counterfactual": 0.5489994869163674, "eval_distillation_accuracy_factual": 0.849153412006157, "eval_distillation_f1_counterfactual": 0.4620954039021683, "eval_distillation_f1_factual": 0.834974685438975, "eval_groundtruth_accuracy_counterfactual": 0.534376603386352, "eval_groundtruth_f1_counterfactual": 0.45160560697228036, "eval_groundtruth_f1_factual": 0.6623628027633227, "eval_icace_cosine": 0.44925835728645325, "eval_icace_l2": 0.5219926834106445, "eval_icace_normdiff": 0.29042351245880127, "eval_loss": 4.714720726013184, "eval_runtime": 10.8678, "eval_samples_per_second": 358.675, "eval_steps_per_second": 5.613, "step": 950 }, { "epoch": 1.65, "learning_rate": 3.960138648180243e-05, "loss": 5.0922, "step": 960 }, { "epoch": 1.68, "learning_rate": 3.938474870017331e-05, "loss": 5.1155, "step": 980 }, { "epoch": 1.72, "learning_rate": 3.91681109185442e-05, "loss": 5.0007, "step": 1000 }, { "epoch": 1.72, "eval_distillation_accuracy_counterfactual": 0.5413032324268856, "eval_distillation_accuracy_factual": 0.8565931246793227, "eval_distillation_f1_counterfactual": 0.4602705547507884, "eval_distillation_f1_factual": 0.8433764689823576, "eval_groundtruth_accuracy_counterfactual": 0.5277065161621344, "eval_groundtruth_f1_counterfactual": 0.4527054638176642, "eval_groundtruth_f1_factual": 0.6636713893371056, "eval_icace_cosine": 0.45849716663360596, "eval_icace_l2": 0.5370419025421143, "eval_icace_normdiff": 0.2944065034389496, "eval_loss": 4.901066780090332, "eval_runtime": 8.5486, "eval_samples_per_second": 455.979, "eval_steps_per_second": 7.136, "step": 1000 }, { "epoch": 1.75, "learning_rate": 3.895147313691508e-05, "loss": 5.1264, "step": 1020 }, { "epoch": 1.78, "learning_rate": 3.873483535528597e-05, "loss": 5.0684, "step": 1040 }, { "epoch": 1.8, "eval_distillation_accuracy_counterfactual": 0.547203694202155, "eval_distillation_accuracy_factual": 0.8217034376603386, "eval_distillation_f1_counterfactual": 0.4837388698536936, "eval_distillation_f1_factual": 0.810862504949279, "eval_groundtruth_accuracy_counterfactual": 0.525910723447922, "eval_groundtruth_f1_counterfactual": 0.4693969983072536, "eval_groundtruth_f1_factual": 0.6703892266130405, "eval_icace_cosine": 0.46058204770088196, "eval_icace_l2": 0.5416079759597778, "eval_icace_normdiff": 0.3100549578666687, "eval_loss": 4.95220422744751, "eval_runtime": 8.5039, "eval_samples_per_second": 458.38, "eval_steps_per_second": 7.173, "step": 1050 }, { "epoch": 1.82, "learning_rate": 3.851819757365685e-05, "loss": 5.0693, "step": 1060 }, { "epoch": 1.85, "learning_rate": 3.830155979202773e-05, "loss": 5.0042, "step": 1080 }, { "epoch": 1.89, "learning_rate": 3.8084922010398616e-05, "loss": 5.1003, "step": 1100 }, { "epoch": 1.89, "eval_distillation_accuracy_counterfactual": 0.534633145202668, "eval_distillation_accuracy_factual": 0.8293996921498205, "eval_distillation_f1_counterfactual": 0.4464032223299544, "eval_distillation_f1_factual": 0.8221241204436007, "eval_groundtruth_accuracy_counterfactual": 0.521549512570549, "eval_groundtruth_f1_counterfactual": 0.4391932131294249, "eval_groundtruth_f1_factual": 0.6698779807792266, "eval_icace_cosine": 0.4571634531021118, "eval_icace_l2": 0.5411211848258972, "eval_icace_normdiff": 0.3035091757774353, "eval_loss": 4.98823881149292, "eval_runtime": 11.985, "eval_samples_per_second": 325.239, "eval_steps_per_second": 5.09, "step": 1100 }, { "epoch": 1.92, "learning_rate": 3.78682842287695e-05, "loss": 4.816, "step": 1120 }, { "epoch": 1.96, "learning_rate": 3.765164644714038e-05, "loss": 4.8439, "step": 1140 }, { "epoch": 1.97, "eval_distillation_accuracy_counterfactual": 0.5402770651616213, "eval_distillation_accuracy_factual": 0.8560800410466907, "eval_distillation_f1_counterfactual": 0.4267861418445166, "eval_distillation_f1_factual": 0.8421924351489614, "eval_groundtruth_accuracy_counterfactual": 0.534376603386352, "eval_groundtruth_f1_counterfactual": 0.42692253637748045, "eval_groundtruth_f1_factual": 0.6725597052009967, "eval_icace_cosine": 0.45820027589797974, "eval_icace_l2": 0.5365382432937622, "eval_icace_normdiff": 0.30253198742866516, "eval_loss": 4.800932884216309, "eval_runtime": 8.5249, "eval_samples_per_second": 457.25, "eval_steps_per_second": 7.156, "step": 1150 }, { "epoch": 1.99, "learning_rate": 3.7435008665511264e-05, "loss": 4.8136, "step": 1160 }, { "epoch": 2.02, "learning_rate": 3.7218370883882154e-05, "loss": 4.9884, "step": 1180 }, { "epoch": 2.06, "learning_rate": 3.700173310225303e-05, "loss": 4.7164, "step": 1200 }, { "epoch": 2.06, "eval_distillation_accuracy_counterfactual": 0.5402770651616213, "eval_distillation_accuracy_factual": 0.8499230374551052, "eval_distillation_f1_counterfactual": 0.46476851572976663, "eval_distillation_f1_factual": 0.8393420850809745, "eval_groundtruth_accuracy_counterfactual": 0.5261672652642381, "eval_groundtruth_f1_counterfactual": 0.45958049273322404, "eval_groundtruth_f1_factual": 0.6735974648370172, "eval_icace_cosine": 0.4624723792076111, "eval_icace_l2": 0.5405644774436951, "eval_icace_normdiff": 0.2997790575027466, "eval_loss": 4.861130714416504, "eval_runtime": 8.5275, "eval_samples_per_second": 457.109, "eval_steps_per_second": 7.153, "step": 1200 }, { "epoch": 2.09, "learning_rate": 3.678509532062392e-05, "loss": 4.9932, "step": 1220 }, { "epoch": 2.13, "learning_rate": 3.65684575389948e-05, "loss": 4.9391, "step": 1240 }, { "epoch": 2.14, "eval_distillation_accuracy_counterfactual": 0.530528476141611, "eval_distillation_accuracy_factual": 0.8476141611082606, "eval_distillation_f1_counterfactual": 0.44643833430458235, "eval_distillation_f1_factual": 0.8352264339610791, "eval_groundtruth_accuracy_counterfactual": 0.5225756798358132, "eval_groundtruth_f1_counterfactual": 0.4423951307540747, "eval_groundtruth_f1_factual": 0.6764649332002749, "eval_icace_cosine": 0.4605988562107086, "eval_icace_l2": 0.5397510528564453, "eval_icace_normdiff": 0.3071255683898926, "eval_loss": 4.8622236251831055, "eval_runtime": 15.2012, "eval_samples_per_second": 256.428, "eval_steps_per_second": 4.013, "step": 1250 }, { "epoch": 2.16, "learning_rate": 3.6351819757365686e-05, "loss": 4.8551, "step": 1260 }, { "epoch": 2.2, "learning_rate": 3.613518197573657e-05, "loss": 4.7986, "step": 1280 }, { "epoch": 2.23, "learning_rate": 3.591854419410746e-05, "loss": 4.6987, "step": 1300 }, { "epoch": 2.23, "eval_distillation_accuracy_counterfactual": 0.5371985633658286, "eval_distillation_accuracy_factual": 0.8547973319651103, "eval_distillation_f1_counterfactual": 0.4855763758913416, "eval_distillation_f1_factual": 0.8458791097205444, "eval_groundtruth_accuracy_counterfactual": 0.5169317598768599, "eval_groundtruth_f1_counterfactual": 0.4711991182158193, "eval_groundtruth_f1_factual": 0.6786609314773037, "eval_icace_cosine": 0.4552989602088928, "eval_icace_l2": 0.5454961657524109, "eval_icace_normdiff": 0.3164507746696472, "eval_loss": 4.8772292137146, "eval_runtime": 9.0595, "eval_samples_per_second": 430.267, "eval_steps_per_second": 6.733, "step": 1300 }, { "epoch": 2.26, "learning_rate": 3.5701906412478334e-05, "loss": 4.8687, "step": 1320 }, { "epoch": 2.3, "learning_rate": 3.5485268630849224e-05, "loss": 4.8033, "step": 1340 }, { "epoch": 2.32, "eval_distillation_accuracy_counterfactual": 0.5410466906105695, "eval_distillation_accuracy_factual": 0.8437660338635198, "eval_distillation_f1_counterfactual": 0.4750528866298297, "eval_distillation_f1_factual": 0.827952414486583, "eval_groundtruth_accuracy_counterfactual": 0.5110312981015905, "eval_groundtruth_f1_counterfactual": 0.45269348729404746, "eval_groundtruth_f1_factual": 0.6701636104858377, "eval_icace_cosine": 0.4493303596973419, "eval_icace_l2": 0.5253385305404663, "eval_icace_normdiff": 0.2902440130710602, "eval_loss": 4.811532020568848, "eval_runtime": 8.5373, "eval_samples_per_second": 456.585, "eval_steps_per_second": 7.145, "step": 1350 }, { "epoch": 2.33, "learning_rate": 3.526863084922011e-05, "loss": 4.7939, "step": 1360 }, { "epoch": 2.37, "learning_rate": 3.505199306759099e-05, "loss": 4.6752, "step": 1380 }, { "epoch": 2.4, "learning_rate": 3.483535528596187e-05, "loss": 4.8234, "step": 1400 }, { "epoch": 2.4, "eval_distillation_accuracy_counterfactual": 0.5248845561826577, "eval_distillation_accuracy_factual": 0.8414571575166753, "eval_distillation_f1_counterfactual": 0.4604647022927514, "eval_distillation_f1_factual": 0.8284602796331685, "eval_groundtruth_accuracy_counterfactual": 0.5105182144689584, "eval_groundtruth_f1_counterfactual": 0.4530457791487864, "eval_groundtruth_f1_factual": 0.6800273915795553, "eval_icace_cosine": 0.4578195810317993, "eval_icace_l2": 0.5543230772018433, "eval_icace_normdiff": 0.31625741720199585, "eval_loss": 5.0786452293396, "eval_runtime": 10.8971, "eval_samples_per_second": 357.71, "eval_steps_per_second": 5.598, "step": 1400 }, { "epoch": 2.44, "learning_rate": 3.461871750433276e-05, "loss": 4.8927, "step": 1420 }, { "epoch": 2.47, "learning_rate": 3.440207972270364e-05, "loss": 4.7194, "step": 1440 }, { "epoch": 2.49, "eval_distillation_accuracy_counterfactual": 0.5110312981015905, "eval_distillation_accuracy_factual": 0.8545407901487942, "eval_distillation_f1_counterfactual": 0.4681338599260719, "eval_distillation_f1_factual": 0.8430818307918667, "eval_groundtruth_accuracy_counterfactual": 0.5002565418163161, "eval_groundtruth_f1_counterfactual": 0.4593624830351718, "eval_groundtruth_f1_factual": 0.6727565834099151, "eval_icace_cosine": 0.4633564352989197, "eval_icace_l2": 0.5605540871620178, "eval_icace_normdiff": 0.33030107617378235, "eval_loss": 4.9696502685546875, "eval_runtime": 8.5261, "eval_samples_per_second": 457.183, "eval_steps_per_second": 7.154, "step": 1450 }, { "epoch": 2.5, "learning_rate": 3.418544194107453e-05, "loss": 4.8262, "step": 1460 }, { "epoch": 2.54, "learning_rate": 3.396880415944541e-05, "loss": 4.6793, "step": 1480 }, { "epoch": 2.57, "learning_rate": 3.3752166377816294e-05, "loss": 4.6798, "step": 1500 }, { "epoch": 2.57, "eval_distillation_accuracy_counterfactual": 0.5205233453052848, "eval_distillation_accuracy_factual": 0.8370959466393022, "eval_distillation_f1_counterfactual": 0.4579102816234381, "eval_distillation_f1_factual": 0.8252477509887871, "eval_groundtruth_accuracy_counterfactual": 0.5064135454079015, "eval_groundtruth_f1_counterfactual": 0.45042536054871796, "eval_groundtruth_f1_factual": 0.6775387933565993, "eval_icace_cosine": 0.4610835611820221, "eval_icace_l2": 0.5561323165893555, "eval_icace_normdiff": 0.31462231278419495, "eval_loss": 5.108266830444336, "eval_runtime": 8.5521, "eval_samples_per_second": 455.794, "eval_steps_per_second": 7.133, "step": 1500 }, { "epoch": 2.61, "learning_rate": 3.353552859618718e-05, "loss": 4.8829, "step": 1520 }, { "epoch": 2.64, "learning_rate": 3.331889081455806e-05, "loss": 4.7667, "step": 1540 }, { "epoch": 2.66, "eval_distillation_accuracy_counterfactual": 0.5118009235505387, "eval_distillation_accuracy_factual": 0.8455618265777322, "eval_distillation_f1_counterfactual": 0.4526050506600197, "eval_distillation_f1_factual": 0.8370631376149344, "eval_groundtruth_accuracy_counterfactual": 0.5015392508978963, "eval_groundtruth_f1_counterfactual": 0.4450025619140404, "eval_groundtruth_f1_factual": 0.6719413415081574, "eval_icace_cosine": 0.464785635471344, "eval_icace_l2": 0.5620153546333313, "eval_icace_normdiff": 0.32657164335250854, "eval_loss": 5.071800708770752, "eval_runtime": 8.4808, "eval_samples_per_second": 459.629, "eval_steps_per_second": 7.193, "step": 1550 }, { "epoch": 2.68, "learning_rate": 3.310225303292894e-05, "loss": 4.7129, "step": 1560 }, { "epoch": 2.71, "learning_rate": 3.2885615251299825e-05, "loss": 4.7026, "step": 1580 }, { "epoch": 2.74, "learning_rate": 3.2668977469670715e-05, "loss": 4.7816, "step": 1600 }, { "epoch": 2.74, "eval_distillation_accuracy_counterfactual": 0.5500256541816316, "eval_distillation_accuracy_factual": 0.8453052847614161, "eval_distillation_f1_counterfactual": 0.46856745025772817, "eval_distillation_f1_factual": 0.8301459813662639, "eval_groundtruth_accuracy_counterfactual": 0.5328373524884557, "eval_groundtruth_f1_counterfactual": 0.4556992554764913, "eval_groundtruth_f1_factual": 0.6742293632354649, "eval_icace_cosine": 0.4537753760814667, "eval_icace_l2": 0.5347741842269897, "eval_icace_normdiff": 0.30534929037094116, "eval_loss": 4.789421081542969, "eval_runtime": 10.8083, "eval_samples_per_second": 360.649, "eval_steps_per_second": 5.644, "step": 1600 }, { "epoch": 2.78, "learning_rate": 3.245233968804159e-05, "loss": 4.8026, "step": 1620 }, { "epoch": 2.81, "learning_rate": 3.223570190641248e-05, "loss": 4.8066, "step": 1640 }, { "epoch": 2.83, "eval_distillation_accuracy_counterfactual": 0.5492560287326834, "eval_distillation_accuracy_factual": 0.8445356593124679, "eval_distillation_f1_counterfactual": 0.4742631698980107, "eval_distillation_f1_factual": 0.8332463691045845, "eval_groundtruth_accuracy_counterfactual": 0.5271934325295023, "eval_groundtruth_f1_counterfactual": 0.4569299587893097, "eval_groundtruth_f1_factual": 0.6760428226327067, "eval_icace_cosine": 0.44850993156433105, "eval_icace_l2": 0.5251337289810181, "eval_icace_normdiff": 0.29198816418647766, "eval_loss": 4.806829929351807, "eval_runtime": 8.5162, "eval_samples_per_second": 457.713, "eval_steps_per_second": 7.163, "step": 1650 } ], "max_steps": 4616, "num_train_epochs": 8, "total_flos": 1.38860599685376e+16, "trial_name": null, "trial_params": null }