{"best_metric": 4.383237361907959, "best_model_checkpoint": "results/finetuned_models/OLID/finetuned_CMLM_maks_frac-0.2_0.55-explain_wrt-predicted/checkpoint-664", "epoch": 4.0120845921450154, "global_step": 1328, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [{"epoch": 0.03, "learning_rate": 4.9848942598187317e-05, "loss": 5.6104, "step": 10}, {"epoch": 0.06, "learning_rate": 4.9697885196374624e-05, "loss": 5.1722, "step": 20}, {"epoch": 0.09, "learning_rate": 4.954682779456193e-05, "loss": 5.2806, "step": 30}, {"epoch": 0.12, "learning_rate": 4.9395770392749245e-05, "loss": 5.1288, "step": 40}, {"epoch": 0.15, "learning_rate": 4.924471299093656e-05, "loss": 5.0946, "step": 50}, {"epoch": 0.18, "learning_rate": 4.9093655589123874e-05, "loss": 4.7467, "step": 60}, {"epoch": 0.21, "learning_rate": 4.894259818731118e-05, "loss": 4.8987, "step": 70}, {"epoch": 0.24, "learning_rate": 4.879154078549849e-05, "loss": 4.8214, "step": 80}, {"epoch": 0.27, "learning_rate": 4.86404833836858e-05, "loss": 4.7891, "step": 90}, {"epoch": 0.3, "learning_rate": 4.848942598187312e-05, "loss": 4.6505, "step": 100}, {"epoch": 0.33, "learning_rate": 4.833836858006043e-05, "loss": 4.8505, "step": 110}, {"epoch": 0.36, "learning_rate": 4.818731117824773e-05, "loss": 4.7141, "step": 120}, {"epoch": 0.39, "learning_rate": 4.8036253776435046e-05, "loss": 4.5672, "step": 130}, {"epoch": 0.42, "learning_rate": 4.788519637462236e-05, "loss": 4.4017, "step": 140}, {"epoch": 0.45, "learning_rate": 4.7734138972809674e-05, "loss": 4.5208, "step": 150}, {"epoch": 0.48, "learning_rate": 4.758308157099698e-05, "loss": 4.5176, "step": 160}, {"epoch": 0.5, "eval_loss": 4.550278663635254, "eval_runtime": 14.7773, "eval_samples_per_second": 179.194, "eval_steps_per_second": 11.233, "step": 166}, {"epoch": 0.51, "learning_rate": 4.743202416918429e-05, "loss": 4.489, "step": 170}, {"epoch": 0.54, "learning_rate": 4.72809667673716e-05, "loss": 4.6648, "step": 180}, {"epoch": 0.57, "learning_rate": 4.712990936555892e-05, "loss": 4.7188, "step": 190}, {"epoch": 0.6, "learning_rate": 4.6978851963746225e-05, "loss": 4.8011, "step": 200}, {"epoch": 0.63, "learning_rate": 4.682779456193354e-05, "loss": 4.698, "step": 210}, {"epoch": 0.66, "learning_rate": 4.6676737160120846e-05, "loss": 4.5085, "step": 220}, {"epoch": 0.69, "learning_rate": 4.652567975830816e-05, "loss": 4.4405, "step": 230}, {"epoch": 0.73, "learning_rate": 4.6374622356495474e-05, "loss": 4.5906, "step": 240}, {"epoch": 0.76, "learning_rate": 4.622356495468278e-05, "loss": 4.4835, "step": 250}, {"epoch": 0.79, "learning_rate": 4.607250755287009e-05, "loss": 4.4923, "step": 260}, {"epoch": 0.82, "learning_rate": 4.5921450151057403e-05, "loss": 4.5259, "step": 270}, {"epoch": 0.85, "learning_rate": 4.577039274924472e-05, "loss": 4.4251, "step": 280}, {"epoch": 0.88, "learning_rate": 4.5619335347432025e-05, "loss": 4.4415, "step": 290}, {"epoch": 0.91, "learning_rate": 4.546827794561934e-05, "loss": 4.5138, "step": 300}, {"epoch": 0.94, "learning_rate": 4.5317220543806646e-05, "loss": 4.4501, "step": 310}, {"epoch": 0.97, "learning_rate": 4.516616314199396e-05, "loss": 4.4595, "step": 320}, {"epoch": 1.0, "learning_rate": 4.5015105740181275e-05, "loss": 4.5988, "step": 330}, {"epoch": 1.0, "eval_loss": 4.4101786613464355, "eval_runtime": 15.3163, "eval_samples_per_second": 172.887, "eval_steps_per_second": 10.838, "step": 332}, {"epoch": 1.03, "learning_rate": 4.486404833836858e-05, "loss": 4.1014, "step": 340}, {"epoch": 1.06, "learning_rate": 4.4712990936555896e-05, "loss": 4.0713, "step": 350}, {"epoch": 1.09, "learning_rate": 4.4561933534743204e-05, "loss": 4.1176, "step": 360}, {"epoch": 1.12, "learning_rate": 4.441087613293052e-05, "loss": 4.0181, "step": 370}, {"epoch": 1.15, "learning_rate": 4.4259818731117825e-05, "loss": 3.9442, "step": 380}, {"epoch": 1.18, "learning_rate": 4.410876132930514e-05, "loss": 4.1475, "step": 390}, {"epoch": 1.21, "learning_rate": 4.395770392749245e-05, "loss": 4.2223, "step": 400}, {"epoch": 1.24, "learning_rate": 4.380664652567976e-05, "loss": 4.1392, "step": 410}, {"epoch": 1.27, "learning_rate": 4.365558912386707e-05, "loss": 4.0068, "step": 420}, {"epoch": 1.3, "learning_rate": 4.350453172205438e-05, "loss": 3.9818, "step": 430}, {"epoch": 1.33, "learning_rate": 4.33534743202417e-05, "loss": 4.2681, "step": 440}, {"epoch": 1.36, "learning_rate": 4.3202416918429004e-05, "loss": 4.1101, "step": 450}, {"epoch": 1.39, "learning_rate": 4.305135951661632e-05, "loss": 3.9186, "step": 460}, {"epoch": 1.42, "learning_rate": 4.2900302114803626e-05, "loss": 3.8203, "step": 470}, {"epoch": 1.45, "learning_rate": 4.274924471299094e-05, "loss": 3.8622, "step": 480}, {"epoch": 1.48, "learning_rate": 4.259818731117825e-05, "loss": 4.1678, "step": 490}, {"epoch": 1.5, "eval_loss": 4.412171840667725, "eval_runtime": 15.6664, "eval_samples_per_second": 169.025, "eval_steps_per_second": 10.596, "step": 498}, {"epoch": 1.51, "learning_rate": 4.244712990936556e-05, "loss": 4.1218, "step": 500}, {"epoch": 1.54, "learning_rate": 4.229607250755287e-05, "loss": 3.9189, "step": 510}, {"epoch": 1.57, "learning_rate": 4.214501510574018e-05, "loss": 4.1192, "step": 520}, {"epoch": 1.6, "learning_rate": 4.19939577039275e-05, "loss": 3.9903, "step": 530}, {"epoch": 1.63, "learning_rate": 4.1842900302114804e-05, "loss": 4.1671, "step": 540}, {"epoch": 1.66, "learning_rate": 4.169184290030212e-05, "loss": 3.945, "step": 550}, {"epoch": 1.69, "learning_rate": 4.1540785498489426e-05, "loss": 3.9957, "step": 560}, {"epoch": 1.72, "learning_rate": 4.138972809667674e-05, "loss": 4.1655, "step": 570}, {"epoch": 1.75, "learning_rate": 4.1238670694864054e-05, "loss": 3.8984, "step": 580}, {"epoch": 1.78, "learning_rate": 4.108761329305136e-05, "loss": 4.0351, "step": 590}, {"epoch": 1.81, "learning_rate": 4.093655589123867e-05, "loss": 3.9757, "step": 600}, {"epoch": 1.84, "learning_rate": 4.078549848942598e-05, "loss": 3.9874, "step": 610}, {"epoch": 1.87, "learning_rate": 4.06344410876133e-05, "loss": 3.8126, "step": 620}, {"epoch": 1.9, "learning_rate": 4.0483383685800605e-05, "loss": 4.2519, "step": 630}, {"epoch": 1.93, "learning_rate": 4.033232628398791e-05, "loss": 4.0979, "step": 640}, {"epoch": 1.96, "learning_rate": 4.0181268882175226e-05, "loss": 3.8422, "step": 650}, {"epoch": 1.99, "learning_rate": 4.003021148036254e-05, "loss": 4.1531, "step": 660}, {"epoch": 2.01, "eval_loss": 4.383237361907959, "eval_runtime": 15.7999, "eval_samples_per_second": 167.596, "eval_steps_per_second": 10.506, "step": 664}, {"epoch": 2.02, "learning_rate": 3.9879154078549855e-05, "loss": 3.7563, "step": 670}, {"epoch": 2.05, "learning_rate": 3.972809667673716e-05, "loss": 3.5979, "step": 680}, {"epoch": 2.08, "learning_rate": 3.957703927492447e-05, "loss": 3.7998, "step": 690}, {"epoch": 2.11, "learning_rate": 3.9425981873111784e-05, "loss": 3.8811, "step": 700}, {"epoch": 2.15, "learning_rate": 3.92749244712991e-05, "loss": 3.687, "step": 710}, {"epoch": 2.18, "learning_rate": 3.9123867069486405e-05, "loss": 3.8717, "step": 720}, {"epoch": 2.21, "learning_rate": 3.897280966767371e-05, "loss": 3.512, "step": 730}, {"epoch": 2.24, "learning_rate": 3.882175226586103e-05, "loss": 3.6362, "step": 740}, {"epoch": 2.27, "learning_rate": 3.867069486404834e-05, "loss": 3.4258, "step": 750}, {"epoch": 2.3, "learning_rate": 3.8519637462235655e-05, "loss": 3.6063, "step": 760}, {"epoch": 2.33, "learning_rate": 3.836858006042296e-05, "loss": 3.3684, "step": 770}, {"epoch": 2.36, "learning_rate": 3.821752265861027e-05, "loss": 3.5997, "step": 780}, {"epoch": 2.39, "learning_rate": 3.8066465256797584e-05, "loss": 3.6768, "step": 790}, {"epoch": 2.42, "learning_rate": 3.79154078549849e-05, "loss": 3.5425, "step": 800}, {"epoch": 2.45, "learning_rate": 3.776435045317221e-05, "loss": 3.6274, "step": 810}, {"epoch": 2.48, "learning_rate": 3.761329305135951e-05, "loss": 3.7632, "step": 820}, {"epoch": 2.51, "learning_rate": 3.746223564954683e-05, "loss": 3.4288, "step": 830}, {"epoch": 2.51, "eval_loss": 4.4401445388793945, "eval_runtime": 15.8646, "eval_samples_per_second": 166.913, "eval_steps_per_second": 10.464, "step": 830}, {"epoch": 2.54, "learning_rate": 3.731117824773414e-05, "loss": 3.7785, "step": 840}, {"epoch": 2.57, "learning_rate": 3.7160120845921455e-05, "loss": 3.51, "step": 850}, {"epoch": 2.6, "learning_rate": 3.700906344410876e-05, "loss": 3.5191, "step": 860}, {"epoch": 2.63, "learning_rate": 3.685800604229607e-05, "loss": 3.4876, "step": 870}, {"epoch": 2.66, "learning_rate": 3.6706948640483384e-05, "loss": 3.5081, "step": 880}, {"epoch": 2.69, "learning_rate": 3.65558912386707e-05, "loss": 3.6952, "step": 890}, {"epoch": 2.72, "learning_rate": 3.640483383685801e-05, "loss": 3.6586, "step": 900}, {"epoch": 2.75, "learning_rate": 3.625377643504532e-05, "loss": 3.5887, "step": 910}, {"epoch": 2.78, "learning_rate": 3.610271903323263e-05, "loss": 3.4526, "step": 920}, {"epoch": 2.81, "learning_rate": 3.595166163141994e-05, "loss": 3.5271, "step": 930}, {"epoch": 2.84, "learning_rate": 3.5800604229607256e-05, "loss": 3.5838, "step": 940}, {"epoch": 2.87, "learning_rate": 3.564954682779456e-05, "loss": 3.6732, "step": 950}, {"epoch": 2.9, "learning_rate": 3.549848942598187e-05, "loss": 3.516, "step": 960}, {"epoch": 2.93, "learning_rate": 3.5347432024169185e-05, "loss": 3.8047, "step": 970}, {"epoch": 2.96, "learning_rate": 3.51963746223565e-05, "loss": 3.8504, "step": 980}, {"epoch": 2.99, "learning_rate": 3.504531722054381e-05, "loss": 3.6579, "step": 990}, {"epoch": 3.01, "eval_loss": 4.3886871337890625, "eval_runtime": 15.5236, "eval_samples_per_second": 170.58, "eval_steps_per_second": 10.693, "step": 996}, {"epoch": 3.02, "learning_rate": 3.489425981873112e-05, "loss": 3.4727, "step": 1000}, {"epoch": 3.05, "learning_rate": 3.474320241691843e-05, "loss": 3.1725, "step": 1010}, {"epoch": 3.08, "learning_rate": 3.459214501510574e-05, "loss": 3.1958, "step": 1020}, {"epoch": 3.11, "learning_rate": 3.4441087613293056e-05, "loss": 3.3109, "step": 1030}, {"epoch": 3.14, "learning_rate": 3.429003021148036e-05, "loss": 3.2058, "step": 1040}, {"epoch": 3.17, "learning_rate": 3.413897280966768e-05, "loss": 3.4052, "step": 1050}, {"epoch": 3.2, "learning_rate": 3.3987915407854985e-05, "loss": 3.2419, "step": 1060}, {"epoch": 3.23, "learning_rate": 3.38368580060423e-05, "loss": 3.1844, "step": 1070}, {"epoch": 3.26, "learning_rate": 3.368580060422961e-05, "loss": 3.1922, "step": 1080}, {"epoch": 3.29, "learning_rate": 3.353474320241692e-05, "loss": 3.2545, "step": 1090}, {"epoch": 3.32, "learning_rate": 3.338368580060423e-05, "loss": 3.126, "step": 1100}, {"epoch": 3.35, "learning_rate": 3.323262839879154e-05, "loss": 3.2755, "step": 1110}, {"epoch": 3.38, "learning_rate": 3.3081570996978856e-05, "loss": 3.2627, "step": 1120}, {"epoch": 3.41, "learning_rate": 3.2930513595166164e-05, "loss": 3.2227, "step": 1130}, {"epoch": 3.44, "learning_rate": 3.277945619335348e-05, "loss": 3.1871, "step": 1140}, {"epoch": 3.47, "learning_rate": 3.2628398791540785e-05, "loss": 3.3655, "step": 1150}, {"epoch": 3.5, "learning_rate": 3.24773413897281e-05, "loss": 3.3232, "step": 1160}, {"epoch": 3.51, "eval_loss": 4.473777770996094, "eval_runtime": 15.5491, "eval_samples_per_second": 170.3, "eval_steps_per_second": 10.676, "step": 1162}, {"epoch": 3.53, "learning_rate": 3.232628398791541e-05, "loss": 3.2528, "step": 1170}, {"epoch": 3.56, "learning_rate": 3.217522658610272e-05, "loss": 3.3312, "step": 1180}, {"epoch": 3.6, "learning_rate": 3.202416918429003e-05, "loss": 3.2352, "step": 1190}, {"epoch": 3.63, "learning_rate": 3.187311178247734e-05, "loss": 3.3988, "step": 1200}, {"epoch": 3.66, "learning_rate": 3.1722054380664657e-05, "loss": 3.3558, "step": 1210}, {"epoch": 3.69, "learning_rate": 3.1570996978851964e-05, "loss": 3.1662, "step": 1220}, {"epoch": 3.72, "learning_rate": 3.141993957703928e-05, "loss": 3.4317, "step": 1230}, {"epoch": 3.75, "learning_rate": 3.1268882175226586e-05, "loss": 3.3406, "step": 1240}, {"epoch": 3.78, "learning_rate": 3.11178247734139e-05, "loss": 3.2896, "step": 1250}, {"epoch": 3.81, "learning_rate": 3.096676737160121e-05, "loss": 3.3931, "step": 1260}, {"epoch": 3.84, "learning_rate": 3.081570996978852e-05, "loss": 3.1645, "step": 1270}, {"epoch": 3.87, "learning_rate": 3.0664652567975835e-05, "loss": 3.3172, "step": 1280}, {"epoch": 3.9, "learning_rate": 3.0513595166163146e-05, "loss": 3.4949, "step": 1290}, {"epoch": 3.93, "learning_rate": 3.0362537764350457e-05, "loss": 3.3462, "step": 1300}, {"epoch": 3.96, "learning_rate": 3.0211480362537764e-05, "loss": 3.0747, "step": 1310}, {"epoch": 3.99, "learning_rate": 3.0060422960725075e-05, "loss": 3.3506, "step": 1320}, {"epoch": 4.01, "eval_loss": 4.468980312347412, "eval_runtime": 15.7958, "eval_samples_per_second": 167.64, "eval_steps_per_second": 10.509, "step": 1328}], "max_steps": 3310, "num_train_epochs": 10, "total_flos": 1789825627641600.0, "trial_name": null, "trial_params": null} |