|
{ |
|
"best_metric": 0.9918838103374626, |
|
"best_model_checkpoint": "resnet-50-finetuned-dog-vs-cat/checkpoint-329", |
|
"epoch": 2.986342943854325, |
|
"eval_steps": 500, |
|
"global_step": 492, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5951987504959106, |
|
"learning_rate": 1e-05, |
|
"loss": 0.693, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.743243932723999, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6912, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.6900031566619873, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6871, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.7302324175834656, |
|
"learning_rate": 4e-05, |
|
"loss": 0.68, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.9748005270957947, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6664, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.131984829902649, |
|
"learning_rate": 4.8868778280542986e-05, |
|
"loss": 0.6408, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.0235670804977417, |
|
"learning_rate": 4.7737556561085976e-05, |
|
"loss": 0.617, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.1163190603256226, |
|
"learning_rate": 4.660633484162896e-05, |
|
"loss": 0.5807, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.282690405845642, |
|
"learning_rate": 4.547511312217195e-05, |
|
"loss": 0.5438, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.068588137626648, |
|
"learning_rate": 4.434389140271493e-05, |
|
"loss": 0.502, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.9256152510643005, |
|
"learning_rate": 4.321266968325792e-05, |
|
"loss": 0.4701, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.9594171643257141, |
|
"learning_rate": 4.2081447963800907e-05, |
|
"loss": 0.447, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0906646251678467, |
|
"learning_rate": 4.095022624434389e-05, |
|
"loss": 0.4238, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.9434269070625305, |
|
"learning_rate": 3.981900452488688e-05, |
|
"loss": 0.4026, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.15977144241333, |
|
"learning_rate": 3.868778280542987e-05, |
|
"loss": 0.3807, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.9563286304473877, |
|
"learning_rate": 3.7556561085972854e-05, |
|
"loss": 0.3357, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9867577958137548, |
|
"eval_loss": 0.22545380890369415, |
|
"eval_runtime": 27.2631, |
|
"eval_samples_per_second": 85.867, |
|
"eval_steps_per_second": 2.714, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.0790256261825562, |
|
"learning_rate": 3.642533936651584e-05, |
|
"loss": 0.3166, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.9720374345779419, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.2961, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.2068029642105103, |
|
"learning_rate": 3.416289592760181e-05, |
|
"loss": 0.2753, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.0407794713974, |
|
"learning_rate": 3.3031674208144794e-05, |
|
"loss": 0.2631, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.9938662052154541, |
|
"learning_rate": 3.1900452488687784e-05, |
|
"loss": 0.2427, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.8150984048843384, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.2163, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.7988576889038086, |
|
"learning_rate": 2.9638009049773758e-05, |
|
"loss": 0.216, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.1607186794281006, |
|
"learning_rate": 2.850678733031674e-05, |
|
"loss": 0.2086, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.0582228899002075, |
|
"learning_rate": 2.737556561085973e-05, |
|
"loss": 0.1857, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.8427866697311401, |
|
"learning_rate": 2.6244343891402718e-05, |
|
"loss": 0.1808, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.8713784217834473, |
|
"learning_rate": 2.51131221719457e-05, |
|
"loss": 0.1689, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.8434356451034546, |
|
"learning_rate": 2.3981900452488688e-05, |
|
"loss": 0.1732, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.7676334977149963, |
|
"learning_rate": 2.2850678733031675e-05, |
|
"loss": 0.1678, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.6907545328140259, |
|
"learning_rate": 2.1719457013574662e-05, |
|
"loss": 0.1624, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.7248879075050354, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.1613, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.4968321323394775, |
|
"learning_rate": 1.9457013574660635e-05, |
|
"loss": 0.1683, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9918838103374626, |
|
"eval_loss": 0.05769222229719162, |
|
"eval_runtime": 27.9863, |
|
"eval_samples_per_second": 83.648, |
|
"eval_steps_per_second": 2.644, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.3721855878829956, |
|
"learning_rate": 1.832579185520362e-05, |
|
"loss": 0.1635, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.0472960472106934, |
|
"learning_rate": 1.7194570135746606e-05, |
|
"loss": 0.1576, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.9706544280052185, |
|
"learning_rate": 1.6063348416289596e-05, |
|
"loss": 0.157, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.6168745756149292, |
|
"learning_rate": 1.493212669683258e-05, |
|
"loss": 0.1535, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 2.201444149017334, |
|
"learning_rate": 1.3800904977375568e-05, |
|
"loss": 0.1494, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.6486875414848328, |
|
"learning_rate": 1.2669683257918553e-05, |
|
"loss": 0.1502, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.7056523561477661, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 0.1446, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 1.3912076950073242, |
|
"learning_rate": 1.0407239819004526e-05, |
|
"loss": 0.1428, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.0273276567459106, |
|
"learning_rate": 9.276018099547511e-06, |
|
"loss": 0.144, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 1.1831059455871582, |
|
"learning_rate": 8.144796380090498e-06, |
|
"loss": 0.1572, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 1.5577871799468994, |
|
"learning_rate": 7.013574660633485e-06, |
|
"loss": 0.1419, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.2978945970535278, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.1562, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.5621709823608398, |
|
"learning_rate": 4.751131221719457e-06, |
|
"loss": 0.1352, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 1.0484684705734253, |
|
"learning_rate": 3.619909502262444e-06, |
|
"loss": 0.143, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.9431272149085999, |
|
"learning_rate": 2.48868778280543e-06, |
|
"loss": 0.1517, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 0.8055468201637268, |
|
"learning_rate": 1.3574660633484164e-06, |
|
"loss": 0.1455, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 1.1355047225952148, |
|
"learning_rate": 2.2624434389140275e-07, |
|
"loss": 0.1448, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9918838103374626, |
|
"eval_loss": 0.04604041948914528, |
|
"eval_runtime": 27.4499, |
|
"eval_samples_per_second": 85.283, |
|
"eval_steps_per_second": 2.696, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"step": 492, |
|
"total_flos": 1.336513820941394e+18, |
|
"train_loss": 0.2994473668617931, |
|
"train_runtime": 953.1608, |
|
"train_samples_per_second": 66.313, |
|
"train_steps_per_second": 0.516 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 492, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.336513820941394e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|