|
{ |
|
"best_metric": 0.03524893894791603, |
|
"best_model_checkpoint": "/kaggle/working/output/checkpoint-84", |
|
"epoch": 29.925925925925927, |
|
"eval_steps": 500, |
|
"global_step": 101, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"eval_LCC": -0.11469366908669681, |
|
"eval_SROCC": -0.15860744297719087, |
|
"eval_loss": 0.4400586783885956, |
|
"eval_runtime": 42.4739, |
|
"eval_samples_per_second": 1.177, |
|
"eval_steps_per_second": 0.047, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"eval_LCC": -0.1545349416495121, |
|
"eval_SROCC": -0.2675150060024009, |
|
"eval_loss": 0.22064876556396484, |
|
"eval_runtime": 42.0384, |
|
"eval_samples_per_second": 1.189, |
|
"eval_steps_per_second": 0.048, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 16.114206314086914, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3084, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"eval_LCC": -0.1813303002138391, |
|
"eval_SROCC": -0.27539015606242495, |
|
"eval_loss": 0.1909903734922409, |
|
"eval_runtime": 42.3941, |
|
"eval_samples_per_second": 1.179, |
|
"eval_steps_per_second": 0.047, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.851851851851852, |
|
"eval_LCC": -0.1510725671786146, |
|
"eval_SROCC": -0.2169987995198079, |
|
"eval_loss": 0.23344503343105316, |
|
"eval_runtime": 42.0983, |
|
"eval_samples_per_second": 1.188, |
|
"eval_steps_per_second": 0.048, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.7407407407407405, |
|
"eval_LCC": -0.1309558195202113, |
|
"eval_SROCC": -0.2, |
|
"eval_loss": 0.14843837916851044, |
|
"eval_runtime": 42.2346, |
|
"eval_samples_per_second": 1.184, |
|
"eval_steps_per_second": 0.047, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"grad_norm": 12.447588920593262, |
|
"learning_rate": 9.966191788709716e-06, |
|
"loss": 0.0852, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"eval_LCC": -0.08517980393535173, |
|
"eval_SROCC": -0.10213685474189677, |
|
"eval_loss": 0.12589265406131744, |
|
"eval_runtime": 42.2026, |
|
"eval_samples_per_second": 1.185, |
|
"eval_steps_per_second": 0.047, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.814814814814815, |
|
"eval_LCC": -0.05947795499623739, |
|
"eval_SROCC": -0.07092436974789915, |
|
"eval_loss": 0.15516823530197144, |
|
"eval_runtime": 42.1659, |
|
"eval_samples_per_second": 1.186, |
|
"eval_steps_per_second": 0.047, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_LCC": -0.0584113029101595, |
|
"eval_SROCC": -0.09483793517406962, |
|
"eval_loss": 0.09420724213123322, |
|
"eval_runtime": 42.5672, |
|
"eval_samples_per_second": 1.175, |
|
"eval_steps_per_second": 0.047, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 6.68361759185791, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.0406, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_LCC": -0.05502642204622495, |
|
"eval_SROCC": -0.04797118847539015, |
|
"eval_loss": 0.08413399010896683, |
|
"eval_runtime": 42.0478, |
|
"eval_samples_per_second": 1.189, |
|
"eval_steps_per_second": 0.048, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"eval_LCC": -0.044815400478992636, |
|
"eval_SROCC": -0.0575750300120048, |
|
"eval_loss": 0.08856825530529022, |
|
"eval_runtime": 42.1399, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.047, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 10.962962962962964, |
|
"eval_LCC": -0.04737793047406845, |
|
"eval_SROCC": -0.07726290516206481, |
|
"eval_loss": 0.07205035537481308, |
|
"eval_runtime": 42.2456, |
|
"eval_samples_per_second": 1.184, |
|
"eval_steps_per_second": 0.047, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 11.851851851851851, |
|
"grad_norm": 1.6507503986358643, |
|
"learning_rate": 9.177439057064684e-06, |
|
"loss": 0.023, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 11.851851851851851, |
|
"eval_LCC": -0.03637477859676811, |
|
"eval_SROCC": -0.04460984393757503, |
|
"eval_loss": 0.06969437748193741, |
|
"eval_runtime": 42.0276, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.048, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.74074074074074, |
|
"eval_LCC": -0.00905185691215031, |
|
"eval_SROCC": -0.021656662665066023, |
|
"eval_loss": 0.05769188702106476, |
|
"eval_runtime": 42.1379, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.047, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 13.925925925925926, |
|
"eval_LCC": 0.011204733733650302, |
|
"eval_SROCC": -0.03135654261704682, |
|
"eval_loss": 0.06662916392087936, |
|
"eval_runtime": 41.9846, |
|
"eval_samples_per_second": 1.191, |
|
"eval_steps_per_second": 0.048, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"grad_norm": 0.8441389799118042, |
|
"learning_rate": 8.43120818934367e-06, |
|
"loss": 0.0136, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"eval_LCC": 0.006038695767176178, |
|
"eval_SROCC": -0.05008403361344538, |
|
"eval_loss": 0.052547648549079895, |
|
"eval_runtime": 42.0346, |
|
"eval_samples_per_second": 1.189, |
|
"eval_steps_per_second": 0.048, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_LCC": 0.05035447924360885, |
|
"eval_SROCC": -0.01781512605042017, |
|
"eval_loss": 0.06257949769496918, |
|
"eval_runtime": 42.0741, |
|
"eval_samples_per_second": 1.188, |
|
"eval_steps_per_second": 0.048, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 16.88888888888889, |
|
"eval_LCC": 0.08269606829608296, |
|
"eval_SROCC": 0.015894357743097238, |
|
"eval_loss": 0.043766554445028305, |
|
"eval_runtime": 42.063, |
|
"eval_samples_per_second": 1.189, |
|
"eval_steps_per_second": 0.048, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 1.5869125127792358, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0113, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"eval_LCC": 0.10735678232307705, |
|
"eval_SROCC": 0.074093637454982, |
|
"eval_loss": 0.05027168616652489, |
|
"eval_runtime": 42.0969, |
|
"eval_samples_per_second": 1.188, |
|
"eval_steps_per_second": 0.048, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.962962962962962, |
|
"eval_LCC": 0.11292358158916863, |
|
"eval_SROCC": 0.08177671068427371, |
|
"eval_loss": 0.04287625476717949, |
|
"eval_runtime": 42.1066, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.047, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 19.85185185185185, |
|
"eval_LCC": 0.11877825314298533, |
|
"eval_SROCC": 0.08744297719087635, |
|
"eval_loss": 0.04554678872227669, |
|
"eval_runtime": 42.3014, |
|
"eval_samples_per_second": 1.182, |
|
"eval_steps_per_second": 0.047, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 20.74074074074074, |
|
"grad_norm": 2.4397213459014893, |
|
"learning_rate": 6.434016163555452e-06, |
|
"loss": 0.0097, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.74074074074074, |
|
"eval_LCC": 0.1315746635210365, |
|
"eval_SROCC": 0.09262905162064825, |
|
"eval_loss": 0.05967041850090027, |
|
"eval_runtime": 42.314, |
|
"eval_samples_per_second": 1.182, |
|
"eval_steps_per_second": 0.047, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.925925925925927, |
|
"eval_LCC": 0.14461751047124533, |
|
"eval_SROCC": 0.06141656662665066, |
|
"eval_loss": 0.03967233747243881, |
|
"eval_runtime": 41.6175, |
|
"eval_samples_per_second": 1.201, |
|
"eval_steps_per_second": 0.048, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 22.814814814814813, |
|
"eval_LCC": 0.1636524681044405, |
|
"eval_SROCC": 0.07783913565426169, |
|
"eval_loss": 0.052949074655771255, |
|
"eval_runtime": 42.4694, |
|
"eval_samples_per_second": 1.177, |
|
"eval_steps_per_second": 0.047, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 23.703703703703702, |
|
"grad_norm": 1.3437652587890625, |
|
"learning_rate": 5.290724144552379e-06, |
|
"loss": 0.0084, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_LCC": 0.17607777965634505, |
|
"eval_SROCC": 0.07159663865546219, |
|
"eval_loss": 0.03660748153924942, |
|
"eval_runtime": 41.9971, |
|
"eval_samples_per_second": 1.191, |
|
"eval_steps_per_second": 0.048, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 24.88888888888889, |
|
"eval_LCC": 0.18198076982766778, |
|
"eval_SROCC": 0.0683313325330132, |
|
"eval_loss": 0.03524893894791603, |
|
"eval_runtime": 42.1358, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.047, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 25.77777777777778, |
|
"eval_LCC": 0.1847607546132782, |
|
"eval_SROCC": 0.09695078031212485, |
|
"eval_loss": 0.04908595234155655, |
|
"eval_runtime": 42.3591, |
|
"eval_samples_per_second": 1.18, |
|
"eval_steps_per_second": 0.047, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 1.1412720680236816, |
|
"learning_rate": 4.131759111665349e-06, |
|
"loss": 0.0078, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 26.962962962962962, |
|
"eval_LCC": 0.18306996734895817, |
|
"eval_SROCC": 0.09839135654261706, |
|
"eval_loss": 0.039556559175252914, |
|
"eval_runtime": 42.0202, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.048, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 27.85185185185185, |
|
"eval_LCC": 0.18556425091617126, |
|
"eval_SROCC": 0.1011764705882353, |
|
"eval_loss": 0.039471760392189026, |
|
"eval_runtime": 42.1635, |
|
"eval_samples_per_second": 1.186, |
|
"eval_steps_per_second": 0.047, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 28.74074074074074, |
|
"eval_LCC": 0.19561082710345667, |
|
"eval_SROCC": 0.10972388955582232, |
|
"eval_loss": 0.04257187247276306, |
|
"eval_runtime": 42.1398, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.047, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 29.62962962962963, |
|
"grad_norm": 0.5356392860412598, |
|
"learning_rate": 3.019601169804216e-06, |
|
"loss": 0.0063, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 29.925925925925927, |
|
"eval_LCC": 0.19841940652904885, |
|
"eval_SROCC": 0.10021608643457382, |
|
"eval_loss": 0.03704619035124779, |
|
"eval_runtime": 42.1404, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.047, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 29.925925925925927, |
|
"step": 101, |
|
"total_flos": 5.9133993779567e+18, |
|
"train_loss": 0.050966506113879165, |
|
"train_runtime": 7148.5533, |
|
"train_samples_per_second": 1.497, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.9133993779567e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|