|
{ |
|
"best_metric": 0.37047404050827026, |
|
"best_model_checkpoint": "/home/ahf38/palmer_scratch/brainlm/training-runs/2023-07-19-17_00_00/checkpoint-3250", |
|
"epoch": 13.63040629095675, |
|
"global_step": 3250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 0.0002100840336134454, |
|
"loss": 0.3863, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.3854129910469055, |
|
"eval_mae": 0.4674049913883209, |
|
"eval_mse": 0.38541293144226074, |
|
"eval_r2": 0.4410198587613452, |
|
"eval_runtime": 273.6849, |
|
"eval_samples_per_second": 2.923, |
|
"eval_steps_per_second": 0.731, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 0.0004201680672268908, |
|
"loss": 0.3852, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.38619935512542725, |
|
"eval_mae": 0.46789708733558655, |
|
"eval_mse": 0.386198490858078, |
|
"eval_r2": 0.4393806237898299, |
|
"eval_runtime": 280.3385, |
|
"eval_samples_per_second": 2.854, |
|
"eval_steps_per_second": 0.713, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 0.0006302521008403362, |
|
"loss": 0.3851, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.38692620396614075, |
|
"eval_mae": 0.4686347544193268, |
|
"eval_mse": 0.3869261145591736, |
|
"eval_r2": 0.43769447169001496, |
|
"eval_runtime": 264.849, |
|
"eval_samples_per_second": 3.021, |
|
"eval_steps_per_second": 0.755, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 0.0008403361344537816, |
|
"loss": 0.393, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.409867525100708, |
|
"eval_mae": 0.4844866693019867, |
|
"eval_mse": 0.4098674952983856, |
|
"eval_r2": 0.4059509929564361, |
|
"eval_runtime": 264.8497, |
|
"eval_samples_per_second": 3.021, |
|
"eval_steps_per_second": 0.755, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 0.0009999826244478293, |
|
"loss": 0.3975, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.3888590931892395, |
|
"eval_mae": 0.46977394819259644, |
|
"eval_mse": 0.38885924220085144, |
|
"eval_r2": 0.43532054204855475, |
|
"eval_runtime": 263.3448, |
|
"eval_samples_per_second": 3.038, |
|
"eval_steps_per_second": 0.759, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 0.0009995362383107963, |
|
"loss": 0.3883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.3876199424266815, |
|
"eval_mae": 0.46888846158981323, |
|
"eval_mse": 0.3876205086708069, |
|
"eval_r2": 0.4391745591771552, |
|
"eval_runtime": 264.9341, |
|
"eval_samples_per_second": 3.02, |
|
"eval_steps_per_second": 0.755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 0.000998487151097676, |
|
"loss": 0.3887, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.3875642418861389, |
|
"eval_mae": 0.4687305688858032, |
|
"eval_mse": 0.38756391406059265, |
|
"eval_r2": 0.4382047783977898, |
|
"eval_runtime": 272.0994, |
|
"eval_samples_per_second": 2.94, |
|
"eval_steps_per_second": 0.735, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 0.0009968366285544618, |
|
"loss": 0.386, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.3863183557987213, |
|
"eval_mae": 0.4679754078388214, |
|
"eval_mse": 0.386318176984787, |
|
"eval_r2": 0.4391650800336171, |
|
"eval_runtime": 272.2539, |
|
"eval_samples_per_second": 2.938, |
|
"eval_steps_per_second": 0.735, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 0.000994586662071641, |
|
"loss": 0.3861, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.39024049043655396, |
|
"eval_mae": 0.47065961360931396, |
|
"eval_mse": 0.39024049043655396, |
|
"eval_r2": 0.4331343008067088, |
|
"eval_runtime": 266.2407, |
|
"eval_samples_per_second": 3.005, |
|
"eval_steps_per_second": 0.751, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 0.000991739966281539, |
|
"loss": 0.3876, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.3867352604866028, |
|
"eval_mae": 0.46853551268577576, |
|
"eval_mse": 0.3867342174053192, |
|
"eval_r2": 0.43885509094622643, |
|
"eval_runtime": 265.3966, |
|
"eval_samples_per_second": 3.014, |
|
"eval_steps_per_second": 0.754, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 0.0009882999757830587, |
|
"loss": 0.386, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.38316670060157776, |
|
"eval_mae": 0.46593964099884033, |
|
"eval_mse": 0.3831673562526703, |
|
"eval_r2": 0.44430950413045944, |
|
"eval_runtime": 262.4987, |
|
"eval_samples_per_second": 3.048, |
|
"eval_steps_per_second": 0.762, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 0.0009842708409977634, |
|
"loss": 0.3851, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.3835921585559845, |
|
"eval_mae": 0.4662681818008423, |
|
"eval_mse": 0.38359254598617554, |
|
"eval_r2": 0.4431584385649353, |
|
"eval_runtime": 262.0832, |
|
"eval_samples_per_second": 3.052, |
|
"eval_steps_per_second": 0.763, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 0.0009796574231623054, |
|
"loss": 0.3828, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.38348111510276794, |
|
"eval_mae": 0.46614208817481995, |
|
"eval_mse": 0.3834820091724396, |
|
"eval_r2": 0.44133966944739267, |
|
"eval_runtime": 267.1672, |
|
"eval_samples_per_second": 2.994, |
|
"eval_steps_per_second": 0.749, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 0.0009744652884632406, |
|
"loss": 0.3839, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.38388848304748535, |
|
"eval_mae": 0.46622949838638306, |
|
"eval_mse": 0.38388875126838684, |
|
"eval_r2": 0.44094833637705777, |
|
"eval_runtime": 264.3781, |
|
"eval_samples_per_second": 3.026, |
|
"eval_steps_per_second": 0.756, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 0.0009687007013213062, |
|
"loss": 0.3848, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.3808014392852783, |
|
"eval_mae": 0.46472200751304626, |
|
"eval_mse": 0.38080185651779175, |
|
"eval_r2": 0.44630267861478945, |
|
"eval_runtime": 265.0946, |
|
"eval_samples_per_second": 3.018, |
|
"eval_steps_per_second": 0.754, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 0.0009623706168332644, |
|
"loss": 0.3821, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.3820662200450897, |
|
"eval_mae": 0.4652169346809387, |
|
"eval_mse": 0.3820664882659912, |
|
"eval_r2": 0.4448990871151196, |
|
"eval_runtime": 267.613, |
|
"eval_samples_per_second": 2.989, |
|
"eval_steps_per_second": 0.747, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 0.0009554826723804303, |
|
"loss": 0.3818, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.3809438645839691, |
|
"eval_mae": 0.46468687057495117, |
|
"eval_mse": 0.3809432089328766, |
|
"eval_r2": 0.4446287307018585, |
|
"eval_runtime": 264.0042, |
|
"eval_samples_per_second": 3.03, |
|
"eval_steps_per_second": 0.758, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 0.0009480451784140092, |
|
"loss": 0.3824, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.38150742650032043, |
|
"eval_mae": 0.4647422134876251, |
|
"eval_mse": 0.3815067708492279, |
|
"eval_r2": 0.44662126614575437, |
|
"eval_runtime": 267.3864, |
|
"eval_samples_per_second": 2.992, |
|
"eval_steps_per_second": 0.748, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 0.0009400671084283606, |
|
"loss": 0.3816, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.3832119107246399, |
|
"eval_mae": 0.46604394912719727, |
|
"eval_mse": 0.3832109272480011, |
|
"eval_r2": 0.4432983612099932, |
|
"eval_runtime": 284.3156, |
|
"eval_samples_per_second": 2.814, |
|
"eval_steps_per_second": 0.703, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 0.0009315580881342876, |
|
"loss": 0.3794, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.3803976774215698, |
|
"eval_mae": 0.4641774296760559, |
|
"eval_mse": 0.380397766828537, |
|
"eval_r2": 0.4470804443192825, |
|
"eval_runtime": 265.2618, |
|
"eval_samples_per_second": 3.016, |
|
"eval_steps_per_second": 0.754, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 0.000922528383845411, |
|
"loss": 0.3819, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.38107830286026, |
|
"eval_mae": 0.4645218849182129, |
|
"eval_mse": 0.38107892870903015, |
|
"eval_r2": 0.4483045268109964, |
|
"eval_runtime": 265.6012, |
|
"eval_samples_per_second": 3.012, |
|
"eval_steps_per_second": 0.753, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 0.0009129888900916457, |
|
"loss": 0.3799, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.38090744614601135, |
|
"eval_mae": 0.46439129114151, |
|
"eval_mse": 0.38090750575065613, |
|
"eval_r2": 0.4488611430910662, |
|
"eval_runtime": 271.1593, |
|
"eval_samples_per_second": 2.95, |
|
"eval_steps_per_second": 0.738, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 0.0009029511164747175, |
|
"loss": 0.383, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.3813271224498749, |
|
"eval_mae": 0.4647519886493683, |
|
"eval_mse": 0.38132649660110474, |
|
"eval_r2": 0.44615534251165345, |
|
"eval_runtime": 272.9035, |
|
"eval_samples_per_second": 2.931, |
|
"eval_steps_per_second": 0.733, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 0.0008924271737815855, |
|
"loss": 0.3809, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.37957870960235596, |
|
"eval_mae": 0.46358469128608704, |
|
"eval_mse": 0.3795786499977112, |
|
"eval_r2": 0.4479695425018687, |
|
"eval_runtime": 265.8282, |
|
"eval_samples_per_second": 3.009, |
|
"eval_steps_per_second": 0.752, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 0.0008814297593725199, |
|
"loss": 0.3805, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.38085976243019104, |
|
"eval_mae": 0.46433085203170776, |
|
"eval_mse": 0.38085973262786865, |
|
"eval_r2": 0.44749606591539537, |
|
"eval_runtime": 270.336, |
|
"eval_samples_per_second": 2.959, |
|
"eval_steps_per_second": 0.74, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 0.0008699721418614672, |
|
"loss": 0.3805, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.3793216049671173, |
|
"eval_mae": 0.46346384286880493, |
|
"eval_mse": 0.3793216049671173, |
|
"eval_r2": 0.4494445278203605, |
|
"eval_runtime": 264.9048, |
|
"eval_samples_per_second": 3.02, |
|
"eval_steps_per_second": 0.755, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 0.0008580681451071866, |
|
"loss": 0.3794, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.37941062450408936, |
|
"eval_mae": 0.4636057913303375, |
|
"eval_mse": 0.37941014766693115, |
|
"eval_r2": 0.4489888493647255, |
|
"eval_runtime": 279.0513, |
|
"eval_samples_per_second": 2.867, |
|
"eval_steps_per_second": 0.717, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 0.0008457321315344694, |
|
"loss": 0.3806, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.37968844175338745, |
|
"eval_mae": 0.4636916518211365, |
|
"eval_mse": 0.3796885013580322, |
|
"eval_r2": 0.4474722627694526, |
|
"eval_runtime": 268.6996, |
|
"eval_samples_per_second": 2.977, |
|
"eval_steps_per_second": 0.744, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 0.0008329789848055705, |
|
"loss": 0.3764, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.3784217834472656, |
|
"eval_mae": 0.4628964364528656, |
|
"eval_mse": 0.37842199206352234, |
|
"eval_r2": 0.45179472514661223, |
|
"eval_runtime": 275.6704, |
|
"eval_samples_per_second": 2.902, |
|
"eval_steps_per_second": 0.726, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 0.0008198240918627525, |
|
"loss": 0.3802, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.3779056966304779, |
|
"eval_mae": 0.4623972177505493, |
|
"eval_mse": 0.37790581583976746, |
|
"eval_r2": 0.45185399059020337, |
|
"eval_runtime": 269.5949, |
|
"eval_samples_per_second": 2.967, |
|
"eval_steps_per_second": 0.742, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 0.0008062833243636134, |
|
"loss": 0.3787, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.3789410889148712, |
|
"eval_mae": 0.46288514137268066, |
|
"eval_mse": 0.37894150614738464, |
|
"eval_r2": 0.4510378453068221, |
|
"eval_runtime": 291.6469, |
|
"eval_samples_per_second": 2.743, |
|
"eval_steps_per_second": 0.686, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 0.0007923730195315961, |
|
"loss": 0.379, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.3779994249343872, |
|
"eval_mae": 0.4624533951282501, |
|
"eval_mse": 0.377999484539032, |
|
"eval_r2": 0.4508841579385672, |
|
"eval_runtime": 278.879, |
|
"eval_samples_per_second": 2.869, |
|
"eval_steps_per_second": 0.717, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 0.0007781099604447793, |
|
"loss": 0.3765, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.37762123346328735, |
|
"eval_mae": 0.4622168242931366, |
|
"eval_mse": 0.37762120366096497, |
|
"eval_r2": 0.452898354940625, |
|
"eval_runtime": 278.1979, |
|
"eval_samples_per_second": 2.876, |
|
"eval_steps_per_second": 0.719, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 0.0007635113557867395, |
|
"loss": 0.3797, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.37847700715065, |
|
"eval_mae": 0.4626946747303009, |
|
"eval_mse": 0.378477543592453, |
|
"eval_r2": 0.4524606040645045, |
|
"eval_runtime": 265.647, |
|
"eval_samples_per_second": 3.012, |
|
"eval_steps_per_second": 0.753, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 0.0007485948190839076, |
|
"loss": 0.3772, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.3774454891681671, |
|
"eval_mae": 0.46223533153533936, |
|
"eval_mse": 0.37744590640068054, |
|
"eval_r2": 0.45290920313283933, |
|
"eval_runtime": 268.9714, |
|
"eval_samples_per_second": 2.974, |
|
"eval_steps_per_second": 0.744, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 0.0007333783474544757, |
|
"loss": 0.3792, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.37726637721061707, |
|
"eval_mae": 0.46205469965934753, |
|
"eval_mse": 0.3772661089897156, |
|
"eval_r2": 0.45215750792291154, |
|
"eval_runtime": 270.804, |
|
"eval_samples_per_second": 2.954, |
|
"eval_steps_per_second": 0.739, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 0.0007178802998944933, |
|
"loss": 0.3753, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.3767879009246826, |
|
"eval_mae": 0.4616534411907196, |
|
"eval_mse": 0.3767889440059662, |
|
"eval_r2": 0.4520469227666626, |
|
"eval_runtime": 263.9276, |
|
"eval_samples_per_second": 3.031, |
|
"eval_steps_per_second": 0.758, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 0.0007021193751273463, |
|
"loss": 0.3758, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.37764108180999756, |
|
"eval_mae": 0.4621761739253998, |
|
"eval_mse": 0.37764135003089905, |
|
"eval_r2": 0.45377418641465317, |
|
"eval_runtime": 283.3511, |
|
"eval_samples_per_second": 2.823, |
|
"eval_steps_per_second": 0.706, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 0.0006861145890433519, |
|
"loss": 0.377, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.3769887685775757, |
|
"eval_mae": 0.46189454197883606, |
|
"eval_mse": 0.37698760628700256, |
|
"eval_r2": 0.45451653345429155, |
|
"eval_runtime": 269.8194, |
|
"eval_samples_per_second": 2.965, |
|
"eval_steps_per_second": 0.741, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 0.0006698852517566836, |
|
"loss": 0.3761, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.3763187527656555, |
|
"eval_mae": 0.4613550901412964, |
|
"eval_mse": 0.3763185143470764, |
|
"eval_r2": 0.45283548813003005, |
|
"eval_runtime": 272.1057, |
|
"eval_samples_per_second": 2.94, |
|
"eval_steps_per_second": 0.735, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 0.0006534509443073072, |
|
"loss": 0.3764, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.3749840557575226, |
|
"eval_mae": 0.46052685379981995, |
|
"eval_mse": 0.37498345971107483, |
|
"eval_r2": 0.45454628406748754, |
|
"eval_runtime": 277.2032, |
|
"eval_samples_per_second": 2.886, |
|
"eval_steps_per_second": 0.721, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 0.0006368314950360416, |
|
"loss": 0.3747, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.37445199489593506, |
|
"eval_mae": 0.46010997891426086, |
|
"eval_mse": 0.3744511008262634, |
|
"eval_r2": 0.4568363191792082, |
|
"eval_runtime": 276.9731, |
|
"eval_samples_per_second": 2.888, |
|
"eval_steps_per_second": 0.722, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 0.0006200469556612435, |
|
"loss": 0.3754, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.3743913769721985, |
|
"eval_mae": 0.4600675106048584, |
|
"eval_mse": 0.3743920922279358, |
|
"eval_r2": 0.45744293340515363, |
|
"eval_runtime": 281.3782, |
|
"eval_samples_per_second": 2.843, |
|
"eval_steps_per_second": 0.711, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 0.0006031175770859848, |
|
"loss": 0.3736, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.3738979399204254, |
|
"eval_mae": 0.45969095826148987, |
|
"eval_mse": 0.37389707565307617, |
|
"eval_r2": 0.4581812073615178, |
|
"eval_runtime": 270.2281, |
|
"eval_samples_per_second": 2.96, |
|
"eval_steps_per_second": 0.74, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 0.0005860637849649073, |
|
"loss": 0.3743, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.3741012513637543, |
|
"eval_mae": 0.4597731828689575, |
|
"eval_mse": 0.37410181760787964, |
|
"eval_r2": 0.4575432685303391, |
|
"eval_runtime": 278.4971, |
|
"eval_samples_per_second": 2.873, |
|
"eval_steps_per_second": 0.718, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 0.000568906155060237, |
|
"loss": 0.3734, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.37282341718673706, |
|
"eval_mae": 0.4590661823749542, |
|
"eval_mse": 0.3728235960006714, |
|
"eval_r2": 0.457423055516365, |
|
"eval_runtime": 279.5267, |
|
"eval_samples_per_second": 2.862, |
|
"eval_steps_per_second": 0.715, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 0.0005516653884166902, |
|
"loss": 0.371, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.3734956681728363, |
|
"eval_mae": 0.45936986804008484, |
|
"eval_mse": 0.3734953701496124, |
|
"eval_r2": 0.4600796269569801, |
|
"eval_runtime": 274.1528, |
|
"eval_samples_per_second": 2.918, |
|
"eval_steps_per_second": 0.73, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 0.0005343622863852232, |
|
"loss": 0.3754, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.3738192021846771, |
|
"eval_mae": 0.45952871441841125, |
|
"eval_mse": 0.37381842732429504, |
|
"eval_r2": 0.4592306947745455, |
|
"eval_runtime": 277.5843, |
|
"eval_samples_per_second": 2.882, |
|
"eval_steps_per_second": 0.721, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 0.0005170177255257618, |
|
"loss": 0.373, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.3726402521133423, |
|
"eval_mae": 0.4587893486022949, |
|
"eval_mse": 0.3726405203342438, |
|
"eval_r2": 0.45936278859727786, |
|
"eval_runtime": 281.2863, |
|
"eval_samples_per_second": 2.844, |
|
"eval_steps_per_second": 0.711, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 0.0004996526324191872, |
|
"loss": 0.3717, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.37096771597862244, |
|
"eval_mae": 0.45787444710731506, |
|
"eval_mse": 0.3709667921066284, |
|
"eval_r2": 0.4609350988985167, |
|
"eval_runtime": 270.5778, |
|
"eval_samples_per_second": 2.957, |
|
"eval_steps_per_second": 0.739, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 0.0004822879584189731, |
|
"loss": 0.3727, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.3726476728916168, |
|
"eval_mae": 0.4586939215660095, |
|
"eval_mse": 0.37264758348464966, |
|
"eval_r2": 0.4587349241462606, |
|
"eval_runtime": 273.9493, |
|
"eval_samples_per_second": 2.92, |
|
"eval_steps_per_second": 0.73, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 0.0004649446543729322, |
|
"loss": 0.372, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.37159234285354614, |
|
"eval_mae": 0.4583428204059601, |
|
"eval_mse": 0.3715912401676178, |
|
"eval_r2": 0.4603821208241091, |
|
"eval_runtime": 278.26, |
|
"eval_samples_per_second": 2.875, |
|
"eval_steps_per_second": 0.719, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 0.0004476436453455742, |
|
"loss": 0.3715, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.3717365562915802, |
|
"eval_mae": 0.4582462012767792, |
|
"eval_mse": 0.3717361092567444, |
|
"eval_r2": 0.45976387138394437, |
|
"eval_runtime": 292.146, |
|
"eval_samples_per_second": 2.738, |
|
"eval_steps_per_second": 0.685, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 0.0004304058053715702, |
|
"loss": 0.3717, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.3717900216579437, |
|
"eval_mae": 0.45822903513908386, |
|
"eval_mse": 0.37178927659988403, |
|
"eval_r2": 0.45998171303819835, |
|
"eval_runtime": 269.82, |
|
"eval_samples_per_second": 2.965, |
|
"eval_steps_per_second": 0.741, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 0.0004132519322707882, |
|
"loss": 0.3707, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.3708600699901581, |
|
"eval_mae": 0.45766767859458923, |
|
"eval_mse": 0.3708597719669342, |
|
"eval_r2": 0.4603013858171726, |
|
"eval_runtime": 269.9605, |
|
"eval_samples_per_second": 2.963, |
|
"eval_steps_per_second": 0.741, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 0.00039620272255528065, |
|
"loss": 0.3739, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.3721630573272705, |
|
"eval_mae": 0.4584360122680664, |
|
"eval_mse": 0.37216344475746155, |
|
"eval_r2": 0.459010225690355, |
|
"eval_runtime": 268.3335, |
|
"eval_samples_per_second": 2.981, |
|
"eval_steps_per_second": 0.745, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 0.000379278746458504, |
|
"loss": 0.3702, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.37071841955184937, |
|
"eval_mae": 0.4574899673461914, |
|
"eval_mse": 0.3707171678543091, |
|
"eval_r2": 0.4613746011063965, |
|
"eval_runtime": 279.1852, |
|
"eval_samples_per_second": 2.865, |
|
"eval_steps_per_second": 0.716, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 0.000362500423116895, |
|
"loss": 0.3712, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.37087440490722656, |
|
"eval_mae": 0.45766788721084595, |
|
"eval_mse": 0.37087327241897583, |
|
"eval_r2": 0.4618904862700307, |
|
"eval_runtime": 292.1987, |
|
"eval_samples_per_second": 2.738, |
|
"eval_steps_per_second": 0.684, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 0.0003458879959337494, |
|
"loss": 0.3693, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.3699745535850525, |
|
"eval_mae": 0.4571460485458374, |
|
"eval_mse": 0.36997491121292114, |
|
"eval_r2": 0.4619671878026411, |
|
"eval_runtime": 273.9573, |
|
"eval_samples_per_second": 2.92, |
|
"eval_steps_per_second": 0.73, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 0.00032946150815512586, |
|
"loss": 0.3719, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.3705122470855713, |
|
"eval_mae": 0.45748522877693176, |
|
"eval_mse": 0.37051278352737427, |
|
"eval_r2": 0.46142887892760176, |
|
"eval_runtime": 268.3928, |
|
"eval_samples_per_second": 2.981, |
|
"eval_steps_per_second": 0.745, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 0.0003132407786872442, |
|
"loss": 0.3716, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.3708195388317108, |
|
"eval_mae": 0.45740067958831787, |
|
"eval_mse": 0.3708195090293884, |
|
"eval_r2": 0.4616999176965736, |
|
"eval_runtime": 273.3209, |
|
"eval_samples_per_second": 2.927, |
|
"eval_steps_per_second": 0.732, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 0.00029724537818455465, |
|
"loss": 0.3693, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.3704887628555298, |
|
"eval_mae": 0.45740318298339844, |
|
"eval_mse": 0.37048864364624023, |
|
"eval_r2": 0.4616717003785853, |
|
"eval_runtime": 273.739, |
|
"eval_samples_per_second": 2.922, |
|
"eval_steps_per_second": 0.731, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 0.00028149460543732666, |
|
"loss": 0.3706, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.37004169821739197, |
|
"eval_mae": 0.45711666345596313, |
|
"eval_mse": 0.37004172801971436, |
|
"eval_r2": 0.46274949898335205, |
|
"eval_runtime": 265.1017, |
|
"eval_samples_per_second": 3.018, |
|
"eval_steps_per_second": 0.754, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 0.00026600746408725063, |
|
"loss": 0.3703, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.3703955113887787, |
|
"eval_mae": 0.45735496282577515, |
|
"eval_mse": 0.37039563059806824, |
|
"eval_r2": 0.4622188399376109, |
|
"eval_runtime": 287.415, |
|
"eval_samples_per_second": 2.783, |
|
"eval_steps_per_second": 0.696, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 0.000250802639699139, |
|
"loss": 0.3693, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.37047404050827026, |
|
"eval_mae": 0.45720237493515015, |
|
"eval_mse": 0.37047335505485535, |
|
"eval_r2": 0.46234450920301984, |
|
"eval_runtime": 265.936, |
|
"eval_samples_per_second": 3.008, |
|
"eval_steps_per_second": 0.752, |
|
"step": 3250 |
|
} |
|
], |
|
"max_steps": 4760, |
|
"num_train_epochs": 20, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|