|
{ |
|
"best_metric": 0.46414923667907715, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-from-probs-large-2024_11_15-batch-size32_freeze_probs/checkpoint-15987", |
|
"epoch": 83.0, |
|
"eval_steps": 500, |
|
"global_step": 18177, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_explained_variance": 0.3432542085647583, |
|
"eval_kl_divergence": 0.31011611223220825, |
|
"eval_loss": 0.4855400025844574, |
|
"eval_mae": 0.1364378184080124, |
|
"eval_rmse": 0.17712123692035675, |
|
"eval_runtime": 55.3387, |
|
"eval_samples_per_second": 42.538, |
|
"eval_steps_per_second": 1.337, |
|
"learning_rate": 0.001, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_explained_variance": 0.38912513852119446, |
|
"eval_kl_divergence": 0.5077245235443115, |
|
"eval_loss": 0.47601452469825745, |
|
"eval_mae": 0.12465938925743103, |
|
"eval_rmse": 0.16875195503234863, |
|
"eval_runtime": 54.843, |
|
"eval_samples_per_second": 42.923, |
|
"eval_steps_per_second": 1.349, |
|
"learning_rate": 0.001, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.2831050228310503, |
|
"grad_norm": 0.35450002551078796, |
|
"learning_rate": 0.001, |
|
"loss": 0.5195, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_explained_variance": 0.3848476707935333, |
|
"eval_kl_divergence": 0.7895973324775696, |
|
"eval_loss": 0.4776814579963684, |
|
"eval_mae": 0.12300346046686172, |
|
"eval_rmse": 0.17065072059631348, |
|
"eval_runtime": 56.2195, |
|
"eval_samples_per_second": 41.872, |
|
"eval_steps_per_second": 1.316, |
|
"learning_rate": 0.001, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_explained_variance": 0.403704434633255, |
|
"eval_kl_divergence": 0.49319207668304443, |
|
"eval_loss": 0.47429159283638, |
|
"eval_mae": 0.12376764416694641, |
|
"eval_rmse": 0.1672389954328537, |
|
"eval_runtime": 54.7793, |
|
"eval_samples_per_second": 42.972, |
|
"eval_steps_per_second": 1.351, |
|
"learning_rate": 0.001, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 4.566210045662101, |
|
"grad_norm": 0.2313629388809204, |
|
"learning_rate": 0.001, |
|
"loss": 0.4742, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_explained_variance": 0.41316938400268555, |
|
"eval_kl_divergence": 0.2900688648223877, |
|
"eval_loss": 0.47457176446914673, |
|
"eval_mae": 0.12771284580230713, |
|
"eval_rmse": 0.16687722504138947, |
|
"eval_runtime": 55.1273, |
|
"eval_samples_per_second": 42.701, |
|
"eval_steps_per_second": 1.342, |
|
"learning_rate": 0.001, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_explained_variance": 0.40222811698913574, |
|
"eval_kl_divergence": 0.43988940119743347, |
|
"eval_loss": 0.4749792814254761, |
|
"eval_mae": 0.1252531260251999, |
|
"eval_rmse": 0.16735166311264038, |
|
"eval_runtime": 53.136, |
|
"eval_samples_per_second": 44.301, |
|
"eval_steps_per_second": 1.393, |
|
"learning_rate": 0.001, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 6.8493150684931505, |
|
"grad_norm": 0.18959695100784302, |
|
"learning_rate": 0.001, |
|
"loss": 0.4706, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_explained_variance": 0.4019981324672699, |
|
"eval_kl_divergence": 0.48684099316596985, |
|
"eval_loss": 0.4744807779788971, |
|
"eval_mae": 0.12594138085842133, |
|
"eval_rmse": 0.16705705225467682, |
|
"eval_runtime": 53.367, |
|
"eval_samples_per_second": 44.11, |
|
"eval_steps_per_second": 1.387, |
|
"learning_rate": 0.001, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_explained_variance": 0.41111621260643005, |
|
"eval_kl_divergence": 0.324148029088974, |
|
"eval_loss": 0.47424906492233276, |
|
"eval_mae": 0.12568950653076172, |
|
"eval_rmse": 0.16722555458545685, |
|
"eval_runtime": 55.5084, |
|
"eval_samples_per_second": 42.408, |
|
"eval_steps_per_second": 1.333, |
|
"learning_rate": 0.001, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_explained_variance": 0.4107116162776947, |
|
"eval_kl_divergence": 0.4560392200946808, |
|
"eval_loss": 0.4729686379432678, |
|
"eval_mae": 0.12355945259332657, |
|
"eval_rmse": 0.16584673523902893, |
|
"eval_runtime": 55.1596, |
|
"eval_samples_per_second": 42.676, |
|
"eval_steps_per_second": 1.342, |
|
"learning_rate": 0.001, |
|
"step": 1971 |
|
}, |
|
{ |
|
"epoch": 9.132420091324201, |
|
"grad_norm": 0.18577350676059723, |
|
"learning_rate": 0.001, |
|
"loss": 0.4678, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_explained_variance": 0.4190339744091034, |
|
"eval_kl_divergence": 0.2140849530696869, |
|
"eval_loss": 0.4750550389289856, |
|
"eval_mae": 0.12685616314411163, |
|
"eval_rmse": 0.1679263859987259, |
|
"eval_runtime": 56.0284, |
|
"eval_samples_per_second": 42.014, |
|
"eval_steps_per_second": 1.321, |
|
"learning_rate": 0.001, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_explained_variance": 0.41887199878692627, |
|
"eval_kl_divergence": 0.2529982030391693, |
|
"eval_loss": 0.4733181595802307, |
|
"eval_mae": 0.12647458910942078, |
|
"eval_rmse": 0.16627688705921173, |
|
"eval_runtime": 55.5532, |
|
"eval_samples_per_second": 42.374, |
|
"eval_steps_per_second": 1.332, |
|
"learning_rate": 0.001, |
|
"step": 2409 |
|
}, |
|
{ |
|
"epoch": 11.415525114155251, |
|
"grad_norm": 0.14618106186389923, |
|
"learning_rate": 0.001, |
|
"loss": 0.4674, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_explained_variance": 0.4073503315448761, |
|
"eval_kl_divergence": 0.3965540826320648, |
|
"eval_loss": 0.4758349061012268, |
|
"eval_mae": 0.1263781040906906, |
|
"eval_rmse": 0.1683548092842102, |
|
"eval_runtime": 53.8367, |
|
"eval_samples_per_second": 43.725, |
|
"eval_steps_per_second": 1.375, |
|
"learning_rate": 0.001, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_explained_variance": 0.41419240832328796, |
|
"eval_kl_divergence": 0.6054547429084778, |
|
"eval_loss": 0.4722050428390503, |
|
"eval_mae": 0.12233959883451462, |
|
"eval_rmse": 0.16495703160762787, |
|
"eval_runtime": 54.7322, |
|
"eval_samples_per_second": 43.009, |
|
"eval_steps_per_second": 1.352, |
|
"learning_rate": 0.001, |
|
"step": 2847 |
|
}, |
|
{ |
|
"epoch": 13.698630136986301, |
|
"grad_norm": 0.15461835265159607, |
|
"learning_rate": 0.001, |
|
"loss": 0.4676, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_explained_variance": 0.40708938241004944, |
|
"eval_kl_divergence": 0.4203389585018158, |
|
"eval_loss": 0.4747372567653656, |
|
"eval_mae": 0.12501581013202667, |
|
"eval_rmse": 0.16655980050563812, |
|
"eval_runtime": 55.2289, |
|
"eval_samples_per_second": 42.623, |
|
"eval_steps_per_second": 1.34, |
|
"learning_rate": 0.001, |
|
"step": 3066 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_explained_variance": 0.41527059674263, |
|
"eval_kl_divergence": 0.6553499102592468, |
|
"eval_loss": 0.47325292229652405, |
|
"eval_mae": 0.12266030162572861, |
|
"eval_rmse": 0.16621644794940948, |
|
"eval_runtime": 54.2502, |
|
"eval_samples_per_second": 43.392, |
|
"eval_steps_per_second": 1.364, |
|
"learning_rate": 0.001, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 15.981735159817351, |
|
"grad_norm": 0.10063416510820389, |
|
"learning_rate": 0.001, |
|
"loss": 0.4663, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_explained_variance": 0.4175969660282135, |
|
"eval_kl_divergence": 0.35757607221603394, |
|
"eval_loss": 0.4734710156917572, |
|
"eval_mae": 0.12411689758300781, |
|
"eval_rmse": 0.16558559238910675, |
|
"eval_runtime": 53.6921, |
|
"eval_samples_per_second": 43.843, |
|
"eval_steps_per_second": 1.378, |
|
"learning_rate": 0.001, |
|
"step": 3504 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_explained_variance": 0.4231180250644684, |
|
"eval_kl_divergence": 0.4545155465602875, |
|
"eval_loss": 0.4721581041812897, |
|
"eval_mae": 0.12205825001001358, |
|
"eval_rmse": 0.16431300342082977, |
|
"eval_runtime": 54.0719, |
|
"eval_samples_per_second": 43.535, |
|
"eval_steps_per_second": 1.369, |
|
"learning_rate": 0.001, |
|
"step": 3723 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_explained_variance": 0.42092254757881165, |
|
"eval_kl_divergence": 0.49019381403923035, |
|
"eval_loss": 0.4723944365978241, |
|
"eval_mae": 0.12245010584592819, |
|
"eval_rmse": 0.16473934054374695, |
|
"eval_runtime": 53.2446, |
|
"eval_samples_per_second": 44.211, |
|
"eval_steps_per_second": 1.39, |
|
"learning_rate": 0.001, |
|
"step": 3942 |
|
}, |
|
{ |
|
"epoch": 18.264840182648403, |
|
"grad_norm": 0.11052733659744263, |
|
"learning_rate": 0.001, |
|
"loss": 0.4655, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_explained_variance": 0.42237523198127747, |
|
"eval_kl_divergence": 0.3157788813114166, |
|
"eval_loss": 0.47289156913757324, |
|
"eval_mae": 0.12610264122486115, |
|
"eval_rmse": 0.164999321103096, |
|
"eval_runtime": 54.353, |
|
"eval_samples_per_second": 43.309, |
|
"eval_steps_per_second": 1.361, |
|
"learning_rate": 0.001, |
|
"step": 4161 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_explained_variance": 0.43422555923461914, |
|
"eval_kl_divergence": 0.45738106966018677, |
|
"eval_loss": 0.4697262644767761, |
|
"eval_mae": 0.12028751522302628, |
|
"eval_rmse": 0.16227416694164276, |
|
"eval_runtime": 52.1033, |
|
"eval_samples_per_second": 45.179, |
|
"eval_steps_per_second": 1.42, |
|
"learning_rate": 0.0001, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 20.54794520547945, |
|
"grad_norm": 0.10903308540582657, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4635, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_explained_variance": 0.43825283646583557, |
|
"eval_kl_divergence": 0.45688703656196594, |
|
"eval_loss": 0.46890661120414734, |
|
"eval_mae": 0.11968808621168137, |
|
"eval_rmse": 0.16127373278141022, |
|
"eval_runtime": 52.3325, |
|
"eval_samples_per_second": 44.982, |
|
"eval_steps_per_second": 1.414, |
|
"learning_rate": 0.0001, |
|
"step": 4599 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_explained_variance": 0.4373685419559479, |
|
"eval_kl_divergence": 0.45346954464912415, |
|
"eval_loss": 0.46905258297920227, |
|
"eval_mae": 0.12017489224672318, |
|
"eval_rmse": 0.16165030002593994, |
|
"eval_runtime": 51.1815, |
|
"eval_samples_per_second": 45.993, |
|
"eval_steps_per_second": 1.446, |
|
"learning_rate": 0.0001, |
|
"step": 4818 |
|
}, |
|
{ |
|
"epoch": 22.831050228310502, |
|
"grad_norm": 0.09725002944469452, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4615, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_explained_variance": 0.4442131519317627, |
|
"eval_kl_divergence": 0.2970678508281708, |
|
"eval_loss": 0.4691086411476135, |
|
"eval_mae": 0.1210075318813324, |
|
"eval_rmse": 0.1613779515028, |
|
"eval_runtime": 50.785, |
|
"eval_samples_per_second": 46.352, |
|
"eval_steps_per_second": 1.457, |
|
"learning_rate": 0.0001, |
|
"step": 5037 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_explained_variance": 0.4405536353588104, |
|
"eval_kl_divergence": 0.39161574840545654, |
|
"eval_loss": 0.46915334463119507, |
|
"eval_mae": 0.11959254741668701, |
|
"eval_rmse": 0.16161170601844788, |
|
"eval_runtime": 50.8712, |
|
"eval_samples_per_second": 46.274, |
|
"eval_steps_per_second": 1.455, |
|
"learning_rate": 0.0001, |
|
"step": 5256 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_explained_variance": 0.4465361535549164, |
|
"eval_kl_divergence": 0.4515945613384247, |
|
"eval_loss": 0.4676876664161682, |
|
"eval_mae": 0.11813607066869736, |
|
"eval_rmse": 0.16005758941173553, |
|
"eval_runtime": 50.537, |
|
"eval_samples_per_second": 46.58, |
|
"eval_steps_per_second": 1.464, |
|
"learning_rate": 0.0001, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 25.114155251141554, |
|
"grad_norm": 0.10921537131071091, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4601, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_explained_variance": 0.4434172809123993, |
|
"eval_kl_divergence": 0.6089490652084351, |
|
"eval_loss": 0.4679708480834961, |
|
"eval_mae": 0.11711684614419937, |
|
"eval_rmse": 0.1605486422777176, |
|
"eval_runtime": 49.8832, |
|
"eval_samples_per_second": 47.19, |
|
"eval_steps_per_second": 1.483, |
|
"learning_rate": 0.0001, |
|
"step": 5694 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_explained_variance": 0.4460805654525757, |
|
"eval_kl_divergence": 0.4741028845310211, |
|
"eval_loss": 0.4674595892429352, |
|
"eval_mae": 0.11824781447649002, |
|
"eval_rmse": 0.16004686057567596, |
|
"eval_runtime": 49.7793, |
|
"eval_samples_per_second": 47.289, |
|
"eval_steps_per_second": 1.487, |
|
"learning_rate": 0.0001, |
|
"step": 5913 |
|
}, |
|
{ |
|
"epoch": 27.397260273972602, |
|
"grad_norm": 0.11422494053840637, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4585, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_explained_variance": 0.4489245116710663, |
|
"eval_kl_divergence": 0.3355759084224701, |
|
"eval_loss": 0.46810340881347656, |
|
"eval_mae": 0.11996418237686157, |
|
"eval_rmse": 0.16060088574886322, |
|
"eval_runtime": 52.9491, |
|
"eval_samples_per_second": 44.458, |
|
"eval_steps_per_second": 1.398, |
|
"learning_rate": 0.0001, |
|
"step": 6132 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_explained_variance": 0.4459850490093231, |
|
"eval_kl_divergence": 0.43302619457244873, |
|
"eval_loss": 0.4678303897380829, |
|
"eval_mae": 0.11808297038078308, |
|
"eval_rmse": 0.16026519238948822, |
|
"eval_runtime": 50.5506, |
|
"eval_samples_per_second": 46.567, |
|
"eval_steps_per_second": 1.464, |
|
"learning_rate": 0.0001, |
|
"step": 6351 |
|
}, |
|
{ |
|
"epoch": 29.680365296803654, |
|
"grad_norm": 0.11833047866821289, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4578, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_explained_variance": 0.4503695070743561, |
|
"eval_kl_divergence": 0.3159695267677307, |
|
"eval_loss": 0.46800243854522705, |
|
"eval_mae": 0.11937135457992554, |
|
"eval_rmse": 0.160204216837883, |
|
"eval_runtime": 50.0689, |
|
"eval_samples_per_second": 47.015, |
|
"eval_steps_per_second": 1.478, |
|
"learning_rate": 0.0001, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_explained_variance": 0.4467611014842987, |
|
"eval_kl_divergence": 0.419010728597641, |
|
"eval_loss": 0.4676785469055176, |
|
"eval_mae": 0.11789224296808243, |
|
"eval_rmse": 0.1599912792444229, |
|
"eval_runtime": 50.2573, |
|
"eval_samples_per_second": 46.839, |
|
"eval_steps_per_second": 1.472, |
|
"learning_rate": 0.0001, |
|
"step": 6789 |
|
}, |
|
{ |
|
"epoch": 31.963470319634702, |
|
"grad_norm": 0.1234586164355278, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4579, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_explained_variance": 0.4503757953643799, |
|
"eval_kl_divergence": 0.3705631494522095, |
|
"eval_loss": 0.46752873063087463, |
|
"eval_mae": 0.11878199130296707, |
|
"eval_rmse": 0.159804567694664, |
|
"eval_runtime": 50.3085, |
|
"eval_samples_per_second": 46.791, |
|
"eval_steps_per_second": 1.471, |
|
"learning_rate": 0.0001, |
|
"step": 7008 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_explained_variance": 0.4545632600784302, |
|
"eval_kl_divergence": 0.35043853521347046, |
|
"eval_loss": 0.46710190176963806, |
|
"eval_mae": 0.1181415393948555, |
|
"eval_rmse": 0.1593446284532547, |
|
"eval_runtime": 50.4199, |
|
"eval_samples_per_second": 46.688, |
|
"eval_steps_per_second": 1.468, |
|
"learning_rate": 0.0001, |
|
"step": 7227 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_explained_variance": 0.4532606303691864, |
|
"eval_kl_divergence": 0.3881392180919647, |
|
"eval_loss": 0.4670344293117523, |
|
"eval_mae": 0.11804797500371933, |
|
"eval_rmse": 0.15942266583442688, |
|
"eval_runtime": 50.088, |
|
"eval_samples_per_second": 46.997, |
|
"eval_steps_per_second": 1.477, |
|
"learning_rate": 0.0001, |
|
"step": 7446 |
|
}, |
|
{ |
|
"epoch": 34.24657534246575, |
|
"grad_norm": 0.14323526620864868, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4569, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_explained_variance": 0.4555685818195343, |
|
"eval_kl_divergence": 0.43976902961730957, |
|
"eval_loss": 0.4662601053714752, |
|
"eval_mae": 0.11664538830518723, |
|
"eval_rmse": 0.1586536318063736, |
|
"eval_runtime": 49.8708, |
|
"eval_samples_per_second": 47.202, |
|
"eval_steps_per_second": 1.484, |
|
"learning_rate": 0.0001, |
|
"step": 7665 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_explained_variance": 0.4544428884983063, |
|
"eval_kl_divergence": 0.4382496476173401, |
|
"eval_loss": 0.46657058596611023, |
|
"eval_mae": 0.11700741201639175, |
|
"eval_rmse": 0.15874631702899933, |
|
"eval_runtime": 49.7975, |
|
"eval_samples_per_second": 47.271, |
|
"eval_steps_per_second": 1.486, |
|
"learning_rate": 0.0001, |
|
"step": 7884 |
|
}, |
|
{ |
|
"epoch": 36.529680365296805, |
|
"grad_norm": 0.17629703879356384, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4572, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_explained_variance": 0.45941635966300964, |
|
"eval_kl_divergence": 0.4330490827560425, |
|
"eval_loss": 0.4657588005065918, |
|
"eval_mae": 0.11633748561143875, |
|
"eval_rmse": 0.15810036659240723, |
|
"eval_runtime": 51.4251, |
|
"eval_samples_per_second": 45.775, |
|
"eval_steps_per_second": 1.439, |
|
"learning_rate": 0.0001, |
|
"step": 8103 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_explained_variance": 0.4566784203052521, |
|
"eval_kl_divergence": 0.4877949357032776, |
|
"eval_loss": 0.4659184217453003, |
|
"eval_mae": 0.11623784899711609, |
|
"eval_rmse": 0.15832678973674774, |
|
"eval_runtime": 49.7333, |
|
"eval_samples_per_second": 47.332, |
|
"eval_steps_per_second": 1.488, |
|
"learning_rate": 0.0001, |
|
"step": 8322 |
|
}, |
|
{ |
|
"epoch": 38.81278538812786, |
|
"grad_norm": 0.1781003624200821, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4572, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_explained_variance": 0.45519956946372986, |
|
"eval_kl_divergence": 0.3790707290172577, |
|
"eval_loss": 0.46703553199768066, |
|
"eval_mae": 0.11782807856798172, |
|
"eval_rmse": 0.15946339070796967, |
|
"eval_runtime": 52.4, |
|
"eval_samples_per_second": 44.924, |
|
"eval_steps_per_second": 1.412, |
|
"learning_rate": 0.0001, |
|
"step": 8541 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_explained_variance": 0.45683178305625916, |
|
"eval_kl_divergence": 0.38892972469329834, |
|
"eval_loss": 0.4664987027645111, |
|
"eval_mae": 0.11783644556999207, |
|
"eval_rmse": 0.15876977145671844, |
|
"eval_runtime": 50.7398, |
|
"eval_samples_per_second": 46.394, |
|
"eval_steps_per_second": 1.458, |
|
"learning_rate": 0.0001, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_explained_variance": 0.4591364860534668, |
|
"eval_kl_divergence": 0.3222128450870514, |
|
"eval_loss": 0.46659526228904724, |
|
"eval_mae": 0.11838778108358383, |
|
"eval_rmse": 0.15888933837413788, |
|
"eval_runtime": 50.0159, |
|
"eval_samples_per_second": 47.065, |
|
"eval_steps_per_second": 1.48, |
|
"learning_rate": 0.0001, |
|
"step": 8979 |
|
}, |
|
{ |
|
"epoch": 41.0958904109589, |
|
"grad_norm": 0.13085126876831055, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4559, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_explained_variance": 0.4606964886188507, |
|
"eval_kl_divergence": 0.426244854927063, |
|
"eval_loss": 0.4655005633831024, |
|
"eval_mae": 0.11635158210992813, |
|
"eval_rmse": 0.15787668526172638, |
|
"eval_runtime": 49.9099, |
|
"eval_samples_per_second": 47.165, |
|
"eval_steps_per_second": 1.483, |
|
"learning_rate": 0.0001, |
|
"step": 9198 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_explained_variance": 0.46034756302833557, |
|
"eval_kl_divergence": 0.4611224830150604, |
|
"eval_loss": 0.4656265676021576, |
|
"eval_mae": 0.11616652458906174, |
|
"eval_rmse": 0.1579464077949524, |
|
"eval_runtime": 50.0123, |
|
"eval_samples_per_second": 47.068, |
|
"eval_steps_per_second": 1.48, |
|
"learning_rate": 0.0001, |
|
"step": 9417 |
|
}, |
|
{ |
|
"epoch": 43.37899543378995, |
|
"grad_norm": 0.17523790895938873, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4554, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_explained_variance": 0.4616149961948395, |
|
"eval_kl_divergence": 0.45858410000801086, |
|
"eval_loss": 0.4655725955963135, |
|
"eval_mae": 0.11644264310598373, |
|
"eval_rmse": 0.15800905227661133, |
|
"eval_runtime": 50.6284, |
|
"eval_samples_per_second": 46.496, |
|
"eval_steps_per_second": 1.462, |
|
"learning_rate": 0.0001, |
|
"step": 9636 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_explained_variance": 0.45969870686531067, |
|
"eval_kl_divergence": 0.4367772340774536, |
|
"eval_loss": 0.46600833535194397, |
|
"eval_mae": 0.11579249054193497, |
|
"eval_rmse": 0.15833592414855957, |
|
"eval_runtime": 50.629, |
|
"eval_samples_per_second": 46.495, |
|
"eval_steps_per_second": 1.462, |
|
"learning_rate": 0.0001, |
|
"step": 9855 |
|
}, |
|
{ |
|
"epoch": 45.662100456621005, |
|
"grad_norm": 0.1231347844004631, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4557, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_explained_variance": 0.4603704512119293, |
|
"eval_kl_divergence": 0.41175922751426697, |
|
"eval_loss": 0.4660418927669525, |
|
"eval_mae": 0.11639311909675598, |
|
"eval_rmse": 0.1581837385892868, |
|
"eval_runtime": 50.1537, |
|
"eval_samples_per_second": 46.936, |
|
"eval_steps_per_second": 1.475, |
|
"learning_rate": 0.0001, |
|
"step": 10074 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_explained_variance": 0.4613979756832123, |
|
"eval_kl_divergence": 0.5424114465713501, |
|
"eval_loss": 0.46521857380867004, |
|
"eval_mae": 0.11542114615440369, |
|
"eval_rmse": 0.15771377086639404, |
|
"eval_runtime": 49.6928, |
|
"eval_samples_per_second": 47.371, |
|
"eval_steps_per_second": 1.489, |
|
"learning_rate": 0.0001, |
|
"step": 10293 |
|
}, |
|
{ |
|
"epoch": 47.945205479452056, |
|
"grad_norm": 0.46352267265319824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4551, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_explained_variance": 0.45960724353790283, |
|
"eval_kl_divergence": 0.525124728679657, |
|
"eval_loss": 0.46598610281944275, |
|
"eval_mae": 0.1159835234284401, |
|
"eval_rmse": 0.15856431424617767, |
|
"eval_runtime": 49.9974, |
|
"eval_samples_per_second": 47.082, |
|
"eval_steps_per_second": 1.48, |
|
"learning_rate": 0.0001, |
|
"step": 10512 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_explained_variance": 0.4572352468967438, |
|
"eval_kl_divergence": 0.5006867051124573, |
|
"eval_loss": 0.46604350209236145, |
|
"eval_mae": 0.11609696596860886, |
|
"eval_rmse": 0.15853044390678406, |
|
"eval_runtime": 50.2446, |
|
"eval_samples_per_second": 46.851, |
|
"eval_steps_per_second": 1.473, |
|
"learning_rate": 0.0001, |
|
"step": 10731 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_explained_variance": 0.4658548831939697, |
|
"eval_kl_divergence": 0.24239596724510193, |
|
"eval_loss": 0.46660009026527405, |
|
"eval_mae": 0.11854288727045059, |
|
"eval_rmse": 0.15863054990768433, |
|
"eval_runtime": 50.1897, |
|
"eval_samples_per_second": 46.902, |
|
"eval_steps_per_second": 1.474, |
|
"learning_rate": 0.0001, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 50.22831050228311, |
|
"grad_norm": 0.1688494235277176, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4545, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_explained_variance": 0.45888975262641907, |
|
"eval_kl_divergence": 0.4170607030391693, |
|
"eval_loss": 0.4660661220550537, |
|
"eval_mae": 0.11618483066558838, |
|
"eval_rmse": 0.15835459530353546, |
|
"eval_runtime": 49.5535, |
|
"eval_samples_per_second": 47.504, |
|
"eval_steps_per_second": 1.493, |
|
"learning_rate": 0.0001, |
|
"step": 11169 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_explained_variance": 0.46297597885131836, |
|
"eval_kl_divergence": 0.49118655920028687, |
|
"eval_loss": 0.4649689793586731, |
|
"eval_mae": 0.11549883335828781, |
|
"eval_rmse": 0.1575259119272232, |
|
"eval_runtime": 50.3774, |
|
"eval_samples_per_second": 46.727, |
|
"eval_steps_per_second": 1.469, |
|
"learning_rate": 0.0001, |
|
"step": 11388 |
|
}, |
|
{ |
|
"epoch": 52.51141552511415, |
|
"grad_norm": 0.2805333137512207, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4548, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_explained_variance": 0.46440085768699646, |
|
"eval_kl_divergence": 0.4030352830886841, |
|
"eval_loss": 0.4653578996658325, |
|
"eval_mae": 0.11687562614679337, |
|
"eval_rmse": 0.15780305862426758, |
|
"eval_runtime": 51.1877, |
|
"eval_samples_per_second": 45.988, |
|
"eval_steps_per_second": 1.446, |
|
"learning_rate": 0.0001, |
|
"step": 11607 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_explained_variance": 0.4594965875148773, |
|
"eval_kl_divergence": 0.4810858964920044, |
|
"eval_loss": 0.4660585820674896, |
|
"eval_mae": 0.11529505252838135, |
|
"eval_rmse": 0.15853293240070343, |
|
"eval_runtime": 51.2952, |
|
"eval_samples_per_second": 45.891, |
|
"eval_steps_per_second": 1.443, |
|
"learning_rate": 0.0001, |
|
"step": 11826 |
|
}, |
|
{ |
|
"epoch": 54.794520547945204, |
|
"grad_norm": 0.22778521478176117, |
|
"learning_rate": 0.0001, |
|
"loss": 0.455, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_explained_variance": 0.46380600333213806, |
|
"eval_kl_divergence": 0.3773800730705261, |
|
"eval_loss": 0.46527624130249023, |
|
"eval_mae": 0.11668615788221359, |
|
"eval_rmse": 0.1576414853334427, |
|
"eval_runtime": 50.6825, |
|
"eval_samples_per_second": 46.446, |
|
"eval_steps_per_second": 1.46, |
|
"learning_rate": 0.0001, |
|
"step": 12045 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_explained_variance": 0.4669934809207916, |
|
"eval_kl_divergence": 0.32541513442993164, |
|
"eval_loss": 0.4654240906238556, |
|
"eval_mae": 0.11757931858301163, |
|
"eval_rmse": 0.1575363427400589, |
|
"eval_runtime": 50.538, |
|
"eval_samples_per_second": 46.579, |
|
"eval_steps_per_second": 1.464, |
|
"learning_rate": 0.0001, |
|
"step": 12264 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_explained_variance": 0.4661710560321808, |
|
"eval_kl_divergence": 0.3648814857006073, |
|
"eval_loss": 0.4654492139816284, |
|
"eval_mae": 0.11615876108407974, |
|
"eval_rmse": 0.15751774609088898, |
|
"eval_runtime": 51.1673, |
|
"eval_samples_per_second": 46.006, |
|
"eval_steps_per_second": 1.446, |
|
"learning_rate": 0.0001, |
|
"step": 12483 |
|
}, |
|
{ |
|
"epoch": 57.077625570776256, |
|
"grad_norm": 0.16715611517429352, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4531, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_explained_variance": 0.4606919586658478, |
|
"eval_kl_divergence": 0.40749335289001465, |
|
"eval_loss": 0.46654412150382996, |
|
"eval_mae": 0.1166309341788292, |
|
"eval_rmse": 0.15835203230381012, |
|
"eval_runtime": 50.603, |
|
"eval_samples_per_second": 46.519, |
|
"eval_steps_per_second": 1.462, |
|
"learning_rate": 0.0001, |
|
"step": 12702 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_explained_variance": 0.4653950035572052, |
|
"eval_kl_divergence": 0.42019784450531006, |
|
"eval_loss": 0.465238481760025, |
|
"eval_mae": 0.11570876836776733, |
|
"eval_rmse": 0.15746039152145386, |
|
"eval_runtime": 50.3267, |
|
"eval_samples_per_second": 46.774, |
|
"eval_steps_per_second": 1.47, |
|
"learning_rate": 1e-05, |
|
"step": 12921 |
|
}, |
|
{ |
|
"epoch": 59.36073059360731, |
|
"grad_norm": 0.19701753556728363, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4538, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_explained_variance": 0.4668855369091034, |
|
"eval_kl_divergence": 0.4084234833717346, |
|
"eval_loss": 0.46530231833457947, |
|
"eval_mae": 0.11569295078516006, |
|
"eval_rmse": 0.15709955990314484, |
|
"eval_runtime": 51.1174, |
|
"eval_samples_per_second": 46.051, |
|
"eval_steps_per_second": 1.448, |
|
"learning_rate": 1e-05, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_explained_variance": 0.4661245346069336, |
|
"eval_kl_divergence": 0.4496937096118927, |
|
"eval_loss": 0.4653523564338684, |
|
"eval_mae": 0.11528477817773819, |
|
"eval_rmse": 0.15729330480098724, |
|
"eval_runtime": 50.8416, |
|
"eval_samples_per_second": 46.301, |
|
"eval_steps_per_second": 1.456, |
|
"learning_rate": 1e-05, |
|
"step": 13359 |
|
}, |
|
{ |
|
"epoch": 61.64383561643836, |
|
"grad_norm": 0.1874207705259323, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4529, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_explained_variance": 0.4681651294231415, |
|
"eval_kl_divergence": 0.411173015832901, |
|
"eval_loss": 0.46477487683296204, |
|
"eval_mae": 0.11529665440320969, |
|
"eval_rmse": 0.15684308111667633, |
|
"eval_runtime": 52.6214, |
|
"eval_samples_per_second": 44.735, |
|
"eval_steps_per_second": 1.406, |
|
"learning_rate": 1e-05, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_explained_variance": 0.47016242146492004, |
|
"eval_kl_divergence": 0.3748082220554352, |
|
"eval_loss": 0.46481335163116455, |
|
"eval_mae": 0.11518841236829758, |
|
"eval_rmse": 0.15671293437480927, |
|
"eval_runtime": 53.2469, |
|
"eval_samples_per_second": 44.209, |
|
"eval_steps_per_second": 1.39, |
|
"learning_rate": 1e-05, |
|
"step": 13797 |
|
}, |
|
{ |
|
"epoch": 63.926940639269404, |
|
"grad_norm": 0.22562281787395477, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4527, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_explained_variance": 0.4721170663833618, |
|
"eval_kl_divergence": 0.3044198155403137, |
|
"eval_loss": 0.46523070335388184, |
|
"eval_mae": 0.11618036776781082, |
|
"eval_rmse": 0.15709933638572693, |
|
"eval_runtime": 53.3051, |
|
"eval_samples_per_second": 44.161, |
|
"eval_steps_per_second": 1.388, |
|
"learning_rate": 1e-05, |
|
"step": 14016 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_explained_variance": 0.46695852279663086, |
|
"eval_kl_divergence": 0.46853822469711304, |
|
"eval_loss": 0.46484872698783875, |
|
"eval_mae": 0.11532068997621536, |
|
"eval_rmse": 0.1568661779165268, |
|
"eval_runtime": 52.7599, |
|
"eval_samples_per_second": 44.617, |
|
"eval_steps_per_second": 1.403, |
|
"learning_rate": 1e-05, |
|
"step": 14235 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_explained_variance": 0.46712610125541687, |
|
"eval_kl_divergence": 0.508738100528717, |
|
"eval_loss": 0.46500927209854126, |
|
"eval_mae": 0.11475471407175064, |
|
"eval_rmse": 0.15729309618473053, |
|
"eval_runtime": 54.0149, |
|
"eval_samples_per_second": 43.581, |
|
"eval_steps_per_second": 1.37, |
|
"learning_rate": 1e-05, |
|
"step": 14454 |
|
}, |
|
{ |
|
"epoch": 66.21004566210046, |
|
"grad_norm": 0.18448679149150848, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4531, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_explained_variance": 0.4690088927745819, |
|
"eval_kl_divergence": 0.42743220925331116, |
|
"eval_loss": 0.4645930230617523, |
|
"eval_mae": 0.1155417189002037, |
|
"eval_rmse": 0.1567572057247162, |
|
"eval_runtime": 52.5655, |
|
"eval_samples_per_second": 44.782, |
|
"eval_steps_per_second": 1.408, |
|
"learning_rate": 1e-05, |
|
"step": 14673 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_explained_variance": 0.4680323302745819, |
|
"eval_kl_divergence": 0.49686378240585327, |
|
"eval_loss": 0.46456360816955566, |
|
"eval_mae": 0.11437365412712097, |
|
"eval_rmse": 0.1566230058670044, |
|
"eval_runtime": 50.8799, |
|
"eval_samples_per_second": 46.266, |
|
"eval_steps_per_second": 1.454, |
|
"learning_rate": 1e-05, |
|
"step": 14892 |
|
}, |
|
{ |
|
"epoch": 68.4931506849315, |
|
"grad_norm": 0.21752646565437317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.452, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_explained_variance": 0.4696376323699951, |
|
"eval_kl_divergence": 0.44800856709480286, |
|
"eval_loss": 0.464430034160614, |
|
"eval_mae": 0.11452987045049667, |
|
"eval_rmse": 0.15642575919628143, |
|
"eval_runtime": 61.8405, |
|
"eval_samples_per_second": 38.066, |
|
"eval_steps_per_second": 1.197, |
|
"learning_rate": 1e-05, |
|
"step": 15111 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_explained_variance": 0.4692017734050751, |
|
"eval_kl_divergence": 0.42908576130867004, |
|
"eval_loss": 0.4648461937904358, |
|
"eval_mae": 0.11500384658575058, |
|
"eval_rmse": 0.15674862265586853, |
|
"eval_runtime": 60.5787, |
|
"eval_samples_per_second": 38.859, |
|
"eval_steps_per_second": 1.222, |
|
"learning_rate": 1e-05, |
|
"step": 15330 |
|
}, |
|
{ |
|
"epoch": 70.77625570776256, |
|
"grad_norm": 0.23285503685474396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4524, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_explained_variance": 0.4711233675479889, |
|
"eval_kl_divergence": 0.37966692447662354, |
|
"eval_loss": 0.4645022749900818, |
|
"eval_mae": 0.11555531620979309, |
|
"eval_rmse": 0.15646833181381226, |
|
"eval_runtime": 61.2584, |
|
"eval_samples_per_second": 38.427, |
|
"eval_steps_per_second": 1.208, |
|
"learning_rate": 1e-05, |
|
"step": 15549 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_explained_variance": 0.4690466821193695, |
|
"eval_kl_divergence": 0.42796915769577026, |
|
"eval_loss": 0.46473589539527893, |
|
"eval_mae": 0.11497951298952103, |
|
"eval_rmse": 0.15693025290966034, |
|
"eval_runtime": 61.782, |
|
"eval_samples_per_second": 38.102, |
|
"eval_steps_per_second": 1.198, |
|
"learning_rate": 1e-05, |
|
"step": 15768 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_explained_variance": 0.4707035720348358, |
|
"eval_kl_divergence": 0.4591566324234009, |
|
"eval_loss": 0.46414923667907715, |
|
"eval_mae": 0.11423368006944656, |
|
"eval_rmse": 0.15631103515625, |
|
"eval_runtime": 62.9115, |
|
"eval_samples_per_second": 37.418, |
|
"eval_steps_per_second": 1.176, |
|
"learning_rate": 1e-05, |
|
"step": 15987 |
|
}, |
|
{ |
|
"epoch": 73.05936073059361, |
|
"grad_norm": 0.1904192417860031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4515, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_explained_variance": 0.4705829620361328, |
|
"eval_kl_divergence": 0.43208685517311096, |
|
"eval_loss": 0.4641610085964203, |
|
"eval_mae": 0.11505597829818726, |
|
"eval_rmse": 0.1563975065946579, |
|
"eval_runtime": 61.932, |
|
"eval_samples_per_second": 38.009, |
|
"eval_steps_per_second": 1.195, |
|
"learning_rate": 1e-05, |
|
"step": 16206 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_explained_variance": 0.47077181935310364, |
|
"eval_kl_divergence": 0.3843104839324951, |
|
"eval_loss": 0.4644509255886078, |
|
"eval_mae": 0.11519055813550949, |
|
"eval_rmse": 0.15653057396411896, |
|
"eval_runtime": 62.3182, |
|
"eval_samples_per_second": 37.774, |
|
"eval_steps_per_second": 1.187, |
|
"learning_rate": 1e-05, |
|
"step": 16425 |
|
}, |
|
{ |
|
"epoch": 75.34246575342466, |
|
"grad_norm": 0.2563965618610382, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4521, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_explained_variance": 0.4675123989582062, |
|
"eval_kl_divergence": 0.5215911269187927, |
|
"eval_loss": 0.4646488130092621, |
|
"eval_mae": 0.1146780475974083, |
|
"eval_rmse": 0.1569206565618515, |
|
"eval_runtime": 66.0488, |
|
"eval_samples_per_second": 35.64, |
|
"eval_steps_per_second": 1.12, |
|
"learning_rate": 1e-05, |
|
"step": 16644 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_explained_variance": 0.46909868717193604, |
|
"eval_kl_divergence": 0.4094104468822479, |
|
"eval_loss": 0.46475714445114136, |
|
"eval_mae": 0.11523856967687607, |
|
"eval_rmse": 0.15687990188598633, |
|
"eval_runtime": 62.1685, |
|
"eval_samples_per_second": 37.865, |
|
"eval_steps_per_second": 1.19, |
|
"learning_rate": 1e-05, |
|
"step": 16863 |
|
}, |
|
{ |
|
"epoch": 77.62557077625571, |
|
"grad_norm": 0.16491472721099854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4519, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_explained_variance": 0.47086599469184875, |
|
"eval_kl_divergence": 0.43988528847694397, |
|
"eval_loss": 0.46428272128105164, |
|
"eval_mae": 0.11493176966905594, |
|
"eval_rmse": 0.15638257563114166, |
|
"eval_runtime": 61.9923, |
|
"eval_samples_per_second": 37.972, |
|
"eval_steps_per_second": 1.194, |
|
"learning_rate": 1e-05, |
|
"step": 17082 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_explained_variance": 0.4697439670562744, |
|
"eval_kl_divergence": 0.4178011417388916, |
|
"eval_loss": 0.4645934998989105, |
|
"eval_mae": 0.11465150117874146, |
|
"eval_rmse": 0.15666015446186066, |
|
"eval_runtime": 63.0404, |
|
"eval_samples_per_second": 37.341, |
|
"eval_steps_per_second": 1.174, |
|
"learning_rate": 1e-05, |
|
"step": 17301 |
|
}, |
|
{ |
|
"epoch": 79.90867579908675, |
|
"grad_norm": 0.1647184044122696, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.4517, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_explained_variance": 0.4699563980102539, |
|
"eval_kl_divergence": 0.43727052211761475, |
|
"eval_loss": 0.46436014771461487, |
|
"eval_mae": 0.11501001566648483, |
|
"eval_rmse": 0.15643416345119476, |
|
"eval_runtime": 61.5606, |
|
"eval_samples_per_second": 38.239, |
|
"eval_steps_per_second": 1.202, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 17520 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_explained_variance": 0.468768835067749, |
|
"eval_kl_divergence": 0.47009941935539246, |
|
"eval_loss": 0.46448636054992676, |
|
"eval_mae": 0.11508657783269882, |
|
"eval_rmse": 0.15673168003559113, |
|
"eval_runtime": 62.9178, |
|
"eval_samples_per_second": 37.414, |
|
"eval_steps_per_second": 1.176, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 17739 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_explained_variance": 0.470253586769104, |
|
"eval_kl_divergence": 0.4601159989833832, |
|
"eval_loss": 0.4644375145435333, |
|
"eval_mae": 0.11455937474966049, |
|
"eval_rmse": 0.15652652084827423, |
|
"eval_runtime": 62.6023, |
|
"eval_samples_per_second": 37.602, |
|
"eval_steps_per_second": 1.182, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 17958 |
|
}, |
|
{ |
|
"epoch": 82.1917808219178, |
|
"grad_norm": 0.2432813197374344, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.4514, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_explained_variance": 0.468420147895813, |
|
"eval_kl_divergence": 0.4510715901851654, |
|
"eval_loss": 0.46457409858703613, |
|
"eval_mae": 0.11468392610549927, |
|
"eval_rmse": 0.15669189393520355, |
|
"eval_runtime": 62.7877, |
|
"eval_samples_per_second": 37.491, |
|
"eval_steps_per_second": 1.179, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 18177 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 18177, |
|
"total_flos": 8.603009036605255e+19, |
|
"train_loss": 0.45949580130708517, |
|
"train_runtime": 19431.3015, |
|
"train_samples_per_second": 54.06, |
|
"train_steps_per_second": 1.691 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32850, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.603009036605255e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|