|
{ |
|
"best_metric": 11.866828918457031, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.006207613638127163, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.103806819063581e-05, |
|
"grad_norm": 0.06145656853914261, |
|
"learning_rate": 1.009e-05, |
|
"loss": 11.9393, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 3.103806819063581e-05, |
|
"eval_loss": 11.937196731567383, |
|
"eval_runtime": 414.1282, |
|
"eval_samples_per_second": 32.758, |
|
"eval_steps_per_second": 8.191, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 6.207613638127163e-05, |
|
"grad_norm": 0.10884089767932892, |
|
"learning_rate": 2.018e-05, |
|
"loss": 11.9275, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 9.311420457190745e-05, |
|
"grad_norm": 0.11498255282640457, |
|
"learning_rate": 3.027e-05, |
|
"loss": 11.9388, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00012415227276254325, |
|
"grad_norm": 0.088991180062294, |
|
"learning_rate": 4.036e-05, |
|
"loss": 11.9282, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00015519034095317908, |
|
"grad_norm": 0.07985679805278778, |
|
"learning_rate": 5.045e-05, |
|
"loss": 11.9416, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0001862284091438149, |
|
"grad_norm": 0.07948625832796097, |
|
"learning_rate": 6.054e-05, |
|
"loss": 11.9386, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0002172664773344507, |
|
"grad_norm": 0.08249641954898834, |
|
"learning_rate": 7.062999999999999e-05, |
|
"loss": 11.9342, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0002483045455250865, |
|
"grad_norm": 0.06686617434024811, |
|
"learning_rate": 8.072e-05, |
|
"loss": 11.9395, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0002793426137157223, |
|
"grad_norm": 0.07867742329835892, |
|
"learning_rate": 9.081e-05, |
|
"loss": 11.9405, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00031038068190635817, |
|
"grad_norm": 0.09347719699144363, |
|
"learning_rate": 0.0001009, |
|
"loss": 11.9347, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.000341418750096994, |
|
"grad_norm": 0.09216347336769104, |
|
"learning_rate": 0.00010036894736842106, |
|
"loss": 11.9372, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0003724568182876298, |
|
"grad_norm": 0.08478887379169464, |
|
"learning_rate": 9.98378947368421e-05, |
|
"loss": 11.9306, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0004034948864782656, |
|
"grad_norm": 0.10052121430635452, |
|
"learning_rate": 9.930684210526315e-05, |
|
"loss": 11.9362, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0004345329546689014, |
|
"grad_norm": 0.09549523890018463, |
|
"learning_rate": 9.877578947368421e-05, |
|
"loss": 11.933, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0004655710228595372, |
|
"grad_norm": 0.10671835392713547, |
|
"learning_rate": 9.824473684210527e-05, |
|
"loss": 11.9354, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.000496609091050173, |
|
"grad_norm": 0.12740136682987213, |
|
"learning_rate": 9.771368421052632e-05, |
|
"loss": 11.9261, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0005276471592408088, |
|
"grad_norm": 0.10865606367588043, |
|
"learning_rate": 9.718263157894736e-05, |
|
"loss": 11.9368, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0005586852274314446, |
|
"grad_norm": 0.15083859860897064, |
|
"learning_rate": 9.665157894736842e-05, |
|
"loss": 11.9356, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0005897232956220805, |
|
"grad_norm": 0.18175148963928223, |
|
"learning_rate": 9.612052631578948e-05, |
|
"loss": 11.9251, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0006207613638127163, |
|
"grad_norm": 0.1715611070394516, |
|
"learning_rate": 9.558947368421052e-05, |
|
"loss": 11.927, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0006517994320033521, |
|
"grad_norm": 0.17699895799160004, |
|
"learning_rate": 9.505842105263159e-05, |
|
"loss": 11.9289, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.000682837500193988, |
|
"grad_norm": 0.17426027357578278, |
|
"learning_rate": 9.452736842105263e-05, |
|
"loss": 11.9333, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0007138755683846238, |
|
"grad_norm": 0.20959268510341644, |
|
"learning_rate": 9.399631578947368e-05, |
|
"loss": 11.9314, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0007449136365752596, |
|
"grad_norm": 0.22871772944927216, |
|
"learning_rate": 9.346526315789474e-05, |
|
"loss": 11.9286, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0007759517047658954, |
|
"grad_norm": 0.24363191425800323, |
|
"learning_rate": 9.293421052631578e-05, |
|
"loss": 11.9192, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0008069897729565312, |
|
"grad_norm": 0.28882232308387756, |
|
"learning_rate": 9.240315789473684e-05, |
|
"loss": 11.9225, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.000838027841147167, |
|
"grad_norm": 0.3017212152481079, |
|
"learning_rate": 9.18721052631579e-05, |
|
"loss": 11.9349, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0008690659093378028, |
|
"grad_norm": 0.36739078164100647, |
|
"learning_rate": 9.134105263157895e-05, |
|
"loss": 11.9307, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0009001039775284386, |
|
"grad_norm": 0.35056230425834656, |
|
"learning_rate": 9.081e-05, |
|
"loss": 11.9151, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0009311420457190744, |
|
"grad_norm": 0.3288112282752991, |
|
"learning_rate": 9.027894736842105e-05, |
|
"loss": 11.9204, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0009621801139097103, |
|
"grad_norm": 0.43591663241386414, |
|
"learning_rate": 8.97478947368421e-05, |
|
"loss": 11.9073, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.000993218182100346, |
|
"grad_norm": 0.36357542872428894, |
|
"learning_rate": 8.921684210526316e-05, |
|
"loss": 11.9184, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0010242562502909818, |
|
"grad_norm": 0.4034142792224884, |
|
"learning_rate": 8.86857894736842e-05, |
|
"loss": 11.9087, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0010552943184816176, |
|
"grad_norm": 0.33256056904792786, |
|
"learning_rate": 8.815473684210527e-05, |
|
"loss": 11.9187, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0010863323866722534, |
|
"grad_norm": 0.3923008143901825, |
|
"learning_rate": 8.762368421052631e-05, |
|
"loss": 11.9074, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0011173704548628892, |
|
"grad_norm": 0.4441995322704315, |
|
"learning_rate": 8.709263157894737e-05, |
|
"loss": 11.9153, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0011484085230535253, |
|
"grad_norm": 0.3605518639087677, |
|
"learning_rate": 8.656157894736843e-05, |
|
"loss": 11.912, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.001179446591244161, |
|
"grad_norm": 0.4256209135055542, |
|
"learning_rate": 8.603052631578947e-05, |
|
"loss": 11.9072, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0012104846594347969, |
|
"grad_norm": 0.35672155022621155, |
|
"learning_rate": 8.549947368421052e-05, |
|
"loss": 11.9033, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0012415227276254327, |
|
"grad_norm": 0.3789241909980774, |
|
"learning_rate": 8.496842105263158e-05, |
|
"loss": 11.8991, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0012725607958160685, |
|
"grad_norm": 0.353971928358078, |
|
"learning_rate": 8.443736842105264e-05, |
|
"loss": 11.9023, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0013035988640067043, |
|
"grad_norm": 0.36267516016960144, |
|
"learning_rate": 8.390631578947369e-05, |
|
"loss": 11.8962, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.00133463693219734, |
|
"grad_norm": 0.30361491441726685, |
|
"learning_rate": 8.337526315789473e-05, |
|
"loss": 11.8893, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.001365675000387976, |
|
"grad_norm": 0.3609680235385895, |
|
"learning_rate": 8.284421052631579e-05, |
|
"loss": 11.898, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0013967130685786117, |
|
"grad_norm": 0.3087683618068695, |
|
"learning_rate": 8.231315789473685e-05, |
|
"loss": 11.89, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0014277511367692475, |
|
"grad_norm": 0.3304439187049866, |
|
"learning_rate": 8.178210526315789e-05, |
|
"loss": 11.8939, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0014587892049598833, |
|
"grad_norm": 0.29173874855041504, |
|
"learning_rate": 8.125105263157894e-05, |
|
"loss": 11.8919, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0014898272731505191, |
|
"grad_norm": 0.23176749050617218, |
|
"learning_rate": 8.072e-05, |
|
"loss": 11.895, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.001520865341341155, |
|
"grad_norm": 0.29861122369766235, |
|
"learning_rate": 8.018894736842106e-05, |
|
"loss": 11.8894, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0015519034095317907, |
|
"grad_norm": 0.22058968245983124, |
|
"learning_rate": 7.965789473684211e-05, |
|
"loss": 11.8917, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0015519034095317907, |
|
"eval_loss": 11.89046573638916, |
|
"eval_runtime": 412.729, |
|
"eval_samples_per_second": 32.869, |
|
"eval_steps_per_second": 8.218, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0015829414777224265, |
|
"grad_norm": 0.28083565831184387, |
|
"learning_rate": 7.912684210526315e-05, |
|
"loss": 11.8962, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0016139795459130623, |
|
"grad_norm": 0.40294113755226135, |
|
"learning_rate": 7.859578947368421e-05, |
|
"loss": 11.8997, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0016450176141036982, |
|
"grad_norm": 0.3762059807777405, |
|
"learning_rate": 7.806473684210527e-05, |
|
"loss": 11.8922, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.001676055682294334, |
|
"grad_norm": 0.3565743863582611, |
|
"learning_rate": 7.753368421052631e-05, |
|
"loss": 11.8875, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0017070937504849698, |
|
"grad_norm": 0.23867182433605194, |
|
"learning_rate": 7.700263157894738e-05, |
|
"loss": 11.8867, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0017381318186756056, |
|
"grad_norm": 0.25642549991607666, |
|
"learning_rate": 7.647157894736842e-05, |
|
"loss": 11.8881, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0017691698868662414, |
|
"grad_norm": 0.21004194021224976, |
|
"learning_rate": 7.594052631578948e-05, |
|
"loss": 11.8872, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0018002079550568772, |
|
"grad_norm": 0.21973472833633423, |
|
"learning_rate": 7.540947368421053e-05, |
|
"loss": 11.8852, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.001831246023247513, |
|
"grad_norm": 0.17524276673793793, |
|
"learning_rate": 7.487842105263157e-05, |
|
"loss": 11.8839, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0018622840914381488, |
|
"grad_norm": 0.18540893495082855, |
|
"learning_rate": 7.434736842105263e-05, |
|
"loss": 11.8891, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0018933221596287846, |
|
"grad_norm": 0.19046209752559662, |
|
"learning_rate": 7.381631578947368e-05, |
|
"loss": 11.8908, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0019243602278194206, |
|
"grad_norm": 0.1590886116027832, |
|
"learning_rate": 7.328526315789474e-05, |
|
"loss": 11.8905, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0019553982960100564, |
|
"grad_norm": 0.188175231218338, |
|
"learning_rate": 7.27542105263158e-05, |
|
"loss": 11.8886, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.001986436364200692, |
|
"grad_norm": 0.1609676331281662, |
|
"learning_rate": 7.222315789473684e-05, |
|
"loss": 11.8729, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.002017474432391328, |
|
"grad_norm": 0.13807262480258942, |
|
"learning_rate": 7.16921052631579e-05, |
|
"loss": 11.8826, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0020485125005819636, |
|
"grad_norm": 0.14472399652004242, |
|
"learning_rate": 7.116105263157895e-05, |
|
"loss": 11.8801, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0020795505687725997, |
|
"grad_norm": 0.17055800557136536, |
|
"learning_rate": 7.062999999999999e-05, |
|
"loss": 11.8832, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0021105886369632352, |
|
"grad_norm": 0.1315099149942398, |
|
"learning_rate": 7.009894736842106e-05, |
|
"loss": 11.8775, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0021416267051538713, |
|
"grad_norm": 0.10270003229379654, |
|
"learning_rate": 6.95678947368421e-05, |
|
"loss": 11.8751, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.002172664773344507, |
|
"grad_norm": 0.13660825788974762, |
|
"learning_rate": 6.903684210526316e-05, |
|
"loss": 11.8779, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.002203702841535143, |
|
"grad_norm": 0.11236346513032913, |
|
"learning_rate": 6.850578947368422e-05, |
|
"loss": 11.8768, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0022347409097257785, |
|
"grad_norm": 0.20397527515888214, |
|
"learning_rate": 6.797473684210526e-05, |
|
"loss": 11.8695, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0022657789779164145, |
|
"grad_norm": 0.186505526304245, |
|
"learning_rate": 6.744368421052631e-05, |
|
"loss": 11.8669, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0022968170461070505, |
|
"grad_norm": 0.12963062524795532, |
|
"learning_rate": 6.691263157894736e-05, |
|
"loss": 11.8862, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.002327855114297686, |
|
"grad_norm": 0.10962740331888199, |
|
"learning_rate": 6.638157894736843e-05, |
|
"loss": 11.8836, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.002358893182488322, |
|
"grad_norm": 0.13987334072589874, |
|
"learning_rate": 6.585052631578948e-05, |
|
"loss": 11.8744, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0023899312506789577, |
|
"grad_norm": 0.19806340336799622, |
|
"learning_rate": 6.531947368421052e-05, |
|
"loss": 11.8869, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0024209693188695937, |
|
"grad_norm": 0.11947502940893173, |
|
"learning_rate": 6.478842105263158e-05, |
|
"loss": 11.8842, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0024520073870602293, |
|
"grad_norm": 0.17792446911334991, |
|
"learning_rate": 6.425736842105264e-05, |
|
"loss": 11.8844, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0024830454552508653, |
|
"grad_norm": 0.13691994547843933, |
|
"learning_rate": 6.372631578947368e-05, |
|
"loss": 11.8785, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.002514083523441501, |
|
"grad_norm": 0.19070185720920563, |
|
"learning_rate": 6.319526315789473e-05, |
|
"loss": 11.8824, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.002545121591632137, |
|
"grad_norm": 0.13721631467342377, |
|
"learning_rate": 6.266421052631579e-05, |
|
"loss": 11.8779, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0025761596598227725, |
|
"grad_norm": 0.11055205017328262, |
|
"learning_rate": 6.213315789473685e-05, |
|
"loss": 11.8808, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0026071977280134086, |
|
"grad_norm": 0.1432357281446457, |
|
"learning_rate": 6.16021052631579e-05, |
|
"loss": 11.8835, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.002638235796204044, |
|
"grad_norm": 0.15524394810199738, |
|
"learning_rate": 6.107105263157894e-05, |
|
"loss": 11.8805, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.00266927386439468, |
|
"grad_norm": 0.12444309145212173, |
|
"learning_rate": 6.054e-05, |
|
"loss": 11.8804, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0027003119325853158, |
|
"grad_norm": 0.19381719827651978, |
|
"learning_rate": 6.000894736842105e-05, |
|
"loss": 11.8824, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.002731350000775952, |
|
"grad_norm": 0.1044355109333992, |
|
"learning_rate": 5.94778947368421e-05, |
|
"loss": 11.8786, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0027623880689665874, |
|
"grad_norm": 0.1657666563987732, |
|
"learning_rate": 5.894684210526316e-05, |
|
"loss": 11.8843, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0027934261371572234, |
|
"grad_norm": 0.15279187262058258, |
|
"learning_rate": 5.841578947368421e-05, |
|
"loss": 11.8778, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.002824464205347859, |
|
"grad_norm": 0.13506744801998138, |
|
"learning_rate": 5.7884736842105265e-05, |
|
"loss": 11.8795, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.002855502273538495, |
|
"grad_norm": 0.1315164864063263, |
|
"learning_rate": 5.7353684210526314e-05, |
|
"loss": 11.8806, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0028865403417291306, |
|
"grad_norm": 0.1755332350730896, |
|
"learning_rate": 5.6822631578947364e-05, |
|
"loss": 11.8803, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0029175784099197666, |
|
"grad_norm": 0.11907346546649933, |
|
"learning_rate": 5.629157894736842e-05, |
|
"loss": 11.8779, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0029486164781104022, |
|
"grad_norm": 0.1171933189034462, |
|
"learning_rate": 5.576052631578948e-05, |
|
"loss": 11.874, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0029796545463010382, |
|
"grad_norm": 0.13140052556991577, |
|
"learning_rate": 5.522947368421053e-05, |
|
"loss": 11.8762, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.003010692614491674, |
|
"grad_norm": 0.12108182907104492, |
|
"learning_rate": 5.469842105263158e-05, |
|
"loss": 11.8796, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.00304173068268231, |
|
"grad_norm": 0.10808394849300385, |
|
"learning_rate": 5.416736842105263e-05, |
|
"loss": 11.8786, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.003072768750872946, |
|
"grad_norm": 0.08503346145153046, |
|
"learning_rate": 5.3636315789473685e-05, |
|
"loss": 11.8801, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0031038068190635815, |
|
"grad_norm": 0.1425827592611313, |
|
"learning_rate": 5.3105263157894734e-05, |
|
"loss": 11.8832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0031038068190635815, |
|
"eval_loss": 11.877439498901367, |
|
"eval_runtime": 413.0154, |
|
"eval_samples_per_second": 32.846, |
|
"eval_steps_per_second": 8.213, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0031348448872542175, |
|
"grad_norm": 0.2032780945301056, |
|
"learning_rate": 5.257421052631578e-05, |
|
"loss": 11.8815, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.003165882955444853, |
|
"grad_norm": 0.234865203499794, |
|
"learning_rate": 5.2043157894736846e-05, |
|
"loss": 11.8732, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.003196921023635489, |
|
"grad_norm": 0.2010481357574463, |
|
"learning_rate": 5.1512105263157895e-05, |
|
"loss": 11.8762, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0032279590918261247, |
|
"grad_norm": 0.30129870772361755, |
|
"learning_rate": 5.098105263157895e-05, |
|
"loss": 11.8719, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0032589971600167607, |
|
"grad_norm": 0.1704847365617752, |
|
"learning_rate": 5.045e-05, |
|
"loss": 11.8783, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0032900352282073963, |
|
"grad_norm": 0.125693678855896, |
|
"learning_rate": 4.991894736842105e-05, |
|
"loss": 11.8782, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0033210732963980323, |
|
"grad_norm": 0.1328500360250473, |
|
"learning_rate": 4.9387894736842105e-05, |
|
"loss": 11.8734, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.003352111364588668, |
|
"grad_norm": 0.10017025470733643, |
|
"learning_rate": 4.885684210526316e-05, |
|
"loss": 11.8779, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.003383149432779304, |
|
"grad_norm": 0.1521725356578827, |
|
"learning_rate": 4.832578947368421e-05, |
|
"loss": 11.8802, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0034141875009699395, |
|
"grad_norm": 0.12134622782468796, |
|
"learning_rate": 4.779473684210526e-05, |
|
"loss": 11.8718, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0034452255691605756, |
|
"grad_norm": 0.13203729689121246, |
|
"learning_rate": 4.7263684210526315e-05, |
|
"loss": 11.8839, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.003476263637351211, |
|
"grad_norm": 0.14758096635341644, |
|
"learning_rate": 4.673263157894737e-05, |
|
"loss": 11.8757, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.003507301705541847, |
|
"grad_norm": 0.14375559985637665, |
|
"learning_rate": 4.620157894736842e-05, |
|
"loss": 11.8748, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0035383397737324828, |
|
"grad_norm": 0.10907881706953049, |
|
"learning_rate": 4.5670526315789475e-05, |
|
"loss": 11.8673, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0035693778419231188, |
|
"grad_norm": 0.11960776150226593, |
|
"learning_rate": 4.5139473684210524e-05, |
|
"loss": 11.8824, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0036004159101137544, |
|
"grad_norm": 0.12177892029285431, |
|
"learning_rate": 4.460842105263158e-05, |
|
"loss": 11.8694, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0036314539783043904, |
|
"grad_norm": 0.11586112529039383, |
|
"learning_rate": 4.4077368421052636e-05, |
|
"loss": 11.8777, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.003662492046495026, |
|
"grad_norm": 0.10951674729585648, |
|
"learning_rate": 4.3546315789473685e-05, |
|
"loss": 11.8792, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.003693530114685662, |
|
"grad_norm": 0.13929174840450287, |
|
"learning_rate": 4.3015263157894734e-05, |
|
"loss": 11.8713, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0037245681828762976, |
|
"grad_norm": 0.14048022031784058, |
|
"learning_rate": 4.248421052631579e-05, |
|
"loss": 11.8762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0037556062510669336, |
|
"grad_norm": 0.1154230386018753, |
|
"learning_rate": 4.1953157894736846e-05, |
|
"loss": 11.8679, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.003786644319257569, |
|
"grad_norm": 0.11130271852016449, |
|
"learning_rate": 4.1422105263157895e-05, |
|
"loss": 11.8732, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0038176823874482052, |
|
"grad_norm": 0.19456033408641815, |
|
"learning_rate": 4.0891052631578944e-05, |
|
"loss": 11.8726, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0038487204556388412, |
|
"grad_norm": 0.17270055413246155, |
|
"learning_rate": 4.036e-05, |
|
"loss": 11.8599, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.003879758523829477, |
|
"grad_norm": 0.1157611832022667, |
|
"learning_rate": 3.9828947368421056e-05, |
|
"loss": 11.8741, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.003910796592020113, |
|
"grad_norm": 0.13942551612854004, |
|
"learning_rate": 3.9297894736842105e-05, |
|
"loss": 11.8738, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.003941834660210749, |
|
"grad_norm": 0.1275920271873474, |
|
"learning_rate": 3.8766842105263154e-05, |
|
"loss": 11.8615, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.003972872728401384, |
|
"grad_norm": 0.09613733738660812, |
|
"learning_rate": 3.823578947368421e-05, |
|
"loss": 11.8738, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.00400391079659202, |
|
"grad_norm": 0.16122451424598694, |
|
"learning_rate": 3.7704736842105265e-05, |
|
"loss": 11.8692, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.004034948864782656, |
|
"grad_norm": 0.13367600739002228, |
|
"learning_rate": 3.7173684210526315e-05, |
|
"loss": 11.8725, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.004065986932973292, |
|
"grad_norm": 0.11731020361185074, |
|
"learning_rate": 3.664263157894737e-05, |
|
"loss": 11.8773, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.004097025001163927, |
|
"grad_norm": 0.13289429247379303, |
|
"learning_rate": 3.611157894736842e-05, |
|
"loss": 11.8661, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.004128063069354563, |
|
"grad_norm": 0.13751353323459625, |
|
"learning_rate": 3.5580526315789475e-05, |
|
"loss": 11.8654, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.004159101137545199, |
|
"grad_norm": 0.13276422023773193, |
|
"learning_rate": 3.504947368421053e-05, |
|
"loss": 11.8733, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.004190139205735835, |
|
"grad_norm": 0.12328977137804031, |
|
"learning_rate": 3.451842105263158e-05, |
|
"loss": 11.8791, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0042211772739264705, |
|
"grad_norm": 0.14081747829914093, |
|
"learning_rate": 3.398736842105263e-05, |
|
"loss": 11.8727, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.0042522153421171065, |
|
"grad_norm": 0.13730375468730927, |
|
"learning_rate": 3.345631578947368e-05, |
|
"loss": 11.8715, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0042832534103077425, |
|
"grad_norm": 0.1453617960214615, |
|
"learning_rate": 3.292526315789474e-05, |
|
"loss": 11.8744, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0043142914784983786, |
|
"grad_norm": 0.18549758195877075, |
|
"learning_rate": 3.239421052631579e-05, |
|
"loss": 11.8698, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.004345329546689014, |
|
"grad_norm": 0.13958542048931122, |
|
"learning_rate": 3.186315789473684e-05, |
|
"loss": 11.8703, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.00437636761487965, |
|
"grad_norm": 0.1116572916507721, |
|
"learning_rate": 3.1332105263157895e-05, |
|
"loss": 11.8733, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.004407405683070286, |
|
"grad_norm": 0.15264497697353363, |
|
"learning_rate": 3.080105263157895e-05, |
|
"loss": 11.8744, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.004438443751260922, |
|
"grad_norm": 0.16946198046207428, |
|
"learning_rate": 3.027e-05, |
|
"loss": 11.8748, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.004469481819451557, |
|
"grad_norm": 0.17827944457530975, |
|
"learning_rate": 2.973894736842105e-05, |
|
"loss": 11.8683, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.004500519887642193, |
|
"grad_norm": 0.15590260922908783, |
|
"learning_rate": 2.9207894736842105e-05, |
|
"loss": 11.8667, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.004531557955832829, |
|
"grad_norm": 0.17276710271835327, |
|
"learning_rate": 2.8676842105263157e-05, |
|
"loss": 11.8716, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.004562596024023465, |
|
"grad_norm": 0.15102654695510864, |
|
"learning_rate": 2.814578947368421e-05, |
|
"loss": 11.8671, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.004593634092214101, |
|
"grad_norm": 0.18881015479564667, |
|
"learning_rate": 2.7614736842105266e-05, |
|
"loss": 11.8761, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.004624672160404736, |
|
"grad_norm": 0.20375801622867584, |
|
"learning_rate": 2.7083684210526315e-05, |
|
"loss": 11.8732, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.004655710228595372, |
|
"grad_norm": 0.221470445394516, |
|
"learning_rate": 2.6552631578947367e-05, |
|
"loss": 11.8677, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004655710228595372, |
|
"eval_loss": 11.868966102600098, |
|
"eval_runtime": 414.3737, |
|
"eval_samples_per_second": 32.739, |
|
"eval_steps_per_second": 8.186, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004686748296786008, |
|
"grad_norm": 0.1980566531419754, |
|
"learning_rate": 2.6021578947368423e-05, |
|
"loss": 11.8806, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.004717786364976644, |
|
"grad_norm": 0.19020256400108337, |
|
"learning_rate": 2.5490526315789475e-05, |
|
"loss": 11.8685, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.004748824433167279, |
|
"grad_norm": 0.24149803817272186, |
|
"learning_rate": 2.4959473684210524e-05, |
|
"loss": 11.857, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.004779862501357915, |
|
"grad_norm": 0.2381637543439865, |
|
"learning_rate": 2.442842105263158e-05, |
|
"loss": 11.8716, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0048109005695485514, |
|
"grad_norm": 0.18365250527858734, |
|
"learning_rate": 2.389736842105263e-05, |
|
"loss": 11.8579, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0048419386377391875, |
|
"grad_norm": 0.13402819633483887, |
|
"learning_rate": 2.3366315789473685e-05, |
|
"loss": 11.8672, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.004872976705929823, |
|
"grad_norm": 0.13781289756298065, |
|
"learning_rate": 2.2835263157894738e-05, |
|
"loss": 11.869, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.004904014774120459, |
|
"grad_norm": 0.1478467583656311, |
|
"learning_rate": 2.230421052631579e-05, |
|
"loss": 11.8676, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.004935052842311095, |
|
"grad_norm": 0.12892381846904755, |
|
"learning_rate": 2.1773157894736843e-05, |
|
"loss": 11.8707, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.004966090910501731, |
|
"grad_norm": 0.1606762856245041, |
|
"learning_rate": 2.1242105263157895e-05, |
|
"loss": 11.8716, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.004997128978692366, |
|
"grad_norm": 0.07333854585886002, |
|
"learning_rate": 2.0711052631578947e-05, |
|
"loss": 11.8637, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.005028167046883002, |
|
"grad_norm": 0.15114228427410126, |
|
"learning_rate": 2.018e-05, |
|
"loss": 11.8736, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.005059205115073638, |
|
"grad_norm": 0.0825541689991951, |
|
"learning_rate": 1.9648947368421052e-05, |
|
"loss": 11.859, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.005090243183264274, |
|
"grad_norm": 0.14535300433635712, |
|
"learning_rate": 1.9117894736842105e-05, |
|
"loss": 11.8684, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.005121281251454909, |
|
"grad_norm": 0.10561253130435944, |
|
"learning_rate": 1.8586842105263157e-05, |
|
"loss": 11.8702, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.005152319319645545, |
|
"grad_norm": 0.1263810098171234, |
|
"learning_rate": 1.805578947368421e-05, |
|
"loss": 11.8643, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.005183357387836181, |
|
"grad_norm": 0.07599103450775146, |
|
"learning_rate": 1.7524736842105266e-05, |
|
"loss": 11.8712, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.005214395456026817, |
|
"grad_norm": 0.10940805822610855, |
|
"learning_rate": 1.6993684210526315e-05, |
|
"loss": 11.8615, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.005245433524217452, |
|
"grad_norm": 0.07466328144073486, |
|
"learning_rate": 1.646263157894737e-05, |
|
"loss": 11.8694, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.005276471592408088, |
|
"grad_norm": 0.16310299932956696, |
|
"learning_rate": 1.593157894736842e-05, |
|
"loss": 11.8719, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.005307509660598724, |
|
"grad_norm": 0.09811323136091232, |
|
"learning_rate": 1.5400526315789475e-05, |
|
"loss": 11.8716, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.00533854772878936, |
|
"grad_norm": 0.07701898366212845, |
|
"learning_rate": 1.4869473684210524e-05, |
|
"loss": 11.8667, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.005369585796979996, |
|
"grad_norm": 0.09958688914775848, |
|
"learning_rate": 1.4338421052631579e-05, |
|
"loss": 11.869, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0054006238651706315, |
|
"grad_norm": 0.10621592402458191, |
|
"learning_rate": 1.3807368421052633e-05, |
|
"loss": 11.8738, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.005431661933361268, |
|
"grad_norm": 0.09998718649148941, |
|
"learning_rate": 1.3276315789473684e-05, |
|
"loss": 11.8742, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.005462700001551904, |
|
"grad_norm": 0.08477786928415298, |
|
"learning_rate": 1.2745263157894738e-05, |
|
"loss": 11.8695, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.00549373806974254, |
|
"grad_norm": 0.0795200765132904, |
|
"learning_rate": 1.221421052631579e-05, |
|
"loss": 11.8666, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.005524776137933175, |
|
"grad_norm": 0.13940560817718506, |
|
"learning_rate": 1.1683157894736843e-05, |
|
"loss": 11.8669, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.005555814206123811, |
|
"grad_norm": 0.09820152819156647, |
|
"learning_rate": 1.1152105263157895e-05, |
|
"loss": 11.8619, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.005586852274314447, |
|
"grad_norm": 0.10361889004707336, |
|
"learning_rate": 1.0621052631578948e-05, |
|
"loss": 11.8694, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.005617890342505083, |
|
"grad_norm": 0.1097809374332428, |
|
"learning_rate": 1.009e-05, |
|
"loss": 11.8732, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.005648928410695718, |
|
"grad_norm": 0.18127013742923737, |
|
"learning_rate": 9.558947368421052e-06, |
|
"loss": 11.8678, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.005679966478886354, |
|
"grad_norm": 0.12430532276630402, |
|
"learning_rate": 9.027894736842105e-06, |
|
"loss": 11.8681, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.00571100454707699, |
|
"grad_norm": 0.17203256487846375, |
|
"learning_rate": 8.496842105263157e-06, |
|
"loss": 11.8631, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.005742042615267626, |
|
"grad_norm": 0.09748750180006027, |
|
"learning_rate": 7.96578947368421e-06, |
|
"loss": 11.8646, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.005773080683458261, |
|
"grad_norm": 0.14566555619239807, |
|
"learning_rate": 7.434736842105262e-06, |
|
"loss": 11.8662, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.005804118751648897, |
|
"grad_norm": 0.13249988853931427, |
|
"learning_rate": 6.903684210526316e-06, |
|
"loss": 11.8669, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.005835156819839533, |
|
"grad_norm": 0.14271654188632965, |
|
"learning_rate": 6.372631578947369e-06, |
|
"loss": 11.8662, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.005866194888030169, |
|
"grad_norm": 0.10937763005495071, |
|
"learning_rate": 5.841578947368421e-06, |
|
"loss": 11.8672, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.0058972329562208044, |
|
"grad_norm": 0.1206103041768074, |
|
"learning_rate": 5.310526315789474e-06, |
|
"loss": 11.862, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0059282710244114405, |
|
"grad_norm": 0.12608934938907623, |
|
"learning_rate": 4.779473684210526e-06, |
|
"loss": 11.8674, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.0059593090926020765, |
|
"grad_norm": 0.1504194289445877, |
|
"learning_rate": 4.248421052631579e-06, |
|
"loss": 11.8704, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.0059903471607927125, |
|
"grad_norm": 0.19314518570899963, |
|
"learning_rate": 3.717368421052631e-06, |
|
"loss": 11.8675, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.006021385228983348, |
|
"grad_norm": 0.1470961570739746, |
|
"learning_rate": 3.1863157894736844e-06, |
|
"loss": 11.8726, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.006052423297173984, |
|
"grad_norm": 0.17263321578502655, |
|
"learning_rate": 2.655263157894737e-06, |
|
"loss": 11.8699, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.00608346136536462, |
|
"grad_norm": 0.18947991728782654, |
|
"learning_rate": 2.1242105263157893e-06, |
|
"loss": 11.8713, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.006114499433555256, |
|
"grad_norm": 0.19662337005138397, |
|
"learning_rate": 1.5931578947368422e-06, |
|
"loss": 11.873, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.006145537501745892, |
|
"grad_norm": 0.1542241871356964, |
|
"learning_rate": 1.0621052631578947e-06, |
|
"loss": 11.8692, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.006176575569936527, |
|
"grad_norm": 0.20754170417785645, |
|
"learning_rate": 5.310526315789473e-07, |
|
"loss": 11.8685, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.006207613638127163, |
|
"grad_norm": 0.20448267459869385, |
|
"learning_rate": 0.0, |
|
"loss": 11.8738, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.006207613638127163, |
|
"eval_loss": 11.866828918457031, |
|
"eval_runtime": 412.6675, |
|
"eval_samples_per_second": 32.874, |
|
"eval_steps_per_second": 8.22, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 261529927680.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|