|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 1025, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 0.4036892056465149, |
|
"learning_rate": 0.00019999478113897612, |
|
"loss": 1.0282, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 0.3629762828350067, |
|
"learning_rate": 0.0001999791251006346, |
|
"loss": 0.7875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 0.4877622425556183, |
|
"learning_rate": 0.0001999530335191093, |
|
"loss": 0.5942, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.4466260075569153, |
|
"learning_rate": 0.00019991650911776695, |
|
"loss": 0.3866, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.649118959903717, |
|
"learning_rate": 0.000199869555708923, |
|
"loss": 0.3928, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 0.8762800097465515, |
|
"learning_rate": 0.0001998121781934438, |
|
"loss": 0.3258, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 0.9195622801780701, |
|
"learning_rate": 0.0001997443825602349, |
|
"loss": 0.2885, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 0.5856262445449829, |
|
"learning_rate": 0.00019966617588561609, |
|
"loss": 0.2888, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.5520443320274353, |
|
"learning_rate": 0.00019957756633258265, |
|
"loss": 0.2242, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.9435800909996033, |
|
"learning_rate": 0.00019947856314995349, |
|
"loss": 0.1629, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 0.9416623115539551, |
|
"learning_rate": 0.00019936917667140555, |
|
"loss": 0.1555, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 0.802065372467041, |
|
"learning_rate": 0.0001992494183143955, |
|
"loss": 0.1339, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 0.7007794380187988, |
|
"learning_rate": 0.00019911930057896774, |
|
"loss": 0.1191, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 0.6755990386009216, |
|
"learning_rate": 0.00019897883704644983, |
|
"loss": 0.1571, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 1.6951078176498413, |
|
"learning_rate": 0.00019882804237803488, |
|
"loss": 0.1309, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.567158579826355, |
|
"learning_rate": 0.0001986669323132512, |
|
"loss": 0.0766, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 0.8820038437843323, |
|
"learning_rate": 0.0001984955236683196, |
|
"loss": 0.0839, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.6520794034004211, |
|
"learning_rate": 0.00019831383433439797, |
|
"loss": 0.0863, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 0.45519864559173584, |
|
"learning_rate": 0.00019812188327571399, |
|
"loss": 0.0889, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.614235520362854, |
|
"learning_rate": 0.00019791969052758562, |
|
"loss": 0.0725, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 0.2764686644077301, |
|
"learning_rate": 0.00019770727719432994, |
|
"loss": 0.0407, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 0.6082726716995239, |
|
"learning_rate": 0.00019748466544706022, |
|
"loss": 0.044, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1219512195121952, |
|
"grad_norm": 0.9295619130134583, |
|
"learning_rate": 0.00019725187852137195, |
|
"loss": 0.0675, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 0.3758924603462219, |
|
"learning_rate": 0.00019700894071491732, |
|
"loss": 0.0439, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 0.46514585614204407, |
|
"learning_rate": 0.00019675587738486936, |
|
"loss": 0.0398, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 0.5870018005371094, |
|
"learning_rate": 0.0001964927149452751, |
|
"loss": 0.0406, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 0.30292996764183044, |
|
"learning_rate": 0.00019621948086429844, |
|
"loss": 0.028, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 0.47037121653556824, |
|
"learning_rate": 0.00019593620366135337, |
|
"loss": 0.0239, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4146341463414633, |
|
"grad_norm": 0.4176475405693054, |
|
"learning_rate": 0.00019564291290412688, |
|
"loss": 0.0281, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 0.3179157078266144, |
|
"learning_rate": 0.00019533963920549306, |
|
"loss": 0.0281, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5121951219512195, |
|
"grad_norm": 0.5817562937736511, |
|
"learning_rate": 0.00019502641422031763, |
|
"loss": 0.0296, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 0.7409655451774597, |
|
"learning_rate": 0.00019470327064215383, |
|
"loss": 0.029, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 0.4418310225009918, |
|
"learning_rate": 0.00019437024219983028, |
|
"loss": 0.0583, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 0.31637728214263916, |
|
"learning_rate": 0.0001940273636539301, |
|
"loss": 0.0354, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 0.22175493836402893, |
|
"learning_rate": 0.00019367467079316279, |
|
"loss": 0.0514, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 0.6636152267456055, |
|
"learning_rate": 0.00019331220043062894, |
|
"loss": 0.034, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8048780487804879, |
|
"grad_norm": 0.8424332141876221, |
|
"learning_rate": 0.00019293999039997746, |
|
"loss": 0.0299, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 0.6435155272483826, |
|
"learning_rate": 0.00019255807955145677, |
|
"loss": 0.0508, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 0.7734220027923584, |
|
"learning_rate": 0.00019216650774785972, |
|
"loss": 0.035, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.2854250967502594, |
|
"learning_rate": 0.0001917653158603628, |
|
"loss": 0.0339, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6165639758110046, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.0323, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 0.2167205959558487, |
|
"learning_rate": 0.00019093424033459248, |
|
"loss": 0.026, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.097560975609756, |
|
"grad_norm": 0.2723434269428253, |
|
"learning_rate": 0.0001905044434416725, |
|
"loss": 0.0176, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.1463414634146343, |
|
"grad_norm": 0.4085879325866699, |
|
"learning_rate": 0.00019006519994650513, |
|
"loss": 0.0138, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 0.17931397259235382, |
|
"learning_rate": 0.00018961655569610557, |
|
"loss": 0.0358, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.2439024390243905, |
|
"grad_norm": 0.3886450231075287, |
|
"learning_rate": 0.00018915855751871363, |
|
"loss": 0.0209, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.292682926829268, |
|
"grad_norm": 0.11022531986236572, |
|
"learning_rate": 0.0001886912532189061, |
|
"loss": 0.0101, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 0.14626094698905945, |
|
"learning_rate": 0.00018821469157260685, |
|
"loss": 0.0156, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.3902439024390243, |
|
"grad_norm": 0.18015721440315247, |
|
"learning_rate": 0.00018772892232199592, |
|
"loss": 0.0156, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 0.3262254595756531, |
|
"learning_rate": 0.00018723399617031751, |
|
"loss": 0.0295, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4878048780487805, |
|
"grad_norm": 0.10063759982585907, |
|
"learning_rate": 0.00018672996477658767, |
|
"loss": 0.0164, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.5365853658536586, |
|
"grad_norm": 0.11382050812244415, |
|
"learning_rate": 0.00018621688075020227, |
|
"loss": 0.0207, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5853658536585367, |
|
"grad_norm": 0.12307539582252502, |
|
"learning_rate": 0.0001856947976454459, |
|
"loss": 0.0326, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 0.18902955949306488, |
|
"learning_rate": 0.00018516376995590187, |
|
"loss": 0.0144, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 0.18087974190711975, |
|
"learning_rate": 0.00018462385310876443, |
|
"loss": 0.0111, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 0.2894444465637207, |
|
"learning_rate": 0.00018407510345905332, |
|
"loss": 0.0081, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 0.24273361265659332, |
|
"learning_rate": 0.0001835175782837318, |
|
"loss": 0.0301, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.8292682926829267, |
|
"grad_norm": 0.069428451359272, |
|
"learning_rate": 0.00018295133577572799, |
|
"loss": 0.0234, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.8780487804878048, |
|
"grad_norm": 0.1845165342092514, |
|
"learning_rate": 0.00018237643503786095, |
|
"loss": 0.0112, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 0.06373828649520874, |
|
"learning_rate": 0.00018179293607667178, |
|
"loss": 0.0241, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.975609756097561, |
|
"grad_norm": 0.09466666728258133, |
|
"learning_rate": 0.0001812008997961602, |
|
"loss": 0.0151, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.024390243902439, |
|
"grad_norm": 0.5068451762199402, |
|
"learning_rate": 0.00018060038799142759, |
|
"loss": 0.02, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.073170731707317, |
|
"grad_norm": 0.17547158896923065, |
|
"learning_rate": 0.00017999146334222695, |
|
"loss": 0.0111, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.1219512195121952, |
|
"grad_norm": 0.24108292162418365, |
|
"learning_rate": 0.00017937418940642074, |
|
"loss": 0.0095, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.1707317073170733, |
|
"grad_norm": 0.24457822740077972, |
|
"learning_rate": 0.00017874863061334657, |
|
"loss": 0.0134, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.2195121951219514, |
|
"grad_norm": 0.2185467779636383, |
|
"learning_rate": 0.00017811485225709256, |
|
"loss": 0.0135, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.2682926829268295, |
|
"grad_norm": 0.12849357724189758, |
|
"learning_rate": 0.00017747292048968187, |
|
"loss": 0.0154, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.317073170731707, |
|
"grad_norm": 0.09158976376056671, |
|
"learning_rate": 0.0001768229023141682, |
|
"loss": 0.0137, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.3658536585365852, |
|
"grad_norm": 0.09520118683576584, |
|
"learning_rate": 0.00017616486557764187, |
|
"loss": 0.0147, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 0.11151307821273804, |
|
"learning_rate": 0.00017549887896414851, |
|
"loss": 0.0168, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.4634146341463414, |
|
"grad_norm": 0.1175757572054863, |
|
"learning_rate": 0.00017482501198751965, |
|
"loss": 0.015, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.5121951219512195, |
|
"grad_norm": 0.12314116209745407, |
|
"learning_rate": 0.00017414333498411733, |
|
"loss": 0.0179, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.5609756097560976, |
|
"grad_norm": 0.08803991228342056, |
|
"learning_rate": 0.00017345391910549238, |
|
"loss": 0.0105, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.6097560975609757, |
|
"grad_norm": 0.06382381916046143, |
|
"learning_rate": 0.000172756836310958, |
|
"loss": 0.0106, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 0.47523975372314453, |
|
"learning_rate": 0.0001720521593600787, |
|
"loss": 0.0085, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.7073170731707314, |
|
"grad_norm": 0.1268441379070282, |
|
"learning_rate": 0.000171339961805076, |
|
"loss": 0.0155, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.7560975609756095, |
|
"grad_norm": 0.23719309270381927, |
|
"learning_rate": 0.000170620317983151, |
|
"loss": 0.015, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.8048780487804876, |
|
"grad_norm": 0.0664207935333252, |
|
"learning_rate": 0.00016989330300872576, |
|
"loss": 0.0179, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.8536585365853657, |
|
"grad_norm": 0.3121427893638611, |
|
"learning_rate": 0.00016915899276560237, |
|
"loss": 0.0138, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 0.0784049853682518, |
|
"learning_rate": 0.00016841746389904304, |
|
"loss": 0.0114, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.951219512195122, |
|
"grad_norm": 0.7239044904708862, |
|
"learning_rate": 0.0001676687938077698, |
|
"loss": 0.0251, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.2333860546350479, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 0.0135, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.048780487804878, |
|
"grad_norm": 0.7087911367416382, |
|
"learning_rate": 0.00016615034326471898, |
|
"loss": 0.0195, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.097560975609756, |
|
"grad_norm": 0.07190815359354019, |
|
"learning_rate": 0.00016538072130458853, |
|
"loss": 0.0095, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.146341463414634, |
|
"grad_norm": 0.33951500058174133, |
|
"learning_rate": 0.00016460427508649546, |
|
"loss": 0.0131, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.195121951219512, |
|
"grad_norm": 0.06381627917289734, |
|
"learning_rate": 0.00016382108565373785, |
|
"loss": 0.0119, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.2439024390243905, |
|
"grad_norm": 0.44717633724212646, |
|
"learning_rate": 0.00016303123475345182, |
|
"loss": 0.0127, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.2926829268292686, |
|
"grad_norm": 0.6674973368644714, |
|
"learning_rate": 0.00016223480482807894, |
|
"loss": 0.0111, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.341463414634147, |
|
"grad_norm": 0.3303108215332031, |
|
"learning_rate": 0.00016143187900676112, |
|
"loss": 0.0159, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.2972947359085083, |
|
"learning_rate": 0.0001606225410966638, |
|
"loss": 0.0086, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.439024390243903, |
|
"grad_norm": 0.059205561876297, |
|
"learning_rate": 0.00015980687557422854, |
|
"loss": 0.0104, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.487804878048781, |
|
"grad_norm": 0.08515360206365585, |
|
"learning_rate": 0.00015898496757635536, |
|
"loss": 0.0079, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.536585365853659, |
|
"grad_norm": 0.14600469172000885, |
|
"learning_rate": 0.00015815690289151658, |
|
"loss": 0.0101, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.585365853658536, |
|
"grad_norm": 0.05901546776294708, |
|
"learning_rate": 0.0001573227679508024, |
|
"loss": 0.0075, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.634146341463414, |
|
"grad_norm": 0.3404502868652344, |
|
"learning_rate": 0.00015648264981889934, |
|
"loss": 0.0158, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.682926829268292, |
|
"grad_norm": 0.0968211218714714, |
|
"learning_rate": 0.00015563663618500302, |
|
"loss": 0.0092, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.7317073170731705, |
|
"grad_norm": 0.08040373027324677, |
|
"learning_rate": 0.00015478481535366494, |
|
"loss": 0.0088, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.780487804878049, |
|
"grad_norm": 0.7208348512649536, |
|
"learning_rate": 0.00015392727623557585, |
|
"loss": 0.0132, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.829268292682927, |
|
"grad_norm": 0.26821044087409973, |
|
"learning_rate": 0.00015306410833828535, |
|
"loss": 0.0113, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 0.09240720421075821, |
|
"learning_rate": 0.00015219540175685938, |
|
"loss": 0.0096, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.926829268292683, |
|
"grad_norm": 0.3397311270236969, |
|
"learning_rate": 0.00015132124716447627, |
|
"loss": 0.0072, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.975609756097561, |
|
"grad_norm": 0.07910922169685364, |
|
"learning_rate": 0.00015044173580296266, |
|
"loss": 0.0115, |
|
"step": 1020 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3075, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.1079468558336e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|