|
{ |
|
"best_metric": 0.998, |
|
"best_model_checkpoint": "wav2vec2-base-lang-id/checkpoint-2404", |
|
"epoch": 4.992506244796004, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01665278934221482, |
|
"grad_norm": 0.7952147126197815, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.1922, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03330557868442964, |
|
"grad_norm": 0.8025399446487427, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.1899, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04995836802664446, |
|
"grad_norm": 1.0220482349395752, |
|
"learning_rate": 6e-06, |
|
"loss": 2.1615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06661115736885928, |
|
"grad_norm": 1.5168348550796509, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.1031, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08326394671107411, |
|
"grad_norm": 2.3170907497406006, |
|
"learning_rate": 1e-05, |
|
"loss": 1.9853, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09991673605328892, |
|
"grad_norm": 2.646939992904663, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.819, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11656952539550375, |
|
"grad_norm": 3.0145034790039062, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.5971, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13322231473771856, |
|
"grad_norm": 6.5615739822387695, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.3221, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1498751040799334, |
|
"grad_norm": 7.5968146324157715, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.0419, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16652789342214822, |
|
"grad_norm": 11.896831512451172, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8648, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18318068276436303, |
|
"grad_norm": 10.90375804901123, |
|
"learning_rate": 2.1800000000000005e-05, |
|
"loss": 0.7764, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19983347210657784, |
|
"grad_norm": 7.848816394805908, |
|
"learning_rate": 2.3800000000000003e-05, |
|
"loss": 0.6304, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21648626144879268, |
|
"grad_norm": 5.973367214202881, |
|
"learning_rate": 2.5800000000000004e-05, |
|
"loss": 0.5106, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2331390507910075, |
|
"grad_norm": 10.721240997314453, |
|
"learning_rate": 2.78e-05, |
|
"loss": 0.4462, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2497918401332223, |
|
"grad_norm": 5.517733097076416, |
|
"learning_rate": 2.9800000000000003e-05, |
|
"loss": 0.5038, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2664446294754371, |
|
"grad_norm": 8.605814933776855, |
|
"learning_rate": 3.180000000000001e-05, |
|
"loss": 0.3833, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28309741881765194, |
|
"grad_norm": 1.7152849435806274, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 0.3112, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2997502081598668, |
|
"grad_norm": 1.6370782852172852, |
|
"learning_rate": 3.5600000000000005e-05, |
|
"loss": 0.3238, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3164029975020816, |
|
"grad_norm": 20.556215286254883, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.3753, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.33305578684429643, |
|
"grad_norm": 1.986198902130127, |
|
"learning_rate": 3.96e-05, |
|
"loss": 0.2539, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34970857618651124, |
|
"grad_norm": 9.711119651794434, |
|
"learning_rate": 3.9885714285714284e-05, |
|
"loss": 0.195, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.36636136552872606, |
|
"grad_norm": 22.053802490234375, |
|
"learning_rate": 3.9742857142857146e-05, |
|
"loss": 0.2251, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.38301415487094087, |
|
"grad_norm": 4.119320869445801, |
|
"learning_rate": 3.96e-05, |
|
"loss": 0.2491, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3996669442131557, |
|
"grad_norm": 34.85157012939453, |
|
"learning_rate": 3.945714285714286e-05, |
|
"loss": 0.2676, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4163197335553705, |
|
"grad_norm": 3.7660233974456787, |
|
"learning_rate": 3.9314285714285716e-05, |
|
"loss": 0.2795, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43297252289758537, |
|
"grad_norm": 2.8417813777923584, |
|
"learning_rate": 3.917142857142858e-05, |
|
"loss": 0.4349, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4496253122398002, |
|
"grad_norm": 2.4724223613739014, |
|
"learning_rate": 3.902857142857143e-05, |
|
"loss": 0.2017, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.466278101582015, |
|
"grad_norm": 11.620677947998047, |
|
"learning_rate": 3.8885714285714294e-05, |
|
"loss": 0.1538, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4829308909242298, |
|
"grad_norm": 8.653327941894531, |
|
"learning_rate": 3.874285714285714e-05, |
|
"loss": 0.3721, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4995836802664446, |
|
"grad_norm": 0.48509305715560913, |
|
"learning_rate": 3.86e-05, |
|
"loss": 0.12, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5162364696086594, |
|
"grad_norm": 16.0579833984375, |
|
"learning_rate": 3.845714285714286e-05, |
|
"loss": 0.1726, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5328892589508742, |
|
"grad_norm": 11.486455917358398, |
|
"learning_rate": 3.831428571428572e-05, |
|
"loss": 0.1061, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5495420482930891, |
|
"grad_norm": 9.900602340698242, |
|
"learning_rate": 3.8171428571428574e-05, |
|
"loss": 0.0963, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5661948376353039, |
|
"grad_norm": 0.6496172547340393, |
|
"learning_rate": 3.8028571428571435e-05, |
|
"loss": 0.1222, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5828476269775187, |
|
"grad_norm": 3.4763410091400146, |
|
"learning_rate": 3.788571428571429e-05, |
|
"loss": 0.0951, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5995004163197336, |
|
"grad_norm": 1.4176121950149536, |
|
"learning_rate": 3.7742857142857145e-05, |
|
"loss": 0.1592, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6161532056619484, |
|
"grad_norm": 0.24932004511356354, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.0837, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6328059950041632, |
|
"grad_norm": 0.34060370922088623, |
|
"learning_rate": 3.745714285714286e-05, |
|
"loss": 0.1331, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.649458784346378, |
|
"grad_norm": 32.56100082397461, |
|
"learning_rate": 3.7314285714285715e-05, |
|
"loss": 0.0939, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6661115736885929, |
|
"grad_norm": 27.72275733947754, |
|
"learning_rate": 3.717142857142858e-05, |
|
"loss": 0.2595, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6827643630308077, |
|
"grad_norm": 2.5867972373962402, |
|
"learning_rate": 3.702857142857143e-05, |
|
"loss": 0.1213, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6994171523730225, |
|
"grad_norm": 3.271141290664673, |
|
"learning_rate": 3.6885714285714286e-05, |
|
"loss": 0.0732, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7160699417152373, |
|
"grad_norm": 0.40355125069618225, |
|
"learning_rate": 3.674285714285715e-05, |
|
"loss": 0.0865, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7327227310574521, |
|
"grad_norm": 0.1593771129846573, |
|
"learning_rate": 3.66e-05, |
|
"loss": 0.0725, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7493755203996669, |
|
"grad_norm": 6.7296857833862305, |
|
"learning_rate": 3.645714285714286e-05, |
|
"loss": 0.1532, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7660283097418817, |
|
"grad_norm": 0.15103112161159515, |
|
"learning_rate": 3.631428571428572e-05, |
|
"loss": 0.0429, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7826810990840966, |
|
"grad_norm": 10.738967895507812, |
|
"learning_rate": 3.617142857142857e-05, |
|
"loss": 0.0405, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7993338884263114, |
|
"grad_norm": 0.13244017958641052, |
|
"learning_rate": 3.602857142857143e-05, |
|
"loss": 0.089, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8159866777685262, |
|
"grad_norm": 0.13044649362564087, |
|
"learning_rate": 3.588571428571429e-05, |
|
"loss": 0.1373, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.832639467110741, |
|
"grad_norm": 0.2371596097946167, |
|
"learning_rate": 3.574285714285714e-05, |
|
"loss": 0.0845, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8492922564529559, |
|
"grad_norm": 0.14471735060214996, |
|
"learning_rate": 3.5600000000000005e-05, |
|
"loss": 0.0872, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8659450457951707, |
|
"grad_norm": 0.8515125513076782, |
|
"learning_rate": 3.545714285714286e-05, |
|
"loss": 0.0425, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8825978351373855, |
|
"grad_norm": 1.3742948770523071, |
|
"learning_rate": 3.531428571428572e-05, |
|
"loss": 0.0114, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8992506244796004, |
|
"grad_norm": 8.960271835327148, |
|
"learning_rate": 3.5171428571428575e-05, |
|
"loss": 0.0661, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9159034138218152, |
|
"grad_norm": 0.16396482288837433, |
|
"learning_rate": 3.502857142857143e-05, |
|
"loss": 0.096, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.93255620316403, |
|
"grad_norm": 0.11441925913095474, |
|
"learning_rate": 3.4885714285714285e-05, |
|
"loss": 0.042, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9492089925062448, |
|
"grad_norm": 0.9531962275505066, |
|
"learning_rate": 3.4742857142857146e-05, |
|
"loss": 0.1096, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9658617818484596, |
|
"grad_norm": 20.664161682128906, |
|
"learning_rate": 3.46e-05, |
|
"loss": 0.1895, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9825145711906744, |
|
"grad_norm": 0.10766961425542831, |
|
"learning_rate": 3.445714285714286e-05, |
|
"loss": 0.0951, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9991673605328892, |
|
"grad_norm": 0.06997501850128174, |
|
"learning_rate": 3.431428571428572e-05, |
|
"loss": 0.0677, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9935, |
|
"eval_loss": 0.02969062514603138, |
|
"eval_runtime": 43.3836, |
|
"eval_samples_per_second": 46.1, |
|
"eval_steps_per_second": 11.525, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.0149875104079933, |
|
"grad_norm": 0.9687513709068298, |
|
"learning_rate": 3.417142857142858e-05, |
|
"loss": 0.0525, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.031640299750208, |
|
"grad_norm": 6.680837154388428, |
|
"learning_rate": 3.402857142857143e-05, |
|
"loss": 0.0161, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.048293089092423, |
|
"grad_norm": 24.152660369873047, |
|
"learning_rate": 3.388571428571429e-05, |
|
"loss": 0.0387, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0649458784346377, |
|
"grad_norm": 0.07758224755525589, |
|
"learning_rate": 3.374285714285714e-05, |
|
"loss": 0.0266, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0815986677768525, |
|
"grad_norm": 0.0564408153295517, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 0.0563, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0982514571190674, |
|
"grad_norm": 0.05482952296733856, |
|
"learning_rate": 3.345714285714286e-05, |
|
"loss": 0.0493, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1149042464612822, |
|
"grad_norm": 0.05704626441001892, |
|
"learning_rate": 3.331428571428572e-05, |
|
"loss": 0.0924, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.131557035803497, |
|
"grad_norm": 0.05357396975159645, |
|
"learning_rate": 3.3171428571428574e-05, |
|
"loss": 0.0814, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1482098251457118, |
|
"grad_norm": 0.47527748346328735, |
|
"learning_rate": 3.302857142857143e-05, |
|
"loss": 0.0544, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1648626144879266, |
|
"grad_norm": 28.22191047668457, |
|
"learning_rate": 3.288571428571429e-05, |
|
"loss": 0.1022, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1815154038301416, |
|
"grad_norm": 0.3312930166721344, |
|
"learning_rate": 3.2742857142857145e-05, |
|
"loss": 0.005, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1981681931723565, |
|
"grad_norm": 35.735198974609375, |
|
"learning_rate": 3.26e-05, |
|
"loss": 0.0072, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2148209825145713, |
|
"grad_norm": 0.04014894366264343, |
|
"learning_rate": 3.245714285714286e-05, |
|
"loss": 0.0938, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.231473771856786, |
|
"grad_norm": 16.871463775634766, |
|
"learning_rate": 3.2314285714285716e-05, |
|
"loss": 0.0241, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.248126561199001, |
|
"grad_norm": 25.51210594177246, |
|
"learning_rate": 3.217142857142857e-05, |
|
"loss": 0.0322, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2647793505412157, |
|
"grad_norm": 0.03736506775021553, |
|
"learning_rate": 3.202857142857143e-05, |
|
"loss": 0.0521, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2814321398834305, |
|
"grad_norm": 3.5099713802337646, |
|
"learning_rate": 3.1885714285714286e-05, |
|
"loss": 0.0582, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2980849292256453, |
|
"grad_norm": 0.054958537220954895, |
|
"learning_rate": 3.174285714285715e-05, |
|
"loss": 0.0836, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3147377185678601, |
|
"grad_norm": 0.7841320633888245, |
|
"learning_rate": 3.16e-05, |
|
"loss": 0.0066, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.331390507910075, |
|
"grad_norm": 0.03516022861003876, |
|
"learning_rate": 3.1457142857142864e-05, |
|
"loss": 0.0657, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3480432972522898, |
|
"grad_norm": 0.03558405488729477, |
|
"learning_rate": 3.131428571428571e-05, |
|
"loss": 0.0347, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3646960865945046, |
|
"grad_norm": 0.14143311977386475, |
|
"learning_rate": 3.117142857142857e-05, |
|
"loss": 0.0028, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3813488759367194, |
|
"grad_norm": 2.7062411308288574, |
|
"learning_rate": 3.102857142857143e-05, |
|
"loss": 0.0898, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3980016652789342, |
|
"grad_norm": 0.03832915052771568, |
|
"learning_rate": 3.088571428571429e-05, |
|
"loss": 0.004, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.414654454621149, |
|
"grad_norm": 19.017581939697266, |
|
"learning_rate": 3.0742857142857144e-05, |
|
"loss": 0.0495, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4313072439633638, |
|
"grad_norm": 19.32662582397461, |
|
"learning_rate": 3.0600000000000005e-05, |
|
"loss": 0.0808, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4479600333055787, |
|
"grad_norm": 0.21114251017570496, |
|
"learning_rate": 3.045714285714286e-05, |
|
"loss": 0.0275, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4646128226477935, |
|
"grad_norm": 4.781786918640137, |
|
"learning_rate": 3.0314285714285718e-05, |
|
"loss": 0.0483, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4812656119900083, |
|
"grad_norm": 25.905826568603516, |
|
"learning_rate": 3.0171428571428576e-05, |
|
"loss": 0.1505, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.497918401332223, |
|
"grad_norm": 8.17207145690918, |
|
"learning_rate": 3.002857142857143e-05, |
|
"loss": 0.0302, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5145711906744381, |
|
"grad_norm": Infinity, |
|
"learning_rate": 2.9900000000000005e-05, |
|
"loss": 0.0906, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.531223980016653, |
|
"grad_norm": 3.578585624694824, |
|
"learning_rate": 2.9757142857142856e-05, |
|
"loss": 0.047, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5478767693588678, |
|
"grad_norm": 0.03926655277609825, |
|
"learning_rate": 2.9614285714285714e-05, |
|
"loss": 0.0685, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5645295587010826, |
|
"grad_norm": 30.002620697021484, |
|
"learning_rate": 2.9471428571428572e-05, |
|
"loss": 0.0474, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5811823480432974, |
|
"grad_norm": 0.035517849028110504, |
|
"learning_rate": 2.932857142857143e-05, |
|
"loss": 0.0041, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5978351373855122, |
|
"grad_norm": 4.368102550506592, |
|
"learning_rate": 2.918571428571429e-05, |
|
"loss": 0.0803, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.614487926727727, |
|
"grad_norm": 0.03676709160208702, |
|
"learning_rate": 2.9042857142857146e-05, |
|
"loss": 0.0037, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6311407160699418, |
|
"grad_norm": 0.032644957304000854, |
|
"learning_rate": 2.8900000000000005e-05, |
|
"loss": 0.0379, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.6477935054121566, |
|
"grad_norm": 0.02497854270040989, |
|
"learning_rate": 2.8757142857142863e-05, |
|
"loss": 0.021, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6644462947543714, |
|
"grad_norm": 0.024164369329810143, |
|
"learning_rate": 2.8614285714285714e-05, |
|
"loss": 0.012, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6810990840965863, |
|
"grad_norm": 0.05042396858334541, |
|
"learning_rate": 2.8471428571428572e-05, |
|
"loss": 0.0342, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.697751873438801, |
|
"grad_norm": 0.15826770663261414, |
|
"learning_rate": 2.832857142857143e-05, |
|
"loss": 0.022, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.7144046627810159, |
|
"grad_norm": 13.766159057617188, |
|
"learning_rate": 2.8185714285714288e-05, |
|
"loss": 0.0442, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7310574521232307, |
|
"grad_norm": 0.3997497856616974, |
|
"learning_rate": 2.8042857142857146e-05, |
|
"loss": 0.1228, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.7477102414654455, |
|
"grad_norm": 0.04381551966071129, |
|
"learning_rate": 2.7900000000000004e-05, |
|
"loss": 0.0231, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7643630308076603, |
|
"grad_norm": 0.14710094034671783, |
|
"learning_rate": 2.7757142857142862e-05, |
|
"loss": 0.0995, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.7810158201498751, |
|
"grad_norm": 4.448687553405762, |
|
"learning_rate": 2.7614285714285717e-05, |
|
"loss": 0.1546, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.79766860949209, |
|
"grad_norm": 0.03282266855239868, |
|
"learning_rate": 2.7471428571428575e-05, |
|
"loss": 0.005, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.8143213988343048, |
|
"grad_norm": 0.02770112454891205, |
|
"learning_rate": 2.732857142857143e-05, |
|
"loss": 0.009, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.8309741881765196, |
|
"grad_norm": 0.02363790012896061, |
|
"learning_rate": 2.7185714285714287e-05, |
|
"loss": 0.0019, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8476269775187344, |
|
"grad_norm": 14.821802139282227, |
|
"learning_rate": 2.7042857142857145e-05, |
|
"loss": 0.028, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8642797668609492, |
|
"grad_norm": 0.024729197844862938, |
|
"learning_rate": 2.6900000000000003e-05, |
|
"loss": 0.0446, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.880932556203164, |
|
"grad_norm": 0.039613548666238785, |
|
"learning_rate": 2.6757142857142858e-05, |
|
"loss": 0.0019, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8975853455453788, |
|
"grad_norm": 0.029998844489455223, |
|
"learning_rate": 2.6614285714285716e-05, |
|
"loss": 0.069, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.9142381348875936, |
|
"grad_norm": 0.024138517677783966, |
|
"learning_rate": 2.6471428571428574e-05, |
|
"loss": 0.0708, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.9308909242298085, |
|
"grad_norm": 0.02379235252737999, |
|
"learning_rate": 2.6328571428571432e-05, |
|
"loss": 0.0741, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9475437135720233, |
|
"grad_norm": 0.718246579170227, |
|
"learning_rate": 2.618571428571429e-05, |
|
"loss": 0.023, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.964196502914238, |
|
"grad_norm": 0.021009549498558044, |
|
"learning_rate": 2.6042857142857145e-05, |
|
"loss": 0.0765, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.980849292256453, |
|
"grad_norm": 7.245909690856934, |
|
"learning_rate": 2.59e-05, |
|
"loss": 0.086, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9975020815986677, |
|
"grad_norm": 0.024452045559883118, |
|
"learning_rate": 2.5757142857142857e-05, |
|
"loss": 0.0345, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9935, |
|
"eval_loss": 0.03615127503871918, |
|
"eval_runtime": 41.5933, |
|
"eval_samples_per_second": 48.085, |
|
"eval_steps_per_second": 12.021, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.0133222314737718, |
|
"grad_norm": 10.592672348022461, |
|
"learning_rate": 2.5614285714285715e-05, |
|
"loss": 0.0139, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.0299750208159866, |
|
"grad_norm": 0.06319635361433029, |
|
"learning_rate": 2.5471428571428573e-05, |
|
"loss": 0.0204, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.0466278101582014, |
|
"grad_norm": 0.018913447856903076, |
|
"learning_rate": 2.532857142857143e-05, |
|
"loss": 0.0278, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.063280599500416, |
|
"grad_norm": 0.019886815920472145, |
|
"learning_rate": 2.518571428571429e-05, |
|
"loss": 0.0195, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.079933388842631, |
|
"grad_norm": 0.11551292985677719, |
|
"learning_rate": 2.5042857142857148e-05, |
|
"loss": 0.0349, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.096586178184846, |
|
"grad_norm": 0.3485487103462219, |
|
"learning_rate": 2.4900000000000006e-05, |
|
"loss": 0.0084, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.1132389675270606, |
|
"grad_norm": 0.040086254477500916, |
|
"learning_rate": 2.4757142857142857e-05, |
|
"loss": 0.0307, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.1298917568692755, |
|
"grad_norm": 0.017919067293405533, |
|
"learning_rate": 2.4614285714285715e-05, |
|
"loss": 0.041, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.1465445462114903, |
|
"grad_norm": 0.01724259741604328, |
|
"learning_rate": 2.4471428571428573e-05, |
|
"loss": 0.0052, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.163197335553705, |
|
"grad_norm": 0.25569283962249756, |
|
"learning_rate": 2.432857142857143e-05, |
|
"loss": 0.0283, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.17985012489592, |
|
"grad_norm": 0.016946446150541306, |
|
"learning_rate": 2.418571428571429e-05, |
|
"loss": 0.0124, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.1965029142381347, |
|
"grad_norm": 0.023746447637677193, |
|
"learning_rate": 2.4042857142857147e-05, |
|
"loss": 0.0302, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.2131557035803495, |
|
"grad_norm": 1.8454148769378662, |
|
"learning_rate": 2.39e-05, |
|
"loss": 0.0034, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.2298084929225643, |
|
"grad_norm": 0.015741076320409775, |
|
"learning_rate": 2.375714285714286e-05, |
|
"loss": 0.0367, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.246461282264779, |
|
"grad_norm": 0.015740349888801575, |
|
"learning_rate": 2.3614285714285714e-05, |
|
"loss": 0.0108, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.263114071606994, |
|
"grad_norm": 20.47581672668457, |
|
"learning_rate": 2.3471428571428572e-05, |
|
"loss": 0.0652, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.279766860949209, |
|
"grad_norm": 0.017650267109274864, |
|
"learning_rate": 2.332857142857143e-05, |
|
"loss": 0.0058, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2964196502914236, |
|
"grad_norm": 0.1071343645453453, |
|
"learning_rate": 2.318571428571429e-05, |
|
"loss": 0.0279, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.313072439633639, |
|
"grad_norm": 0.16622492671012878, |
|
"learning_rate": 2.3042857142857143e-05, |
|
"loss": 0.0343, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.329725228975853, |
|
"grad_norm": 0.07910218089818954, |
|
"learning_rate": 2.29e-05, |
|
"loss": 0.0023, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.3463780183180685, |
|
"grad_norm": 0.016809897497296333, |
|
"learning_rate": 2.275714285714286e-05, |
|
"loss": 0.0081, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.3630308076602833, |
|
"grad_norm": 0.014708627946674824, |
|
"learning_rate": 2.2614285714285717e-05, |
|
"loss": 0.0628, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.379683597002498, |
|
"grad_norm": 0.014496504329144955, |
|
"learning_rate": 2.2471428571428575e-05, |
|
"loss": 0.0017, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.396336386344713, |
|
"grad_norm": 0.013988097198307514, |
|
"learning_rate": 2.232857142857143e-05, |
|
"loss": 0.049, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.4129891756869277, |
|
"grad_norm": 0.02206108532845974, |
|
"learning_rate": 2.2185714285714284e-05, |
|
"loss": 0.0009, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.4296419650291425, |
|
"grad_norm": 0.01265011541545391, |
|
"learning_rate": 2.2042857142857142e-05, |
|
"loss": 0.0009, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.4462947543713573, |
|
"grad_norm": 0.014575159177184105, |
|
"learning_rate": 2.19e-05, |
|
"loss": 0.0008, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.462947543713572, |
|
"grad_norm": 0.012796576134860516, |
|
"learning_rate": 2.175714285714286e-05, |
|
"loss": 0.0425, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.479600333055787, |
|
"grad_norm": 0.013447301462292671, |
|
"learning_rate": 2.1614285714285716e-05, |
|
"loss": 0.0011, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.496253122398002, |
|
"grad_norm": 0.012052874080836773, |
|
"learning_rate": 2.1471428571428574e-05, |
|
"loss": 0.0153, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.5129059117402166, |
|
"grad_norm": 0.011766649782657623, |
|
"learning_rate": 2.1328571428571432e-05, |
|
"loss": 0.0165, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.5295587010824314, |
|
"grad_norm": 0.01666083373129368, |
|
"learning_rate": 2.118571428571429e-05, |
|
"loss": 0.0039, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.5462114904246462, |
|
"grad_norm": 1.2848758697509766, |
|
"learning_rate": 2.1042857142857142e-05, |
|
"loss": 0.0383, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.562864279766861, |
|
"grad_norm": 0.014356808736920357, |
|
"learning_rate": 2.09e-05, |
|
"loss": 0.0171, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.579517069109076, |
|
"grad_norm": 0.08616916835308075, |
|
"learning_rate": 2.0757142857142858e-05, |
|
"loss": 0.0688, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.5961698584512907, |
|
"grad_norm": 0.02040853165090084, |
|
"learning_rate": 2.0614285714285716e-05, |
|
"loss": 0.0011, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.6128226477935055, |
|
"grad_norm": 15.67353343963623, |
|
"learning_rate": 2.0471428571428574e-05, |
|
"loss": 0.0273, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.6294754371357203, |
|
"grad_norm": 0.014960126020014286, |
|
"learning_rate": 2.0328571428571432e-05, |
|
"loss": 0.0116, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.646128226477935, |
|
"grad_norm": 0.0133629459887743, |
|
"learning_rate": 2.018571428571429e-05, |
|
"loss": 0.0154, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.66278101582015, |
|
"grad_norm": 1.1109951734542847, |
|
"learning_rate": 2.0042857142857145e-05, |
|
"loss": 0.0298, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6794338051623647, |
|
"grad_norm": 0.07497254759073257, |
|
"learning_rate": 1.9900000000000003e-05, |
|
"loss": 0.0366, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.6960865945045795, |
|
"grad_norm": 0.01391484122723341, |
|
"learning_rate": 1.975714285714286e-05, |
|
"loss": 0.0029, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.7127393838467944, |
|
"grad_norm": 0.011711220256984234, |
|
"learning_rate": 1.9614285714285715e-05, |
|
"loss": 0.0183, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.729392173189009, |
|
"grad_norm": 0.01194568071514368, |
|
"learning_rate": 1.9471428571428573e-05, |
|
"loss": 0.0232, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.746044962531224, |
|
"grad_norm": 0.032725363969802856, |
|
"learning_rate": 1.932857142857143e-05, |
|
"loss": 0.0283, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.762697751873439, |
|
"grad_norm": 0.021190594881772995, |
|
"learning_rate": 1.9185714285714286e-05, |
|
"loss": 0.0026, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.7793505412156536, |
|
"grad_norm": 24.009925842285156, |
|
"learning_rate": 1.9042857142857144e-05, |
|
"loss": 0.0627, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7960033305578684, |
|
"grad_norm": 0.017221566289663315, |
|
"learning_rate": 1.8900000000000002e-05, |
|
"loss": 0.0132, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.8126561199000832, |
|
"grad_norm": 0.010178760625422001, |
|
"learning_rate": 1.8757142857142857e-05, |
|
"loss": 0.0063, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.829308909242298, |
|
"grad_norm": 0.01019757054746151, |
|
"learning_rate": 1.8614285714285715e-05, |
|
"loss": 0.0101, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.845961698584513, |
|
"grad_norm": 0.010430095717310905, |
|
"learning_rate": 1.8471428571428573e-05, |
|
"loss": 0.0312, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.8626144879267277, |
|
"grad_norm": 0.011290703900158405, |
|
"learning_rate": 1.832857142857143e-05, |
|
"loss": 0.0016, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.8792672772689425, |
|
"grad_norm": 0.12969951331615448, |
|
"learning_rate": 1.8185714285714285e-05, |
|
"loss": 0.0354, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.8959200666111573, |
|
"grad_norm": 0.011047742329537868, |
|
"learning_rate": 1.8042857142857143e-05, |
|
"loss": 0.0018, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.912572855953372, |
|
"grad_norm": 0.010961545631289482, |
|
"learning_rate": 1.79e-05, |
|
"loss": 0.0023, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.929225645295587, |
|
"grad_norm": 0.014039566740393639, |
|
"learning_rate": 1.775714285714286e-05, |
|
"loss": 0.0649, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.9458784346378017, |
|
"grad_norm": 0.010026945732533932, |
|
"learning_rate": 1.7614285714285714e-05, |
|
"loss": 0.0039, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.9625312239800166, |
|
"grad_norm": 0.009294411167502403, |
|
"learning_rate": 1.7471428571428572e-05, |
|
"loss": 0.0011, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.9791840133222314, |
|
"grad_norm": 0.010262146592140198, |
|
"learning_rate": 1.732857142857143e-05, |
|
"loss": 0.0006, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.995836802664446, |
|
"grad_norm": 19.167015075683594, |
|
"learning_rate": 1.7185714285714288e-05, |
|
"loss": 0.013, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.997, |
|
"eval_loss": 0.015118513256311417, |
|
"eval_runtime": 42.7084, |
|
"eval_samples_per_second": 46.829, |
|
"eval_steps_per_second": 11.707, |
|
"step": 1803 |
|
}, |
|
{ |
|
"epoch": 3.0116569525395502, |
|
"grad_norm": 0.0086745023727417, |
|
"learning_rate": 1.7042857142857143e-05, |
|
"loss": 0.0008, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.028309741881765, |
|
"grad_norm": 0.17141631245613098, |
|
"learning_rate": 1.69e-05, |
|
"loss": 0.006, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.04496253122398, |
|
"grad_norm": 0.01152071077376604, |
|
"learning_rate": 1.675714285714286e-05, |
|
"loss": 0.0006, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.0616153205661947, |
|
"grad_norm": 0.00846625491976738, |
|
"learning_rate": 1.6614285714285717e-05, |
|
"loss": 0.0006, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.0782681099084095, |
|
"grad_norm": 0.008341658860445023, |
|
"learning_rate": 1.6471428571428575e-05, |
|
"loss": 0.0005, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.0949208992506243, |
|
"grad_norm": 0.008472333662211895, |
|
"learning_rate": 1.632857142857143e-05, |
|
"loss": 0.0005, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.111573688592839, |
|
"grad_norm": 28.69985580444336, |
|
"learning_rate": 1.6185714285714288e-05, |
|
"loss": 0.009, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.128226477935054, |
|
"grad_norm": 0.007999376393854618, |
|
"learning_rate": 1.6042857142857146e-05, |
|
"loss": 0.0005, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.1448792672772687, |
|
"grad_norm": 0.008124138228595257, |
|
"learning_rate": 1.5900000000000004e-05, |
|
"loss": 0.0009, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.1615320566194836, |
|
"grad_norm": 0.04555722326040268, |
|
"learning_rate": 1.5757142857142858e-05, |
|
"loss": 0.0137, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.178184845961699, |
|
"grad_norm": 0.007663598284125328, |
|
"learning_rate": 1.5614285714285716e-05, |
|
"loss": 0.1333, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.194837635303913, |
|
"grad_norm": 0.011646582745015621, |
|
"learning_rate": 1.5471428571428574e-05, |
|
"loss": 0.0506, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.2114904246461284, |
|
"grad_norm": 0.00872163474559784, |
|
"learning_rate": 1.532857142857143e-05, |
|
"loss": 0.0037, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.2281432139883433, |
|
"grad_norm": 0.008789711631834507, |
|
"learning_rate": 1.5185714285714285e-05, |
|
"loss": 0.0007, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.244796003330558, |
|
"grad_norm": 0.008661571890115738, |
|
"learning_rate": 1.5042857142857143e-05, |
|
"loss": 0.0037, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.261448792672773, |
|
"grad_norm": 28.83612060546875, |
|
"learning_rate": 1.4900000000000001e-05, |
|
"loss": 0.0253, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.2781015820149877, |
|
"grad_norm": 0.0573546439409256, |
|
"learning_rate": 1.475714285714286e-05, |
|
"loss": 0.0007, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.2947543713572025, |
|
"grad_norm": 0.009006750769913197, |
|
"learning_rate": 1.4614285714285716e-05, |
|
"loss": 0.0053, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.3114071606994173, |
|
"grad_norm": 0.007715549319982529, |
|
"learning_rate": 1.4471428571428572e-05, |
|
"loss": 0.033, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.328059950041632, |
|
"grad_norm": 0.008084608241915703, |
|
"learning_rate": 1.432857142857143e-05, |
|
"loss": 0.0144, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.344712739383847, |
|
"grad_norm": 0.007967551238834858, |
|
"learning_rate": 1.4185714285714286e-05, |
|
"loss": 0.0137, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.3613655287260618, |
|
"grad_norm": 0.008203152567148209, |
|
"learning_rate": 1.4042857142857144e-05, |
|
"loss": 0.0037, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.3780183180682766, |
|
"grad_norm": 6.683016300201416, |
|
"learning_rate": 1.39e-05, |
|
"loss": 0.034, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 3.3946711074104914, |
|
"grad_norm": 0.011201892048120499, |
|
"learning_rate": 1.3757142857142857e-05, |
|
"loss": 0.0005, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.411323896752706, |
|
"grad_norm": 0.008145448751747608, |
|
"learning_rate": 1.3614285714285715e-05, |
|
"loss": 0.0005, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.427976686094921, |
|
"grad_norm": 0.9828100204467773, |
|
"learning_rate": 1.3471428571428573e-05, |
|
"loss": 0.0007, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.444629475437136, |
|
"grad_norm": 0.007419963832944632, |
|
"learning_rate": 1.3328571428571431e-05, |
|
"loss": 0.0124, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.4612822647793506, |
|
"grad_norm": 0.030570199713110924, |
|
"learning_rate": 1.3185714285714286e-05, |
|
"loss": 0.0005, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.4779350541215655, |
|
"grad_norm": 38.09405517578125, |
|
"learning_rate": 1.3042857142857144e-05, |
|
"loss": 0.0514, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 3.4945878434637803, |
|
"grad_norm": 0.007145782001316547, |
|
"learning_rate": 1.2900000000000002e-05, |
|
"loss": 0.0005, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.511240632805995, |
|
"grad_norm": 0.01062481477856636, |
|
"learning_rate": 1.275714285714286e-05, |
|
"loss": 0.0326, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 3.52789342214821, |
|
"grad_norm": 0.00760676059871912, |
|
"learning_rate": 1.2614285714285715e-05, |
|
"loss": 0.0598, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.5445462114904247, |
|
"grad_norm": 0.011588923633098602, |
|
"learning_rate": 1.2471428571428573e-05, |
|
"loss": 0.0317, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 3.5611990008326395, |
|
"grad_norm": 0.007162110414355993, |
|
"learning_rate": 1.232857142857143e-05, |
|
"loss": 0.0007, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.5778517901748543, |
|
"grad_norm": 0.008215065114200115, |
|
"learning_rate": 1.2185714285714287e-05, |
|
"loss": 0.0006, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.594504579517069, |
|
"grad_norm": 0.34589236974716187, |
|
"learning_rate": 1.2042857142857143e-05, |
|
"loss": 0.0279, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.611157368859284, |
|
"grad_norm": 0.02411896549165249, |
|
"learning_rate": 1.1900000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.6278101582014988, |
|
"grad_norm": 0.008000008761882782, |
|
"learning_rate": 1.1757142857142858e-05, |
|
"loss": 0.0232, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.6444629475437136, |
|
"grad_norm": 0.01416712999343872, |
|
"learning_rate": 1.1614285714285716e-05, |
|
"loss": 0.0374, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.6611157368859284, |
|
"grad_norm": 0.009390470571815968, |
|
"learning_rate": 1.1471428571428574e-05, |
|
"loss": 0.0132, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.677768526228143, |
|
"grad_norm": 0.011863280087709427, |
|
"learning_rate": 1.1328571428571428e-05, |
|
"loss": 0.0008, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.694421315570358, |
|
"grad_norm": 0.01807197742164135, |
|
"learning_rate": 1.1185714285714286e-05, |
|
"loss": 0.0006, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.711074104912573, |
|
"grad_norm": 0.03740830719470978, |
|
"learning_rate": 1.1042857142857144e-05, |
|
"loss": 0.0006, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.7277268942547876, |
|
"grad_norm": 0.007398474961519241, |
|
"learning_rate": 1.0900000000000002e-05, |
|
"loss": 0.0005, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.7443796835970025, |
|
"grad_norm": 0.00720438826829195, |
|
"learning_rate": 1.0757142857142857e-05, |
|
"loss": 0.0005, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.7610324729392173, |
|
"grad_norm": 0.007070284336805344, |
|
"learning_rate": 1.0614285714285715e-05, |
|
"loss": 0.0004, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.777685262281432, |
|
"grad_norm": 0.007917001843452454, |
|
"learning_rate": 1.0471428571428573e-05, |
|
"loss": 0.0004, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.794338051623647, |
|
"grad_norm": 0.006697102449834347, |
|
"learning_rate": 1.032857142857143e-05, |
|
"loss": 0.0138, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.8109908409658617, |
|
"grad_norm": 0.008782276883721352, |
|
"learning_rate": 1.0185714285714286e-05, |
|
"loss": 0.0007, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.8276436303080765, |
|
"grad_norm": 0.061034586280584335, |
|
"learning_rate": 1.0042857142857144e-05, |
|
"loss": 0.0005, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.8442964196502913, |
|
"grad_norm": 0.006082494277507067, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.0004, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.860949208992506, |
|
"grad_norm": 0.006540893577039242, |
|
"learning_rate": 9.757142857142858e-06, |
|
"loss": 0.0004, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.877601998334721, |
|
"grad_norm": 0.0454992949962616, |
|
"learning_rate": 9.614285714285714e-06, |
|
"loss": 0.0004, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.894254787676936, |
|
"grad_norm": 0.025658713653683662, |
|
"learning_rate": 9.471428571428572e-06, |
|
"loss": 0.0005, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.9109075770191506, |
|
"grad_norm": 0.005761799868196249, |
|
"learning_rate": 9.328571428571429e-06, |
|
"loss": 0.0004, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.9275603663613654, |
|
"grad_norm": 0.006150644738227129, |
|
"learning_rate": 9.185714285714287e-06, |
|
"loss": 0.0004, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.94421315570358, |
|
"grad_norm": 0.0057544950395822525, |
|
"learning_rate": 9.042857142857143e-06, |
|
"loss": 0.0003, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.960865945045795, |
|
"grad_norm": 0.006148363929241896, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.0248, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.97751873438801, |
|
"grad_norm": 0.005814474541693926, |
|
"learning_rate": 8.757142857142858e-06, |
|
"loss": 0.0004, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.9941715237302247, |
|
"grad_norm": 0.0060193813405931, |
|
"learning_rate": 8.614285714285716e-06, |
|
"loss": 0.0003, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.998, |
|
"eval_loss": 0.013417072594165802, |
|
"eval_runtime": 41.1291, |
|
"eval_samples_per_second": 48.627, |
|
"eval_steps_per_second": 12.157, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 4.009991673605329, |
|
"grad_norm": 0.006395525299012661, |
|
"learning_rate": 8.471428571428572e-06, |
|
"loss": 0.0003, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 4.0266444629475435, |
|
"grad_norm": 0.005516465287655592, |
|
"learning_rate": 8.32857142857143e-06, |
|
"loss": 0.0293, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 4.043297252289759, |
|
"grad_norm": 0.5823869705200195, |
|
"learning_rate": 8.185714285714286e-06, |
|
"loss": 0.0004, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 4.059950041631973, |
|
"grad_norm": 0.005808957852423191, |
|
"learning_rate": 8.042857142857143e-06, |
|
"loss": 0.0005, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 4.076602830974188, |
|
"grad_norm": 0.00578009570017457, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.0003, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 4.093255620316403, |
|
"grad_norm": 0.005703100468963385, |
|
"learning_rate": 7.757142857142857e-06, |
|
"loss": 0.0003, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 4.109908409658618, |
|
"grad_norm": 0.005927779711782932, |
|
"learning_rate": 7.614285714285715e-06, |
|
"loss": 0.0003, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 4.126561199000832, |
|
"grad_norm": 0.006832882761955261, |
|
"learning_rate": 7.471428571428571e-06, |
|
"loss": 0.0225, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 4.143213988343048, |
|
"grad_norm": 0.005367404315620661, |
|
"learning_rate": 7.328571428571429e-06, |
|
"loss": 0.0008, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 4.159866777685262, |
|
"grad_norm": 0.005708944518119097, |
|
"learning_rate": 7.185714285714286e-06, |
|
"loss": 0.038, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.176519567027477, |
|
"grad_norm": 0.007482975255697966, |
|
"learning_rate": 7.042857142857144e-06, |
|
"loss": 0.0004, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 4.193172356369692, |
|
"grad_norm": 0.007174965925514698, |
|
"learning_rate": 6.9e-06, |
|
"loss": 0.0006, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 4.209825145711907, |
|
"grad_norm": 0.009364173747599125, |
|
"learning_rate": 6.757142857142858e-06, |
|
"loss": 0.0005, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.226477935054121, |
|
"grad_norm": 0.007082544732838869, |
|
"learning_rate": 6.614285714285715e-06, |
|
"loss": 0.0258, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 4.2431307243963365, |
|
"grad_norm": 0.006570044904947281, |
|
"learning_rate": 6.4714285714285715e-06, |
|
"loss": 0.0004, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.259783513738551, |
|
"grad_norm": 0.006020836066454649, |
|
"learning_rate": 6.3285714285714296e-06, |
|
"loss": 0.0006, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 4.276436303080766, |
|
"grad_norm": 0.005941543262451887, |
|
"learning_rate": 6.185714285714286e-06, |
|
"loss": 0.0009, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 4.2930890924229805, |
|
"grad_norm": 0.0074948640540242195, |
|
"learning_rate": 6.042857142857144e-06, |
|
"loss": 0.0003, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 4.309741881765196, |
|
"grad_norm": 0.0059341308660805225, |
|
"learning_rate": 5.9e-06, |
|
"loss": 0.0004, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 4.32639467110741, |
|
"grad_norm": 0.00568350637331605, |
|
"learning_rate": 5.7571428571428574e-06, |
|
"loss": 0.0004, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.343047460449625, |
|
"grad_norm": 0.005876988638192415, |
|
"learning_rate": 5.614285714285715e-06, |
|
"loss": 0.0311, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 4.35970024979184, |
|
"grad_norm": 0.006036726292222738, |
|
"learning_rate": 5.471428571428572e-06, |
|
"loss": 0.0004, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 4.376353039134055, |
|
"grad_norm": 0.013689212501049042, |
|
"learning_rate": 5.328571428571428e-06, |
|
"loss": 0.0017, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 4.393005828476269, |
|
"grad_norm": 0.1334390938282013, |
|
"learning_rate": 5.185714285714286e-06, |
|
"loss": 0.0004, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 4.409658617818485, |
|
"grad_norm": 0.023301932960748672, |
|
"learning_rate": 5.042857142857144e-06, |
|
"loss": 0.0004, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.426311407160699, |
|
"grad_norm": 1.0275276899337769, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.0004, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 4.442964196502914, |
|
"grad_norm": 0.005459700245410204, |
|
"learning_rate": 4.757142857142858e-06, |
|
"loss": 0.0003, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 4.459616985845129, |
|
"grad_norm": 0.019774090498685837, |
|
"learning_rate": 4.614285714285715e-06, |
|
"loss": 0.0327, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 4.476269775187344, |
|
"grad_norm": 0.009837147779762745, |
|
"learning_rate": 4.471428571428571e-06, |
|
"loss": 0.0125, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 4.492922564529558, |
|
"grad_norm": 0.005503670312464237, |
|
"learning_rate": 4.328571428571429e-06, |
|
"loss": 0.0003, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.5095753538717736, |
|
"grad_norm": 0.0053068650886416435, |
|
"learning_rate": 4.185714285714286e-06, |
|
"loss": 0.0004, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 4.526228143213988, |
|
"grad_norm": 0.005588301923125982, |
|
"learning_rate": 4.042857142857144e-06, |
|
"loss": 0.0222, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.542880932556203, |
|
"grad_norm": 0.0052786581218242645, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 0.0003, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 4.559533721898418, |
|
"grad_norm": 0.005984195042401552, |
|
"learning_rate": 3.7571428571428575e-06, |
|
"loss": 0.0003, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 4.576186511240633, |
|
"grad_norm": 0.005208215676248074, |
|
"learning_rate": 3.6142857142857147e-06, |
|
"loss": 0.0003, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.592839300582847, |
|
"grad_norm": 0.16855907440185547, |
|
"learning_rate": 3.471428571428572e-06, |
|
"loss": 0.0164, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.609492089925062, |
|
"grad_norm": 0.006120254285633564, |
|
"learning_rate": 3.3285714285714286e-06, |
|
"loss": 0.0003, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 4.626144879267278, |
|
"grad_norm": 0.013766973279416561, |
|
"learning_rate": 3.185714285714286e-06, |
|
"loss": 0.0003, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 4.642797668609492, |
|
"grad_norm": 0.10941141843795776, |
|
"learning_rate": 3.042857142857143e-06, |
|
"loss": 0.0003, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 4.659450457951706, |
|
"grad_norm": 0.005121996160596609, |
|
"learning_rate": 2.9e-06, |
|
"loss": 0.0003, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.676103247293922, |
|
"grad_norm": 0.005419144406914711, |
|
"learning_rate": 2.757142857142857e-06, |
|
"loss": 0.0087, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 4.692756036636137, |
|
"grad_norm": 0.005216961260885, |
|
"learning_rate": 2.614285714285715e-06, |
|
"loss": 0.0038, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 4.709408825978351, |
|
"grad_norm": 0.005281897261738777, |
|
"learning_rate": 2.4714285714285717e-06, |
|
"loss": 0.0003, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 4.726061615320567, |
|
"grad_norm": 0.005348918028175831, |
|
"learning_rate": 2.3285714285714285e-06, |
|
"loss": 0.0017, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 4.742714404662781, |
|
"grad_norm": 0.005255383439362049, |
|
"learning_rate": 2.185714285714286e-06, |
|
"loss": 0.0003, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.759367194004996, |
|
"grad_norm": 0.004928835202008486, |
|
"learning_rate": 2.0428571428571433e-06, |
|
"loss": 0.0003, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 4.776019983347211, |
|
"grad_norm": 11.944090843200684, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 0.0137, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 4.792672772689426, |
|
"grad_norm": 0.0049188993871212006, |
|
"learning_rate": 1.7571428571428572e-06, |
|
"loss": 0.0004, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.80932556203164, |
|
"grad_norm": 0.005385698284953833, |
|
"learning_rate": 1.6142857142857144e-06, |
|
"loss": 0.0003, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 4.8259783513738554, |
|
"grad_norm": 0.0051418510265648365, |
|
"learning_rate": 1.4714285714285713e-06, |
|
"loss": 0.0004, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.84263114071607, |
|
"grad_norm": 0.005018496885895729, |
|
"learning_rate": 1.3285714285714287e-06, |
|
"loss": 0.0004, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 4.859283930058285, |
|
"grad_norm": 0.005061938893049955, |
|
"learning_rate": 1.185714285714286e-06, |
|
"loss": 0.0003, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 4.875936719400499, |
|
"grad_norm": 0.0050169117748737335, |
|
"learning_rate": 1.0428571428571429e-06, |
|
"loss": 0.0003, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.892589508742715, |
|
"grad_norm": 0.009882300160825253, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 0.0003, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.909242298084929, |
|
"grad_norm": 0.005003802943974733, |
|
"learning_rate": 7.571428571428572e-07, |
|
"loss": 0.0003, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.925895087427144, |
|
"grad_norm": 0.005712533835321665, |
|
"learning_rate": 6.142857142857143e-07, |
|
"loss": 0.0091, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.942547876769359, |
|
"grad_norm": 0.005044932942837477, |
|
"learning_rate": 4.714285714285715e-07, |
|
"loss": 0.0004, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.959200666111574, |
|
"grad_norm": 0.004893309436738491, |
|
"learning_rate": 3.2857142857142857e-07, |
|
"loss": 0.0003, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.975853455453788, |
|
"grad_norm": 0.004968111868947744, |
|
"learning_rate": 1.8571428571428572e-07, |
|
"loss": 0.0003, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.992506244796004, |
|
"grad_norm": 0.004901645239442587, |
|
"learning_rate": 4.2857142857142865e-08, |
|
"loss": 0.0003, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.992506244796004, |
|
"eval_accuracy": 0.998, |
|
"eval_loss": 0.012748559936881065, |
|
"eval_runtime": 52.3226, |
|
"eval_samples_per_second": 38.224, |
|
"eval_steps_per_second": 9.556, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.992506244796004, |
|
"step": 3000, |
|
"total_flos": 1.2141162127800926e+19, |
|
"train_loss": 0.10829704528961641, |
|
"train_runtime": 6533.0547, |
|
"train_samples_per_second": 22.052, |
|
"train_steps_per_second": 0.459 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2141162127800926e+19, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|