|
{ |
|
"best_metric": 1.0316544771194458, |
|
"best_model_checkpoint": "checkpoints/instrucode/text_models/llama-13b/checkpoint-3000", |
|
"epoch": 0.9599232061435086, |
|
"eval_steps": 200, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 1.9718, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 1.9714, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.4e-05, |
|
"loss": 1.6976, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 1.4059, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 1.1741, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00017399999999999997, |
|
"loss": 1.1727, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000204, |
|
"loss": 1.1187, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000234, |
|
"loss": 1.1282, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00026399999999999997, |
|
"loss": 1.1329, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000294, |
|
"loss": 1.0721, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002998454106280193, |
|
"loss": 1.118, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029965217391304344, |
|
"loss": 1.1217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002994589371980676, |
|
"loss": 1.1033, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002992657004830917, |
|
"loss": 1.1104, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002990724637681159, |
|
"loss": 1.078, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029887922705314007, |
|
"loss": 1.0807, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002986859903381642, |
|
"loss": 1.0875, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029849275362318835, |
|
"loss": 1.0463, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029829951690821255, |
|
"loss": 1.1217, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002981062801932367, |
|
"loss": 1.0452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.0776479244232178, |
|
"eval_runtime": 197.5388, |
|
"eval_samples_per_second": 10.125, |
|
"eval_steps_per_second": 1.266, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029791304347826083, |
|
"loss": 1.0228, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000297719806763285, |
|
"loss": 1.092, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002975265700483091, |
|
"loss": 1.104, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002973333333333333, |
|
"loss": 1.0577, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029714009661835746, |
|
"loss": 1.0892, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002969468599033816, |
|
"loss": 1.0915, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029675362318840574, |
|
"loss": 1.0616, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002965603864734299, |
|
"loss": 1.071, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002963671497584541, |
|
"loss": 1.08, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002961739130434782, |
|
"loss": 1.0947, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029598067632850237, |
|
"loss": 1.0547, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002957874396135265, |
|
"loss": 1.0723, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002955942028985507, |
|
"loss": 1.0367, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029540096618357485, |
|
"loss": 1.0349, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000295207729468599, |
|
"loss": 1.0507, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029501449275362313, |
|
"loss": 1.0568, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002948212560386473, |
|
"loss": 1.0861, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002946280193236715, |
|
"loss": 1.0287, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002944347826086956, |
|
"loss": 1.1086, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00029424154589371976, |
|
"loss": 1.0916, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.0622169971466064, |
|
"eval_runtime": 197.2041, |
|
"eval_samples_per_second": 10.142, |
|
"eval_steps_per_second": 1.268, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002940483091787439, |
|
"loss": 1.075, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002938550724637681, |
|
"loss": 1.0899, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029366183574879224, |
|
"loss": 1.0565, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002934685990338164, |
|
"loss": 1.0414, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002932753623188405, |
|
"loss": 1.1033, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00029308212560386467, |
|
"loss": 1.0518, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00029288888888888886, |
|
"loss": 1.1324, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000292695652173913, |
|
"loss": 1.0275, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00029250241545893715, |
|
"loss": 1.029, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002923091787439613, |
|
"loss": 1.0648, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002921159420289855, |
|
"loss": 1.0746, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00029192270531400963, |
|
"loss": 1.0845, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002917294685990338, |
|
"loss": 1.0186, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002915362318840579, |
|
"loss": 1.0984, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002913429951690821, |
|
"loss": 1.0706, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00029114975845410626, |
|
"loss": 1.0184, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002909565217391304, |
|
"loss": 1.0686, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00029076328502415454, |
|
"loss": 1.0391, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002905700483091787, |
|
"loss": 1.0616, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002903768115942029, |
|
"loss": 1.0565, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.0566095113754272, |
|
"eval_runtime": 196.6646, |
|
"eval_samples_per_second": 10.17, |
|
"eval_steps_per_second": 1.271, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000290183574879227, |
|
"loss": 1.0609, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028999033816425117, |
|
"loss": 1.0633, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002897971014492753, |
|
"loss": 1.0049, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002896038647342995, |
|
"loss": 0.9926, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028941062801932365, |
|
"loss": 1.0949, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002892173913043478, |
|
"loss": 1.0686, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028902415458937193, |
|
"loss": 1.0152, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002888309178743961, |
|
"loss": 1.0462, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028863768115942027, |
|
"loss": 1.0438, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002884444444444444, |
|
"loss": 1.0124, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00028825120772946856, |
|
"loss": 0.9959, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002880579710144927, |
|
"loss": 1.0467, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002878647342995169, |
|
"loss": 1.0502, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00028767149758454104, |
|
"loss": 1.0763, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002874782608695652, |
|
"loss": 1.0821, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002872850241545893, |
|
"loss": 1.0667, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00028709178743961347, |
|
"loss": 1.0604, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00028689855072463766, |
|
"loss": 1.0555, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002867053140096618, |
|
"loss": 1.0861, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00028651207729468595, |
|
"loss": 1.0458, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.0510022640228271, |
|
"eval_runtime": 196.1318, |
|
"eval_samples_per_second": 10.197, |
|
"eval_steps_per_second": 1.275, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002863188405797101, |
|
"loss": 1.0542, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002861256038647343, |
|
"loss": 1.0783, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00028593236714975843, |
|
"loss": 1.0668, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00028573913043478257, |
|
"loss": 1.0369, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002855458937198067, |
|
"loss": 1.0321, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002853526570048309, |
|
"loss": 1.0515, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00028515942028985505, |
|
"loss": 1.0129, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002849661835748792, |
|
"loss": 1.0042, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00028477294685990334, |
|
"loss": 1.0086, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002845797101449275, |
|
"loss": 1.0555, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002843864734299517, |
|
"loss": 0.9976, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002841932367149758, |
|
"loss": 1.0679, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00028399999999999996, |
|
"loss": 1.0039, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002838067632850241, |
|
"loss": 1.0254, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002836135265700483, |
|
"loss": 1.0109, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00028342028985507245, |
|
"loss": 1.0158, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002832270531400966, |
|
"loss": 1.0165, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00028303381642512073, |
|
"loss": 1.031, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002828405797101449, |
|
"loss": 1.0534, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00028264734299516907, |
|
"loss": 1.066, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.047478199005127, |
|
"eval_runtime": 196.1193, |
|
"eval_samples_per_second": 10.198, |
|
"eval_steps_per_second": 1.275, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002824541062801932, |
|
"loss": 1.0527, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00028226086956521736, |
|
"loss": 1.0677, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002820676328502415, |
|
"loss": 1.0409, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002818743961352657, |
|
"loss": 1.0414, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00028168115942028984, |
|
"loss": 1.0466, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000281487922705314, |
|
"loss": 1.0357, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002812946859903381, |
|
"loss": 1.0418, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00028110144927536227, |
|
"loss": 1.0796, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00028090821256038646, |
|
"loss": 1.0579, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002807149758454106, |
|
"loss": 1.0631, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00028052173913043475, |
|
"loss": 1.0221, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002803285024154589, |
|
"loss": 1.049, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002801352657004831, |
|
"loss": 1.0192, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00027994202898550723, |
|
"loss": 0.9978, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00027974879227053137, |
|
"loss": 1.0044, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002795555555555555, |
|
"loss": 1.0802, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002793623188405797, |
|
"loss": 1.1231, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00027916908212560385, |
|
"loss": 1.0323, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000278975845410628, |
|
"loss": 1.0908, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00027878260869565214, |
|
"loss": 1.0456, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.0448572635650635, |
|
"eval_runtime": 197.1129, |
|
"eval_samples_per_second": 10.146, |
|
"eval_steps_per_second": 1.268, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002785893719806763, |
|
"loss": 1.0119, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002783961352657005, |
|
"loss": 1.0101, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002782028985507246, |
|
"loss": 1.0481, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00027800966183574876, |
|
"loss": 1.0478, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002778164251207729, |
|
"loss": 1.0388, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002776231884057971, |
|
"loss": 1.0566, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00027742995169082124, |
|
"loss": 1.084, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002772367149758454, |
|
"loss": 1.0515, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00027704347826086953, |
|
"loss": 1.053, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00027685024154589367, |
|
"loss": 1.0276, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00027665700483091787, |
|
"loss": 1.0394, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000276463768115942, |
|
"loss": 0.9991, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00027627053140096615, |
|
"loss": 1.0272, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002760772946859903, |
|
"loss": 1.0684, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002758840579710145, |
|
"loss": 1.0865, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00027569082125603864, |
|
"loss": 1.0375, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002754975845410628, |
|
"loss": 1.0922, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002753043478260869, |
|
"loss": 1.0611, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00027511111111111106, |
|
"loss": 0.9878, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00027491787439613526, |
|
"loss": 1.0658, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.0436240434646606, |
|
"eval_runtime": 197.0464, |
|
"eval_samples_per_second": 10.15, |
|
"eval_steps_per_second": 1.269, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002747246376811594, |
|
"loss": 1.0201, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00027453140096618355, |
|
"loss": 1.0537, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002743381642512077, |
|
"loss": 1.051, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002741449275362319, |
|
"loss": 1.0028, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00027395169082125603, |
|
"loss": 1.0521, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00027375845410628017, |
|
"loss": 1.0732, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002735652173913043, |
|
"loss": 1.0427, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002733719806763285, |
|
"loss": 1.0333, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00027317874396135265, |
|
"loss": 1.0084, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002729855072463768, |
|
"loss": 1.0555, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00027279227053140094, |
|
"loss": 1.0419, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002725990338164251, |
|
"loss": 1.0846, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002724057971014493, |
|
"loss": 1.0375, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002722125603864734, |
|
"loss": 1.0593, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00027201932367149756, |
|
"loss": 1.017, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002718260869565217, |
|
"loss": 1.0887, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002716328502415459, |
|
"loss": 1.0333, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00027143961352657004, |
|
"loss": 1.0439, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002712463768115942, |
|
"loss": 1.0221, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00027105314009661833, |
|
"loss": 1.063, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.042041301727295, |
|
"eval_runtime": 196.9233, |
|
"eval_samples_per_second": 10.156, |
|
"eval_steps_per_second": 1.27, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00027085990338164247, |
|
"loss": 1.0347, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00027066666666666667, |
|
"loss": 1.027, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002704734299516908, |
|
"loss": 0.9961, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00027028019323671495, |
|
"loss": 0.9966, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002700869565217391, |
|
"loss": 1.0474, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002698937198067633, |
|
"loss": 1.017, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00026970048309178744, |
|
"loss": 1.0663, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002695072463768116, |
|
"loss": 1.0505, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002693140096618357, |
|
"loss": 1.0954, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00026912077294685986, |
|
"loss": 1.0136, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00026892753623188406, |
|
"loss": 1.0749, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002687342995169082, |
|
"loss": 0.9969, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00026854106280193234, |
|
"loss": 1.0681, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002683478260869565, |
|
"loss": 1.0237, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002681545893719807, |
|
"loss": 1.0473, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002679613526570048, |
|
"loss": 1.0249, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00026776811594202897, |
|
"loss": 0.999, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002675748792270531, |
|
"loss": 1.0299, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002673816425120773, |
|
"loss": 1.0885, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00026718840579710145, |
|
"loss": 1.0552, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.0400397777557373, |
|
"eval_runtime": 196.3928, |
|
"eval_samples_per_second": 10.184, |
|
"eval_steps_per_second": 1.273, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002669951690821256, |
|
"loss": 1.0717, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00026680193236714974, |
|
"loss": 1.0412, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002666086956521739, |
|
"loss": 1.0221, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002664154589371981, |
|
"loss": 1.0729, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002662222222222222, |
|
"loss": 1.0284, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00026602898550724636, |
|
"loss": 1.0319, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002658357487922705, |
|
"loss": 1.0305, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002656425120772947, |
|
"loss": 1.0398, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00026544927536231884, |
|
"loss": 1.0427, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.000265256038647343, |
|
"loss": 0.9895, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00026506280193236713, |
|
"loss": 1.0631, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00026486956521739127, |
|
"loss": 1.0165, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00026467632850241547, |
|
"loss": 1.0257, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002644830917874396, |
|
"loss": 1.0475, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00026428985507246375, |
|
"loss": 1.0641, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002640966183574879, |
|
"loss": 1.014, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002639033816425121, |
|
"loss": 1.021, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00026371014492753623, |
|
"loss": 1.0223, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002635169082125604, |
|
"loss": 1.0313, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002633236714975845, |
|
"loss": 1.069, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.037448525428772, |
|
"eval_runtime": 196.1715, |
|
"eval_samples_per_second": 10.195, |
|
"eval_steps_per_second": 1.274, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00026313043478260866, |
|
"loss": 1.0353, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00026293719806763286, |
|
"loss": 1.0634, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000262743961352657, |
|
"loss": 1.0551, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00026255072463768114, |
|
"loss": 1.0555, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002623574879227053, |
|
"loss": 1.019, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002621642512077295, |
|
"loss": 1.0524, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002619710144927536, |
|
"loss": 1.0285, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00026177777777777777, |
|
"loss": 1.0271, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002615845410628019, |
|
"loss": 1.0253, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002613913043478261, |
|
"loss": 1.0134, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00026119806763285025, |
|
"loss": 1.0424, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002610048309178744, |
|
"loss": 1.0769, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00026081159420289854, |
|
"loss": 1.0464, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002606183574879227, |
|
"loss": 1.0213, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002604251207729469, |
|
"loss": 1.0178, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.000260231884057971, |
|
"loss": 1.0113, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00026003864734299516, |
|
"loss": 1.0261, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002598454106280193, |
|
"loss": 1.0522, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00025965217391304344, |
|
"loss": 1.0553, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00025945893719806764, |
|
"loss": 1.036, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.0350252389907837, |
|
"eval_runtime": 195.1168, |
|
"eval_samples_per_second": 10.25, |
|
"eval_steps_per_second": 1.281, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002592657004830918, |
|
"loss": 1.0341, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002590724637681159, |
|
"loss": 1.0552, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00025887922705314007, |
|
"loss": 0.9925, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00025868599033816427, |
|
"loss": 1.0427, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002584927536231884, |
|
"loss": 1.0169, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00025829951690821255, |
|
"loss": 1.028, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002581062801932367, |
|
"loss": 1.0556, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00025791304347826084, |
|
"loss": 1.0242, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00025771980676328503, |
|
"loss": 1.0307, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002575265700483092, |
|
"loss": 1.0083, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002573333333333333, |
|
"loss": 1.0146, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00025714009661835746, |
|
"loss": 0.993, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002569468599033816, |
|
"loss": 1.0726, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002567536231884058, |
|
"loss": 1.028, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00025656038647342994, |
|
"loss": 1.031, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002563671497584541, |
|
"loss": 1.0175, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00025617391304347823, |
|
"loss": 1.0618, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002559806763285024, |
|
"loss": 1.0442, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00025578743961352657, |
|
"loss": 1.0507, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002555942028985507, |
|
"loss": 1.0347, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.0352022647857666, |
|
"eval_runtime": 194.821, |
|
"eval_samples_per_second": 10.266, |
|
"eval_steps_per_second": 1.283, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00025540096618357485, |
|
"loss": 1.0786, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.000255207729468599, |
|
"loss": 1.0542, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002550144927536232, |
|
"loss": 1.0799, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00025482125603864733, |
|
"loss": 1.0518, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002546280193236715, |
|
"loss": 1.005, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002544347826086956, |
|
"loss": 1.0239, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002542415458937198, |
|
"loss": 1.0243, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00025404830917874396, |
|
"loss": 1.0144, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002538550724637681, |
|
"loss": 0.9995, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00025366183574879224, |
|
"loss": 1.0222, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002534685990338164, |
|
"loss": 1.0381, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002532753623188406, |
|
"loss": 1.0309, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002530821256038647, |
|
"loss": 1.0176, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00025288888888888887, |
|
"loss": 1.0085, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.000252695652173913, |
|
"loss": 1.049, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00025250241545893715, |
|
"loss": 1.0146, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00025230917874396135, |
|
"loss": 1.0584, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002521159420289855, |
|
"loss": 1.0562, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00025192270531400963, |
|
"loss": 1.0391, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002517294685990338, |
|
"loss": 1.0388, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.0332427024841309, |
|
"eval_runtime": 194.4995, |
|
"eval_samples_per_second": 10.283, |
|
"eval_steps_per_second": 1.285, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000251536231884058, |
|
"loss": 1.0589, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002513429951690821, |
|
"loss": 1.0123, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00025114975845410626, |
|
"loss": 0.9994, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002509565217391304, |
|
"loss": 1.0468, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00025076328502415454, |
|
"loss": 1.0455, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00025057004830917874, |
|
"loss": 1.0127, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002503768115942029, |
|
"loss": 1.0031, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.000250183574879227, |
|
"loss": 1.0568, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00024999033816425117, |
|
"loss": 1.0687, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00024979710144927537, |
|
"loss": 1.0378, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002496038647342995, |
|
"loss": 1.0761, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00024941062801932365, |
|
"loss": 0.9987, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002492173913043478, |
|
"loss": 1.0179, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00024902415458937194, |
|
"loss": 1.0491, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00024883091787439613, |
|
"loss": 1.0199, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002486376811594203, |
|
"loss": 1.062, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002484444444444444, |
|
"loss": 1.0389, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00024825120772946856, |
|
"loss": 1.027, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002480579710144927, |
|
"loss": 1.0093, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002478647342995169, |
|
"loss": 1.0448, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 1.032317876815796, |
|
"eval_runtime": 196.407, |
|
"eval_samples_per_second": 10.183, |
|
"eval_steps_per_second": 1.273, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00024767149758454104, |
|
"loss": 1.0627, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002474782608695652, |
|
"loss": 1.0202, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00024728502415458933, |
|
"loss": 1.0905, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002470917874396135, |
|
"loss": 1.0552, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00024689855072463767, |
|
"loss": 1.0605, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002467053140096618, |
|
"loss": 1.0194, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00024651207729468595, |
|
"loss": 1.077, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002463188405797101, |
|
"loss": 1.0845, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002461256038647343, |
|
"loss": 1.0225, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00024593236714975843, |
|
"loss": 1.0217, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002457391304347826, |
|
"loss": 1.0415, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002455458937198067, |
|
"loss": 1.0581, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00024535265700483086, |
|
"loss": 1.0459, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00024515942028985506, |
|
"loss": 1.0505, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002449661835748792, |
|
"loss": 1.0562, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00024477294685990334, |
|
"loss": 0.9631, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002445797101449275, |
|
"loss": 1.0287, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002443864734299517, |
|
"loss": 1.0227, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002441932367149758, |
|
"loss": 1.03, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.000244, |
|
"loss": 1.0795, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.0316544771194458, |
|
"eval_runtime": 197.1671, |
|
"eval_samples_per_second": 10.144, |
|
"eval_steps_per_second": 1.268, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 15625, |
|
"num_train_epochs": 5, |
|
"save_steps": 200, |
|
"total_flos": 6.448770827226317e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|