|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 0, |
|
"global_step": 452, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004424778761061947, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 0.00039911504424778763, |
|
"loss": 1.3739, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008849557522123894, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 0.00039823008849557525, |
|
"loss": 1.4091, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01327433628318584, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 0.00039734513274336286, |
|
"loss": 1.2628, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.017699115044247787, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 0.0003964601769911505, |
|
"loss": 1.1101, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.022123893805309734, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 0.0003955752212389381, |
|
"loss": 1.344, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02654867256637168, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 0.00039469026548672565, |
|
"loss": 1.1884, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.030973451327433628, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 0.0003938053097345133, |
|
"loss": 1.1329, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 0.0003929203539823009, |
|
"loss": 1.138, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03982300884955752, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 0.00039203539823008855, |
|
"loss": 1.0113, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04424778761061947, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 0.0003911504424778761, |
|
"loss": 1.087, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.048672566371681415, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 0.0003902654867256637, |
|
"loss": 1.1459, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05309734513274336, |
|
"grad_norm": 0.03662109375, |
|
"learning_rate": 0.00038938053097345134, |
|
"loss": 1.1421, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05752212389380531, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 0.00038849557522123895, |
|
"loss": 1.175, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.061946902654867256, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 0.00038761061946902657, |
|
"loss": 1.2099, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06637168141592921, |
|
"grad_norm": 0.038818359375, |
|
"learning_rate": 0.0003867256637168142, |
|
"loss": 1.1295, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07079646017699115, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 0.00038584070796460174, |
|
"loss": 1.0737, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0752212389380531, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 0.0003849557522123894, |
|
"loss": 1.1563, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07964601769911504, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 0.000384070796460177, |
|
"loss": 1.1061, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.084070796460177, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 0.00038318584070796464, |
|
"loss": 1.1052, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08849557522123894, |
|
"grad_norm": 0.036865234375, |
|
"learning_rate": 0.00038230088495575226, |
|
"loss": 1.0009, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09292035398230089, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 0.0003814159292035398, |
|
"loss": 0.9805, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09734513274336283, |
|
"grad_norm": 0.03173828125, |
|
"learning_rate": 0.0003805309734513275, |
|
"loss": 1.1098, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10176991150442478, |
|
"grad_norm": 0.0322265625, |
|
"learning_rate": 0.00037964601769911505, |
|
"loss": 1.0691, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10619469026548672, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 0.00037876106194690266, |
|
"loss": 1.2944, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11061946902654868, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 0.0003778761061946903, |
|
"loss": 1.0819, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11504424778761062, |
|
"grad_norm": 0.0341796875, |
|
"learning_rate": 0.0003769911504424779, |
|
"loss": 1.215, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11946902654867257, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 0.0003761061946902655, |
|
"loss": 1.0624, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12389380530973451, |
|
"grad_norm": 0.03271484375, |
|
"learning_rate": 0.0003752212389380531, |
|
"loss": 1.0258, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12831858407079647, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 0.00037433628318584073, |
|
"loss": 1.0544, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.13274336283185842, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 0.00037345132743362835, |
|
"loss": 1.0203, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13716814159292035, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 0.0003725663716814159, |
|
"loss": 1.1584, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 0.0341796875, |
|
"learning_rate": 0.0003716814159292036, |
|
"loss": 0.9215, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14601769911504425, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 0.0003707964601769912, |
|
"loss": 1.1255, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1504424778761062, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 0.00036991150442477875, |
|
"loss": 1.3504, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.15486725663716813, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 0.0003690265486725664, |
|
"loss": 1.0819, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1592920353982301, |
|
"grad_norm": 0.041259765625, |
|
"learning_rate": 0.000368141592920354, |
|
"loss": 1.2328, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.16371681415929204, |
|
"grad_norm": 0.04345703125, |
|
"learning_rate": 0.00036725663716814165, |
|
"loss": 1.1783, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.168141592920354, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 0.0003663716814159292, |
|
"loss": 1.105, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.17256637168141592, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 0.0003654867256637168, |
|
"loss": 1.1757, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.00036460176991150444, |
|
"loss": 1.1601, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18141592920353983, |
|
"grad_norm": 0.04296875, |
|
"learning_rate": 0.00036371681415929205, |
|
"loss": 0.9869, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.18584070796460178, |
|
"grad_norm": 0.05419921875, |
|
"learning_rate": 0.00036283185840707967, |
|
"loss": 1.0769, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1902654867256637, |
|
"grad_norm": 0.0361328125, |
|
"learning_rate": 0.0003619469026548673, |
|
"loss": 1.015, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.19469026548672566, |
|
"grad_norm": 0.03564453125, |
|
"learning_rate": 0.00036106194690265484, |
|
"loss": 0.9435, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19911504424778761, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 0.0003601769911504425, |
|
"loss": 1.1832, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.20353982300884957, |
|
"grad_norm": 0.052490234375, |
|
"learning_rate": 0.00035929203539823007, |
|
"loss": 1.1826, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2079646017699115, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 0.00035840707964601774, |
|
"loss": 1.02, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.21238938053097345, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 0.0003575221238938053, |
|
"loss": 1.0803, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2168141592920354, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 0.0003566371681415929, |
|
"loss": 1.021, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.22123893805309736, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 0.0003557522123893806, |
|
"loss": 1.0058, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22566371681415928, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 0.00035486725663716814, |
|
"loss": 1.0489, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.23008849557522124, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 0.0003539823008849558, |
|
"loss": 0.986, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2345132743362832, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 0.00035309734513274337, |
|
"loss": 1.0928, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23893805309734514, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 0.000352212389380531, |
|
"loss": 1.0037, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.24336283185840707, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 0.0003513274336283186, |
|
"loss": 1.0165, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.24778761061946902, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 0.0003504424778761062, |
|
"loss": 0.9856, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.252212389380531, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 0.00034955752212389383, |
|
"loss": 1.0988, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.25663716814159293, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 0.00034867256637168145, |
|
"loss": 0.9983, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2610619469026549, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 0.000347787610619469, |
|
"loss": 1.0727, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.26548672566371684, |
|
"grad_norm": 0.0380859375, |
|
"learning_rate": 0.0003469026548672567, |
|
"loss": 0.9617, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26991150442477874, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 0.00034601769911504423, |
|
"loss": 1.1435, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.2743362831858407, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 0.0003451327433628319, |
|
"loss": 1.0895, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.27876106194690264, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 0.00034424778761061946, |
|
"loss": 1.0823, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2831858407079646, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.0003433628318584071, |
|
"loss": 1.1119, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.28761061946902655, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 0.00034247787610619475, |
|
"loss": 1.2428, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2920353982300885, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 0.0003415929203539823, |
|
"loss": 0.9943, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.29646017699115046, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 0.0003407079646017699, |
|
"loss": 1.3215, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3008849557522124, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 0.00033982300884955754, |
|
"loss": 0.9997, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3053097345132743, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 0.00033893805309734515, |
|
"loss": 0.9796, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.30973451327433627, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 0.00033805309734513277, |
|
"loss": 1.1079, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3141592920353982, |
|
"grad_norm": 0.041259765625, |
|
"learning_rate": 0.0003371681415929204, |
|
"loss": 1.0242, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3185840707964602, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 0.000336283185840708, |
|
"loss": 1.0227, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3230088495575221, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.0003353982300884956, |
|
"loss": 0.9375, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3274336283185841, |
|
"grad_norm": 0.03759765625, |
|
"learning_rate": 0.00033451327433628317, |
|
"loss": 1.0104, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.33185840707964603, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 0.00033362831858407084, |
|
"loss": 1.1685, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.336283185840708, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 0.0003327433628318584, |
|
"loss": 1.2954, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3407079646017699, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 0.000331858407079646, |
|
"loss": 0.9816, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.34513274336283184, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 0.00033097345132743363, |
|
"loss": 1.0791, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3495575221238938, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 0.00033008849557522124, |
|
"loss": 1.0989, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 0.00032920353982300886, |
|
"loss": 1.1164, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3584070796460177, |
|
"grad_norm": 0.0400390625, |
|
"learning_rate": 0.00032831858407079647, |
|
"loss": 1.2053, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.36283185840707965, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 0.0003274336283185841, |
|
"loss": 1.0322, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3672566371681416, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 0.0003265486725663717, |
|
"loss": 0.9184, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.37168141592920356, |
|
"grad_norm": 0.037353515625, |
|
"learning_rate": 0.0003256637168141593, |
|
"loss": 1.0874, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.37610619469026546, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 0.00032477876106194693, |
|
"loss": 1.0051, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3805309734513274, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 0.00032389380530973454, |
|
"loss": 1.1232, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.38495575221238937, |
|
"grad_norm": 0.036865234375, |
|
"learning_rate": 0.0003230088495575221, |
|
"loss": 0.9745, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.3893805309734513, |
|
"grad_norm": 0.037353515625, |
|
"learning_rate": 0.0003221238938053098, |
|
"loss": 0.9092, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3938053097345133, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 0.00032123893805309733, |
|
"loss": 1.0712, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.39823008849557523, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 0.000320353982300885, |
|
"loss": 1.0908, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4026548672566372, |
|
"grad_norm": 0.04150390625, |
|
"learning_rate": 0.00031946902654867256, |
|
"loss": 1.0897, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.40707964601769914, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 0.0003185840707964602, |
|
"loss": 0.8939, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.41150442477876104, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 0.0003176991150442478, |
|
"loss": 1.0992, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.415929203539823, |
|
"grad_norm": 0.038818359375, |
|
"learning_rate": 0.0003168141592920354, |
|
"loss": 0.937, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.42035398230088494, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 0.000315929203539823, |
|
"loss": 1.1744, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4247787610619469, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.00031504424778761064, |
|
"loss": 1.0227, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.42920353982300885, |
|
"grad_norm": 0.041259765625, |
|
"learning_rate": 0.00031415929203539825, |
|
"loss": 1.112, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.4336283185840708, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 0.00031327433628318586, |
|
"loss": 0.9122, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.43805309734513276, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 0.0003123893805309735, |
|
"loss": 1.0073, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4424778761061947, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 0.0003115044247787611, |
|
"loss": 1.0326, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4469026548672566, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 0.0003106194690265487, |
|
"loss": 1.0014, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.45132743362831856, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 0.00030973451327433627, |
|
"loss": 1.1081, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4557522123893805, |
|
"grad_norm": 0.041015625, |
|
"learning_rate": 0.00030884955752212394, |
|
"loss": 1.1268, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.46017699115044247, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 0.0003079646017699115, |
|
"loss": 1.0382, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4646017699115044, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 0.00030707964601769917, |
|
"loss": 0.9887, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4690265486725664, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 0.0003061946902654867, |
|
"loss": 1.0143, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.47345132743362833, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 0.00030530973451327434, |
|
"loss": 1.0332, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4778761061946903, |
|
"grad_norm": 0.044921875, |
|
"learning_rate": 0.00030442477876106196, |
|
"loss": 0.9422, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.4823008849557522, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 0.00030353982300884957, |
|
"loss": 1.0376, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.48672566371681414, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 0.0003026548672566372, |
|
"loss": 1.1175, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4911504424778761, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 0.0003017699115044248, |
|
"loss": 0.9571, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.49557522123893805, |
|
"grad_norm": 0.0478515625, |
|
"learning_rate": 0.00030088495575221236, |
|
"loss": 1.0857, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 0.00030000000000000003, |
|
"loss": 0.9346, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.504424778761062, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 0.00029911504424778764, |
|
"loss": 1.0317, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5088495575221239, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 0.00029823008849557526, |
|
"loss": 1.0535, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5132743362831859, |
|
"grad_norm": 0.04150390625, |
|
"learning_rate": 0.00029734513274336287, |
|
"loss": 1.0437, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5176991150442478, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 0.00029646017699115043, |
|
"loss": 1.0253, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5221238938053098, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 0.0002955752212389381, |
|
"loss": 1.022, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5265486725663717, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 0.00029469026548672566, |
|
"loss": 1.2344, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 0.0002938053097345133, |
|
"loss": 0.9828, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5353982300884956, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 0.0002929203539823009, |
|
"loss": 0.9207, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5398230088495575, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 0.0002920353982300885, |
|
"loss": 0.9794, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5442477876106194, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 0.0002911504424778761, |
|
"loss": 1.0962, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5486725663716814, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 0.00029026548672566373, |
|
"loss": 1.1614, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5530973451327433, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 0.00028938053097345135, |
|
"loss": 0.9082, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5575221238938053, |
|
"grad_norm": 0.037353515625, |
|
"learning_rate": 0.00028849557522123896, |
|
"loss": 0.9406, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5619469026548672, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 0.0002876106194690265, |
|
"loss": 1.1105, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5663716814159292, |
|
"grad_norm": 0.051025390625, |
|
"learning_rate": 0.0002867256637168142, |
|
"loss": 0.9679, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5707964601769911, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 0.00028584070796460175, |
|
"loss": 0.9529, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5752212389380531, |
|
"grad_norm": 0.056396484375, |
|
"learning_rate": 0.00028495575221238937, |
|
"loss": 1.0341, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5796460176991151, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 0.00028407079646017704, |
|
"loss": 0.9493, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.584070796460177, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 0.0002831858407079646, |
|
"loss": 1.262, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.588495575221239, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 0.00028230088495575226, |
|
"loss": 0.9412, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5929203539823009, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 0.0002814159292035398, |
|
"loss": 1.0563, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5973451327433629, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 0.00028053097345132744, |
|
"loss": 1.0201, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6017699115044248, |
|
"grad_norm": 0.04052734375, |
|
"learning_rate": 0.00027964601769911505, |
|
"loss": 1.0401, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6061946902654868, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 0.00027876106194690267, |
|
"loss": 1.0241, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6106194690265486, |
|
"grad_norm": 0.05810546875, |
|
"learning_rate": 0.0002778761061946903, |
|
"loss": 1.1263, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6150442477876106, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 0.0002769911504424779, |
|
"loss": 1.0869, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6194690265486725, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 0.0002761061946902655, |
|
"loss": 0.9944, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6238938053097345, |
|
"grad_norm": 0.038818359375, |
|
"learning_rate": 0.0002752212389380531, |
|
"loss": 0.9675, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6283185840707964, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 0.0002743362831858407, |
|
"loss": 1.0227, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6327433628318584, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 0.00027345132743362836, |
|
"loss": 1.0381, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6371681415929203, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 0.0002725663716814159, |
|
"loss": 0.9385, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6415929203539823, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 0.00027168141592920353, |
|
"loss": 1.001, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6460176991150443, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 0.0002707964601769912, |
|
"loss": 1.04, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6504424778761062, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 0.00026991150442477876, |
|
"loss": 0.9735, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6548672566371682, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 0.00026902654867256643, |
|
"loss": 1.0873, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6592920353982301, |
|
"grad_norm": 0.04638671875, |
|
"learning_rate": 0.000268141592920354, |
|
"loss": 1.1032, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6637168141592921, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 0.0002672566371681416, |
|
"loss": 1.0414, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.668141592920354, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 0.0002663716814159292, |
|
"loss": 0.892, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.672566371681416, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 0.00026548672566371683, |
|
"loss": 0.9048, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6769911504424779, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.00026460176991150445, |
|
"loss": 1.0745, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.6814159292035398, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 0.00026371681415929206, |
|
"loss": 1.2796, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6858407079646017, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 0.0002628318584070796, |
|
"loss": 0.9484, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6902654867256637, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 0.0002619469026548673, |
|
"loss": 1.0571, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6946902654867256, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 0.00026106194690265485, |
|
"loss": 1.1435, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6991150442477876, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 0.0002601769911504425, |
|
"loss": 1.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7035398230088495, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 0.0002592920353982301, |
|
"loss": 1.0044, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 0.0002584070796460177, |
|
"loss": 1.001, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7123893805309734, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 0.0002575221238938053, |
|
"loss": 1.0643, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7168141592920354, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 0.0002566371681415929, |
|
"loss": 1.2461, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7212389380530974, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 0.00025575221238938054, |
|
"loss": 1.297, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7256637168141593, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.00025486725663716815, |
|
"loss": 0.9718, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7300884955752213, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 0.00025398230088495577, |
|
"loss": 0.9553, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7345132743362832, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 0.0002530973451327434, |
|
"loss": 1.074, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7389380530973452, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 0.000252212389380531, |
|
"loss": 1.0015, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7433628318584071, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 0.0002513274336283186, |
|
"loss": 1.021, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7477876106194691, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 0.0002504424778761062, |
|
"loss": 1.063, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.7522123893805309, |
|
"grad_norm": 0.03759765625, |
|
"learning_rate": 0.0002495575221238938, |
|
"loss": 0.9415, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7566371681415929, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 0.00024867256637168145, |
|
"loss": 1.0556, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7610619469026548, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 0.000247787610619469, |
|
"loss": 1.1345, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7654867256637168, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 0.00024690265486725663, |
|
"loss": 0.9686, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7699115044247787, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 0.00024601769911504424, |
|
"loss": 0.8729, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7743362831858407, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 0.00024513274336283186, |
|
"loss": 1.0424, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7787610619469026, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 0.00024424778761061947, |
|
"loss": 1.0317, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.7831858407079646, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 0.0002433628318584071, |
|
"loss": 1.1979, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7876106194690266, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 0.00024247787610619473, |
|
"loss": 1.0134, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7920353982300885, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 0.00024159292035398232, |
|
"loss": 1.1044, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.7964601769911505, |
|
"grad_norm": 0.04443359375, |
|
"learning_rate": 0.00024070796460176993, |
|
"loss": 1.0293, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8008849557522124, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 0.00023982300884955752, |
|
"loss": 0.9629, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8053097345132744, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 0.00023893805309734516, |
|
"loss": 0.9511, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8097345132743363, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 0.00023805309734513275, |
|
"loss": 1.0096, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8141592920353983, |
|
"grad_norm": 0.0498046875, |
|
"learning_rate": 0.0002371681415929204, |
|
"loss": 0.8986, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8185840707964602, |
|
"grad_norm": 0.050048828125, |
|
"learning_rate": 0.00023628318584070798, |
|
"loss": 0.9618, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8230088495575221, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 0.0002353982300884956, |
|
"loss": 1.0183, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.827433628318584, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 0.00023451327433628318, |
|
"loss": 0.9824, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.831858407079646, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 0.00023362831858407082, |
|
"loss": 0.9304, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8362831858407079, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 0.0002327433628318584, |
|
"loss": 0.9942, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8407079646017699, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 0.00023185840707964602, |
|
"loss": 1.1299, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8451327433628318, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 0.0002309734513274336, |
|
"loss": 1.0395, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8495575221238938, |
|
"grad_norm": 0.04296875, |
|
"learning_rate": 0.00023008849557522125, |
|
"loss": 0.9442, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8539823008849557, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 0.00022920353982300884, |
|
"loss": 1.0056, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8584070796460177, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 0.00022831858407079648, |
|
"loss": 0.9217, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8628318584070797, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 0.0002274336283185841, |
|
"loss": 0.9522, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8672566371681416, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.00022654867256637168, |
|
"loss": 0.9525, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8716814159292036, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 0.00022566371681415932, |
|
"loss": 1.0493, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.8761061946902655, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 0.0002247787610619469, |
|
"loss": 1.1643, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.8805309734513275, |
|
"grad_norm": 0.041748046875, |
|
"learning_rate": 0.00022389380530973453, |
|
"loss": 0.8968, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 0.0002230088495575221, |
|
"loss": 0.8145, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8893805309734514, |
|
"grad_norm": 0.0693359375, |
|
"learning_rate": 0.00022212389380530975, |
|
"loss": 1.1892, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.8938053097345132, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 0.00022123893805309734, |
|
"loss": 0.9646, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.8982300884955752, |
|
"grad_norm": 0.046630859375, |
|
"learning_rate": 0.00022035398230088498, |
|
"loss": 1.0692, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.9026548672566371, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 0.00021946902654867257, |
|
"loss": 0.9034, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9070796460176991, |
|
"grad_norm": 0.04150390625, |
|
"learning_rate": 0.00021858407079646019, |
|
"loss": 1.1094, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.911504424778761, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 0.00021769911504424777, |
|
"loss": 1.1966, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.915929203539823, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 0.00021681415929203541, |
|
"loss": 1.1902, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.9203539823008849, |
|
"grad_norm": 0.06884765625, |
|
"learning_rate": 0.000215929203539823, |
|
"loss": 1.1077, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.9247787610619469, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.00021504424778761064, |
|
"loss": 0.9293, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.9292035398230089, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 0.00021415929203539826, |
|
"loss": 1.0238, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9336283185840708, |
|
"grad_norm": 0.046142578125, |
|
"learning_rate": 0.00021327433628318585, |
|
"loss": 0.9889, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.9380530973451328, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 0.0002123893805309735, |
|
"loss": 1.0614, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.9424778761061947, |
|
"grad_norm": 0.048095703125, |
|
"learning_rate": 0.00021150442477876107, |
|
"loss": 1.0836, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9469026548672567, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 0.0002106194690265487, |
|
"loss": 1.0815, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9513274336283186, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 0.00020973451327433628, |
|
"loss": 1.0021, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9557522123893806, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 0.00020884955752212392, |
|
"loss": 1.0002, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.9601769911504425, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 0.0002079646017699115, |
|
"loss": 1.2081, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.9646017699115044, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 0.00020707964601769915, |
|
"loss": 1.0711, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.9690265486725663, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 0.00020619469026548673, |
|
"loss": 1.0342, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.9734513274336283, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 0.00020530973451327435, |
|
"loss": 1.0103, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9778761061946902, |
|
"grad_norm": 0.04931640625, |
|
"learning_rate": 0.00020442477876106194, |
|
"loss": 0.9692, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.9823008849557522, |
|
"grad_norm": 0.04296875, |
|
"learning_rate": 0.00020353982300884958, |
|
"loss": 0.9639, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.9867256637168141, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 0.00020265486725663717, |
|
"loss": 0.9039, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.9911504424778761, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 0.00020176991150442478, |
|
"loss": 0.9265, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.995575221238938, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 0.00020088495575221237, |
|
"loss": 0.8961, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0299, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.0044247787610618, |
|
"grad_norm": 0.052978515625, |
|
"learning_rate": 0.00019911504424778762, |
|
"loss": 0.8533, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.008849557522124, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 0.00019823008849557524, |
|
"loss": 0.937, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.0132743362831858, |
|
"grad_norm": 0.05029296875, |
|
"learning_rate": 0.00019734513274336283, |
|
"loss": 0.8202, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.0176991150442478, |
|
"grad_norm": 0.0517578125, |
|
"learning_rate": 0.00019646017699115044, |
|
"loss": 0.8976, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0221238938053097, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 0.00019557522123893806, |
|
"loss": 0.8791, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.0265486725663717, |
|
"grad_norm": 0.050537109375, |
|
"learning_rate": 0.00019469026548672567, |
|
"loss": 1.0753, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.0309734513274336, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 0.00019380530973451328, |
|
"loss": 1.0464, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.0353982300884956, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 0.00019292035398230087, |
|
"loss": 0.8115, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.0398230088495575, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 0.0001920353982300885, |
|
"loss": 0.9851, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.0442477876106195, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 0.00019115044247787613, |
|
"loss": 0.8867, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.0486725663716814, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 0.00019026548672566374, |
|
"loss": 0.7882, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.0530973451327434, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.00018938053097345133, |
|
"loss": 1.0028, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0575221238938053, |
|
"grad_norm": 0.06103515625, |
|
"learning_rate": 0.00018849557522123894, |
|
"loss": 0.9446, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.0619469026548674, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 0.00018761061946902656, |
|
"loss": 1.0249, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0663716814159292, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 0.00018672566371681417, |
|
"loss": 0.9277, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.0707964601769913, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 0.0001858407079646018, |
|
"loss": 0.8228, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.075221238938053, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 0.00018495575221238938, |
|
"loss": 0.8757, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.079646017699115, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 0.000184070796460177, |
|
"loss": 0.7868, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.084070796460177, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 0.0001831858407079646, |
|
"loss": 0.878, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.0884955752212389, |
|
"grad_norm": 0.05908203125, |
|
"learning_rate": 0.00018230088495575222, |
|
"loss": 0.8944, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.092920353982301, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 0.00018141592920353983, |
|
"loss": 0.8831, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.0973451327433628, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 0.00018053097345132742, |
|
"loss": 0.9312, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.1017699115044248, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 0.00017964601769911504, |
|
"loss": 0.7488, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.1061946902654867, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 0.00017876106194690265, |
|
"loss": 0.9677, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1106194690265487, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 0.0001778761061946903, |
|
"loss": 0.8391, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.1150442477876106, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 0.0001769911504424779, |
|
"loss": 0.9225, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.1194690265486726, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001761061946902655, |
|
"loss": 0.7969, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.1238938053097345, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.0001752212389380531, |
|
"loss": 0.8957, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.1283185840707965, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 0.00017433628318584072, |
|
"loss": 0.9192, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.1327433628318584, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.00017345132743362834, |
|
"loss": 0.8669, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.1371681415929205, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.00017256637168141595, |
|
"loss": 0.9332, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.1415929203539823, |
|
"grad_norm": 0.06689453125, |
|
"learning_rate": 0.00017168141592920354, |
|
"loss": 0.8392, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.1460176991150441, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.00017079646017699115, |
|
"loss": 1.1159, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.1504424778761062, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 0.00016991150442477877, |
|
"loss": 0.9649, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.154867256637168, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 0.00016902654867256638, |
|
"loss": 0.9653, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.1592920353982301, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 0.000168141592920354, |
|
"loss": 0.8342, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.163716814159292, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 0.00016725663716814158, |
|
"loss": 0.7385, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.168141592920354, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001663716814159292, |
|
"loss": 0.7605, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.1725663716814159, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 0.00016548672566371681, |
|
"loss": 0.8457, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.176991150442478, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.00016460176991150443, |
|
"loss": 0.872, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.1814159292035398, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 0.00016371681415929204, |
|
"loss": 1.0322, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.1858407079646018, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 0.00016283185840707966, |
|
"loss": 1.0532, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.1902654867256637, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 0.00016194690265486727, |
|
"loss": 0.9205, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.1946902654867257, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 0.0001610619469026549, |
|
"loss": 0.8789, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1991150442477876, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 0.0001601769911504425, |
|
"loss": 1.0501, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.2035398230088497, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 0.0001592920353982301, |
|
"loss": 0.8666, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.2079646017699115, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 0.0001584070796460177, |
|
"loss": 0.8761, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.2123893805309733, |
|
"grad_norm": 0.057373046875, |
|
"learning_rate": 0.00015752212389380532, |
|
"loss": 0.8827, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.2168141592920354, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 0.00015663716814159293, |
|
"loss": 0.8162, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.2212389380530975, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.00015575221238938055, |
|
"loss": 0.7613, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.2256637168141593, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 0.00015486725663716813, |
|
"loss": 0.825, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.2300884955752212, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 0.00015398230088495575, |
|
"loss": 0.9633, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.2345132743362832, |
|
"grad_norm": 0.0595703125, |
|
"learning_rate": 0.00015309734513274336, |
|
"loss": 0.9036, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.238938053097345, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.00015221238938053098, |
|
"loss": 0.9527, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2433628318584071, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 0.0001513274336283186, |
|
"loss": 0.9089, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.247787610619469, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 0.00015044247787610618, |
|
"loss": 0.8911, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.252212389380531, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.00014955752212389382, |
|
"loss": 0.7871, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.2566371681415929, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.00014867256637168144, |
|
"loss": 0.8415, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.261061946902655, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 0.00014778761061946905, |
|
"loss": 1.0105, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.2654867256637168, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.00014690265486725664, |
|
"loss": 0.9677, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.2699115044247788, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.00014601769911504425, |
|
"loss": 0.837, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.2743362831858407, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 0.00014513274336283187, |
|
"loss": 0.8605, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.2787610619469025, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 0.00014424778761061948, |
|
"loss": 0.8717, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.2831858407079646, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001433628318584071, |
|
"loss": 1.0469, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2876106194690267, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 0.00014247787610619468, |
|
"loss": 0.9339, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.2920353982300885, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 0.0001415929203539823, |
|
"loss": 0.7235, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.2964601769911503, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001407079646017699, |
|
"loss": 0.8648, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.3008849557522124, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 0.00013982300884955753, |
|
"loss": 0.8842, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.3053097345132743, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.00013893805309734514, |
|
"loss": 0.9593, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.3097345132743363, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.00013805309734513276, |
|
"loss": 0.9122, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.3141592920353982, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 0.00013716814159292034, |
|
"loss": 1.0082, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.3185840707964602, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 0.00013628318584070796, |
|
"loss": 0.884, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.323008849557522, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001353982300884956, |
|
"loss": 0.8348, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.3274336283185841, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 0.00013451327433628321, |
|
"loss": 0.747, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.331858407079646, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 0.0001336283185840708, |
|
"loss": 0.8841, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.336283185840708, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 0.00013274336283185842, |
|
"loss": 0.8985, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.3407079646017699, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 0.00013185840707964603, |
|
"loss": 0.9008, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.3451327433628317, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.00013097345132743365, |
|
"loss": 0.8909, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.3495575221238938, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.00013008849557522126, |
|
"loss": 0.8108, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.3539823008849559, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.00012920353982300885, |
|
"loss": 0.8546, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.3584070796460177, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.00012831858407079646, |
|
"loss": 1.0212, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.3628318584070795, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 0.00012743362831858408, |
|
"loss": 0.974, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.3672566371681416, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001265486725663717, |
|
"loss": 0.7493, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.3716814159292037, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001256637168141593, |
|
"loss": 1.0118, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.3761061946902655, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001247787610619469, |
|
"loss": 0.8243, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.3805309734513274, |
|
"grad_norm": 0.06884765625, |
|
"learning_rate": 0.0001238938053097345, |
|
"loss": 0.9024, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.3849557522123894, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.00012300884955752212, |
|
"loss": 0.9018, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.3893805309734513, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.00012212389380530974, |
|
"loss": 1.1168, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.3938053097345133, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.00012123893805309736, |
|
"loss": 0.9847, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.3982300884955752, |
|
"grad_norm": 0.07080078125, |
|
"learning_rate": 0.00012035398230088497, |
|
"loss": 0.9884, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.4026548672566372, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 0.00011946902654867258, |
|
"loss": 0.9483, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.407079646017699, |
|
"grad_norm": 0.06787109375, |
|
"learning_rate": 0.0001185840707964602, |
|
"loss": 0.8768, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.411504424778761, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 0.0001176991150442478, |
|
"loss": 0.9072, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.415929203539823, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.00011681415929203541, |
|
"loss": 0.8627, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.420353982300885, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 0.00011592920353982301, |
|
"loss": 0.9518, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.424778761061947, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.00011504424778761063, |
|
"loss": 0.8705, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.4292035398230087, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 0.00011415929203539824, |
|
"loss": 0.8535, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.4336283185840708, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 0.00011327433628318584, |
|
"loss": 0.8835, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.4380530973451329, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.00011238938053097346, |
|
"loss": 1.1187, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.4424778761061947, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.00011150442477876106, |
|
"loss": 0.6991, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.4469026548672566, |
|
"grad_norm": 0.10546875, |
|
"learning_rate": 0.00011061946902654867, |
|
"loss": 0.8172, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.4513274336283186, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 0.00010973451327433629, |
|
"loss": 0.8526, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.4557522123893805, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 0.00010884955752212389, |
|
"loss": 0.8048, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.4601769911504425, |
|
"grad_norm": 0.0693359375, |
|
"learning_rate": 0.0001079646017699115, |
|
"loss": 0.9438, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4646017699115044, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.00010707964601769913, |
|
"loss": 0.9667, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.4690265486725664, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.00010619469026548674, |
|
"loss": 1.0007, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.4734513274336283, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 0.00010530973451327434, |
|
"loss": 0.971, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.4778761061946903, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.00010442477876106196, |
|
"loss": 0.8334, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.4823008849557522, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 0.00010353982300884957, |
|
"loss": 0.7885, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.4867256637168142, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.00010265486725663717, |
|
"loss": 0.825, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.491150442477876, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.00010176991150442479, |
|
"loss": 0.9044, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.495575221238938, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.00010088495575221239, |
|
"loss": 0.7607, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.0693359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.966, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.504424778761062, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 9.911504424778762e-05, |
|
"loss": 0.7745, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.508849557522124, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 9.823008849557522e-05, |
|
"loss": 0.8849, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.5132743362831858, |
|
"grad_norm": 0.0703125, |
|
"learning_rate": 9.734513274336283e-05, |
|
"loss": 0.9905, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.5176991150442478, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 9.646017699115044e-05, |
|
"loss": 0.8459, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.5221238938053099, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 9.557522123893806e-05, |
|
"loss": 0.8842, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.5265486725663717, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 9.469026548672566e-05, |
|
"loss": 1.0654, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.5309734513274336, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 9.380530973451328e-05, |
|
"loss": 0.8734, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.5353982300884956, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 9.29203539823009e-05, |
|
"loss": 0.9752, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.5398230088495575, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 9.20353982300885e-05, |
|
"loss": 0.7664, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.5442477876106193, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 9.115044247787611e-05, |
|
"loss": 0.8328, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.5486725663716814, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 9.026548672566371e-05, |
|
"loss": 0.8581, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5530973451327434, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 8.938053097345133e-05, |
|
"loss": 0.7521, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.5575221238938053, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 8.849557522123895e-05, |
|
"loss": 1.1778, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.5619469026548671, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 8.761061946902655e-05, |
|
"loss": 0.8007, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.5663716814159292, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 8.672566371681417e-05, |
|
"loss": 1.1795, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.5707964601769913, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 8.584070796460177e-05, |
|
"loss": 0.9632, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.575221238938053, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 8.495575221238938e-05, |
|
"loss": 0.7671, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.579646017699115, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 8.4070796460177e-05, |
|
"loss": 0.692, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.584070796460177, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 8.31858407079646e-05, |
|
"loss": 0.6548, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.588495575221239, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 8.230088495575221e-05, |
|
"loss": 0.805, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.592920353982301, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 8.141592920353983e-05, |
|
"loss": 0.7988, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.5973451327433628, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 8.053097345132744e-05, |
|
"loss": 0.9695, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.6017699115044248, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 7.964601769911504e-05, |
|
"loss": 1.0397, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.606194690265487, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 7.876106194690266e-05, |
|
"loss": 0.9098, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.6106194690265485, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 7.787610619469027e-05, |
|
"loss": 0.9249, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.6150442477876106, |
|
"grad_norm": 0.0615234375, |
|
"learning_rate": 7.699115044247787e-05, |
|
"loss": 0.7443, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.6194690265486726, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 7.610619469026549e-05, |
|
"loss": 0.8042, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.6238938053097345, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 7.522123893805309e-05, |
|
"loss": 0.8271, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.6283185840707963, |
|
"grad_norm": 0.06884765625, |
|
"learning_rate": 7.433628318584072e-05, |
|
"loss": 0.9711, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.6327433628318584, |
|
"grad_norm": 0.06689453125, |
|
"learning_rate": 7.345132743362832e-05, |
|
"loss": 0.8821, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.6371681415929205, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 7.256637168141593e-05, |
|
"loss": 0.7417, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.6415929203539823, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 7.168141592920355e-05, |
|
"loss": 0.9247, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.6460176991150441, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 7.079646017699115e-05, |
|
"loss": 0.9101, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.6504424778761062, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 6.991150442477876e-05, |
|
"loss": 1.0123, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.6548672566371683, |
|
"grad_norm": 0.103515625, |
|
"learning_rate": 6.902654867256638e-05, |
|
"loss": 0.7791, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.6592920353982301, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 6.814159292035398e-05, |
|
"loss": 1.0589, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.663716814159292, |
|
"grad_norm": 0.058349609375, |
|
"learning_rate": 6.725663716814161e-05, |
|
"loss": 0.8401, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.668141592920354, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 6.637168141592921e-05, |
|
"loss": 0.8201, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.672566371681416, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 6.548672566371682e-05, |
|
"loss": 0.913, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.676991150442478, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 6.460176991150442e-05, |
|
"loss": 0.8276, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.6814159292035398, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 6.371681415929204e-05, |
|
"loss": 0.7729, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.6858407079646018, |
|
"grad_norm": 0.0703125, |
|
"learning_rate": 6.283185840707965e-05, |
|
"loss": 1.0113, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.6902654867256637, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 6.194690265486725e-05, |
|
"loss": 0.8446, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.6946902654867255, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 6.106194690265487e-05, |
|
"loss": 0.8878, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.6991150442477876, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 6.017699115044248e-05, |
|
"loss": 0.6718, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.7035398230088497, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 5.92920353982301e-05, |
|
"loss": 0.8153, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.7079646017699115, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 5.8407079646017705e-05, |
|
"loss": 0.9931, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.7123893805309733, |
|
"grad_norm": 0.0556640625, |
|
"learning_rate": 5.752212389380531e-05, |
|
"loss": 0.7466, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.7168141592920354, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 5.663716814159292e-05, |
|
"loss": 0.9364, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.7212389380530975, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 5.575221238938053e-05, |
|
"loss": 0.8851, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.7256637168141593, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 5.486725663716814e-05, |
|
"loss": 0.8714, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.7300884955752212, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 5.398230088495575e-05, |
|
"loss": 0.8885, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.7345132743362832, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 5.309734513274337e-05, |
|
"loss": 0.8724, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.7389380530973453, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 5.221238938053098e-05, |
|
"loss": 1.1328, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.7433628318584071, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 5.132743362831859e-05, |
|
"loss": 0.7735, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.747787610619469, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 5.0442477876106195e-05, |
|
"loss": 0.9325, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.752212389380531, |
|
"grad_norm": 0.07080078125, |
|
"learning_rate": 4.955752212389381e-05, |
|
"loss": 0.9273, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.7566371681415929, |
|
"grad_norm": 0.10009765625, |
|
"learning_rate": 4.867256637168142e-05, |
|
"loss": 0.7756, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.7610619469026547, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 4.778761061946903e-05, |
|
"loss": 1.0591, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.7654867256637168, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 4.690265486725664e-05, |
|
"loss": 0.7867, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 4.601769911504425e-05, |
|
"loss": 0.8369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7743362831858407, |
|
"grad_norm": 0.06396484375, |
|
"learning_rate": 4.5132743362831855e-05, |
|
"loss": 0.9999, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.7787610619469025, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 4.4247787610619477e-05, |
|
"loss": 0.8612, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.7831858407079646, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 4.3362831858407084e-05, |
|
"loss": 0.8529, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.7876106194690267, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 4.247787610619469e-05, |
|
"loss": 0.8809, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.7920353982300885, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 4.15929203539823e-05, |
|
"loss": 0.9739, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.7964601769911503, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 4.0707964601769914e-05, |
|
"loss": 0.9416, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.8008849557522124, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 3.982300884955752e-05, |
|
"loss": 0.8359, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.8053097345132745, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 3.893805309734514e-05, |
|
"loss": 0.9323, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.8097345132743363, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 3.8053097345132744e-05, |
|
"loss": 0.8084, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.8141592920353982, |
|
"grad_norm": 0.06298828125, |
|
"learning_rate": 3.716814159292036e-05, |
|
"loss": 0.9237, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.8185840707964602, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 3.628318584070797e-05, |
|
"loss": 1.0047, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.823008849557522, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 3.5398230088495574e-05, |
|
"loss": 0.9763, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.827433628318584, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 3.451327433628319e-05, |
|
"loss": 0.7498, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.831858407079646, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 3.3628318584070804e-05, |
|
"loss": 0.8973, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.836283185840708, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 3.274336283185841e-05, |
|
"loss": 0.9526, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.8407079646017699, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 3.185840707964602e-05, |
|
"loss": 0.9184, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.8451327433628317, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 3.097345132743363e-05, |
|
"loss": 0.9124, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.8495575221238938, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 3.008849557522124e-05, |
|
"loss": 0.8303, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.8539823008849559, |
|
"grad_norm": 0.0703125, |
|
"learning_rate": 2.9203539823008852e-05, |
|
"loss": 0.9533, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.8584070796460177, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 2.831858407079646e-05, |
|
"loss": 0.8822, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.8628318584070795, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 2.743362831858407e-05, |
|
"loss": 0.911, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.8672566371681416, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 2.6548672566371686e-05, |
|
"loss": 0.8209, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.8716814159292037, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 2.5663716814159294e-05, |
|
"loss": 0.8294, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.8761061946902655, |
|
"grad_norm": 0.061279296875, |
|
"learning_rate": 2.4778761061946905e-05, |
|
"loss": 0.7602, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.8805309734513274, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 2.3893805309734516e-05, |
|
"loss": 0.8862, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.8849557522123894, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 2.3008849557522124e-05, |
|
"loss": 0.8715, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.8893805309734515, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 2.2123893805309738e-05, |
|
"loss": 0.9235, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.893805309734513, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 2.1238938053097346e-05, |
|
"loss": 0.8975, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.8982300884955752, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 2.0353982300884957e-05, |
|
"loss": 1.0014, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.9026548672566372, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 1.946902654867257e-05, |
|
"loss": 0.8397, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.907079646017699, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 1.858407079646018e-05, |
|
"loss": 1.0832, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.911504424778761, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 1.7699115044247787e-05, |
|
"loss": 0.7726, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.915929203539823, |
|
"grad_norm": 0.06884765625, |
|
"learning_rate": 1.6814159292035402e-05, |
|
"loss": 0.936, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.920353982300885, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 1.592920353982301e-05, |
|
"loss": 1.0048, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.924778761061947, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 1.504424778761062e-05, |
|
"loss": 0.864, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.9292035398230087, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 1.415929203539823e-05, |
|
"loss": 0.9952, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.9336283185840708, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 1.3274336283185843e-05, |
|
"loss": 0.8628, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.9380530973451329, |
|
"grad_norm": 0.060546875, |
|
"learning_rate": 1.2389380530973452e-05, |
|
"loss": 0.8487, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.9424778761061947, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 1.1504424778761062e-05, |
|
"loss": 0.8495, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.9469026548672566, |
|
"grad_norm": 0.06689453125, |
|
"learning_rate": 1.0619469026548673e-05, |
|
"loss": 0.8815, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.9513274336283186, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 9.734513274336284e-06, |
|
"loss": 0.8667, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.9557522123893807, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 8.849557522123894e-06, |
|
"loss": 0.7515, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.9601769911504425, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 7.964601769911505e-06, |
|
"loss": 0.8048, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.9646017699115044, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 7.079646017699115e-06, |
|
"loss": 0.9373, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.9690265486725664, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 6.194690265486726e-06, |
|
"loss": 0.7985, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.9734513274336283, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 5.3097345132743365e-06, |
|
"loss": 0.9149, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.9778761061946901, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 4.424778761061947e-06, |
|
"loss": 0.8296, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.9823008849557522, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 3.5398230088495575e-06, |
|
"loss": 0.8539, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.9867256637168142, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 2.6548672566371683e-06, |
|
"loss": 0.8847, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.991150442477876, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 1.7699115044247788e-06, |
|
"loss": 0.8814, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.995575221238938, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 8.849557522123894e-07, |
|
"loss": 0.8299, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.12451171875, |
|
"learning_rate": 0.0, |
|
"loss": 0.8232, |
|
"step": 452 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 452, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4086515032577802e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|