|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 1960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 9.978917121887207, |
|
"learning_rate": 9.948979591836737e-06, |
|
"loss": 0.2617, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 7.48874044418335, |
|
"learning_rate": 9.89795918367347e-06, |
|
"loss": 0.1563, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 5.081777572631836, |
|
"learning_rate": 9.846938775510205e-06, |
|
"loss": 0.1254, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 4.443576812744141, |
|
"learning_rate": 9.795918367346939e-06, |
|
"loss": 0.1113, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 4.356841087341309, |
|
"learning_rate": 9.744897959183674e-06, |
|
"loss": 0.1257, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 1.98320472240448, |
|
"learning_rate": 9.693877551020408e-06, |
|
"loss": 0.0819, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 3.809190273284912, |
|
"learning_rate": 9.642857142857144e-06, |
|
"loss": 0.1032, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 1.645442247390747, |
|
"learning_rate": 9.591836734693878e-06, |
|
"loss": 0.1124, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3673469387755102, |
|
"grad_norm": 3.7085306644439697, |
|
"learning_rate": 9.540816326530612e-06, |
|
"loss": 0.0847, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 3.9240212440490723, |
|
"learning_rate": 9.489795918367348e-06, |
|
"loss": 0.0753, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4489795918367347, |
|
"grad_norm": 3.737152338027954, |
|
"learning_rate": 9.438775510204082e-06, |
|
"loss": 0.0723, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 2.308751344680786, |
|
"learning_rate": 9.387755102040818e-06, |
|
"loss": 0.0699, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5306122448979592, |
|
"grad_norm": 2.3706369400024414, |
|
"learning_rate": 9.336734693877552e-06, |
|
"loss": 0.0589, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.968673050403595, |
|
"learning_rate": 9.285714285714288e-06, |
|
"loss": 0.0503, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 1.9251790046691895, |
|
"learning_rate": 9.234693877551022e-06, |
|
"loss": 0.0628, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 1.7473604679107666, |
|
"learning_rate": 9.183673469387756e-06, |
|
"loss": 0.0708, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6938775510204082, |
|
"grad_norm": 1.9279741048812866, |
|
"learning_rate": 9.13265306122449e-06, |
|
"loss": 0.075, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 1.4570097923278809, |
|
"learning_rate": 9.081632653061225e-06, |
|
"loss": 0.0614, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7755102040816326, |
|
"grad_norm": 2.014692544937134, |
|
"learning_rate": 9.03061224489796e-06, |
|
"loss": 0.058, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 1.7634117603302002, |
|
"learning_rate": 8.979591836734695e-06, |
|
"loss": 0.0629, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.7091555595397949, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.0637, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 1.073096752166748, |
|
"learning_rate": 8.877551020408163e-06, |
|
"loss": 0.0603, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9387755102040817, |
|
"grad_norm": 0.7938856482505798, |
|
"learning_rate": 8.826530612244899e-06, |
|
"loss": 0.0538, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 1.192353367805481, |
|
"learning_rate": 8.775510204081633e-06, |
|
"loss": 0.0493, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 0.9369480013847351, |
|
"learning_rate": 8.724489795918369e-06, |
|
"loss": 0.0595, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0612244897959184, |
|
"grad_norm": 1.2866365909576416, |
|
"learning_rate": 8.673469387755103e-06, |
|
"loss": 0.0532, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1020408163265305, |
|
"grad_norm": 16.09465980529785, |
|
"learning_rate": 8.622448979591837e-06, |
|
"loss": 0.0663, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 2.3071987628936768, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.0633, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.183673469387755, |
|
"grad_norm": 1.2905592918395996, |
|
"learning_rate": 8.520408163265307e-06, |
|
"loss": 0.0498, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 1.4856091737747192, |
|
"learning_rate": 8.469387755102042e-06, |
|
"loss": 0.0639, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2653061224489797, |
|
"grad_norm": 0.5537325739860535, |
|
"learning_rate": 8.418367346938776e-06, |
|
"loss": 0.0673, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.306122448979592, |
|
"grad_norm": 1.2954118251800537, |
|
"learning_rate": 8.36734693877551e-06, |
|
"loss": 0.0505, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.346938775510204, |
|
"grad_norm": 0.6809917092323303, |
|
"learning_rate": 8.316326530612246e-06, |
|
"loss": 0.0623, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.3877551020408163, |
|
"grad_norm": 2.596815586090088, |
|
"learning_rate": 8.26530612244898e-06, |
|
"loss": 0.0552, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.0378236770629883, |
|
"learning_rate": 8.214285714285714e-06, |
|
"loss": 0.049, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.469387755102041, |
|
"grad_norm": 1.7493040561676025, |
|
"learning_rate": 8.16326530612245e-06, |
|
"loss": 0.0465, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.510204081632653, |
|
"grad_norm": 1.149561882019043, |
|
"learning_rate": 8.112244897959184e-06, |
|
"loss": 0.0584, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5510204081632653, |
|
"grad_norm": 0.8010720014572144, |
|
"learning_rate": 8.06122448979592e-06, |
|
"loss": 0.047, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5918367346938775, |
|
"grad_norm": 0.7010307908058167, |
|
"learning_rate": 8.010204081632654e-06, |
|
"loss": 0.0649, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 2.022503137588501, |
|
"learning_rate": 7.959183673469388e-06, |
|
"loss": 0.0612, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6734693877551021, |
|
"grad_norm": 1.3006742000579834, |
|
"learning_rate": 7.908163265306124e-06, |
|
"loss": 0.0605, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 1.513334035873413, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 0.054, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7551020408163265, |
|
"grad_norm": 0.28943702578544617, |
|
"learning_rate": 7.806122448979593e-06, |
|
"loss": 0.0673, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7959183673469388, |
|
"grad_norm": 1.2818681001663208, |
|
"learning_rate": 7.755102040816327e-06, |
|
"loss": 0.0614, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.836734693877551, |
|
"grad_norm": 0.5026584267616272, |
|
"learning_rate": 7.704081632653061e-06, |
|
"loss": 0.0443, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.8775510204081631, |
|
"grad_norm": 0.400056391954422, |
|
"learning_rate": 7.653061224489796e-06, |
|
"loss": 0.054, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9183673469387754, |
|
"grad_norm": 0.7661588191986084, |
|
"learning_rate": 7.602040816326531e-06, |
|
"loss": 0.0439, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9591836734693877, |
|
"grad_norm": 0.3066469728946686, |
|
"learning_rate": 7.551020408163265e-06, |
|
"loss": 0.0511, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1751477718353271, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0644, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 0.6497346758842468, |
|
"learning_rate": 7.448979591836736e-06, |
|
"loss": 0.0596, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0816326530612246, |
|
"grad_norm": 0.585145890712738, |
|
"learning_rate": 7.39795918367347e-06, |
|
"loss": 0.0502, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.122448979591837, |
|
"grad_norm": 1.0224946737289429, |
|
"learning_rate": 7.346938775510205e-06, |
|
"loss": 0.0462, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.163265306122449, |
|
"grad_norm": 0.9922281503677368, |
|
"learning_rate": 7.295918367346939e-06, |
|
"loss": 0.063, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.204081632653061, |
|
"grad_norm": 0.7550894618034363, |
|
"learning_rate": 7.244897959183675e-06, |
|
"loss": 0.0595, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2448979591836733, |
|
"grad_norm": 1.000552773475647, |
|
"learning_rate": 7.193877551020409e-06, |
|
"loss": 0.0645, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 0.7375513315200806, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.0597, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.326530612244898, |
|
"grad_norm": 0.7129970192909241, |
|
"learning_rate": 7.091836734693878e-06, |
|
"loss": 0.0603, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.36734693877551, |
|
"grad_norm": 0.8948765993118286, |
|
"learning_rate": 7.0408163265306125e-06, |
|
"loss": 0.0673, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4081632653061225, |
|
"grad_norm": 0.4436047375202179, |
|
"learning_rate": 6.989795918367348e-06, |
|
"loss": 0.0547, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4489795918367347, |
|
"grad_norm": 2.562260627746582, |
|
"learning_rate": 6.938775510204082e-06, |
|
"loss": 0.044, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.489795918367347, |
|
"grad_norm": 2.5978403091430664, |
|
"learning_rate": 6.887755102040817e-06, |
|
"loss": 0.042, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.5306122448979593, |
|
"grad_norm": 0.8350633978843689, |
|
"learning_rate": 6.836734693877551e-06, |
|
"loss": 0.0429, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 1.0908092260360718, |
|
"learning_rate": 6.785714285714287e-06, |
|
"loss": 0.0815, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.612244897959184, |
|
"grad_norm": 1.411789059638977, |
|
"learning_rate": 6.734693877551021e-06, |
|
"loss": 0.0506, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.6530612244897958, |
|
"grad_norm": 0.7262634038925171, |
|
"learning_rate": 6.683673469387756e-06, |
|
"loss": 0.0486, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.693877551020408, |
|
"grad_norm": 0.6718008518218994, |
|
"learning_rate": 6.63265306122449e-06, |
|
"loss": 0.0478, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7346938775510203, |
|
"grad_norm": 0.6992954015731812, |
|
"learning_rate": 6.581632653061225e-06, |
|
"loss": 0.0876, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7755102040816326, |
|
"grad_norm": 3.032949447631836, |
|
"learning_rate": 6.530612244897959e-06, |
|
"loss": 0.0497, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.816326530612245, |
|
"grad_norm": 0.544232189655304, |
|
"learning_rate": 6.4795918367346946e-06, |
|
"loss": 0.0456, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.489704430103302, |
|
"learning_rate": 6.4285714285714295e-06, |
|
"loss": 0.0438, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.8979591836734695, |
|
"grad_norm": 0.7447965741157532, |
|
"learning_rate": 6.3775510204081635e-06, |
|
"loss": 0.0557, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.938775510204082, |
|
"grad_norm": 3.607469081878662, |
|
"learning_rate": 6.326530612244899e-06, |
|
"loss": 0.059, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.979591836734694, |
|
"grad_norm": 0.2003553807735443, |
|
"learning_rate": 6.275510204081633e-06, |
|
"loss": 0.0349, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.020408163265306, |
|
"grad_norm": 1.135377287864685, |
|
"learning_rate": 6.224489795918368e-06, |
|
"loss": 0.0549, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.061224489795918, |
|
"grad_norm": 0.9238697290420532, |
|
"learning_rate": 6.173469387755102e-06, |
|
"loss": 0.0627, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.1020408163265305, |
|
"grad_norm": 0.7442536354064941, |
|
"learning_rate": 6.122448979591837e-06, |
|
"loss": 0.0528, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 0.6410558819770813, |
|
"learning_rate": 6.071428571428571e-06, |
|
"loss": 0.0707, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.183673469387755, |
|
"grad_norm": 0.4915910065174103, |
|
"learning_rate": 6.020408163265307e-06, |
|
"loss": 0.0659, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2244897959183674, |
|
"grad_norm": 0.33948495984077454, |
|
"learning_rate": 5.969387755102042e-06, |
|
"loss": 0.0535, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2653061224489797, |
|
"grad_norm": 0.9314869046211243, |
|
"learning_rate": 5.918367346938776e-06, |
|
"loss": 0.0443, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.306122448979592, |
|
"grad_norm": 0.9704706072807312, |
|
"learning_rate": 5.867346938775511e-06, |
|
"loss": 0.0562, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.3469387755102042, |
|
"grad_norm": 0.8564426898956299, |
|
"learning_rate": 5.816326530612246e-06, |
|
"loss": 0.0466, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.387755102040816, |
|
"grad_norm": 0.31214070320129395, |
|
"learning_rate": 5.7653061224489805e-06, |
|
"loss": 0.0488, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 0.40054649114608765, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.0536, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4693877551020407, |
|
"grad_norm": 0.476951003074646, |
|
"learning_rate": 5.663265306122449e-06, |
|
"loss": 0.0819, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.510204081632653, |
|
"grad_norm": 1.075916051864624, |
|
"learning_rate": 5.6122448979591834e-06, |
|
"loss": 0.0451, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5510204081632653, |
|
"grad_norm": 0.4422233998775482, |
|
"learning_rate": 5.561224489795919e-06, |
|
"loss": 0.065, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5918367346938775, |
|
"grad_norm": 0.7247931361198425, |
|
"learning_rate": 5.510204081632653e-06, |
|
"loss": 0.0442, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.63265306122449, |
|
"grad_norm": 0.18422362208366394, |
|
"learning_rate": 5.459183673469388e-06, |
|
"loss": 0.0295, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.673469387755102, |
|
"grad_norm": 0.6566686034202576, |
|
"learning_rate": 5.408163265306123e-06, |
|
"loss": 0.0527, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 0.7151392698287964, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 0.0614, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7551020408163263, |
|
"grad_norm": 0.1488690972328186, |
|
"learning_rate": 5.306122448979593e-06, |
|
"loss": 0.0546, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.795918367346939, |
|
"grad_norm": 0.472126841545105, |
|
"learning_rate": 5.255102040816327e-06, |
|
"loss": 0.0514, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.836734693877551, |
|
"grad_norm": 0.8530511260032654, |
|
"learning_rate": 5.204081632653062e-06, |
|
"loss": 0.049, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.877551020408163, |
|
"grad_norm": 1.6832056045532227, |
|
"learning_rate": 5.153061224489796e-06, |
|
"loss": 0.0603, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9183673469387754, |
|
"grad_norm": 0.30192047357559204, |
|
"learning_rate": 5.1020408163265315e-06, |
|
"loss": 0.0512, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9591836734693877, |
|
"grad_norm": 1.3734880685806274, |
|
"learning_rate": 5.0510204081632655e-06, |
|
"loss": 0.0756, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.7525829672813416, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0715, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.040816326530612, |
|
"grad_norm": 0.4748665690422058, |
|
"learning_rate": 4.948979591836735e-06, |
|
"loss": 0.0487, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 1.340325117111206, |
|
"learning_rate": 4.897959183673469e-06, |
|
"loss": 0.0638, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.122448979591836, |
|
"grad_norm": 0.5442948937416077, |
|
"learning_rate": 4.846938775510204e-06, |
|
"loss": 0.0642, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1010, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.163265306122449, |
|
"grad_norm": 0.3119046688079834, |
|
"learning_rate": 4.795918367346939e-06, |
|
"loss": 0.0411, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1020, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.204081632653061, |
|
"grad_norm": 0.7393902540206909, |
|
"learning_rate": 4.744897959183674e-06, |
|
"loss": 0.0544, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1030, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.244897959183674, |
|
"grad_norm": 0.5412510633468628, |
|
"learning_rate": 4.693877551020409e-06, |
|
"loss": 0.0406, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1040, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.6453996300697327, |
|
"learning_rate": 4.642857142857144e-06, |
|
"loss": 0.0499, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1050, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.326530612244898, |
|
"grad_norm": 0.3400985896587372, |
|
"learning_rate": 4.591836734693878e-06, |
|
"loss": 0.046, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1060, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.36734693877551, |
|
"grad_norm": 0.5143836736679077, |
|
"learning_rate": 4.540816326530613e-06, |
|
"loss": 0.0494, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1070, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.408163265306122, |
|
"grad_norm": 0.38877835869789124, |
|
"learning_rate": 4.489795918367348e-06, |
|
"loss": 0.0526, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1080, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.448979591836735, |
|
"grad_norm": 0.38251811265945435, |
|
"learning_rate": 4.438775510204082e-06, |
|
"loss": 0.051, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1090, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.489795918367347, |
|
"grad_norm": 0.3022618889808655, |
|
"learning_rate": 4.3877551020408165e-06, |
|
"loss": 0.0368, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.530612244897959, |
|
"grad_norm": 0.12300197780132294, |
|
"learning_rate": 4.336734693877551e-06, |
|
"loss": 0.0474, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.7275770902633667, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.0409, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.612244897959184, |
|
"grad_norm": 0.46302053332328796, |
|
"learning_rate": 4.234693877551021e-06, |
|
"loss": 0.0545, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.653061224489796, |
|
"grad_norm": 1.1193764209747314, |
|
"learning_rate": 4.183673469387755e-06, |
|
"loss": 0.0736, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.6938775510204085, |
|
"grad_norm": 0.936698317527771, |
|
"learning_rate": 4.13265306122449e-06, |
|
"loss": 0.0532, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.73469387755102, |
|
"grad_norm": 1.091784119606018, |
|
"learning_rate": 4.081632653061225e-06, |
|
"loss": 0.064, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.775510204081632, |
|
"grad_norm": 0.3371049165725708, |
|
"learning_rate": 4.03061224489796e-06, |
|
"loss": 0.0557, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.816326530612245, |
|
"grad_norm": 0.5533121824264526, |
|
"learning_rate": 3.979591836734694e-06, |
|
"loss": 0.0449, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 1.3483092784881592, |
|
"learning_rate": 3.928571428571429e-06, |
|
"loss": 0.0551, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.8979591836734695, |
|
"grad_norm": 2.4415154457092285, |
|
"learning_rate": 3.877551020408164e-06, |
|
"loss": 0.0738, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.938775510204081, |
|
"grad_norm": 0.4990352690219879, |
|
"learning_rate": 3.826530612244898e-06, |
|
"loss": 0.0663, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.979591836734694, |
|
"grad_norm": 1.045630693435669, |
|
"learning_rate": 3.7755102040816327e-06, |
|
"loss": 0.0422, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.020408163265306, |
|
"grad_norm": 3.719482660293579, |
|
"learning_rate": 3.724489795918368e-06, |
|
"loss": 0.0531, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.061224489795919, |
|
"grad_norm": 0.6931941509246826, |
|
"learning_rate": 3.6734693877551024e-06, |
|
"loss": 0.0434, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.1020408163265305, |
|
"grad_norm": 0.945284903049469, |
|
"learning_rate": 3.6224489795918373e-06, |
|
"loss": 0.0377, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.142857142857143, |
|
"grad_norm": 0.49527707695961, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 0.0406, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.183673469387755, |
|
"grad_norm": 1.0614029169082642, |
|
"learning_rate": 3.5204081632653062e-06, |
|
"loss": 0.0614, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.224489795918367, |
|
"grad_norm": 1.208749771118164, |
|
"learning_rate": 3.469387755102041e-06, |
|
"loss": 0.0449, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.26530612244898, |
|
"grad_norm": 3.612487554550171, |
|
"learning_rate": 3.4183673469387756e-06, |
|
"loss": 0.0672, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.3061224489795915, |
|
"grad_norm": 0.6228938102722168, |
|
"learning_rate": 3.3673469387755105e-06, |
|
"loss": 0.0516, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.346938775510204, |
|
"grad_norm": 0.586557924747467, |
|
"learning_rate": 3.316326530612245e-06, |
|
"loss": 0.0674, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.387755102040816, |
|
"grad_norm": 0.963624119758606, |
|
"learning_rate": 3.2653061224489794e-06, |
|
"loss": 0.0621, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.428571428571429, |
|
"grad_norm": 1.1783013343811035, |
|
"learning_rate": 3.2142857142857147e-06, |
|
"loss": 0.0366, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.469387755102041, |
|
"grad_norm": 4.429933547973633, |
|
"learning_rate": 3.1632653061224496e-06, |
|
"loss": 0.0511, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.510204081632653, |
|
"grad_norm": 4.795422077178955, |
|
"learning_rate": 3.112244897959184e-06, |
|
"loss": 0.0601, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.551020408163265, |
|
"grad_norm": 0.19068406522274017, |
|
"learning_rate": 3.0612244897959185e-06, |
|
"loss": 0.0479, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.591836734693878, |
|
"grad_norm": 3.7448017597198486, |
|
"learning_rate": 3.0102040816326534e-06, |
|
"loss": 0.0404, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.63265306122449, |
|
"grad_norm": 0.3368137776851654, |
|
"learning_rate": 2.959183673469388e-06, |
|
"loss": 0.0488, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.673469387755102, |
|
"grad_norm": 0.14420035481452942, |
|
"learning_rate": 2.908163265306123e-06, |
|
"loss": 0.0582, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 0.372368723154068, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.0391, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.755102040816326, |
|
"grad_norm": 3.4565131664276123, |
|
"learning_rate": 2.8061224489795917e-06, |
|
"loss": 0.0616, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.795918367346939, |
|
"grad_norm": 3.389681339263916, |
|
"learning_rate": 2.7551020408163266e-06, |
|
"loss": 0.0675, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.836734693877551, |
|
"grad_norm": 0.7100503444671631, |
|
"learning_rate": 2.7040816326530615e-06, |
|
"loss": 0.036, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.877551020408164, |
|
"grad_norm": 0.40696802735328674, |
|
"learning_rate": 2.6530612244897964e-06, |
|
"loss": 0.0632, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.918367346938775, |
|
"grad_norm": 0.7590793967247009, |
|
"learning_rate": 2.602040816326531e-06, |
|
"loss": 0.0549, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.959183673469388, |
|
"grad_norm": 0.48597386479377747, |
|
"learning_rate": 2.5510204081632657e-06, |
|
"loss": 0.0393, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.6455276012420654, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0566, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.040816326530612, |
|
"grad_norm": 2.350471258163452, |
|
"learning_rate": 2.4489795918367347e-06, |
|
"loss": 0.0509, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.081632653061225, |
|
"grad_norm": 3.315977096557617, |
|
"learning_rate": 2.3979591836734696e-06, |
|
"loss": 0.0523, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.122448979591836, |
|
"grad_norm": 1.6327887773513794, |
|
"learning_rate": 2.3469387755102044e-06, |
|
"loss": 0.0432, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.163265306122449, |
|
"grad_norm": 5.029656410217285, |
|
"learning_rate": 2.295918367346939e-06, |
|
"loss": 0.0468, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.204081632653061, |
|
"grad_norm": 3.1543941497802734, |
|
"learning_rate": 2.244897959183674e-06, |
|
"loss": 0.0471, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.244897959183674, |
|
"grad_norm": 1.1178909540176392, |
|
"learning_rate": 2.1938775510204083e-06, |
|
"loss": 0.0538, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.285714285714286, |
|
"grad_norm": 0.7284368276596069, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 0.0474, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.326530612244898, |
|
"grad_norm": 0.15386615693569183, |
|
"learning_rate": 2.0918367346938776e-06, |
|
"loss": 0.0327, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.36734693877551, |
|
"grad_norm": 12.00415325164795, |
|
"learning_rate": 2.0408163265306125e-06, |
|
"loss": 0.0568, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.408163265306122, |
|
"grad_norm": 0.14763076603412628, |
|
"learning_rate": 1.989795918367347e-06, |
|
"loss": 0.0389, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.448979591836735, |
|
"grad_norm": 0.10665205121040344, |
|
"learning_rate": 1.938775510204082e-06, |
|
"loss": 0.0526, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.489795918367347, |
|
"grad_norm": 0.6945566534996033, |
|
"learning_rate": 1.8877551020408163e-06, |
|
"loss": 0.0276, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.530612244897959, |
|
"grad_norm": 0.6304193735122681, |
|
"learning_rate": 1.8367346938775512e-06, |
|
"loss": 0.0595, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.571428571428571, |
|
"grad_norm": 0.738591194152832, |
|
"learning_rate": 1.7857142857142859e-06, |
|
"loss": 0.0591, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.612244897959184, |
|
"grad_norm": 1.1249669790267944, |
|
"learning_rate": 1.7346938775510206e-06, |
|
"loss": 0.0444, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.653061224489796, |
|
"grad_norm": 0.3204442858695984, |
|
"learning_rate": 1.6836734693877552e-06, |
|
"loss": 0.041, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.6938775510204085, |
|
"grad_norm": 0.6603041887283325, |
|
"learning_rate": 1.6326530612244897e-06, |
|
"loss": 0.0485, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.73469387755102, |
|
"grad_norm": 0.9167451858520508, |
|
"learning_rate": 1.5816326530612248e-06, |
|
"loss": 0.051, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.775510204081632, |
|
"grad_norm": 1.1892409324645996, |
|
"learning_rate": 1.5306122448979593e-06, |
|
"loss": 0.0577, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.816326530612245, |
|
"grad_norm": 1.1679530143737793, |
|
"learning_rate": 1.479591836734694e-06, |
|
"loss": 0.0581, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"grad_norm": 4.730435848236084, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.058, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.8979591836734695, |
|
"grad_norm": 2.7492659091949463, |
|
"learning_rate": 1.3775510204081633e-06, |
|
"loss": 0.0593, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.938775510204081, |
|
"grad_norm": 0.29811447858810425, |
|
"learning_rate": 1.3265306122448982e-06, |
|
"loss": 0.0425, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.979591836734694, |
|
"grad_norm": 0.15881459414958954, |
|
"learning_rate": 1.2755102040816329e-06, |
|
"loss": 0.0625, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.020408163265306, |
|
"grad_norm": 0.20461368560791016, |
|
"learning_rate": 1.2244897959183673e-06, |
|
"loss": 0.0581, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.061224489795919, |
|
"grad_norm": 1.1351808309555054, |
|
"learning_rate": 1.1734693877551022e-06, |
|
"loss": 0.0646, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.1020408163265305, |
|
"grad_norm": 0.1654195487499237, |
|
"learning_rate": 1.122448979591837e-06, |
|
"loss": 0.0472, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 0.4366483986377716, |
|
"learning_rate": 1.0714285714285714e-06, |
|
"loss": 0.0461, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.183673469387755, |
|
"grad_norm": 0.5380903482437134, |
|
"learning_rate": 1.0204081632653063e-06, |
|
"loss": 0.0506, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.224489795918367, |
|
"grad_norm": 1.661912441253662, |
|
"learning_rate": 9.69387755102041e-07, |
|
"loss": 0.0664, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.26530612244898, |
|
"grad_norm": 0.4192713499069214, |
|
"learning_rate": 9.183673469387756e-07, |
|
"loss": 0.0394, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.3061224489795915, |
|
"grad_norm": 0.6668973565101624, |
|
"learning_rate": 8.673469387755103e-07, |
|
"loss": 0.0401, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.346938775510204, |
|
"grad_norm": 0.5573325753211975, |
|
"learning_rate": 8.163265306122449e-07, |
|
"loss": 0.0526, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.387755102040816, |
|
"grad_norm": 0.39288291335105896, |
|
"learning_rate": 7.653061224489796e-07, |
|
"loss": 0.0445, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.428571428571429, |
|
"grad_norm": 0.7398673892021179, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.054, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.469387755102041, |
|
"grad_norm": 2.143411636352539, |
|
"learning_rate": 6.632653061224491e-07, |
|
"loss": 0.0458, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.510204081632653, |
|
"grad_norm": 0.3958425223827362, |
|
"learning_rate": 6.122448979591837e-07, |
|
"loss": 0.0641, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.551020408163265, |
|
"grad_norm": 2.797384023666382, |
|
"learning_rate": 5.612244897959184e-07, |
|
"loss": 0.0447, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.591836734693878, |
|
"grad_norm": 1.5026339292526245, |
|
"learning_rate": 5.102040816326531e-07, |
|
"loss": 0.0274, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.63265306122449, |
|
"grad_norm": 0.993212103843689, |
|
"learning_rate": 4.591836734693878e-07, |
|
"loss": 0.0393, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.673469387755102, |
|
"grad_norm": 0.16298241913318634, |
|
"learning_rate": 4.0816326530612243e-07, |
|
"loss": 0.055, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.714285714285714, |
|
"grad_norm": 4.067746639251709, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"loss": 0.0661, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.755102040816326, |
|
"grad_norm": 1.387778878211975, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"loss": 0.0586, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.795918367346939, |
|
"grad_norm": 0.6988309621810913, |
|
"learning_rate": 2.5510204081632656e-07, |
|
"loss": 0.0664, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.836734693877551, |
|
"grad_norm": 0.7139838933944702, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"loss": 0.053, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.877551020408164, |
|
"grad_norm": 0.5550429224967957, |
|
"learning_rate": 1.5306122448979592e-07, |
|
"loss": 0.0458, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.918367346938775, |
|
"grad_norm": 1.2351597547531128, |
|
"learning_rate": 1.0204081632653061e-07, |
|
"loss": 0.0471, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.959183673469388, |
|
"grad_norm": 0.6292315125465393, |
|
"learning_rate": 5.1020408163265303e-08, |
|
"loss": 0.0532, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.6500349044799805, |
|
"learning_rate": 0.0, |
|
"loss": 0.0453, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1960, |
|
"total_flos": 4.89583144415232e+16, |
|
"total_memory_available (GB)": 94.62, |
|
"train_loss": 0.057532464606421335, |
|
"train_runtime": 1666.2328, |
|
"train_samples_per_second": 52.538, |
|
"train_steps_per_second": 1.315 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1960, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.89583144415232e+16, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|