bridgetower / trainer_state.json
edlee123's picture
End of training
a9bd24f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.0,
"eval_steps": 500,
"global_step": 1960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04081632653061224,
"grad_norm": 9.978917121887207,
"learning_rate": 9.948979591836737e-06,
"loss": 0.2617,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 10,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.08163265306122448,
"grad_norm": 7.48874044418335,
"learning_rate": 9.89795918367347e-06,
"loss": 0.1563,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 20,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.12244897959183673,
"grad_norm": 5.081777572631836,
"learning_rate": 9.846938775510205e-06,
"loss": 0.1254,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 30,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.16326530612244897,
"grad_norm": 4.443576812744141,
"learning_rate": 9.795918367346939e-06,
"loss": 0.1113,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 40,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.20408163265306123,
"grad_norm": 4.356841087341309,
"learning_rate": 9.744897959183674e-06,
"loss": 0.1257,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 50,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.24489795918367346,
"grad_norm": 1.98320472240448,
"learning_rate": 9.693877551020408e-06,
"loss": 0.0819,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 60,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.2857142857142857,
"grad_norm": 3.809190273284912,
"learning_rate": 9.642857142857144e-06,
"loss": 0.1032,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 70,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.32653061224489793,
"grad_norm": 1.645442247390747,
"learning_rate": 9.591836734693878e-06,
"loss": 0.1124,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 80,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.3673469387755102,
"grad_norm": 3.7085306644439697,
"learning_rate": 9.540816326530612e-06,
"loss": 0.0847,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 90,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.40816326530612246,
"grad_norm": 3.9240212440490723,
"learning_rate": 9.489795918367348e-06,
"loss": 0.0753,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 100,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.4489795918367347,
"grad_norm": 3.737152338027954,
"learning_rate": 9.438775510204082e-06,
"loss": 0.0723,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 110,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.4897959183673469,
"grad_norm": 2.308751344680786,
"learning_rate": 9.387755102040818e-06,
"loss": 0.0699,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 120,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.5306122448979592,
"grad_norm": 2.3706369400024414,
"learning_rate": 9.336734693877552e-06,
"loss": 0.0589,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 130,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.968673050403595,
"learning_rate": 9.285714285714288e-06,
"loss": 0.0503,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 140,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.6122448979591837,
"grad_norm": 1.9251790046691895,
"learning_rate": 9.234693877551022e-06,
"loss": 0.0628,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 150,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.6530612244897959,
"grad_norm": 1.7473604679107666,
"learning_rate": 9.183673469387756e-06,
"loss": 0.0708,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 160,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.6938775510204082,
"grad_norm": 1.9279741048812866,
"learning_rate": 9.13265306122449e-06,
"loss": 0.075,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 170,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.7346938775510204,
"grad_norm": 1.4570097923278809,
"learning_rate": 9.081632653061225e-06,
"loss": 0.0614,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 180,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.7755102040816326,
"grad_norm": 2.014692544937134,
"learning_rate": 9.03061224489796e-06,
"loss": 0.058,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 190,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.8163265306122449,
"grad_norm": 1.7634117603302002,
"learning_rate": 8.979591836734695e-06,
"loss": 0.0629,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 200,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.7091555595397949,
"learning_rate": 8.92857142857143e-06,
"loss": 0.0637,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 210,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.8979591836734694,
"grad_norm": 1.073096752166748,
"learning_rate": 8.877551020408163e-06,
"loss": 0.0603,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 220,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.9387755102040817,
"grad_norm": 0.7938856482505798,
"learning_rate": 8.826530612244899e-06,
"loss": 0.0538,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 230,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.9795918367346939,
"grad_norm": 1.192353367805481,
"learning_rate": 8.775510204081633e-06,
"loss": 0.0493,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 240,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.0204081632653061,
"grad_norm": 0.9369480013847351,
"learning_rate": 8.724489795918369e-06,
"loss": 0.0595,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 250,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.0612244897959184,
"grad_norm": 1.2866365909576416,
"learning_rate": 8.673469387755103e-06,
"loss": 0.0532,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 260,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.1020408163265305,
"grad_norm": 16.09465980529785,
"learning_rate": 8.622448979591837e-06,
"loss": 0.0663,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 270,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.1428571428571428,
"grad_norm": 2.3071987628936768,
"learning_rate": 8.571428571428571e-06,
"loss": 0.0633,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 280,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.183673469387755,
"grad_norm": 1.2905592918395996,
"learning_rate": 8.520408163265307e-06,
"loss": 0.0498,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 290,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.2244897959183674,
"grad_norm": 1.4856091737747192,
"learning_rate": 8.469387755102042e-06,
"loss": 0.0639,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 300,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.2653061224489797,
"grad_norm": 0.5537325739860535,
"learning_rate": 8.418367346938776e-06,
"loss": 0.0673,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 310,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.306122448979592,
"grad_norm": 1.2954118251800537,
"learning_rate": 8.36734693877551e-06,
"loss": 0.0505,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 320,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.346938775510204,
"grad_norm": 0.6809917092323303,
"learning_rate": 8.316326530612246e-06,
"loss": 0.0623,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 330,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.3877551020408163,
"grad_norm": 2.596815586090088,
"learning_rate": 8.26530612244898e-06,
"loss": 0.0552,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 340,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.4285714285714286,
"grad_norm": 1.0378236770629883,
"learning_rate": 8.214285714285714e-06,
"loss": 0.049,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 350,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.469387755102041,
"grad_norm": 1.7493040561676025,
"learning_rate": 8.16326530612245e-06,
"loss": 0.0465,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 360,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.510204081632653,
"grad_norm": 1.149561882019043,
"learning_rate": 8.112244897959184e-06,
"loss": 0.0584,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 370,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.5510204081632653,
"grad_norm": 0.8010720014572144,
"learning_rate": 8.06122448979592e-06,
"loss": 0.047,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 380,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.5918367346938775,
"grad_norm": 0.7010307908058167,
"learning_rate": 8.010204081632654e-06,
"loss": 0.0649,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 390,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.6326530612244898,
"grad_norm": 2.022503137588501,
"learning_rate": 7.959183673469388e-06,
"loss": 0.0612,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 400,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.6734693877551021,
"grad_norm": 1.3006742000579834,
"learning_rate": 7.908163265306124e-06,
"loss": 0.0605,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 410,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.7142857142857144,
"grad_norm": 1.513334035873413,
"learning_rate": 7.857142857142858e-06,
"loss": 0.054,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 420,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.7551020408163265,
"grad_norm": 0.28943702578544617,
"learning_rate": 7.806122448979593e-06,
"loss": 0.0673,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 430,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.7959183673469388,
"grad_norm": 1.2818681001663208,
"learning_rate": 7.755102040816327e-06,
"loss": 0.0614,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 440,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.836734693877551,
"grad_norm": 0.5026584267616272,
"learning_rate": 7.704081632653061e-06,
"loss": 0.0443,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 450,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.8775510204081631,
"grad_norm": 0.400056391954422,
"learning_rate": 7.653061224489796e-06,
"loss": 0.054,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 460,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.9183673469387754,
"grad_norm": 0.7661588191986084,
"learning_rate": 7.602040816326531e-06,
"loss": 0.0439,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 470,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.9591836734693877,
"grad_norm": 0.3066469728946686,
"learning_rate": 7.551020408163265e-06,
"loss": 0.0511,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 480,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.0,
"grad_norm": 1.1751477718353271,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0644,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 490,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.6497346758842468,
"learning_rate": 7.448979591836736e-06,
"loss": 0.0596,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.0816326530612246,
"grad_norm": 0.585145890712738,
"learning_rate": 7.39795918367347e-06,
"loss": 0.0502,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 510,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.122448979591837,
"grad_norm": 1.0224946737289429,
"learning_rate": 7.346938775510205e-06,
"loss": 0.0462,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 520,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.163265306122449,
"grad_norm": 0.9922281503677368,
"learning_rate": 7.295918367346939e-06,
"loss": 0.063,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 530,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.204081632653061,
"grad_norm": 0.7550894618034363,
"learning_rate": 7.244897959183675e-06,
"loss": 0.0595,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 540,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.2448979591836733,
"grad_norm": 1.000552773475647,
"learning_rate": 7.193877551020409e-06,
"loss": 0.0645,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 550,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.7375513315200806,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.0597,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 560,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.326530612244898,
"grad_norm": 0.7129970192909241,
"learning_rate": 7.091836734693878e-06,
"loss": 0.0603,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 570,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.36734693877551,
"grad_norm": 0.8948765993118286,
"learning_rate": 7.0408163265306125e-06,
"loss": 0.0673,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 580,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.4081632653061225,
"grad_norm": 0.4436047375202179,
"learning_rate": 6.989795918367348e-06,
"loss": 0.0547,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 590,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.4489795918367347,
"grad_norm": 2.562260627746582,
"learning_rate": 6.938775510204082e-06,
"loss": 0.044,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 600,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.489795918367347,
"grad_norm": 2.5978403091430664,
"learning_rate": 6.887755102040817e-06,
"loss": 0.042,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 610,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.5306122448979593,
"grad_norm": 0.8350633978843689,
"learning_rate": 6.836734693877551e-06,
"loss": 0.0429,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 620,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.571428571428571,
"grad_norm": 1.0908092260360718,
"learning_rate": 6.785714285714287e-06,
"loss": 0.0815,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 630,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.612244897959184,
"grad_norm": 1.411789059638977,
"learning_rate": 6.734693877551021e-06,
"loss": 0.0506,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 640,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.6530612244897958,
"grad_norm": 0.7262634038925171,
"learning_rate": 6.683673469387756e-06,
"loss": 0.0486,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 650,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.693877551020408,
"grad_norm": 0.6718008518218994,
"learning_rate": 6.63265306122449e-06,
"loss": 0.0478,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 660,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.7346938775510203,
"grad_norm": 0.6992954015731812,
"learning_rate": 6.581632653061225e-06,
"loss": 0.0876,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 670,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.7755102040816326,
"grad_norm": 3.032949447631836,
"learning_rate": 6.530612244897959e-06,
"loss": 0.0497,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 680,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.816326530612245,
"grad_norm": 0.544232189655304,
"learning_rate": 6.4795918367346946e-06,
"loss": 0.0456,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 690,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.489704430103302,
"learning_rate": 6.4285714285714295e-06,
"loss": 0.0438,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 700,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.8979591836734695,
"grad_norm": 0.7447965741157532,
"learning_rate": 6.3775510204081635e-06,
"loss": 0.0557,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 710,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.938775510204082,
"grad_norm": 3.607469081878662,
"learning_rate": 6.326530612244899e-06,
"loss": 0.059,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 720,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.979591836734694,
"grad_norm": 0.2003553807735443,
"learning_rate": 6.275510204081633e-06,
"loss": 0.0349,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 730,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.020408163265306,
"grad_norm": 1.135377287864685,
"learning_rate": 6.224489795918368e-06,
"loss": 0.0549,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 740,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.061224489795918,
"grad_norm": 0.9238697290420532,
"learning_rate": 6.173469387755102e-06,
"loss": 0.0627,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 750,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.1020408163265305,
"grad_norm": 0.7442536354064941,
"learning_rate": 6.122448979591837e-06,
"loss": 0.0528,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 760,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.142857142857143,
"grad_norm": 0.6410558819770813,
"learning_rate": 6.071428571428571e-06,
"loss": 0.0707,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 770,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.183673469387755,
"grad_norm": 0.4915910065174103,
"learning_rate": 6.020408163265307e-06,
"loss": 0.0659,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 780,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.2244897959183674,
"grad_norm": 0.33948495984077454,
"learning_rate": 5.969387755102042e-06,
"loss": 0.0535,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 790,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.2653061224489797,
"grad_norm": 0.9314869046211243,
"learning_rate": 5.918367346938776e-06,
"loss": 0.0443,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 800,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.306122448979592,
"grad_norm": 0.9704706072807312,
"learning_rate": 5.867346938775511e-06,
"loss": 0.0562,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 810,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.3469387755102042,
"grad_norm": 0.8564426898956299,
"learning_rate": 5.816326530612246e-06,
"loss": 0.0466,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 820,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.387755102040816,
"grad_norm": 0.31214070320129395,
"learning_rate": 5.7653061224489805e-06,
"loss": 0.0488,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 830,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.40054649114608765,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.0536,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 840,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.4693877551020407,
"grad_norm": 0.476951003074646,
"learning_rate": 5.663265306122449e-06,
"loss": 0.0819,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 850,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.510204081632653,
"grad_norm": 1.075916051864624,
"learning_rate": 5.6122448979591834e-06,
"loss": 0.0451,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 860,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.5510204081632653,
"grad_norm": 0.4422233998775482,
"learning_rate": 5.561224489795919e-06,
"loss": 0.065,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 870,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.5918367346938775,
"grad_norm": 0.7247931361198425,
"learning_rate": 5.510204081632653e-06,
"loss": 0.0442,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 880,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.63265306122449,
"grad_norm": 0.18422362208366394,
"learning_rate": 5.459183673469388e-06,
"loss": 0.0295,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 890,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.673469387755102,
"grad_norm": 0.6566686034202576,
"learning_rate": 5.408163265306123e-06,
"loss": 0.0527,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 900,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.7142857142857144,
"grad_norm": 0.7151392698287964,
"learning_rate": 5.357142857142857e-06,
"loss": 0.0614,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 910,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.7551020408163263,
"grad_norm": 0.1488690972328186,
"learning_rate": 5.306122448979593e-06,
"loss": 0.0546,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 920,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.795918367346939,
"grad_norm": 0.472126841545105,
"learning_rate": 5.255102040816327e-06,
"loss": 0.0514,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 930,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.836734693877551,
"grad_norm": 0.8530511260032654,
"learning_rate": 5.204081632653062e-06,
"loss": 0.049,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 940,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.877551020408163,
"grad_norm": 1.6832056045532227,
"learning_rate": 5.153061224489796e-06,
"loss": 0.0603,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 950,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.9183673469387754,
"grad_norm": 0.30192047357559204,
"learning_rate": 5.1020408163265315e-06,
"loss": 0.0512,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 960,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.9591836734693877,
"grad_norm": 1.3734880685806274,
"learning_rate": 5.0510204081632655e-06,
"loss": 0.0756,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 970,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.0,
"grad_norm": 0.7525829672813416,
"learning_rate": 5e-06,
"loss": 0.0715,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 980,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.040816326530612,
"grad_norm": 0.4748665690422058,
"learning_rate": 4.948979591836735e-06,
"loss": 0.0487,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 990,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.081632653061225,
"grad_norm": 1.340325117111206,
"learning_rate": 4.897959183673469e-06,
"loss": 0.0638,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.122448979591836,
"grad_norm": 0.5442948937416077,
"learning_rate": 4.846938775510204e-06,
"loss": 0.0642,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1010,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.163265306122449,
"grad_norm": 0.3119046688079834,
"learning_rate": 4.795918367346939e-06,
"loss": 0.0411,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1020,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.204081632653061,
"grad_norm": 0.7393902540206909,
"learning_rate": 4.744897959183674e-06,
"loss": 0.0544,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1030,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.244897959183674,
"grad_norm": 0.5412510633468628,
"learning_rate": 4.693877551020409e-06,
"loss": 0.0406,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1040,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.6453996300697327,
"learning_rate": 4.642857142857144e-06,
"loss": 0.0499,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1050,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.326530612244898,
"grad_norm": 0.3400985896587372,
"learning_rate": 4.591836734693878e-06,
"loss": 0.046,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1060,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.36734693877551,
"grad_norm": 0.5143836736679077,
"learning_rate": 4.540816326530613e-06,
"loss": 0.0494,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1070,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.408163265306122,
"grad_norm": 0.38877835869789124,
"learning_rate": 4.489795918367348e-06,
"loss": 0.0526,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1080,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.448979591836735,
"grad_norm": 0.38251811265945435,
"learning_rate": 4.438775510204082e-06,
"loss": 0.051,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1090,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.489795918367347,
"grad_norm": 0.3022618889808655,
"learning_rate": 4.3877551020408165e-06,
"loss": 0.0368,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1100,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.530612244897959,
"grad_norm": 0.12300197780132294,
"learning_rate": 4.336734693877551e-06,
"loss": 0.0474,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1110,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.7275770902633667,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.0409,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1120,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.612244897959184,
"grad_norm": 0.46302053332328796,
"learning_rate": 4.234693877551021e-06,
"loss": 0.0545,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1130,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.653061224489796,
"grad_norm": 1.1193764209747314,
"learning_rate": 4.183673469387755e-06,
"loss": 0.0736,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1140,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.6938775510204085,
"grad_norm": 0.936698317527771,
"learning_rate": 4.13265306122449e-06,
"loss": 0.0532,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1150,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.73469387755102,
"grad_norm": 1.091784119606018,
"learning_rate": 4.081632653061225e-06,
"loss": 0.064,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1160,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.775510204081632,
"grad_norm": 0.3371049165725708,
"learning_rate": 4.03061224489796e-06,
"loss": 0.0557,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1170,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.816326530612245,
"grad_norm": 0.5533121824264526,
"learning_rate": 3.979591836734694e-06,
"loss": 0.0449,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1180,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.857142857142857,
"grad_norm": 1.3483092784881592,
"learning_rate": 3.928571428571429e-06,
"loss": 0.0551,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1190,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.8979591836734695,
"grad_norm": 2.4415154457092285,
"learning_rate": 3.877551020408164e-06,
"loss": 0.0738,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1200,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.938775510204081,
"grad_norm": 0.4990352690219879,
"learning_rate": 3.826530612244898e-06,
"loss": 0.0663,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1210,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.979591836734694,
"grad_norm": 1.045630693435669,
"learning_rate": 3.7755102040816327e-06,
"loss": 0.0422,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1220,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.020408163265306,
"grad_norm": 3.719482660293579,
"learning_rate": 3.724489795918368e-06,
"loss": 0.0531,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1230,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.061224489795919,
"grad_norm": 0.6931941509246826,
"learning_rate": 3.6734693877551024e-06,
"loss": 0.0434,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1240,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.1020408163265305,
"grad_norm": 0.945284903049469,
"learning_rate": 3.6224489795918373e-06,
"loss": 0.0377,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1250,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.142857142857143,
"grad_norm": 0.49527707695961,
"learning_rate": 3.5714285714285718e-06,
"loss": 0.0406,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1260,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.183673469387755,
"grad_norm": 1.0614029169082642,
"learning_rate": 3.5204081632653062e-06,
"loss": 0.0614,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1270,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.224489795918367,
"grad_norm": 1.208749771118164,
"learning_rate": 3.469387755102041e-06,
"loss": 0.0449,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1280,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.26530612244898,
"grad_norm": 3.612487554550171,
"learning_rate": 3.4183673469387756e-06,
"loss": 0.0672,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1290,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.3061224489795915,
"grad_norm": 0.6228938102722168,
"learning_rate": 3.3673469387755105e-06,
"loss": 0.0516,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1300,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.346938775510204,
"grad_norm": 0.586557924747467,
"learning_rate": 3.316326530612245e-06,
"loss": 0.0674,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1310,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.387755102040816,
"grad_norm": 0.963624119758606,
"learning_rate": 3.2653061224489794e-06,
"loss": 0.0621,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1320,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.428571428571429,
"grad_norm": 1.1783013343811035,
"learning_rate": 3.2142857142857147e-06,
"loss": 0.0366,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1330,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.469387755102041,
"grad_norm": 4.429933547973633,
"learning_rate": 3.1632653061224496e-06,
"loss": 0.0511,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1340,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.510204081632653,
"grad_norm": 4.795422077178955,
"learning_rate": 3.112244897959184e-06,
"loss": 0.0601,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1350,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.551020408163265,
"grad_norm": 0.19068406522274017,
"learning_rate": 3.0612244897959185e-06,
"loss": 0.0479,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1360,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.591836734693878,
"grad_norm": 3.7448017597198486,
"learning_rate": 3.0102040816326534e-06,
"loss": 0.0404,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1370,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.63265306122449,
"grad_norm": 0.3368137776851654,
"learning_rate": 2.959183673469388e-06,
"loss": 0.0488,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1380,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.673469387755102,
"grad_norm": 0.14420035481452942,
"learning_rate": 2.908163265306123e-06,
"loss": 0.0582,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1390,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.714285714285714,
"grad_norm": 0.372368723154068,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.0391,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1400,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.755102040816326,
"grad_norm": 3.4565131664276123,
"learning_rate": 2.8061224489795917e-06,
"loss": 0.0616,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1410,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.795918367346939,
"grad_norm": 3.389681339263916,
"learning_rate": 2.7551020408163266e-06,
"loss": 0.0675,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1420,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.836734693877551,
"grad_norm": 0.7100503444671631,
"learning_rate": 2.7040816326530615e-06,
"loss": 0.036,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1430,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.877551020408164,
"grad_norm": 0.40696802735328674,
"learning_rate": 2.6530612244897964e-06,
"loss": 0.0632,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1440,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.918367346938775,
"grad_norm": 0.7590793967247009,
"learning_rate": 2.602040816326531e-06,
"loss": 0.0549,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1450,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.959183673469388,
"grad_norm": 0.48597386479377747,
"learning_rate": 2.5510204081632657e-06,
"loss": 0.0393,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1460,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.0,
"grad_norm": 2.6455276012420654,
"learning_rate": 2.5e-06,
"loss": 0.0566,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1470,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.040816326530612,
"grad_norm": 2.350471258163452,
"learning_rate": 2.4489795918367347e-06,
"loss": 0.0509,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1480,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.081632653061225,
"grad_norm": 3.315977096557617,
"learning_rate": 2.3979591836734696e-06,
"loss": 0.0523,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1490,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.122448979591836,
"grad_norm": 1.6327887773513794,
"learning_rate": 2.3469387755102044e-06,
"loss": 0.0432,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.163265306122449,
"grad_norm": 5.029656410217285,
"learning_rate": 2.295918367346939e-06,
"loss": 0.0468,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1510,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.204081632653061,
"grad_norm": 3.1543941497802734,
"learning_rate": 2.244897959183674e-06,
"loss": 0.0471,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1520,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.244897959183674,
"grad_norm": 1.1178909540176392,
"learning_rate": 2.1938775510204083e-06,
"loss": 0.0538,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1530,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.285714285714286,
"grad_norm": 0.7284368276596069,
"learning_rate": 2.1428571428571427e-06,
"loss": 0.0474,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1540,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.326530612244898,
"grad_norm": 0.15386615693569183,
"learning_rate": 2.0918367346938776e-06,
"loss": 0.0327,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1550,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.36734693877551,
"grad_norm": 12.00415325164795,
"learning_rate": 2.0408163265306125e-06,
"loss": 0.0568,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1560,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.408163265306122,
"grad_norm": 0.14763076603412628,
"learning_rate": 1.989795918367347e-06,
"loss": 0.0389,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1570,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.448979591836735,
"grad_norm": 0.10665205121040344,
"learning_rate": 1.938775510204082e-06,
"loss": 0.0526,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1580,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.489795918367347,
"grad_norm": 0.6945566534996033,
"learning_rate": 1.8877551020408163e-06,
"loss": 0.0276,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1590,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.530612244897959,
"grad_norm": 0.6304193735122681,
"learning_rate": 1.8367346938775512e-06,
"loss": 0.0595,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1600,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.571428571428571,
"grad_norm": 0.738591194152832,
"learning_rate": 1.7857142857142859e-06,
"loss": 0.0591,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1610,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.612244897959184,
"grad_norm": 1.1249669790267944,
"learning_rate": 1.7346938775510206e-06,
"loss": 0.0444,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1620,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.653061224489796,
"grad_norm": 0.3204442858695984,
"learning_rate": 1.6836734693877552e-06,
"loss": 0.041,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1630,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.6938775510204085,
"grad_norm": 0.6603041887283325,
"learning_rate": 1.6326530612244897e-06,
"loss": 0.0485,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1640,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.73469387755102,
"grad_norm": 0.9167451858520508,
"learning_rate": 1.5816326530612248e-06,
"loss": 0.051,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1650,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.775510204081632,
"grad_norm": 1.1892409324645996,
"learning_rate": 1.5306122448979593e-06,
"loss": 0.0577,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1660,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.816326530612245,
"grad_norm": 1.1679530143737793,
"learning_rate": 1.479591836734694e-06,
"loss": 0.0581,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1670,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.857142857142857,
"grad_norm": 4.730435848236084,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.058,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1680,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.8979591836734695,
"grad_norm": 2.7492659091949463,
"learning_rate": 1.3775510204081633e-06,
"loss": 0.0593,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1690,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.938775510204081,
"grad_norm": 0.29811447858810425,
"learning_rate": 1.3265306122448982e-06,
"loss": 0.0425,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1700,
"total_memory_available (GB)": 94.62
},
{
"epoch": 6.979591836734694,
"grad_norm": 0.15881459414958954,
"learning_rate": 1.2755102040816329e-06,
"loss": 0.0625,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1710,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.020408163265306,
"grad_norm": 0.20461368560791016,
"learning_rate": 1.2244897959183673e-06,
"loss": 0.0581,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1720,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.061224489795919,
"grad_norm": 1.1351808309555054,
"learning_rate": 1.1734693877551022e-06,
"loss": 0.0646,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1730,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.1020408163265305,
"grad_norm": 0.1654195487499237,
"learning_rate": 1.122448979591837e-06,
"loss": 0.0472,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1740,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.4366483986377716,
"learning_rate": 1.0714285714285714e-06,
"loss": 0.0461,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1750,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.183673469387755,
"grad_norm": 0.5380903482437134,
"learning_rate": 1.0204081632653063e-06,
"loss": 0.0506,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1760,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.224489795918367,
"grad_norm": 1.661912441253662,
"learning_rate": 9.69387755102041e-07,
"loss": 0.0664,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1770,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.26530612244898,
"grad_norm": 0.4192713499069214,
"learning_rate": 9.183673469387756e-07,
"loss": 0.0394,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1780,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.3061224489795915,
"grad_norm": 0.6668973565101624,
"learning_rate": 8.673469387755103e-07,
"loss": 0.0401,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1790,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.346938775510204,
"grad_norm": 0.5573325753211975,
"learning_rate": 8.163265306122449e-07,
"loss": 0.0526,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1800,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.387755102040816,
"grad_norm": 0.39288291335105896,
"learning_rate": 7.653061224489796e-07,
"loss": 0.0445,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1810,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.428571428571429,
"grad_norm": 0.7398673892021179,
"learning_rate": 7.142857142857143e-07,
"loss": 0.054,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1820,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.469387755102041,
"grad_norm": 2.143411636352539,
"learning_rate": 6.632653061224491e-07,
"loss": 0.0458,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1830,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.510204081632653,
"grad_norm": 0.3958425223827362,
"learning_rate": 6.122448979591837e-07,
"loss": 0.0641,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1840,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.551020408163265,
"grad_norm": 2.797384023666382,
"learning_rate": 5.612244897959184e-07,
"loss": 0.0447,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1850,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.591836734693878,
"grad_norm": 1.5026339292526245,
"learning_rate": 5.102040816326531e-07,
"loss": 0.0274,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1860,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.63265306122449,
"grad_norm": 0.993212103843689,
"learning_rate": 4.591836734693878e-07,
"loss": 0.0393,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1870,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.673469387755102,
"grad_norm": 0.16298241913318634,
"learning_rate": 4.0816326530612243e-07,
"loss": 0.055,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1880,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.714285714285714,
"grad_norm": 4.067746639251709,
"learning_rate": 3.5714285714285716e-07,
"loss": 0.0661,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1890,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.755102040816326,
"grad_norm": 1.387778878211975,
"learning_rate": 3.0612244897959183e-07,
"loss": 0.0586,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1900,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.795918367346939,
"grad_norm": 0.6988309621810913,
"learning_rate": 2.5510204081632656e-07,
"loss": 0.0664,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1910,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.836734693877551,
"grad_norm": 0.7139838933944702,
"learning_rate": 2.0408163265306121e-07,
"loss": 0.053,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1920,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.877551020408164,
"grad_norm": 0.5550429224967957,
"learning_rate": 1.5306122448979592e-07,
"loss": 0.0458,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1930,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.918367346938775,
"grad_norm": 1.2351597547531128,
"learning_rate": 1.0204081632653061e-07,
"loss": 0.0471,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1940,
"total_memory_available (GB)": 94.62
},
{
"epoch": 7.959183673469388,
"grad_norm": 0.6292315125465393,
"learning_rate": 5.1020408163265303e-08,
"loss": 0.0532,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1950,
"total_memory_available (GB)": 94.62
},
{
"epoch": 8.0,
"grad_norm": 1.6500349044799805,
"learning_rate": 0.0,
"loss": 0.0453,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1960,
"total_memory_available (GB)": 94.62
},
{
"epoch": 8.0,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1960,
"total_flos": 4.89583144415232e+16,
"total_memory_available (GB)": 94.62,
"train_loss": 0.057532464606421335,
"train_runtime": 1666.2328,
"train_samples_per_second": 52.538,
"train_steps_per_second": 1.315
}
],
"logging_steps": 10,
"max_steps": 1960,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.89583144415232e+16,
"train_batch_size": 40,
"trial_name": null,
"trial_params": null
}