|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 1225, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 9.589848518371582, |
|
"learning_rate": 9.918367346938776e-06, |
|
"loss": 0.2612, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 6.701302528381348, |
|
"learning_rate": 9.836734693877552e-06, |
|
"loss": 0.154, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 5.337311267852783, |
|
"learning_rate": 9.755102040816327e-06, |
|
"loss": 0.1235, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 4.5042338371276855, |
|
"learning_rate": 9.673469387755103e-06, |
|
"loss": 0.1096, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 4.461822032928467, |
|
"learning_rate": 9.591836734693878e-06, |
|
"loss": 0.1196, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 2.2825701236724854, |
|
"learning_rate": 9.510204081632653e-06, |
|
"loss": 0.0805, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 3.725268602371216, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.1026, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 1.707739233970642, |
|
"learning_rate": 9.346938775510204e-06, |
|
"loss": 0.1111, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3673469387755102, |
|
"grad_norm": 4.5863938331604, |
|
"learning_rate": 9.26530612244898e-06, |
|
"loss": 0.0856, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 11.972647666931152, |
|
"learning_rate": 9.183673469387756e-06, |
|
"loss": 0.0759, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4489795918367347, |
|
"grad_norm": 4.550654888153076, |
|
"learning_rate": 9.102040816326532e-06, |
|
"loss": 0.0717, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 4.418276786804199, |
|
"learning_rate": 9.020408163265307e-06, |
|
"loss": 0.0717, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5306122448979592, |
|
"grad_norm": 1.651443600654602, |
|
"learning_rate": 8.938775510204082e-06, |
|
"loss": 0.0581, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.5251814126968384, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.0481, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 1.7455183267593384, |
|
"learning_rate": 8.775510204081633e-06, |
|
"loss": 0.0625, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 1.7588891983032227, |
|
"learning_rate": 8.69387755102041e-06, |
|
"loss": 0.0711, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6938775510204082, |
|
"grad_norm": 2.7675328254699707, |
|
"learning_rate": 8.612244897959184e-06, |
|
"loss": 0.0747, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 1.781469464302063, |
|
"learning_rate": 8.530612244897961e-06, |
|
"loss": 0.061, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7755102040816326, |
|
"grad_norm": 2.3728435039520264, |
|
"learning_rate": 8.448979591836736e-06, |
|
"loss": 0.0588, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.8711996674537659, |
|
"learning_rate": 8.36734693877551e-06, |
|
"loss": 0.062, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.1986733675003052, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.0627, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 2.8968520164489746, |
|
"learning_rate": 8.204081632653062e-06, |
|
"loss": 0.0604, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9387755102040817, |
|
"grad_norm": 0.8414793610572815, |
|
"learning_rate": 8.122448979591837e-06, |
|
"loss": 0.0559, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.7434167861938477, |
|
"learning_rate": 8.040816326530613e-06, |
|
"loss": 0.0498, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 0.8703041076660156, |
|
"learning_rate": 7.959183673469388e-06, |
|
"loss": 0.0618, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0612244897959184, |
|
"grad_norm": 1.0856379270553589, |
|
"learning_rate": 7.877551020408164e-06, |
|
"loss": 0.056, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1020408163265305, |
|
"grad_norm": 0.8847401142120361, |
|
"learning_rate": 7.79591836734694e-06, |
|
"loss": 0.0625, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 1.5929882526397705, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.0571, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.183673469387755, |
|
"grad_norm": 0.8007532954216003, |
|
"learning_rate": 7.63265306122449e-06, |
|
"loss": 0.0511, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 1.2002859115600586, |
|
"learning_rate": 7.551020408163265e-06, |
|
"loss": 0.065, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2653061224489797, |
|
"grad_norm": 12.871713638305664, |
|
"learning_rate": 7.469387755102041e-06, |
|
"loss": 0.0664, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.306122448979592, |
|
"grad_norm": 2.46173357963562, |
|
"learning_rate": 7.387755102040817e-06, |
|
"loss": 0.0495, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.346938775510204, |
|
"grad_norm": 0.860598087310791, |
|
"learning_rate": 7.306122448979592e-06, |
|
"loss": 0.0603, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.3877551020408163, |
|
"grad_norm": 2.5583598613739014, |
|
"learning_rate": 7.224489795918368e-06, |
|
"loss": 0.0547, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.37155964970588684, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.048, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.469387755102041, |
|
"grad_norm": 1.808316707611084, |
|
"learning_rate": 7.061224489795919e-06, |
|
"loss": 0.0462, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.510204081632653, |
|
"grad_norm": 1.0183931589126587, |
|
"learning_rate": 6.979591836734695e-06, |
|
"loss": 0.0594, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5510204081632653, |
|
"grad_norm": 0.5249583721160889, |
|
"learning_rate": 6.8979591836734705e-06, |
|
"loss": 0.0479, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5918367346938775, |
|
"grad_norm": 1.1005572080612183, |
|
"learning_rate": 6.816326530612245e-06, |
|
"loss": 0.0649, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 0.6047573089599609, |
|
"learning_rate": 6.734693877551021e-06, |
|
"loss": 0.0607, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6734693877551021, |
|
"grad_norm": 0.7261654734611511, |
|
"learning_rate": 6.653061224489797e-06, |
|
"loss": 0.0606, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.848527193069458, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.0532, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7551020408163265, |
|
"grad_norm": 0.23483288288116455, |
|
"learning_rate": 6.489795918367348e-06, |
|
"loss": 0.068, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7959183673469388, |
|
"grad_norm": 2.0767459869384766, |
|
"learning_rate": 6.408163265306124e-06, |
|
"loss": 0.0617, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.836734693877551, |
|
"grad_norm": 0.5654011368751526, |
|
"learning_rate": 6.326530612244899e-06, |
|
"loss": 0.044, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.8775510204081631, |
|
"grad_norm": 0.7382919788360596, |
|
"learning_rate": 6.244897959183675e-06, |
|
"loss": 0.0537, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9183673469387754, |
|
"grad_norm": 1.3547204732894897, |
|
"learning_rate": 6.163265306122449e-06, |
|
"loss": 0.0432, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9591836734693877, |
|
"grad_norm": 0.19681082665920258, |
|
"learning_rate": 6.0816326530612245e-06, |
|
"loss": 0.0498, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.109737515449524, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0639, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 0.5894625782966614, |
|
"learning_rate": 5.918367346938776e-06, |
|
"loss": 0.0593, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0816326530612246, |
|
"grad_norm": 0.7122555375099182, |
|
"learning_rate": 5.8367346938775515e-06, |
|
"loss": 0.0498, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.122448979591837, |
|
"grad_norm": 0.8958902955055237, |
|
"learning_rate": 5.755102040816327e-06, |
|
"loss": 0.0457, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.163265306122449, |
|
"grad_norm": 11.620415687561035, |
|
"learning_rate": 5.673469387755103e-06, |
|
"loss": 0.0626, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.204081632653061, |
|
"grad_norm": 0.3538230061531067, |
|
"learning_rate": 5.591836734693878e-06, |
|
"loss": 0.0584, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2448979591836733, |
|
"grad_norm": 1.5313146114349365, |
|
"learning_rate": 5.510204081632653e-06, |
|
"loss": 0.0627, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 1.3519809246063232, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.0572, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.326530612244898, |
|
"grad_norm": 1.0263270139694214, |
|
"learning_rate": 5.3469387755102045e-06, |
|
"loss": 0.0585, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.36734693877551, |
|
"grad_norm": 0.8926671147346497, |
|
"learning_rate": 5.26530612244898e-06, |
|
"loss": 0.0673, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4081632653061225, |
|
"grad_norm": 0.3185974955558777, |
|
"learning_rate": 5.183673469387756e-06, |
|
"loss": 0.0537, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4489795918367347, |
|
"grad_norm": 0.944624662399292, |
|
"learning_rate": 5.1020408163265315e-06, |
|
"loss": 0.0442, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.489795918367347, |
|
"grad_norm": 0.32796111702919006, |
|
"learning_rate": 5.020408163265307e-06, |
|
"loss": 0.0413, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.5306122448979593, |
|
"grad_norm": 0.7929801940917969, |
|
"learning_rate": 4.938775510204082e-06, |
|
"loss": 0.0428, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.910254955291748, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.0813, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.612244897959184, |
|
"grad_norm": 1.101942539215088, |
|
"learning_rate": 4.775510204081633e-06, |
|
"loss": 0.0495, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.6530612244897958, |
|
"grad_norm": 0.7182526588439941, |
|
"learning_rate": 4.693877551020409e-06, |
|
"loss": 0.0471, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.693877551020408, |
|
"grad_norm": 0.8068158626556396, |
|
"learning_rate": 4.612244897959184e-06, |
|
"loss": 0.0469, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7346938775510203, |
|
"grad_norm": 1.2375913858413696, |
|
"learning_rate": 4.530612244897959e-06, |
|
"loss": 0.0857, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7755102040816326, |
|
"grad_norm": 1.1524357795715332, |
|
"learning_rate": 4.448979591836735e-06, |
|
"loss": 0.0488, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.816326530612245, |
|
"grad_norm": 0.3913586437702179, |
|
"learning_rate": 4.367346938775511e-06, |
|
"loss": 0.0451, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.47935113310813904, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.0433, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.8979591836734695, |
|
"grad_norm": 0.8084143996238708, |
|
"learning_rate": 4.204081632653061e-06, |
|
"loss": 0.0548, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.938775510204082, |
|
"grad_norm": 1.7315497398376465, |
|
"learning_rate": 4.122448979591837e-06, |
|
"loss": 0.0587, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.979591836734694, |
|
"grad_norm": 0.20743349194526672, |
|
"learning_rate": 4.040816326530612e-06, |
|
"loss": 0.0342, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.020408163265306, |
|
"grad_norm": 0.8024761080741882, |
|
"learning_rate": 3.959183673469388e-06, |
|
"loss": 0.053, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.061224489795918, |
|
"grad_norm": 0.45326006412506104, |
|
"learning_rate": 3.877551020408164e-06, |
|
"loss": 0.0619, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.1020408163265305, |
|
"grad_norm": 0.6953087449073792, |
|
"learning_rate": 3.795918367346939e-06, |
|
"loss": 0.0527, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 1.2290390729904175, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.0689, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.183673469387755, |
|
"grad_norm": 0.6281890869140625, |
|
"learning_rate": 3.6326530612244903e-06, |
|
"loss": 0.0647, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2244897959183674, |
|
"grad_norm": 0.3096281588077545, |
|
"learning_rate": 3.5510204081632655e-06, |
|
"loss": 0.0522, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2653061224489797, |
|
"grad_norm": 0.9390127062797546, |
|
"learning_rate": 3.469387755102041e-06, |
|
"loss": 0.0432, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.306122448979592, |
|
"grad_norm": 0.87565016746521, |
|
"learning_rate": 3.3877551020408168e-06, |
|
"loss": 0.0555, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.3469387755102042, |
|
"grad_norm": 1.0797837972640991, |
|
"learning_rate": 3.3061224489795924e-06, |
|
"loss": 0.0455, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.387755102040816, |
|
"grad_norm": 0.3658354878425598, |
|
"learning_rate": 3.2244897959183672e-06, |
|
"loss": 0.0487, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 0.4766336977481842, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.053, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4693877551020407, |
|
"grad_norm": 0.49318933486938477, |
|
"learning_rate": 3.0612244897959185e-06, |
|
"loss": 0.0812, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.510204081632653, |
|
"grad_norm": 1.3475311994552612, |
|
"learning_rate": 2.979591836734694e-06, |
|
"loss": 0.0451, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5510204081632653, |
|
"grad_norm": 0.36763882637023926, |
|
"learning_rate": 2.8979591836734694e-06, |
|
"loss": 0.0646, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5918367346938775, |
|
"grad_norm": 3.085198402404785, |
|
"learning_rate": 2.816326530612245e-06, |
|
"loss": 0.0439, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.63265306122449, |
|
"grad_norm": 0.17229312658309937, |
|
"learning_rate": 2.7346938775510207e-06, |
|
"loss": 0.0288, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.673469387755102, |
|
"grad_norm": 1.0760900974273682, |
|
"learning_rate": 2.6530612244897964e-06, |
|
"loss": 0.0514, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 0.45855164527893066, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.0602, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7551020408163263, |
|
"grad_norm": 0.15575875341892242, |
|
"learning_rate": 2.489795918367347e-06, |
|
"loss": 0.0543, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.795918367346939, |
|
"grad_norm": 0.779755175113678, |
|
"learning_rate": 2.4081632653061225e-06, |
|
"loss": 0.0497, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.836734693877551, |
|
"grad_norm": 0.7307060956954956, |
|
"learning_rate": 2.326530612244898e-06, |
|
"loss": 0.0486, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.877551020408163, |
|
"grad_norm": 1.062565803527832, |
|
"learning_rate": 2.244897959183674e-06, |
|
"loss": 0.0594, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9183673469387754, |
|
"grad_norm": 0.3031039535999298, |
|
"learning_rate": 2.1632653061224495e-06, |
|
"loss": 0.0497, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9591836734693877, |
|
"grad_norm": 2.310593843460083, |
|
"learning_rate": 2.0816326530612247e-06, |
|
"loss": 0.0746, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.6998704075813293, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0703, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.040816326530612, |
|
"grad_norm": 0.7492395639419556, |
|
"learning_rate": 1.9183673469387756e-06, |
|
"loss": 0.0486, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 0.7633445858955383, |
|
"learning_rate": 1.8367346938775512e-06, |
|
"loss": 0.0625, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.122448979591836, |
|
"grad_norm": 0.6911561489105225, |
|
"learning_rate": 1.7551020408163267e-06, |
|
"loss": 0.0632, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1010, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.163265306122449, |
|
"grad_norm": 0.33521902561187744, |
|
"learning_rate": 1.6734693877551023e-06, |
|
"loss": 0.0406, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1020, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.204081632653061, |
|
"grad_norm": 0.7509037852287292, |
|
"learning_rate": 1.5918367346938775e-06, |
|
"loss": 0.0531, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1030, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.244897959183674, |
|
"grad_norm": 0.5234070420265198, |
|
"learning_rate": 1.5102040816326532e-06, |
|
"loss": 0.0396, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1040, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.7997304797172546, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.05, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1050, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.326530612244898, |
|
"grad_norm": 0.2255077213048935, |
|
"learning_rate": 1.3469387755102043e-06, |
|
"loss": 0.0457, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1060, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.36734693877551, |
|
"grad_norm": 0.5182124376296997, |
|
"learning_rate": 1.2653061224489795e-06, |
|
"loss": 0.0485, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1070, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.408163265306122, |
|
"grad_norm": 0.35046374797821045, |
|
"learning_rate": 1.1836734693877552e-06, |
|
"loss": 0.0519, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1080, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.448979591836735, |
|
"grad_norm": 0.3923434615135193, |
|
"learning_rate": 1.1020408163265308e-06, |
|
"loss": 0.0507, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1090, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.489795918367347, |
|
"grad_norm": 0.23866137862205505, |
|
"learning_rate": 1.0204081632653063e-06, |
|
"loss": 0.0362, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.530612244897959, |
|
"grad_norm": 0.15117916464805603, |
|
"learning_rate": 9.387755102040817e-07, |
|
"loss": 0.0464, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.5993088483810425, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.0404, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.612244897959184, |
|
"grad_norm": 0.30265432596206665, |
|
"learning_rate": 7.755102040816327e-07, |
|
"loss": 0.0545, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.653061224489796, |
|
"grad_norm": 0.6385183334350586, |
|
"learning_rate": 6.938775510204082e-07, |
|
"loss": 0.0731, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.6938775510204085, |
|
"grad_norm": 1.128566026687622, |
|
"learning_rate": 6.122448979591837e-07, |
|
"loss": 0.0516, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.73469387755102, |
|
"grad_norm": 1.1660116910934448, |
|
"learning_rate": 5.306122448979592e-07, |
|
"loss": 0.0611, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.775510204081632, |
|
"grad_norm": 0.5327439904212952, |
|
"learning_rate": 4.489795918367347e-07, |
|
"loss": 0.0549, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.816326530612245, |
|
"grad_norm": 0.8764423131942749, |
|
"learning_rate": 3.6734693877551025e-07, |
|
"loss": 0.0441, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 0.47835007309913635, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.0541, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.8979591836734695, |
|
"grad_norm": 1.048047661781311, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"loss": 0.0731, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.938775510204081, |
|
"grad_norm": 0.3101171851158142, |
|
"learning_rate": 1.2244897959183673e-07, |
|
"loss": 0.0648, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.979591836734694, |
|
"grad_norm": 0.76802659034729, |
|
"learning_rate": 4.0816326530612253e-08, |
|
"loss": 0.0418, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"max_memory_allocated (GB)": 60.52, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1225, |
|
"total_flos": 3.0598946525952e+16, |
|
"total_memory_available (GB)": 94.62, |
|
"train_loss": 0.06085505417415074, |
|
"train_runtime": 1020.8061, |
|
"train_samples_per_second": 55.51, |
|
"train_steps_per_second": 1.389 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1225, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0598946525952e+16, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|