|
{ |
|
"best_metric": 0.030865401029586792, |
|
"best_model_checkpoint": "./vit-brain-tumour/checkpoint-900", |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 2.4241483211517334, |
|
"learning_rate": 0.00019787234042553193, |
|
"loss": 0.9058, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 1.0025970935821533, |
|
"learning_rate": 0.00019574468085106384, |
|
"loss": 0.4081, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 1.7178714275360107, |
|
"learning_rate": 0.00019361702127659576, |
|
"loss": 0.221, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 1.0210686922073364, |
|
"learning_rate": 0.00019148936170212768, |
|
"loss": 0.4859, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 1.8148099184036255, |
|
"learning_rate": 0.00018936170212765957, |
|
"loss": 0.1614, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 2.7210752964019775, |
|
"learning_rate": 0.0001872340425531915, |
|
"loss": 0.1979, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2978723404255319, |
|
"grad_norm": 1.6892952919006348, |
|
"learning_rate": 0.0001851063829787234, |
|
"loss": 0.1603, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 0.6372403502464294, |
|
"learning_rate": 0.00018297872340425532, |
|
"loss": 0.2073, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"grad_norm": 0.22926707565784454, |
|
"learning_rate": 0.00018085106382978726, |
|
"loss": 0.1834, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 3.5747313499450684, |
|
"learning_rate": 0.00017872340425531915, |
|
"loss": 0.227, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"eval_accuracy": 0.8909599254426841, |
|
"eval_loss": 0.3066631555557251, |
|
"eval_runtime": 4.984, |
|
"eval_samples_per_second": 215.29, |
|
"eval_steps_per_second": 27.087, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.46808510638297873, |
|
"grad_norm": 2.685378074645996, |
|
"learning_rate": 0.00017659574468085107, |
|
"loss": 0.1576, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 2.99109148979187, |
|
"learning_rate": 0.00017446808510638298, |
|
"loss": 0.1967, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5531914893617021, |
|
"grad_norm": 0.5496222376823425, |
|
"learning_rate": 0.0001723404255319149, |
|
"loss": 0.2398, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 2.8761274814605713, |
|
"learning_rate": 0.00017021276595744682, |
|
"loss": 0.1012, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 1.6604396104812622, |
|
"learning_rate": 0.00016808510638297873, |
|
"loss": 0.0942, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 0.680061399936676, |
|
"learning_rate": 0.00016595744680851065, |
|
"loss": 0.1741, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.723404255319149, |
|
"grad_norm": 0.24274976551532745, |
|
"learning_rate": 0.00016382978723404257, |
|
"loss": 0.1793, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 2.806928873062134, |
|
"learning_rate": 0.00016170212765957446, |
|
"loss": 0.1423, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8085106382978723, |
|
"grad_norm": 1.0465861558914185, |
|
"learning_rate": 0.00015957446808510637, |
|
"loss": 0.1028, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.5258079171180725, |
|
"learning_rate": 0.00015744680851063832, |
|
"loss": 0.0659, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"eval_accuracy": 0.9627213420316869, |
|
"eval_loss": 0.11090704798698425, |
|
"eval_runtime": 5.4644, |
|
"eval_samples_per_second": 196.362, |
|
"eval_steps_per_second": 24.705, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"grad_norm": 6.45848274230957, |
|
"learning_rate": 0.0001553191489361702, |
|
"loss": 0.0879, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 1.5740026235580444, |
|
"learning_rate": 0.00015319148936170213, |
|
"loss": 0.081, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9787234042553191, |
|
"grad_norm": 1.8585551977157593, |
|
"learning_rate": 0.00015106382978723407, |
|
"loss": 0.095, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"grad_norm": 0.10410909354686737, |
|
"learning_rate": 0.00014893617021276596, |
|
"loss": 0.1254, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 0.05694637447595596, |
|
"learning_rate": 0.00014680851063829788, |
|
"loss": 0.025, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1063829787234043, |
|
"grad_norm": 0.053941983729600906, |
|
"learning_rate": 0.0001446808510638298, |
|
"loss": 0.0336, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.148936170212766, |
|
"grad_norm": 5.261789798736572, |
|
"learning_rate": 0.0001425531914893617, |
|
"loss": 0.0965, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1914893617021276, |
|
"grad_norm": 1.4031267166137695, |
|
"learning_rate": 0.00014042553191489363, |
|
"loss": 0.0207, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2340425531914894, |
|
"grad_norm": 0.06696026772260666, |
|
"learning_rate": 0.00013829787234042554, |
|
"loss": 0.0992, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 0.055372320115566254, |
|
"learning_rate": 0.00013617021276595746, |
|
"loss": 0.0404, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"eval_accuracy": 0.9776328052190121, |
|
"eval_loss": 0.09002945572137833, |
|
"eval_runtime": 5.5899, |
|
"eval_samples_per_second": 191.953, |
|
"eval_steps_per_second": 24.151, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3191489361702127, |
|
"grad_norm": 0.5205884575843811, |
|
"learning_rate": 0.00013404255319148938, |
|
"loss": 0.1412, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.3617021276595744, |
|
"grad_norm": 0.23773987591266632, |
|
"learning_rate": 0.00013191489361702127, |
|
"loss": 0.0442, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.4042553191489362, |
|
"grad_norm": 4.623290061950684, |
|
"learning_rate": 0.00012978723404255318, |
|
"loss": 0.0839, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4468085106382977, |
|
"grad_norm": 0.059595488011837006, |
|
"learning_rate": 0.00012765957446808513, |
|
"loss": 0.0131, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4893617021276595, |
|
"grad_norm": 5.406118392944336, |
|
"learning_rate": 0.00012553191489361702, |
|
"loss": 0.0347, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5319148936170213, |
|
"grad_norm": 0.03830511495471001, |
|
"learning_rate": 0.00012340425531914893, |
|
"loss": 0.0145, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.574468085106383, |
|
"grad_norm": 0.043490536510944366, |
|
"learning_rate": 0.00012127659574468086, |
|
"loss": 0.0299, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.6170212765957448, |
|
"grad_norm": 0.046317484229803085, |
|
"learning_rate": 0.00011914893617021277, |
|
"loss": 0.0225, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.6595744680851063, |
|
"grad_norm": 0.2598080337047577, |
|
"learning_rate": 0.00011702127659574468, |
|
"loss": 0.0475, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 0.43866652250289917, |
|
"learning_rate": 0.00011489361702127661, |
|
"loss": 0.05, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"eval_accuracy": 0.9748369058713886, |
|
"eval_loss": 0.10819696635007858, |
|
"eval_runtime": 5.1592, |
|
"eval_samples_per_second": 207.977, |
|
"eval_steps_per_second": 26.167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7446808510638299, |
|
"grad_norm": 4.7892985343933105, |
|
"learning_rate": 0.00011276595744680852, |
|
"loss": 0.059, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.7872340425531914, |
|
"grad_norm": 0.0325588583946228, |
|
"learning_rate": 0.00011063829787234043, |
|
"loss": 0.0592, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.8297872340425532, |
|
"grad_norm": 14.177051544189453, |
|
"learning_rate": 0.00010851063829787234, |
|
"loss": 0.1221, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.872340425531915, |
|
"grad_norm": 0.03597332164645195, |
|
"learning_rate": 0.00010638297872340425, |
|
"loss": 0.071, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.9148936170212765, |
|
"grad_norm": 0.058201372623443604, |
|
"learning_rate": 0.00010425531914893618, |
|
"loss": 0.0227, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.9574468085106385, |
|
"grad_norm": 1.1982567310333252, |
|
"learning_rate": 0.00010212765957446809, |
|
"loss": 0.0354, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.06208239495754242, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0258, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"grad_norm": 0.04629204794764519, |
|
"learning_rate": 9.787234042553192e-05, |
|
"loss": 0.0322, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0851063829787235, |
|
"grad_norm": 0.029703835025429726, |
|
"learning_rate": 9.574468085106384e-05, |
|
"loss": 0.0064, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 0.036140162497758865, |
|
"learning_rate": 9.361702127659576e-05, |
|
"loss": 0.006, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_accuracy": 0.9888164026095061, |
|
"eval_loss": 0.037363313138484955, |
|
"eval_runtime": 5.6218, |
|
"eval_samples_per_second": 190.863, |
|
"eval_steps_per_second": 24.013, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1702127659574466, |
|
"grad_norm": 0.023606618866324425, |
|
"learning_rate": 9.148936170212766e-05, |
|
"loss": 0.02, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.2127659574468086, |
|
"grad_norm": 0.04733359441161156, |
|
"learning_rate": 8.936170212765958e-05, |
|
"loss": 0.0371, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.25531914893617, |
|
"grad_norm": 0.03030354529619217, |
|
"learning_rate": 8.723404255319149e-05, |
|
"loss": 0.0056, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.297872340425532, |
|
"grad_norm": 0.21325312554836273, |
|
"learning_rate": 8.510638297872341e-05, |
|
"loss": 0.0146, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.3404255319148937, |
|
"grad_norm": 0.018344180658459663, |
|
"learning_rate": 8.297872340425533e-05, |
|
"loss": 0.0103, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.382978723404255, |
|
"grad_norm": 0.022901207208633423, |
|
"learning_rate": 8.085106382978723e-05, |
|
"loss": 0.0052, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.425531914893617, |
|
"grad_norm": 0.023875948041677475, |
|
"learning_rate": 7.872340425531916e-05, |
|
"loss": 0.0048, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.4680851063829787, |
|
"grad_norm": 0.01968464069068432, |
|
"learning_rate": 7.659574468085106e-05, |
|
"loss": 0.0047, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.5106382978723403, |
|
"grad_norm": 0.11908008903265, |
|
"learning_rate": 7.446808510638298e-05, |
|
"loss": 0.0047, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 0.020715612918138504, |
|
"learning_rate": 7.23404255319149e-05, |
|
"loss": 0.0147, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"eval_accuracy": 0.9888164026095061, |
|
"eval_loss": 0.0540616437792778, |
|
"eval_runtime": 5.1006, |
|
"eval_samples_per_second": 210.367, |
|
"eval_steps_per_second": 26.467, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.595744680851064, |
|
"grad_norm": 0.019655458629131317, |
|
"learning_rate": 7.021276595744681e-05, |
|
"loss": 0.0041, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.6382978723404253, |
|
"grad_norm": 0.04054981470108032, |
|
"learning_rate": 6.808510638297873e-05, |
|
"loss": 0.0625, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.6808510638297873, |
|
"grad_norm": 0.18255342543125153, |
|
"learning_rate": 6.595744680851063e-05, |
|
"loss": 0.0171, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.723404255319149, |
|
"grad_norm": 0.02170875295996666, |
|
"learning_rate": 6.382978723404256e-05, |
|
"loss": 0.0044, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.7659574468085104, |
|
"grad_norm": 0.01993757300078869, |
|
"learning_rate": 6.170212765957447e-05, |
|
"loss": 0.0042, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.8085106382978724, |
|
"grad_norm": 0.017473889514803886, |
|
"learning_rate": 5.9574468085106384e-05, |
|
"loss": 0.0302, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.851063829787234, |
|
"grad_norm": 0.018037663772702217, |
|
"learning_rate": 5.744680851063831e-05, |
|
"loss": 0.0044, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.8936170212765955, |
|
"grad_norm": 0.016940144822001457, |
|
"learning_rate": 5.531914893617022e-05, |
|
"loss": 0.0314, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.9361702127659575, |
|
"grad_norm": 0.018456028774380684, |
|
"learning_rate": 5.319148936170213e-05, |
|
"loss": 0.0039, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"grad_norm": 0.02064073272049427, |
|
"learning_rate": 5.1063829787234044e-05, |
|
"loss": 0.0105, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"eval_accuracy": 0.9906803355079217, |
|
"eval_loss": 0.03594699874520302, |
|
"eval_runtime": 5.0939, |
|
"eval_samples_per_second": 210.644, |
|
"eval_steps_per_second": 26.502, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.021276595744681, |
|
"grad_norm": 0.018245166167616844, |
|
"learning_rate": 4.893617021276596e-05, |
|
"loss": 0.0041, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.0638297872340425, |
|
"grad_norm": 0.01676538586616516, |
|
"learning_rate": 4.680851063829788e-05, |
|
"loss": 0.0047, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.106382978723404, |
|
"grad_norm": 0.018977968022227287, |
|
"learning_rate": 4.468085106382979e-05, |
|
"loss": 0.0035, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.148936170212766, |
|
"grad_norm": 0.01823911815881729, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.0048, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 0.017579803243279457, |
|
"learning_rate": 4.0425531914893614e-05, |
|
"loss": 0.0035, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2340425531914896, |
|
"grad_norm": 0.013467966578900814, |
|
"learning_rate": 3.829787234042553e-05, |
|
"loss": 0.004, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.276595744680851, |
|
"grad_norm": 0.03895196318626404, |
|
"learning_rate": 3.617021276595745e-05, |
|
"loss": 0.0036, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.3191489361702127, |
|
"grad_norm": 0.019369514659047127, |
|
"learning_rate": 3.4042553191489365e-05, |
|
"loss": 0.0034, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.3617021276595747, |
|
"grad_norm": 0.014404061250388622, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 0.0042, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.404255319148936, |
|
"grad_norm": 0.016735393553972244, |
|
"learning_rate": 2.9787234042553192e-05, |
|
"loss": 0.0032, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.404255319148936, |
|
"eval_accuracy": 0.9906803355079217, |
|
"eval_loss": 0.039232004433870316, |
|
"eval_runtime": 5.1085, |
|
"eval_samples_per_second": 210.041, |
|
"eval_steps_per_second": 26.426, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.4468085106382977, |
|
"grad_norm": 0.01882547326385975, |
|
"learning_rate": 2.765957446808511e-05, |
|
"loss": 0.0033, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.4893617021276597, |
|
"grad_norm": 0.018505167216062546, |
|
"learning_rate": 2.5531914893617022e-05, |
|
"loss": 0.0039, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.5319148936170213, |
|
"grad_norm": 0.013403504155576229, |
|
"learning_rate": 2.340425531914894e-05, |
|
"loss": 0.008, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.574468085106383, |
|
"grad_norm": 0.013858008198440075, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.0033, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.617021276595745, |
|
"grad_norm": 0.01901717483997345, |
|
"learning_rate": 1.9148936170212766e-05, |
|
"loss": 0.0031, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.6595744680851063, |
|
"grad_norm": 0.015291731804609299, |
|
"learning_rate": 1.7021276595744682e-05, |
|
"loss": 0.0031, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.702127659574468, |
|
"grad_norm": 0.015405308455228806, |
|
"learning_rate": 1.4893617021276596e-05, |
|
"loss": 0.0037, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.74468085106383, |
|
"grad_norm": 0.01782970316708088, |
|
"learning_rate": 1.2765957446808511e-05, |
|
"loss": 0.0032, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.7872340425531914, |
|
"grad_norm": 0.013772828504443169, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 0.0032, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.829787234042553, |
|
"grad_norm": 3.2476401329040527, |
|
"learning_rate": 8.510638297872341e-06, |
|
"loss": 0.0055, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.829787234042553, |
|
"eval_accuracy": 0.9925442684063374, |
|
"eval_loss": 0.030865401029586792, |
|
"eval_runtime": 5.6777, |
|
"eval_samples_per_second": 188.986, |
|
"eval_steps_per_second": 23.777, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.872340425531915, |
|
"grad_norm": 0.01216566190123558, |
|
"learning_rate": 6.3829787234042555e-06, |
|
"loss": 0.0029, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.9148936170212765, |
|
"grad_norm": 0.02088315039873123, |
|
"learning_rate": 4.255319148936171e-06, |
|
"loss": 0.0032, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.9574468085106385, |
|
"grad_norm": 0.014219781383872032, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"loss": 0.0031, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.048915717750787735, |
|
"learning_rate": 0.0, |
|
"loss": 0.0031, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 940, |
|
"total_flos": 1.1651904419373711e+18, |
|
"train_loss": 0.07046630619926021, |
|
"train_runtime": 463.0785, |
|
"train_samples_per_second": 32.47, |
|
"train_steps_per_second": 2.03 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 940, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1651904419373711e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|