|
{ |
|
"best_metric": 10.287935256958008, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.8565310492505354, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004282655246252677, |
|
"grad_norm": 1.0140916109085083, |
|
"learning_rate": 1e-05, |
|
"loss": 41.5163, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004282655246252677, |
|
"eval_loss": 10.378774642944336, |
|
"eval_runtime": 0.833, |
|
"eval_samples_per_second": 472.969, |
|
"eval_steps_per_second": 118.842, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008565310492505354, |
|
"grad_norm": 0.9732224345207214, |
|
"learning_rate": 2e-05, |
|
"loss": 41.5317, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01284796573875803, |
|
"grad_norm": 0.9495458006858826, |
|
"learning_rate": 3e-05, |
|
"loss": 41.529, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.017130620985010708, |
|
"grad_norm": 1.0050394535064697, |
|
"learning_rate": 4e-05, |
|
"loss": 41.527, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.021413276231263382, |
|
"grad_norm": 0.8522498607635498, |
|
"learning_rate": 5e-05, |
|
"loss": 41.5381, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02569593147751606, |
|
"grad_norm": 0.7451391220092773, |
|
"learning_rate": 6e-05, |
|
"loss": 41.5266, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.029978586723768737, |
|
"grad_norm": 0.9513540267944336, |
|
"learning_rate": 7e-05, |
|
"loss": 41.5287, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.034261241970021415, |
|
"grad_norm": 0.809018611907959, |
|
"learning_rate": 8e-05, |
|
"loss": 41.5078, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03854389721627409, |
|
"grad_norm": 0.9694377183914185, |
|
"learning_rate": 9e-05, |
|
"loss": 41.5079, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.042826552462526764, |
|
"grad_norm": 0.9219012260437012, |
|
"learning_rate": 0.0001, |
|
"loss": 41.5011, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.047109207708779445, |
|
"grad_norm": 0.9172611832618713, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 41.4916, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05139186295503212, |
|
"grad_norm": 0.9734646677970886, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 41.4979, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.055674518201284794, |
|
"grad_norm": 1.1014163494110107, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 41.4956, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.059957173447537475, |
|
"grad_norm": 1.0327798128128052, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 41.4725, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06423982869379015, |
|
"grad_norm": 1.2098652124404907, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 41.4752, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06852248394004283, |
|
"grad_norm": 1.0268454551696777, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 41.4617, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0728051391862955, |
|
"grad_norm": 1.2437031269073486, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 41.4602, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07708779443254818, |
|
"grad_norm": 1.2205047607421875, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 41.4568, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08137044967880086, |
|
"grad_norm": 1.4317915439605713, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 41.4491, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08565310492505353, |
|
"grad_norm": 1.5389832258224487, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 41.4467, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08993576017130621, |
|
"grad_norm": 1.5515321493148804, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 41.4331, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09421841541755889, |
|
"grad_norm": 1.739243507385254, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 41.4354, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09850107066381156, |
|
"grad_norm": 1.4590630531311035, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 41.4432, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10278372591006424, |
|
"grad_norm": 2.056377649307251, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 41.3832, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.10706638115631692, |
|
"grad_norm": 1.8177523612976074, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 41.393, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11134903640256959, |
|
"grad_norm": 1.6252126693725586, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 41.3884, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11563169164882227, |
|
"grad_norm": 1.8270189762115479, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 41.3723, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.11991434689507495, |
|
"grad_norm": 1.8442226648330688, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 41.3694, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12419700214132762, |
|
"grad_norm": 1.9399042129516602, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 41.358, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1284796573875803, |
|
"grad_norm": 1.9133156538009644, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 41.3635, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13276231263383298, |
|
"grad_norm": 1.9715404510498047, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 41.314, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13704496788008566, |
|
"grad_norm": 2.019277572631836, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 41.3179, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14132762312633834, |
|
"grad_norm": 1.889991044998169, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 41.3112, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.145610278372591, |
|
"grad_norm": 1.585730791091919, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 41.3056, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.14989293361884368, |
|
"grad_norm": 1.5292245149612427, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 41.2858, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15417558886509636, |
|
"grad_norm": 1.7086025476455688, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 41.2667, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15845824411134904, |
|
"grad_norm": 1.6152453422546387, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 41.2465, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16274089935760172, |
|
"grad_norm": 1.4000519514083862, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 41.2462, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1670235546038544, |
|
"grad_norm": 1.553257703781128, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 41.2412, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17130620985010706, |
|
"grad_norm": 1.296061396598816, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 41.2366, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17558886509635974, |
|
"grad_norm": 1.3130500316619873, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 41.239, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.17987152034261242, |
|
"grad_norm": 1.3968032598495483, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 41.202, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1841541755888651, |
|
"grad_norm": 1.1028584241867065, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 41.2066, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18843683083511778, |
|
"grad_norm": 1.223429560661316, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 41.2302, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19271948608137046, |
|
"grad_norm": 1.0133888721466064, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 41.2274, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19700214132762311, |
|
"grad_norm": 1.038590908050537, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 41.1885, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2012847965738758, |
|
"grad_norm": 1.2082979679107666, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 41.1656, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.20556745182012848, |
|
"grad_norm": 1.0701316595077515, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 41.1831, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.20985010706638116, |
|
"grad_norm": 0.8915174007415771, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 41.1721, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.21413276231263384, |
|
"grad_norm": 0.9999569058418274, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 41.1658, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21413276231263384, |
|
"eval_loss": 10.300396919250488, |
|
"eval_runtime": 0.8194, |
|
"eval_samples_per_second": 480.82, |
|
"eval_steps_per_second": 120.815, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21841541755888652, |
|
"grad_norm": 1.213214635848999, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 41.2533, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.22269807280513917, |
|
"grad_norm": 0.897641122341156, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 41.2424, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.22698072805139186, |
|
"grad_norm": 0.9352962970733643, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 41.2243, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23126338329764454, |
|
"grad_norm": 0.7871758341789246, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 41.2395, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.23554603854389722, |
|
"grad_norm": 0.8345593810081482, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 41.2352, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2398286937901499, |
|
"grad_norm": 0.7352098822593689, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 41.2216, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.24411134903640258, |
|
"grad_norm": 0.7078768610954285, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 41.2238, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.24839400428265523, |
|
"grad_norm": 0.6202782988548279, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 41.2145, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.25267665952890794, |
|
"grad_norm": 0.6747980713844299, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 41.2165, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2569593147751606, |
|
"grad_norm": 0.6640037298202515, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 41.2129, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26124197002141325, |
|
"grad_norm": 0.5419409871101379, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 41.1924, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.26552462526766596, |
|
"grad_norm": 0.5575965046882629, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 41.1859, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2698072805139186, |
|
"grad_norm": 0.3973611891269684, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 41.2037, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2740899357601713, |
|
"grad_norm": 0.49591419100761414, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 41.1752, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.278372591006424, |
|
"grad_norm": 0.5320420861244202, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 41.1887, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2826552462526767, |
|
"grad_norm": 0.40315917134284973, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 41.1731, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.28693790149892934, |
|
"grad_norm": 0.4538324475288391, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 41.1811, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.291220556745182, |
|
"grad_norm": 0.3954373598098755, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 41.1942, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2955032119914347, |
|
"grad_norm": 0.5449280738830566, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 41.1376, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.29978586723768735, |
|
"grad_norm": 0.37435728311538696, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 41.2013, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.30406852248394006, |
|
"grad_norm": 0.415919691324234, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 41.1889, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3083511777301927, |
|
"grad_norm": 0.3450535237789154, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 41.1531, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.31263383297644537, |
|
"grad_norm": 0.44574135541915894, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 41.192, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3169164882226981, |
|
"grad_norm": 0.3757171034812927, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 41.1906, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.32119914346895073, |
|
"grad_norm": 0.4182075262069702, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 41.1905, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32548179871520344, |
|
"grad_norm": 0.42849573493003845, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 41.1425, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3297644539614561, |
|
"grad_norm": 0.4211777150630951, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 41.1518, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3340471092077088, |
|
"grad_norm": 0.3975631594657898, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 41.1944, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.33832976445396146, |
|
"grad_norm": 0.3846378028392792, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 41.1686, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3426124197002141, |
|
"grad_norm": 0.37172695994377136, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 41.1602, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3468950749464668, |
|
"grad_norm": 0.43355971574783325, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 41.1777, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3511777301927195, |
|
"grad_norm": 0.4843752086162567, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 41.1754, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3554603854389722, |
|
"grad_norm": 0.44365033507347107, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 41.1627, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.35974304068522484, |
|
"grad_norm": 0.4922822415828705, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 41.1995, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.3640256959314775, |
|
"grad_norm": 0.5284846425056458, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 41.1786, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3683083511777302, |
|
"grad_norm": 0.6352524161338806, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 41.1377, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.37259100642398285, |
|
"grad_norm": 0.4986525774002075, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 41.1724, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.37687366167023556, |
|
"grad_norm": 0.5726274847984314, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 41.145, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3811563169164882, |
|
"grad_norm": 0.4305708110332489, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 41.1494, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3854389721627409, |
|
"grad_norm": 0.47623881697654724, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 41.1446, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3897216274089936, |
|
"grad_norm": 0.517105221748352, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 41.1379, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.39400428265524623, |
|
"grad_norm": 0.7964802384376526, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 41.1429, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.39828693790149894, |
|
"grad_norm": 0.45318299531936646, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 41.1773, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.4025695931477516, |
|
"grad_norm": 0.48306235671043396, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 41.1437, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4068522483940043, |
|
"grad_norm": 0.6395437717437744, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 41.1413, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.41113490364025695, |
|
"grad_norm": 0.5044965744018555, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 41.1423, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.41541755888650966, |
|
"grad_norm": 0.5140703916549683, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 41.1529, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.4197002141327623, |
|
"grad_norm": 0.7176982164382935, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 41.1448, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.42398286937901497, |
|
"grad_norm": 0.6121039986610413, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 41.154, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4282655246252677, |
|
"grad_norm": 1.0461997985839844, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 41.1598, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4282655246252677, |
|
"eval_loss": 10.292905807495117, |
|
"eval_runtime": 0.8308, |
|
"eval_samples_per_second": 474.222, |
|
"eval_steps_per_second": 119.157, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.43254817987152033, |
|
"grad_norm": 1.1328588724136353, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 41.2356, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.43683083511777304, |
|
"grad_norm": 0.8626485466957092, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 41.2255, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4411134903640257, |
|
"grad_norm": 0.5959879755973816, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 41.1977, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.44539614561027835, |
|
"grad_norm": 0.6696794033050537, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 41.2135, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.44967880085653106, |
|
"grad_norm": 0.7611036896705627, |
|
"learning_rate": 5e-05, |
|
"loss": 41.1939, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4539614561027837, |
|
"grad_norm": 0.8138741254806519, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 41.2055, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4582441113490364, |
|
"grad_norm": 0.5165987610816956, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 41.1803, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4625267665952891, |
|
"grad_norm": 0.6895624995231628, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 41.2141, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.4668094218415418, |
|
"grad_norm": 0.5425256490707397, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 41.1917, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.47109207708779444, |
|
"grad_norm": 0.4485233426094055, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 41.1746, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4753747323340471, |
|
"grad_norm": 0.5313943028450012, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 41.1849, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.4796573875802998, |
|
"grad_norm": 0.6793656945228577, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 41.1816, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.48394004282655245, |
|
"grad_norm": 0.5230658054351807, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 41.1465, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.48822269807280516, |
|
"grad_norm": 0.39127117395401, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 41.1744, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4925053533190578, |
|
"grad_norm": 0.45096760988235474, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 41.1803, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49678800856531047, |
|
"grad_norm": 0.3252319097518921, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 41.1872, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5010706638115632, |
|
"grad_norm": 0.43201541900634766, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 41.1486, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5053533190578159, |
|
"grad_norm": 0.42343848943710327, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 41.1491, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5096359743040685, |
|
"grad_norm": 0.48265865445137024, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 41.1537, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5139186295503212, |
|
"grad_norm": 0.3337669372558594, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 41.1761, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5182012847965739, |
|
"grad_norm": 0.5385298728942871, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 41.1872, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5224839400428265, |
|
"grad_norm": 0.43514499068260193, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 41.1601, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5267665952890792, |
|
"grad_norm": 0.47093290090560913, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 41.1639, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5310492505353319, |
|
"grad_norm": 0.4684036076068878, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 41.1672, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5353319057815846, |
|
"grad_norm": 0.3665253221988678, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 41.1986, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5396145610278372, |
|
"grad_norm": 0.47622647881507874, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 41.1763, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5438972162740899, |
|
"grad_norm": 0.44692617654800415, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 41.1306, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5481798715203426, |
|
"grad_norm": 0.40188875794410706, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 41.1763, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5524625267665952, |
|
"grad_norm": 0.4522937536239624, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 41.1487, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.556745182012848, |
|
"grad_norm": 0.455946147441864, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 41.1591, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5610278372591007, |
|
"grad_norm": 0.38201573491096497, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 41.1784, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5653104925053534, |
|
"grad_norm": 0.5712035298347473, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 41.1288, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.569593147751606, |
|
"grad_norm": 0.4802549183368683, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 41.1453, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5738758029978587, |
|
"grad_norm": 0.5156493782997131, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 41.1488, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5781584582441114, |
|
"grad_norm": 0.44495895504951477, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 41.1394, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.582441113490364, |
|
"grad_norm": 0.46417051553726196, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 41.126, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5867237687366167, |
|
"grad_norm": 0.3869642913341522, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 41.1446, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5910064239828694, |
|
"grad_norm": 0.46840083599090576, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 41.1703, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5952890792291221, |
|
"grad_norm": 0.43437740206718445, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 41.1154, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5995717344753747, |
|
"grad_norm": 0.43356087803840637, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 41.1531, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6038543897216274, |
|
"grad_norm": 0.49236592650413513, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 41.1226, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6081370449678801, |
|
"grad_norm": 0.515826940536499, |
|
"learning_rate": 2.128356686585282e-05, |
|
"loss": 41.1367, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6124197002141327, |
|
"grad_norm": 0.5549665093421936, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 41.1751, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6167023554603854, |
|
"grad_norm": 0.5821396708488464, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 41.1535, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6209850107066381, |
|
"grad_norm": 0.9271972179412842, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 41.1525, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6252676659528907, |
|
"grad_norm": 0.603287398815155, |
|
"learning_rate": 1.8641182076323148e-05, |
|
"loss": 41.167, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6295503211991434, |
|
"grad_norm": 0.7227380871772766, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 41.1586, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6338329764453962, |
|
"grad_norm": 0.7638959884643555, |
|
"learning_rate": 1.7370711923791567e-05, |
|
"loss": 41.115, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6381156316916489, |
|
"grad_norm": 0.9707202315330505, |
|
"learning_rate": 1.6748771394307585e-05, |
|
"loss": 41.1299, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6423982869379015, |
|
"grad_norm": 1.1904702186584473, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 41.1096, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6423982869379015, |
|
"eval_loss": 10.288755416870117, |
|
"eval_runtime": 0.8147, |
|
"eval_samples_per_second": 483.6, |
|
"eval_steps_per_second": 121.514, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6466809421841542, |
|
"grad_norm": 0.8743388056755066, |
|
"learning_rate": 1.553232954407171e-05, |
|
"loss": 41.2343, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.6509635974304069, |
|
"grad_norm": 0.8372085690498352, |
|
"learning_rate": 1.4938160786375572e-05, |
|
"loss": 41.2028, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6552462526766595, |
|
"grad_norm": 0.6851708889007568, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 41.2184, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.6595289079229122, |
|
"grad_norm": 0.6139897704124451, |
|
"learning_rate": 1.3778739760445552e-05, |
|
"loss": 41.2032, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6638115631691649, |
|
"grad_norm": 0.5757542252540588, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 41.1716, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6680942184154176, |
|
"grad_norm": 0.5816591382026672, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 41.1707, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6723768736616702, |
|
"grad_norm": 0.6130126118659973, |
|
"learning_rate": 1.2114256511983274e-05, |
|
"loss": 41.1754, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6766595289079229, |
|
"grad_norm": 0.6503409743309021, |
|
"learning_rate": 1.157994445715706e-05, |
|
"loss": 41.1595, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6809421841541756, |
|
"grad_norm": 0.5550835728645325, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 41.1744, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6852248394004282, |
|
"grad_norm": 0.5488256812095642, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 41.1501, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6895074946466809, |
|
"grad_norm": 0.42393171787261963, |
|
"learning_rate": 1.0040600155253765e-05, |
|
"loss": 41.1699, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.6937901498929336, |
|
"grad_norm": 0.4661500155925751, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 41.1465, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.6980728051391863, |
|
"grad_norm": 0.5130885243415833, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 41.1654, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.702355460385439, |
|
"grad_norm": 0.4037836790084839, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 41.1324, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7066381156316917, |
|
"grad_norm": 0.501358687877655, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 41.1913, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7109207708779444, |
|
"grad_norm": 0.6229903101921082, |
|
"learning_rate": 7.695237378953223e-06, |
|
"loss": 41.1398, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.715203426124197, |
|
"grad_norm": 0.5036523938179016, |
|
"learning_rate": 7.260364370723044e-06, |
|
"loss": 41.1777, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7194860813704497, |
|
"grad_norm": 0.4165601134300232, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 41.1167, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.7237687366167024, |
|
"grad_norm": 0.5183447003364563, |
|
"learning_rate": 6.425787818636131e-06, |
|
"loss": 41.1806, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.728051391862955, |
|
"grad_norm": 0.37226128578186035, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 41.1631, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7323340471092077, |
|
"grad_norm": 0.43647095561027527, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 41.1716, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7366167023554604, |
|
"grad_norm": 0.37477391958236694, |
|
"learning_rate": 5.263533508961827e-06, |
|
"loss": 41.1419, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7408993576017131, |
|
"grad_norm": 0.38343745470046997, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 41.1524, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7451820128479657, |
|
"grad_norm": 0.457410603761673, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 41.1392, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7494646680942184, |
|
"grad_norm": 0.4119358956813812, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 41.159, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7537473233404711, |
|
"grad_norm": 0.4237579107284546, |
|
"learning_rate": 3.885512251130763e-06, |
|
"loss": 41.1824, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.7580299785867237, |
|
"grad_norm": 0.5307998061180115, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 41.1349, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7623126338329764, |
|
"grad_norm": 0.39958441257476807, |
|
"learning_rate": 3.271776770026963e-06, |
|
"loss": 41.1412, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7665952890792291, |
|
"grad_norm": 0.36851799488067627, |
|
"learning_rate": 2.9840304941919415e-06, |
|
"loss": 41.1155, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.7708779443254818, |
|
"grad_norm": 0.4594135880470276, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 41.1634, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7751605995717344, |
|
"grad_norm": 0.4165166914463043, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 41.1465, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.7794432548179872, |
|
"grad_norm": 0.47898855805397034, |
|
"learning_rate": 2.1982109232821178e-06, |
|
"loss": 41.177, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7837259100642399, |
|
"grad_norm": 0.3837435245513916, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 41.1068, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7880085653104925, |
|
"grad_norm": 0.5167264342308044, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 41.1565, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.7922912205567452, |
|
"grad_norm": 0.413090318441391, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 41.1915, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7965738758029979, |
|
"grad_norm": 0.5370827913284302, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 41.1455, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8008565310492506, |
|
"grad_norm": 0.39179834723472595, |
|
"learning_rate": 1.1506584608200367e-06, |
|
"loss": 41.1149, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8051391862955032, |
|
"grad_norm": 0.6464431881904602, |
|
"learning_rate": 9.810017062595322e-07, |
|
"loss": 41.1741, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8094218415417559, |
|
"grad_norm": 0.40582799911499023, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 41.1555, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.8137044967880086, |
|
"grad_norm": 0.5137993693351746, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 41.1078, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8179871520342612, |
|
"grad_norm": 0.531497597694397, |
|
"learning_rate": 5.526064699265753e-07, |
|
"loss": 41.1521, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8222698072805139, |
|
"grad_norm": 0.5072067379951477, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 41.084, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.8265524625267666, |
|
"grad_norm": 0.7040443420410156, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 41.0847, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8308351177730193, |
|
"grad_norm": 0.5859010815620422, |
|
"learning_rate": 2.458548727494292e-07, |
|
"loss": 41.1341, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8351177730192719, |
|
"grad_norm": 0.6137392520904541, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 41.1505, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8394004282655246, |
|
"grad_norm": 0.5699211359024048, |
|
"learning_rate": 1.0931863906127327e-07, |
|
"loss": 41.1112, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8436830835117773, |
|
"grad_norm": 0.768570065498352, |
|
"learning_rate": 6.150154258476315e-08, |
|
"loss": 41.1204, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.8479657387580299, |
|
"grad_norm": 0.7753411531448364, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 41.1279, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.8522483940042827, |
|
"grad_norm": 0.8194027543067932, |
|
"learning_rate": 6.834750376549792e-09, |
|
"loss": 41.0847, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.8565310492505354, |
|
"grad_norm": 0.9929121732711792, |
|
"learning_rate": 0.0, |
|
"loss": 41.1802, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8565310492505354, |
|
"eval_loss": 10.287935256958008, |
|
"eval_runtime": 0.8236, |
|
"eval_samples_per_second": 478.394, |
|
"eval_steps_per_second": 120.206, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 43874775465984.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|