|
{ |
|
"best_metric": 1.38231360912323, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.02160818950382195, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00010804094751910974, |
|
"eval_loss": 1.7889565229415894, |
|
"eval_runtime": 537.8419, |
|
"eval_samples_per_second": 28.984, |
|
"eval_steps_per_second": 7.247, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00032412284255732923, |
|
"grad_norm": 0.8091393709182739, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5699, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006482456851146585, |
|
"grad_norm": 0.8223854303359985, |
|
"learning_rate": 6e-05, |
|
"loss": 1.7854, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0009723685276719876, |
|
"grad_norm": 0.8447192311286926, |
|
"learning_rate": 9e-05, |
|
"loss": 1.8246, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001296491370229317, |
|
"grad_norm": 0.8499614000320435, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 1.8264, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0016206142127866462, |
|
"grad_norm": 0.8196111917495728, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 1.7786, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0019447370553439753, |
|
"grad_norm": 0.7101804614067078, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 1.7327, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0022688598979013048, |
|
"grad_norm": 0.7729286551475525, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 1.7386, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002592982740458634, |
|
"grad_norm": 0.6755093336105347, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 1.4494, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002917105583015963, |
|
"grad_norm": 0.7253813743591309, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 1.4436, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0032412284255732924, |
|
"grad_norm": 0.5313703417778015, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 1.3522, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0035653512681306215, |
|
"grad_norm": 0.5184443593025208, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 1.2604, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0038894741106879505, |
|
"grad_norm": 0.6221433281898499, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 1.354, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00421359695324528, |
|
"grad_norm": 0.5940028429031372, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 1.3683, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0045377197958026095, |
|
"grad_norm": 0.5949783325195312, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 1.3312, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004861842638359939, |
|
"grad_norm": 0.5560756325721741, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 1.3331, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.005185965480917268, |
|
"grad_norm": 0.5510199666023254, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.2569, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.005402047375955487, |
|
"eval_loss": 1.4472901821136475, |
|
"eval_runtime": 542.7445, |
|
"eval_samples_per_second": 28.723, |
|
"eval_steps_per_second": 7.182, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005510088323474597, |
|
"grad_norm": 0.560280978679657, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 1.3376, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.005834211166031926, |
|
"grad_norm": 0.4924287497997284, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 1.4859, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.006158334008589256, |
|
"grad_norm": 0.5823892951011658, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 1.6142, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.006482456851146585, |
|
"grad_norm": 0.4990837275981903, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 1.6329, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006806579693703914, |
|
"grad_norm": 0.5565041899681091, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 1.6867, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.007130702536261243, |
|
"grad_norm": 0.5534407496452332, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 1.6361, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.007454825378818572, |
|
"grad_norm": 0.6305408477783203, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 1.5459, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.007778948221375901, |
|
"grad_norm": 0.6603249311447144, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 1.6251, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.00810307106393323, |
|
"grad_norm": 0.6320300698280334, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 1.3576, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.00842719390649056, |
|
"grad_norm": 0.5601158142089844, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 1.3429, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.00875131674904789, |
|
"grad_norm": 0.5731490850448608, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 1.3739, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.009075439591605219, |
|
"grad_norm": 0.5410912036895752, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 1.2813, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.009399562434162548, |
|
"grad_norm": 0.5254283547401428, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 1.2508, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.009723685276719877, |
|
"grad_norm": 0.5095140933990479, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 1.204, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.010047808119277206, |
|
"grad_norm": 0.49006080627441406, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 1.2471, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.010371930961834535, |
|
"grad_norm": 0.5014355778694153, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 1.2864, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.010696053804391864, |
|
"grad_norm": 0.53614342212677, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 1.2349, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.010804094751910975, |
|
"eval_loss": 1.4096521139144897, |
|
"eval_runtime": 542.4184, |
|
"eval_samples_per_second": 28.74, |
|
"eval_steps_per_second": 7.186, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011020176646949193, |
|
"grad_norm": 0.49735087156295776, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 1.3988, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.011344299489506523, |
|
"grad_norm": 0.4606347382068634, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5228, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.011668422332063852, |
|
"grad_norm": 0.5091155171394348, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 1.4814, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.01199254517462118, |
|
"grad_norm": 0.5642185211181641, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 1.6132, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.012316668017178511, |
|
"grad_norm": 0.5649376511573792, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 1.5986, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01264079085973584, |
|
"grad_norm": 0.5556029081344604, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 1.606, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.01296491370229317, |
|
"grad_norm": 0.6111022233963013, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 1.5919, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.013289036544850499, |
|
"grad_norm": 0.7231109142303467, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 1.45, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.013613159387407828, |
|
"grad_norm": 0.570808470249176, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 1.3601, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.013937282229965157, |
|
"grad_norm": 0.5434361696243286, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 1.3497, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.014261405072522486, |
|
"grad_norm": 0.4750564992427826, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 1.2799, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.014585527915079815, |
|
"grad_norm": 0.5212507843971252, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 1.2695, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.014909650757637144, |
|
"grad_norm": 0.5255863070487976, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 1.2256, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.015233773600194473, |
|
"grad_norm": 0.5165453553199768, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 1.2671, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.015557896442751802, |
|
"grad_norm": 0.5220006704330444, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 1.2104, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.01588201928530913, |
|
"grad_norm": 0.48809775710105896, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 1.206, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.01620614212786646, |
|
"grad_norm": 0.6223570108413696, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 1.1717, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01620614212786646, |
|
"eval_loss": 1.3886877298355103, |
|
"eval_runtime": 541.2859, |
|
"eval_samples_per_second": 28.8, |
|
"eval_steps_per_second": 7.201, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01653026497042379, |
|
"grad_norm": 0.5191346406936646, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 1.5168, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.01685438781298112, |
|
"grad_norm": 0.5353146195411682, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 1.4967, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.017178510655538447, |
|
"grad_norm": 0.5311741828918457, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 1.5718, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.01750263349809578, |
|
"grad_norm": 0.5845088362693787, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.5607, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.01782675634065311, |
|
"grad_norm": 0.5743259191513062, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 1.581, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.018150879183210438, |
|
"grad_norm": 0.6573442220687866, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 1.6465, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.018475002025767767, |
|
"grad_norm": 0.6954129934310913, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 1.5841, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.018799124868325096, |
|
"grad_norm": 0.5963082909584045, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 1.4765, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.019123247710882425, |
|
"grad_norm": 0.44104716181755066, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 1.2437, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.019447370553439754, |
|
"grad_norm": 0.5068080425262451, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 1.2188, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.019771493395997083, |
|
"grad_norm": 0.4765148460865021, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 1.2455, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.020095616238554413, |
|
"grad_norm": 0.5346724390983582, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 1.3474, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.02041973908111174, |
|
"grad_norm": 0.5100587606430054, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 1.262, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.02074386192366907, |
|
"grad_norm": 0.48295655846595764, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 1.3058, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.0210679847662264, |
|
"grad_norm": 0.5347855687141418, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 1.2393, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.02139210760878373, |
|
"grad_norm": 0.5421356558799744, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 1.2671, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.02160818950382195, |
|
"eval_loss": 1.38231360912323, |
|
"eval_runtime": 542.45, |
|
"eval_samples_per_second": 28.738, |
|
"eval_steps_per_second": 7.186, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.8039887118336e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|