|
{ |
|
"best_metric": 2.82340669631958, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.0030028677386904493, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.005735477380899e-05, |
|
"grad_norm": 0.7277234792709351, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3234, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 6.005735477380899e-05, |
|
"eval_loss": 3.1906468868255615, |
|
"eval_runtime": 2134.556, |
|
"eval_samples_per_second": 13.138, |
|
"eval_steps_per_second": 3.285, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00012011470954761798, |
|
"grad_norm": 0.8249174952507019, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4146, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00018017206432142696, |
|
"grad_norm": 0.8658198118209839, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5297, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00024022941909523595, |
|
"grad_norm": 0.9398988485336304, |
|
"learning_rate": 4e-05, |
|
"loss": 1.6596, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00030028677386904494, |
|
"grad_norm": 0.823463499546051, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6762, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00036034412864285393, |
|
"grad_norm": 0.7399727702140808, |
|
"learning_rate": 6e-05, |
|
"loss": 1.6846, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0004204014834166629, |
|
"grad_norm": 0.9010226726531982, |
|
"learning_rate": 7e-05, |
|
"loss": 1.644, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0004804588381904719, |
|
"grad_norm": 0.9326359629631042, |
|
"learning_rate": 8e-05, |
|
"loss": 1.5877, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0005405161929642808, |
|
"grad_norm": 0.977419912815094, |
|
"learning_rate": 9e-05, |
|
"loss": 1.7177, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0006005735477380899, |
|
"grad_norm": 0.6996713876724243, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4415, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0006606309025118988, |
|
"grad_norm": 0.7411949634552002, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 1.5325, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0007206882572857079, |
|
"grad_norm": 0.8528963327407837, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 1.4692, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0007807456120595168, |
|
"grad_norm": 0.8318995237350464, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 1.6418, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0008408029668333258, |
|
"grad_norm": 0.7479487061500549, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 1.7038, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0009008603216071348, |
|
"grad_norm": 0.7285621166229248, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 1.4805, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0009609176763809438, |
|
"grad_norm": 0.7259240746498108, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 1.6445, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0010209750311547527, |
|
"grad_norm": 0.7934219837188721, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 1.367, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0010810323859285617, |
|
"grad_norm": 0.8407491445541382, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 1.4958, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0011410897407023708, |
|
"grad_norm": 0.9245749115943909, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 1.6083, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0012011470954761798, |
|
"grad_norm": 0.8109903931617737, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 1.5333, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0012612044502499887, |
|
"grad_norm": 1.2378709316253662, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 1.6405, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0013212618050237976, |
|
"grad_norm": 1.558487057685852, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 1.5916, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0013813191597976068, |
|
"grad_norm": 0.9527750015258789, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 1.593, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0014413765145714157, |
|
"grad_norm": 1.060105562210083, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 1.7384, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0015014338693452246, |
|
"grad_norm": 1.1204156875610352, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 1.9215, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0015614912241190336, |
|
"grad_norm": 1.1645809412002563, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 1.8122, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0016215485788928427, |
|
"grad_norm": 1.0866835117340088, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 1.8427, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0016816059336666517, |
|
"grad_norm": 1.4639962911605835, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 1.7834, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0017416632884404606, |
|
"grad_norm": 1.2034151554107666, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.7954, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0018017206432142695, |
|
"grad_norm": 1.3376408815383911, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 1.8065, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0018617779979880787, |
|
"grad_norm": 1.7506548166275024, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 2.0368, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0019218353527618876, |
|
"grad_norm": 1.909898042678833, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 1.766, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0019818927075356966, |
|
"grad_norm": 1.9195573329925537, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 2.0777, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0020419500623095055, |
|
"grad_norm": 1.7940828800201416, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 2.2604, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0021020074170833144, |
|
"grad_norm": 2.1280953884124756, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 2.3557, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0021620647718571234, |
|
"grad_norm": 2.3466970920562744, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 2.6686, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0022221221266309327, |
|
"grad_norm": 2.250682830810547, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 2.7733, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0022821794814047417, |
|
"grad_norm": 2.6434786319732666, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 3.0848, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0023422368361785506, |
|
"grad_norm": 4.73193359375, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 3.0783, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0024022941909523595, |
|
"grad_norm": 3.889577865600586, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 3.4497, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0024623515457261685, |
|
"grad_norm": 4.585144996643066, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 3.6834, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0025224089004999774, |
|
"grad_norm": 5.334516525268555, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 3.5614, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0025824662552737863, |
|
"grad_norm": 6.08420467376709, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 4.0365, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0026425236100475953, |
|
"grad_norm": 6.18115234375, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 3.5424, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0027025809648214046, |
|
"grad_norm": 9.245948791503906, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 4.1178, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0027626383195952136, |
|
"grad_norm": 5.46101713180542, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 3.632, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0028226956743690225, |
|
"grad_norm": 10.615842819213867, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 4.2407, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0028827530291428314, |
|
"grad_norm": 22.699947357177734, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 4.6777, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0029428103839166404, |
|
"grad_norm": 25.06355094909668, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 4.5389, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0030028677386904493, |
|
"grad_norm": 17.44358253479004, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 4.6876, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0030028677386904493, |
|
"eval_loss": 2.82340669631958, |
|
"eval_runtime": 2140.029, |
|
"eval_samples_per_second": 13.104, |
|
"eval_steps_per_second": 3.276, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.580280733197926e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|