|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.881844380403458, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 4.8593597412109375, |
|
"learning_rate": 0.000125, |
|
"loss": 1.0043, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 0.14300437271595, |
|
"learning_rate": 0.00019841427155599603, |
|
"loss": 0.5844, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 0.11132702976465225, |
|
"learning_rate": 0.00019444995044598612, |
|
"loss": 0.4951, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 0.15473473072052002, |
|
"learning_rate": 0.0001904856293359762, |
|
"loss": 0.4809, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 0.12199735641479492, |
|
"learning_rate": 0.00018652130822596633, |
|
"loss": 0.4703, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 0.11592856794595718, |
|
"learning_rate": 0.0001825569871159564, |
|
"loss": 0.4622, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 0.11163052916526794, |
|
"learning_rate": 0.00017859266600594648, |
|
"loss": 0.4527, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.11111810058355331, |
|
"learning_rate": 0.00017462834489593657, |
|
"loss": 0.451, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 0.11076587438583374, |
|
"learning_rate": 0.00017066402378592668, |
|
"loss": 0.4447, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 0.09930027276277542, |
|
"learning_rate": 0.00016669970267591675, |
|
"loss": 0.4486, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 0.09756297618150711, |
|
"learning_rate": 0.00016273538156590683, |
|
"loss": 0.4369, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 0.11096686124801636, |
|
"learning_rate": 0.00015877106045589692, |
|
"loss": 0.4412, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 0.09731573611497879, |
|
"learning_rate": 0.00015480673934588704, |
|
"loss": 0.4407, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 0.10571198165416718, |
|
"learning_rate": 0.0001508424182358771, |
|
"loss": 0.4376, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 0.09965088218450546, |
|
"learning_rate": 0.0001468780971258672, |
|
"loss": 0.4368, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.09912007302045822, |
|
"learning_rate": 0.0001429137760158573, |
|
"loss": 0.4294, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 0.09754997491836548, |
|
"learning_rate": 0.0001389494549058474, |
|
"loss": 0.4309, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 0.10138026624917984, |
|
"learning_rate": 0.00013498513379583746, |
|
"loss": 0.4188, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 0.09764310717582703, |
|
"learning_rate": 0.00013102081268582754, |
|
"loss": 0.4167, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 0.10624668747186661, |
|
"learning_rate": 0.00012705649157581766, |
|
"loss": 0.4167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 0.1042829230427742, |
|
"learning_rate": 0.00012309217046580775, |
|
"loss": 0.42, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 0.09762328863143921, |
|
"learning_rate": 0.00011912784935579781, |
|
"loss": 0.4147, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 0.1053580567240715, |
|
"learning_rate": 0.00011516352824578791, |
|
"loss": 0.4173, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 0.09830950945615768, |
|
"learning_rate": 0.000111199207135778, |
|
"loss": 0.4107, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 0.1017039492726326, |
|
"learning_rate": 0.0001072348860257681, |
|
"loss": 0.4129, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 0.10052554309368134, |
|
"learning_rate": 0.00010327056491575817, |
|
"loss": 0.4115, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 0.09901268035173416, |
|
"learning_rate": 9.930624380574827e-05, |
|
"loss": 0.4113, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 0.0998842865228653, |
|
"learning_rate": 9.534192269573836e-05, |
|
"loss": 0.4075, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 0.10228322446346283, |
|
"learning_rate": 9.137760158572845e-05, |
|
"loss": 0.4103, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 0.10224767029285431, |
|
"learning_rate": 8.741328047571854e-05, |
|
"loss": 0.4097, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 0.10533642023801804, |
|
"learning_rate": 8.344895936570862e-05, |
|
"loss": 0.4069, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 0.11008591949939728, |
|
"learning_rate": 7.948463825569871e-05, |
|
"loss": 0.4043, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 0.1080278530716896, |
|
"learning_rate": 7.55203171456888e-05, |
|
"loss": 0.4058, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 0.10025861114263535, |
|
"learning_rate": 7.155599603567889e-05, |
|
"loss": 0.4035, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.0172910662824206, |
|
"grad_norm": 0.1050715371966362, |
|
"learning_rate": 6.759167492566898e-05, |
|
"loss": 0.3953, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.07492795389049, |
|
"grad_norm": 0.10905614495277405, |
|
"learning_rate": 6.362735381565907e-05, |
|
"loss": 0.3898, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.132564841498559, |
|
"grad_norm": 0.11501201242208481, |
|
"learning_rate": 5.966303270564916e-05, |
|
"loss": 0.3916, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.1902017291066285, |
|
"grad_norm": 0.10602011531591415, |
|
"learning_rate": 5.569871159563925e-05, |
|
"loss": 0.3902, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.2478386167146973, |
|
"grad_norm": 0.11458944529294968, |
|
"learning_rate": 5.1734390485629335e-05, |
|
"loss": 0.391, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.3054755043227666, |
|
"grad_norm": 0.10967529565095901, |
|
"learning_rate": 4.7770069375619424e-05, |
|
"loss": 0.3882, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.363112391930836, |
|
"grad_norm": 0.11452831327915192, |
|
"learning_rate": 4.380574826560951e-05, |
|
"loss": 0.3924, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.4207492795389047, |
|
"grad_norm": 0.11521276086568832, |
|
"learning_rate": 3.98414271555996e-05, |
|
"loss": 0.3884, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.478386167146974, |
|
"grad_norm": 0.11083399504423141, |
|
"learning_rate": 3.587710604558969e-05, |
|
"loss": 0.391, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.5360230547550433, |
|
"grad_norm": 0.1139487475156784, |
|
"learning_rate": 3.191278493557978e-05, |
|
"loss": 0.3903, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.5936599423631126, |
|
"grad_norm": 0.11477449536323547, |
|
"learning_rate": 2.794846382556987e-05, |
|
"loss": 0.3897, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6512968299711814, |
|
"grad_norm": 0.11389576643705368, |
|
"learning_rate": 2.3984142715559964e-05, |
|
"loss": 0.3882, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.7089337175792507, |
|
"grad_norm": 0.11239298433065414, |
|
"learning_rate": 2.0019821605550053e-05, |
|
"loss": 0.388, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.76657060518732, |
|
"grad_norm": 0.10633145272731781, |
|
"learning_rate": 1.605550049554014e-05, |
|
"loss": 0.3823, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.824207492795389, |
|
"grad_norm": 0.12195642292499542, |
|
"learning_rate": 1.2091179385530229e-05, |
|
"loss": 0.3878, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.881844380403458, |
|
"grad_norm": 0.1192099004983902, |
|
"learning_rate": 8.126858275520318e-06, |
|
"loss": 0.3877, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1041, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.450003270942065e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|