|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.18726591760299627, |
|
"eval_steps": 13, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003745318352059925, |
|
"grad_norm": 1.4232933521270752, |
|
"learning_rate": 1e-05, |
|
"loss": 2.6144, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003745318352059925, |
|
"eval_loss": 2.7996814250946045, |
|
"eval_runtime": 168.8282, |
|
"eval_samples_per_second": 2.665, |
|
"eval_steps_per_second": 1.333, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00749063670411985, |
|
"grad_norm": 1.3964163064956665, |
|
"learning_rate": 2e-05, |
|
"loss": 2.6416, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.011235955056179775, |
|
"grad_norm": 1.4900565147399902, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6927, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0149812734082397, |
|
"grad_norm": 1.2893002033233643, |
|
"learning_rate": 4e-05, |
|
"loss": 2.2962, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.018726591760299626, |
|
"grad_norm": 1.5299968719482422, |
|
"learning_rate": 5e-05, |
|
"loss": 2.997, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02247191011235955, |
|
"grad_norm": 1.5312691926956177, |
|
"learning_rate": 6e-05, |
|
"loss": 2.8766, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.026217228464419477, |
|
"grad_norm": 1.6678813695907593, |
|
"learning_rate": 7e-05, |
|
"loss": 2.7168, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0299625468164794, |
|
"grad_norm": 1.3376212120056152, |
|
"learning_rate": 8e-05, |
|
"loss": 2.2845, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.033707865168539325, |
|
"grad_norm": 1.4087949991226196, |
|
"learning_rate": 9e-05, |
|
"loss": 2.2124, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03745318352059925, |
|
"grad_norm": 1.4170143604278564, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3934, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04119850187265917, |
|
"grad_norm": 1.392784833908081, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 2.1857, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0449438202247191, |
|
"grad_norm": 1.4366754293441772, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 1.9748, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04868913857677903, |
|
"grad_norm": 1.5409985780715942, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 1.9675, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04868913857677903, |
|
"eval_loss": 1.975732684135437, |
|
"eval_runtime": 14.12, |
|
"eval_samples_per_second": 31.87, |
|
"eval_steps_per_second": 15.935, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.052434456928838954, |
|
"grad_norm": 1.596232533454895, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.8742, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.056179775280898875, |
|
"grad_norm": 1.6847773790359497, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.9345, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0599250936329588, |
|
"grad_norm": 1.5066741704940796, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 1.7222, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06367041198501873, |
|
"grad_norm": 1.4937152862548828, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 1.8033, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06741573033707865, |
|
"grad_norm": 1.9379730224609375, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.8392, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07116104868913857, |
|
"grad_norm": 1.7714548110961914, |
|
"learning_rate": 8.802029828000156e-05, |
|
"loss": 1.7254, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0749063670411985, |
|
"grad_norm": 1.6995341777801514, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.5896, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07865168539325842, |
|
"grad_norm": 1.6350839138031006, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 1.8164, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08239700374531835, |
|
"grad_norm": 1.4452717304229736, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.7086, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08614232209737828, |
|
"grad_norm": 1.4310646057128906, |
|
"learning_rate": 7.612492823579745e-05, |
|
"loss": 1.7038, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0898876404494382, |
|
"grad_norm": 1.5116603374481201, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 1.3802, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09363295880149813, |
|
"grad_norm": 1.7284044027328491, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.5872, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09737827715355805, |
|
"grad_norm": 1.4467360973358154, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.464, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.09737827715355805, |
|
"eval_loss": 1.513379454612732, |
|
"eval_runtime": 14.1358, |
|
"eval_samples_per_second": 31.834, |
|
"eval_steps_per_second": 15.917, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"grad_norm": 1.6498875617980957, |
|
"learning_rate": 6.167226819279528e-05, |
|
"loss": 1.6355, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10486891385767791, |
|
"grad_norm": 1.5848513841629028, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 1.4611, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.10861423220973783, |
|
"grad_norm": 1.6103304624557495, |
|
"learning_rate": 5.392295478639225e-05, |
|
"loss": 1.463, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11235955056179775, |
|
"grad_norm": 1.4713010787963867, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4531, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11610486891385768, |
|
"grad_norm": 1.4418309926986694, |
|
"learning_rate": 4.607704521360776e-05, |
|
"loss": 1.4607, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1198501872659176, |
|
"grad_norm": 1.3354123830795288, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 1.4086, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12359550561797752, |
|
"grad_norm": 1.5024315118789673, |
|
"learning_rate": 3.832773180720475e-05, |
|
"loss": 1.4571, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12734082397003746, |
|
"grad_norm": 1.4268651008605957, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 1.3551, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.13108614232209737, |
|
"grad_norm": 1.4591658115386963, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.3186, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1348314606741573, |
|
"grad_norm": 1.5342679023742676, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 1.1651, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13857677902621723, |
|
"grad_norm": 1.763529896736145, |
|
"learning_rate": 2.3875071764202563e-05, |
|
"loss": 1.1908, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14232209737827714, |
|
"grad_norm": 1.6287809610366821, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 1.4347, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.14606741573033707, |
|
"grad_norm": 1.562390923500061, |
|
"learning_rate": 1.7527597583490822e-05, |
|
"loss": 1.4056, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14606741573033707, |
|
"eval_loss": 1.3499964475631714, |
|
"eval_runtime": 14.1251, |
|
"eval_samples_per_second": 31.858, |
|
"eval_steps_per_second": 15.929, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.149812734082397, |
|
"grad_norm": 1.6496071815490723, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.1467, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15355805243445692, |
|
"grad_norm": 1.5362358093261719, |
|
"learning_rate": 1.1979701719998453e-05, |
|
"loss": 1.3055, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15730337078651685, |
|
"grad_norm": 1.656622052192688, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.2599, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16104868913857678, |
|
"grad_norm": 1.7673784494400024, |
|
"learning_rate": 7.367991782295391e-06, |
|
"loss": 1.6338, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1647940074906367, |
|
"grad_norm": 1.6738536357879639, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 1.3426, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.16853932584269662, |
|
"grad_norm": 1.5348960161209106, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 1.3081, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17228464419475656, |
|
"grad_norm": 1.4670664072036743, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 1.2674, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1760299625468165, |
|
"grad_norm": 1.5279347896575928, |
|
"learning_rate": 1.3815039801161721e-06, |
|
"loss": 1.3096, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1797752808988764, |
|
"grad_norm": 1.749185562133789, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 1.3092, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18352059925093633, |
|
"grad_norm": 1.499311089515686, |
|
"learning_rate": 1.5413331334360182e-07, |
|
"loss": 1.1741, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.18726591760299627, |
|
"grad_norm": 1.6856210231781006, |
|
"learning_rate": 0.0, |
|
"loss": 1.6819, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.284371676168192e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|