|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9986738753442823, |
|
"eval_steps": 200, |
|
"global_step": 445, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02244210955829848, |
|
"grad_norm": 19.15482521057129, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 3.8476, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04488421911659696, |
|
"grad_norm": 5.446422576904297, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 3.2218, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06732632867489544, |
|
"grad_norm": 1.8523049354553223, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.9756, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08976843823319391, |
|
"grad_norm": 2.139192581176758, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 2.88, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11221054779149241, |
|
"grad_norm": 1.853474497795105, |
|
"learning_rate": 9.996145181203616e-06, |
|
"loss": 2.8198, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13465265734979087, |
|
"grad_norm": 1.501637578010559, |
|
"learning_rate": 9.965342284774633e-06, |
|
"loss": 2.8122, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15709476690808935, |
|
"grad_norm": 2.0072269439697266, |
|
"learning_rate": 9.903926402016153e-06, |
|
"loss": 2.7806, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17953687646638783, |
|
"grad_norm": 1.7332258224487305, |
|
"learning_rate": 9.812276182268236e-06, |
|
"loss": 2.7422, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2019789860246863, |
|
"grad_norm": 1.3256088495254517, |
|
"learning_rate": 9.690956679612422e-06, |
|
"loss": 2.736, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22442109558298481, |
|
"grad_norm": 1.6238477230072021, |
|
"learning_rate": 9.540715869125407e-06, |
|
"loss": 2.7361, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2468632051412833, |
|
"grad_norm": 1.326378583908081, |
|
"learning_rate": 9.362480035363987e-06, |
|
"loss": 2.7135, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26930531469958174, |
|
"grad_norm": 1.3376497030258179, |
|
"learning_rate": 9.157348061512728e-06, |
|
"loss": 2.7064, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29174742425788025, |
|
"grad_norm": 1.2815560102462769, |
|
"learning_rate": 8.926584654403725e-06, |
|
"loss": 2.7018, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3141895338161787, |
|
"grad_norm": 1.5868873596191406, |
|
"learning_rate": 8.671612547178428e-06, |
|
"loss": 2.6961, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3366316433744772, |
|
"grad_norm": 1.366570234298706, |
|
"learning_rate": 8.39400372766471e-06, |
|
"loss": 2.6968, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35907375293277566, |
|
"grad_norm": 1.6603009700775146, |
|
"learning_rate": 8.095469746549172e-06, |
|
"loss": 2.6879, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38151586249107416, |
|
"grad_norm": 1.4688373804092407, |
|
"learning_rate": 7.777851165098012e-06, |
|
"loss": 2.6686, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4039579720493726, |
|
"grad_norm": 1.2386434078216553, |
|
"learning_rate": 7.443106207484776e-06, |
|
"loss": 2.6497, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4264000816076711, |
|
"grad_norm": 1.3002716302871704, |
|
"learning_rate": 7.093298687687141e-06, |
|
"loss": 2.6413, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.44884219116596963, |
|
"grad_norm": 1.2603603601455688, |
|
"learning_rate": 6.730585285387465e-06, |
|
"loss": 2.6472, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44884219116596963, |
|
"eval_loss": 2.642993450164795, |
|
"eval_runtime": 1294.6794, |
|
"eval_samples_per_second": 148.064, |
|
"eval_steps_per_second": 0.842, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4712843007242681, |
|
"grad_norm": 1.2680917978286743, |
|
"learning_rate": 6.3572022493253715e-06, |
|
"loss": 2.6369, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4937264102825666, |
|
"grad_norm": 1.3160443305969238, |
|
"learning_rate": 5.975451610080643e-06, |
|
"loss": 2.63, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5161685198408651, |
|
"grad_norm": 1.2467771768569946, |
|
"learning_rate": 5.587686987289189e-06, |
|
"loss": 2.6209, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5386106293991635, |
|
"grad_norm": 1.208018183708191, |
|
"learning_rate": 5.1962990787953436e-06, |
|
"loss": 2.6318, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.561052738957462, |
|
"grad_norm": 1.2416397333145142, |
|
"learning_rate": 4.803700921204659e-06, |
|
"loss": 2.621, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5834948485157605, |
|
"grad_norm": 1.1826361417770386, |
|
"learning_rate": 4.4123130127108125e-06, |
|
"loss": 2.6161, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.605936958074059, |
|
"grad_norm": 1.2550407648086548, |
|
"learning_rate": 4.02454838991936e-06, |
|
"loss": 2.6164, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6283790676323574, |
|
"grad_norm": 1.2681384086608887, |
|
"learning_rate": 3.6427977506746293e-06, |
|
"loss": 2.6091, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6508211771906559, |
|
"grad_norm": 1.2637056112289429, |
|
"learning_rate": 3.269414714612534e-06, |
|
"loss": 2.5967, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6732632867489544, |
|
"grad_norm": 1.211774468421936, |
|
"learning_rate": 2.906701312312861e-06, |
|
"loss": 2.6031, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6957053963072529, |
|
"grad_norm": 1.1411036252975464, |
|
"learning_rate": 2.5568937925152272e-06, |
|
"loss": 2.6014, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7181475058655513, |
|
"grad_norm": 1.1422080993652344, |
|
"learning_rate": 2.2221488349019903e-06, |
|
"loss": 2.5978, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7405896154238498, |
|
"grad_norm": 1.172059416770935, |
|
"learning_rate": 1.9045302534508298e-06, |
|
"loss": 2.5911, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7630317249821483, |
|
"grad_norm": 1.1655080318450928, |
|
"learning_rate": 1.6059962723352912e-06, |
|
"loss": 2.5913, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7854738345404468, |
|
"grad_norm": 1.1286932229995728, |
|
"learning_rate": 1.3283874528215735e-06, |
|
"loss": 2.5819, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8079159440987452, |
|
"grad_norm": 1.1322216987609863, |
|
"learning_rate": 1.0734153455962765e-06, |
|
"loss": 2.5833, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8303580536570437, |
|
"grad_norm": 1.1392606496810913, |
|
"learning_rate": 8.426519384872733e-07, |
|
"loss": 2.5851, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8528001632153422, |
|
"grad_norm": 1.1811796426773071, |
|
"learning_rate": 6.375199646360142e-07, |
|
"loss": 2.5853, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8752422727736408, |
|
"grad_norm": 1.1267277002334595, |
|
"learning_rate": 4.5928413087459325e-07, |
|
"loss": 2.5832, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8976843823319393, |
|
"grad_norm": 1.116821527481079, |
|
"learning_rate": 3.0904332038757977e-07, |
|
"loss": 2.5779, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8976843823319393, |
|
"eval_loss": 2.582942485809326, |
|
"eval_runtime": 1274.0632, |
|
"eval_samples_per_second": 150.46, |
|
"eval_steps_per_second": 0.856, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9201264918902377, |
|
"grad_norm": 1.1507278680801392, |
|
"learning_rate": 1.8772381773176417e-07, |
|
"loss": 2.5833, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9425686014485362, |
|
"grad_norm": 1.0935174226760864, |
|
"learning_rate": 9.607359798384785e-08, |
|
"loss": 2.597, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9650107110068347, |
|
"grad_norm": 1.1115341186523438, |
|
"learning_rate": 3.465771522536854e-08, |
|
"loss": 2.5725, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9874528205651332, |
|
"grad_norm": 1.131402611732483, |
|
"learning_rate": 3.854818796385495e-09, |
|
"loss": 2.575, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9986738753442823, |
|
"step": 445, |
|
"total_flos": 7.860958022007259e+18, |
|
"train_loss": 2.698132219207421, |
|
"train_runtime": 39941.2631, |
|
"train_samples_per_second": 43.195, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 445, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.860958022007259e+18, |
|
"train_batch_size": 22, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|