|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.42316314450517617, |
|
"eval_steps": 500, |
|
"global_step": 10500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.020150625928817913, |
|
"grad_norm": 1.1627434492111206, |
|
"learning_rate": 1.0072522159548751e-05, |
|
"loss": 0.6529, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04030125185763583, |
|
"grad_norm": 1.1110010147094727, |
|
"learning_rate": 2.0145044319097503e-05, |
|
"loss": 0.6192, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.060451877786453743, |
|
"grad_norm": 0.8589074611663818, |
|
"learning_rate": 3.0217566478646254e-05, |
|
"loss": 0.5975, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08060250371527165, |
|
"grad_norm": 0.9587840437889099, |
|
"learning_rate": 4.0290088638195005e-05, |
|
"loss": 0.5769, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.10075312964408957, |
|
"grad_norm": 0.8058000206947327, |
|
"learning_rate": 4.9959697281805565e-05, |
|
"loss": 0.5703, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12090375557290749, |
|
"grad_norm": 0.7425491809844971, |
|
"learning_rate": 4.8840177331960056e-05, |
|
"loss": 0.5645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1410543815017254, |
|
"grad_norm": 0.7902853488922119, |
|
"learning_rate": 4.7720657382114555e-05, |
|
"loss": 0.5477, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1612050074305433, |
|
"grad_norm": 0.7411909699440002, |
|
"learning_rate": 4.6601137432269046e-05, |
|
"loss": 0.5451, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18135563335936122, |
|
"grad_norm": 0.7251479625701904, |
|
"learning_rate": 4.548161748242354e-05, |
|
"loss": 0.5372, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.20150625928817914, |
|
"grad_norm": 0.7090137600898743, |
|
"learning_rate": 4.4362097532578036e-05, |
|
"loss": 0.5366, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22165688521699706, |
|
"grad_norm": 0.5888206362724304, |
|
"learning_rate": 4.324257758273253e-05, |
|
"loss": 0.5289, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.24180751114581497, |
|
"grad_norm": 0.5961526036262512, |
|
"learning_rate": 4.212305763288702e-05, |
|
"loss": 0.5286, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2619581370746329, |
|
"grad_norm": 0.676559567451477, |
|
"learning_rate": 4.1003537683041517e-05, |
|
"loss": 0.5263, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2821087630034508, |
|
"grad_norm": 0.5270068645477295, |
|
"learning_rate": 3.988401773319601e-05, |
|
"loss": 0.5221, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3022593889322687, |
|
"grad_norm": 0.6866596937179565, |
|
"learning_rate": 3.87644977833505e-05, |
|
"loss": 0.517, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3224100148610866, |
|
"grad_norm": 0.6050545573234558, |
|
"learning_rate": 3.7644977833505e-05, |
|
"loss": 0.5183, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.34256064078990456, |
|
"grad_norm": 0.5003291368484497, |
|
"learning_rate": 3.652545788365949e-05, |
|
"loss": 0.5097, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.36271126671872245, |
|
"grad_norm": 0.5456134676933289, |
|
"learning_rate": 3.540593793381398e-05, |
|
"loss": 0.5072, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3828618926475404, |
|
"grad_norm": 0.6824153661727905, |
|
"learning_rate": 3.428641798396848e-05, |
|
"loss": 0.5048, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.4030125185763583, |
|
"grad_norm": 0.5959055423736572, |
|
"learning_rate": 3.316689803412297e-05, |
|
"loss": 0.5021, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.42316314450517617, |
|
"grad_norm": 0.5629556775093079, |
|
"learning_rate": 3.204737808427746e-05, |
|
"loss": 0.5001, |
|
"step": 10500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 24813, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.677250108543338e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|