|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 6120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 1.4399138689041138, |
|
"learning_rate": 1.8366013071895427e-05, |
|
"loss": 0.1305, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9775613095979087, |
|
"eval_f1": 0.740566037735849, |
|
"eval_loss": 0.06862938404083252, |
|
"eval_precision": 0.6908690869086909, |
|
"eval_recall": 0.7979669631512071, |
|
"eval_runtime": 2.5207, |
|
"eval_samples_per_second": 366.567, |
|
"eval_steps_per_second": 46.019, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 3.059645652770996, |
|
"learning_rate": 1.6732026143790852e-05, |
|
"loss": 0.0475, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9815448773808042, |
|
"eval_f1": 0.8041237113402061, |
|
"eval_loss": 0.067368283867836, |
|
"eval_precision": 0.7691415313225058, |
|
"eval_recall": 0.8424396442185514, |
|
"eval_runtime": 3.126, |
|
"eval_samples_per_second": 295.582, |
|
"eval_steps_per_second": 37.108, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 2.6966614723205566, |
|
"learning_rate": 1.5098039215686276e-05, |
|
"loss": 0.0354, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 3.640780210494995, |
|
"learning_rate": 1.3464052287581701e-05, |
|
"loss": 0.0222, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.982322917963401, |
|
"eval_f1": 0.8089053803339517, |
|
"eval_loss": 0.07790510356426239, |
|
"eval_precision": 0.7879518072289157, |
|
"eval_recall": 0.8310038119440915, |
|
"eval_runtime": 4.1074, |
|
"eval_samples_per_second": 224.961, |
|
"eval_steps_per_second": 28.242, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.6764705882352944, |
|
"grad_norm": 0.008734635077416897, |
|
"learning_rate": 1.1830065359477125e-05, |
|
"loss": 0.012, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.982789742312959, |
|
"eval_f1": 0.8168498168498168, |
|
"eval_loss": 0.0823572650551796, |
|
"eval_precision": 0.7861339600470035, |
|
"eval_recall": 0.8500635324015248, |
|
"eval_runtime": 2.5823, |
|
"eval_samples_per_second": 357.827, |
|
"eval_steps_per_second": 44.922, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.411764705882353, |
|
"grad_norm": 0.3800855576992035, |
|
"learning_rate": 1.0196078431372549e-05, |
|
"loss": 0.0081, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9815137557575003, |
|
"eval_f1": 0.8112745098039216, |
|
"eval_loss": 0.10165887326002121, |
|
"eval_precision": 0.7834319526627219, |
|
"eval_recall": 0.841168996188056, |
|
"eval_runtime": 2.596, |
|
"eval_samples_per_second": 355.931, |
|
"eval_steps_per_second": 44.684, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.147058823529412, |
|
"grad_norm": 1.0274019241333008, |
|
"learning_rate": 8.562091503267974e-06, |
|
"loss": 0.0054, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 0.9368045926094055, |
|
"learning_rate": 6.928104575163399e-06, |
|
"loss": 0.0028, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.981607120627412, |
|
"eval_f1": 0.8175092478421702, |
|
"eval_loss": 0.11204753071069717, |
|
"eval_precision": 0.7940119760479042, |
|
"eval_recall": 0.8424396442185514, |
|
"eval_runtime": 2.6214, |
|
"eval_samples_per_second": 352.478, |
|
"eval_steps_per_second": 44.25, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 6.617647058823529, |
|
"grad_norm": 0.0018368919845670462, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 0.0021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9808290800448152, |
|
"eval_f1": 0.8186215235792019, |
|
"eval_loss": 0.12193118035793304, |
|
"eval_precision": 0.7808535178777394, |
|
"eval_recall": 0.8602287166454892, |
|
"eval_runtime": 2.6902, |
|
"eval_samples_per_second": 343.468, |
|
"eval_steps_per_second": 43.119, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 7.352941176470588, |
|
"grad_norm": 0.004133788403123617, |
|
"learning_rate": 3.6601307189542484e-06, |
|
"loss": 0.0011, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9814826341341965, |
|
"eval_f1": 0.8152173913043478, |
|
"eval_loss": 0.12062280625104904, |
|
"eval_precision": 0.7767548906789413, |
|
"eval_recall": 0.8576874205844981, |
|
"eval_runtime": 2.6543, |
|
"eval_samples_per_second": 348.116, |
|
"eval_steps_per_second": 43.703, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 8.088235294117647, |
|
"grad_norm": 0.00526324100792408, |
|
"learning_rate": 2.0261437908496734e-06, |
|
"loss": 0.0018, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 0.01854279637336731, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 0.0005, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9821050666002739, |
|
"eval_f1": 0.8220858895705522, |
|
"eval_loss": 0.1177670955657959, |
|
"eval_precision": 0.7947805456702254, |
|
"eval_recall": 0.8513341804320204, |
|
"eval_runtime": 2.7389, |
|
"eval_samples_per_second": 337.365, |
|
"eval_steps_per_second": 42.353, |
|
"step": 6120 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 791914496183100.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|