|
{ |
|
"best_metric": 0.8727797865867615, |
|
"best_model_checkpoint": "autotrain-rqsu1-nelrs/checkpoint-6000", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.222222222222223e-06, |
|
"loss": 2.2551, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.65e-05, |
|
"loss": 1.533, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.4833333333333335e-05, |
|
"loss": 1.3785, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.316666666666667e-05, |
|
"loss": 1.2836, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.15e-05, |
|
"loss": 1.2161, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9833333333333336e-05, |
|
"loss": 1.1853, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9092592592592595e-05, |
|
"loss": 1.1863, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8166666666666674e-05, |
|
"loss": 1.1578, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.724074074074074e-05, |
|
"loss": 1.1281, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.631481481481481e-05, |
|
"loss": 1.0885, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.538888888888889e-05, |
|
"loss": 1.0728, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4462962962962966e-05, |
|
"loss": 1.0805, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.353703703703704e-05, |
|
"loss": 1.0485, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.261111111111111e-05, |
|
"loss": 1.0415, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.168518518518519e-05, |
|
"loss": 0.9634, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0759259259259264e-05, |
|
"loss": 0.9934, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9839506172839506e-05, |
|
"loss": 0.9743, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8913580246913585e-05, |
|
"loss": 1.0402, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.798765432098765e-05, |
|
"loss": 0.9291, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.706172839506173e-05, |
|
"loss": 0.999, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6781666666666667, |
|
"eval_f1_macro": 0.417421299699202, |
|
"eval_f1_micro": 0.6781666666666667, |
|
"eval_f1_weighted": 0.6526203782124326, |
|
"eval_loss": 0.9398497939109802, |
|
"eval_precision_macro": 0.53233579791898, |
|
"eval_precision_micro": 0.6781666666666667, |
|
"eval_precision_weighted": 0.6686669761134056, |
|
"eval_recall_macro": 0.37103282997495, |
|
"eval_recall_micro": 0.6781666666666667, |
|
"eval_recall_weighted": 0.6781666666666667, |
|
"eval_runtime": 13.8601, |
|
"eval_samples_per_second": 432.897, |
|
"eval_steps_per_second": 27.056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6135802469135804e-05, |
|
"loss": 0.8418, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.520987654320988e-05, |
|
"loss": 0.7844, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.428395061728395e-05, |
|
"loss": 0.853, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.335802469135802e-05, |
|
"loss": 0.8219, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.24320987654321e-05, |
|
"loss": 0.8721, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1506172839506175e-05, |
|
"loss": 0.7844, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.058024691358025e-05, |
|
"loss": 0.8645, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.965432098765432e-05, |
|
"loss": 0.7823, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8728395061728397e-05, |
|
"loss": 0.7494, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7802469135802474e-05, |
|
"loss": 0.8125, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6876543209876543e-05, |
|
"loss": 0.7879, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.595061728395062e-05, |
|
"loss": 0.8092, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5024691358024692e-05, |
|
"loss": 0.7535, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4098765432098765e-05, |
|
"loss": 0.7787, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.317283950617284e-05, |
|
"loss": 0.8109, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2246913580246914e-05, |
|
"loss": 0.7853, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.132098765432099e-05, |
|
"loss": 0.7999, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0395061728395064e-05, |
|
"loss": 0.7545, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9469135802469137e-05, |
|
"loss": 0.7805, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.854320987654321e-05, |
|
"loss": 0.741, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7148333333333333, |
|
"eval_f1_macro": 0.4743843561639563, |
|
"eval_f1_micro": 0.7148333333333333, |
|
"eval_f1_weighted": 0.6973841930196186, |
|
"eval_loss": 0.8727797865867615, |
|
"eval_precision_macro": 0.689196184834718, |
|
"eval_precision_micro": 0.7148333333333333, |
|
"eval_precision_weighted": 0.7078896728033317, |
|
"eval_recall_macro": 0.41641019062275997, |
|
"eval_recall_micro": 0.7148333333333333, |
|
"eval_recall_weighted": 0.7148333333333333, |
|
"eval_runtime": 13.8166, |
|
"eval_samples_per_second": 434.26, |
|
"eval_steps_per_second": 27.141, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 150, |
|
"max_steps": 9000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 3157587800064000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|