|
{ |
|
"best_metric": 0.326568067073822, |
|
"best_model_checkpoint": "sep-20/checkpoint-184", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 184, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04891304347826087, |
|
"grad_norm": 6.508527755737305, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 0.9057, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.09782608695652174, |
|
"grad_norm": 6.560257434844971, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 0.4782, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.14673913043478262, |
|
"grad_norm": 5.121021270751953, |
|
"learning_rate": 4.7575757575757576e-05, |
|
"loss": 0.3964, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1956521739130435, |
|
"grad_norm": 5.416026592254639, |
|
"learning_rate": 4.484848484848485e-05, |
|
"loss": 0.461, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24456521739130435, |
|
"grad_norm": 3.697016716003418, |
|
"learning_rate": 4.212121212121212e-05, |
|
"loss": 0.6009, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.29347826086956524, |
|
"grad_norm": 3.856910467147827, |
|
"learning_rate": 3.939393939393939e-05, |
|
"loss": 0.2506, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3423913043478261, |
|
"grad_norm": 14.140103340148926, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.396, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.391304347826087, |
|
"grad_norm": 4.570571422576904, |
|
"learning_rate": 3.3939393939393945e-05, |
|
"loss": 0.3124, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.44021739130434784, |
|
"grad_norm": 11.288423538208008, |
|
"learning_rate": 3.121212121212122e-05, |
|
"loss": 0.5017, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4891304347826087, |
|
"grad_norm": 10.446467399597168, |
|
"learning_rate": 2.8484848484848486e-05, |
|
"loss": 0.2368, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5380434782608695, |
|
"grad_norm": 1.5973994731903076, |
|
"learning_rate": 2.575757575757576e-05, |
|
"loss": 0.2517, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5869565217391305, |
|
"grad_norm": 0.7517912983894348, |
|
"learning_rate": 2.3030303030303034e-05, |
|
"loss": 0.571, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6358695652173914, |
|
"grad_norm": 7.41497278213501, |
|
"learning_rate": 2.0303030303030303e-05, |
|
"loss": 0.2846, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6847826086956522, |
|
"grad_norm": 4.097425937652588, |
|
"learning_rate": 1.7575757575757576e-05, |
|
"loss": 0.4801, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7336956521739131, |
|
"grad_norm": 4.8678669929504395, |
|
"learning_rate": 1.484848484848485e-05, |
|
"loss": 0.3254, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 0.7251734733581543, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.1278, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8315217391304348, |
|
"grad_norm": 10.660299301147461, |
|
"learning_rate": 9.393939393939394e-06, |
|
"loss": 0.5605, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8804347826086957, |
|
"grad_norm": 0.7391616702079773, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3178, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9293478260869565, |
|
"grad_norm": 0.3952378034591675, |
|
"learning_rate": 3.939393939393939e-06, |
|
"loss": 0.3842, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9782608695652174, |
|
"grad_norm": 5.111515522003174, |
|
"learning_rate": 1.2121212121212122e-06, |
|
"loss": 0.2226, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8994565217391305, |
|
"eval_f1_macro": 0.574992771283407, |
|
"eval_f1_micro": 0.8994565217391305, |
|
"eval_f1_weighted": 0.8962362631935303, |
|
"eval_loss": 0.326568067073822, |
|
"eval_precision_macro": 0.5852941598199051, |
|
"eval_precision_micro": 0.8994565217391305, |
|
"eval_precision_weighted": 0.8948775064010129, |
|
"eval_recall_macro": 0.566717099325795, |
|
"eval_recall_micro": 0.8994565217391305, |
|
"eval_recall_weighted": 0.8994565217391305, |
|
"eval_runtime": 108.1675, |
|
"eval_samples_per_second": 3.402, |
|
"eval_steps_per_second": 0.213, |
|
"step": 184 |
|
} |
|
], |
|
"logging_steps": 9, |
|
"max_steps": 184, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.138367530175017e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|