|
{ |
|
"best_metric": 5.173658792045899e-05, |
|
"best_model_checkpoint": "res/checkpoint-400", |
|
"epoch": 0.5633802816901409, |
|
"eval_steps": 200, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.3899288177490234, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6908, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.9440199136734009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6992, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": Infinity, |
|
"learning_rate": 1.45e-05, |
|
"loss": 0.6318, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.202402591705322, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.4382, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 21.559049606323242, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0718, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.11590419709682465, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.0055, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.02931913733482361, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.0175, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 31.5627498626709, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.159, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.10093926638364792, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.0993, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.02363261766731739, |
|
"learning_rate": 4.85e-05, |
|
"loss": 0.0013, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.018094830214977264, |
|
"learning_rate": 4.9426229508196726e-05, |
|
"loss": 0.0008, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.010088359005749226, |
|
"learning_rate": 4.860655737704918e-05, |
|
"loss": 0.0005, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.00800824724137783, |
|
"learning_rate": 4.778688524590164e-05, |
|
"loss": 0.0004, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.03682546317577362, |
|
"learning_rate": 4.704918032786885e-05, |
|
"loss": 0.0664, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.006090231705456972, |
|
"learning_rate": 4.622950819672132e-05, |
|
"loss": 0.0002, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.00360495550557971, |
|
"learning_rate": 4.540983606557377e-05, |
|
"loss": 0.0002, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.004050845745950937, |
|
"learning_rate": 4.459016393442623e-05, |
|
"loss": 0.0002, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.0043782140128314495, |
|
"learning_rate": 4.377049180327869e-05, |
|
"loss": 0.0002, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.004006043076515198, |
|
"learning_rate": 4.295081967213115e-05, |
|
"loss": 0.0001, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.003426899667829275, |
|
"learning_rate": 4.213114754098361e-05, |
|
"loss": 0.1068, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.9987674609695973, |
|
"eval_f1": 0.9987956643918106, |
|
"eval_loss": 0.002453433582559228, |
|
"eval_precision": 0.9983948635634029, |
|
"eval_recall": 0.9991967871485944, |
|
"eval_runtime": 335.4821, |
|
"eval_samples_per_second": 7.255, |
|
"eval_steps_per_second": 0.909, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.015531109645962715, |
|
"learning_rate": 4.131147540983607e-05, |
|
"loss": 0.062, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.02330106310546398, |
|
"learning_rate": 4.049180327868853e-05, |
|
"loss": 0.0089, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.007218244019895792, |
|
"learning_rate": 3.9672131147540983e-05, |
|
"loss": 0.0004, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.008732643909752369, |
|
"learning_rate": 3.8852459016393444e-05, |
|
"loss": 0.0003, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.007611180655658245, |
|
"learning_rate": 3.8032786885245905e-05, |
|
"loss": 0.0003, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.004713522270321846, |
|
"learning_rate": 3.721311475409836e-05, |
|
"loss": 0.0002, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.005065000616014004, |
|
"learning_rate": 3.6393442622950826e-05, |
|
"loss": 0.0002, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.0033236700110137463, |
|
"learning_rate": 3.557377049180328e-05, |
|
"loss": 0.0002, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.003254691371694207, |
|
"learning_rate": 3.475409836065574e-05, |
|
"loss": 0.0001, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.0026583385188132524, |
|
"learning_rate": 3.39344262295082e-05, |
|
"loss": 0.0001, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.0026992058847099543, |
|
"learning_rate": 3.3114754098360655e-05, |
|
"loss": 0.0001, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.002891330514103174, |
|
"learning_rate": 3.2295081967213116e-05, |
|
"loss": 0.0001, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0022049755789339542, |
|
"learning_rate": 3.1475409836065576e-05, |
|
"loss": 0.0001, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.002381928265094757, |
|
"learning_rate": 3.065573770491804e-05, |
|
"loss": 0.0001, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.0018912949599325657, |
|
"learning_rate": 2.9836065573770494e-05, |
|
"loss": 0.0001, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.00272244680672884, |
|
"learning_rate": 2.901639344262295e-05, |
|
"loss": 0.0001, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.0019356077536940575, |
|
"learning_rate": 2.819672131147541e-05, |
|
"loss": 0.0001, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.002229230711236596, |
|
"learning_rate": 2.737704918032787e-05, |
|
"loss": 0.0001, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.002221511909738183, |
|
"learning_rate": 2.6557377049180327e-05, |
|
"loss": 0.0001, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0020540908444672823, |
|
"learning_rate": 2.573770491803279e-05, |
|
"loss": 0.0001, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 5.173658792045899e-05, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 333.7573, |
|
"eval_samples_per_second": 7.293, |
|
"eval_steps_per_second": 0.914, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 710, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 400, |
|
"total_flos": 4203850314547200.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|