|
{ |
|
"best_metric": 0.9596994535519126, |
|
"best_model_checkpoint": "train_authorship/train_outputs/05-25-2024_21:30:31/checkpoint-560", |
|
"epoch": 8.115942028985508, |
|
"eval_steps": 35, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.12179946899414, |
|
"learning_rate": 2.5362318840579714e-05, |
|
"loss": 0.9599, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_acc_product": 0.522295927646477, |
|
"eval_fitzgerald_acc": 0.6823770491803278, |
|
"eval_hemingway_acc": 0.9651639344262295, |
|
"eval_loss": 0.4738686978816986, |
|
"eval_overall_acc": 0.8135245901639344, |
|
"eval_runtime": 13.1203, |
|
"eval_samples_per_second": 111.583, |
|
"eval_steps_per_second": 1.753, |
|
"eval_woolf_acc": 0.7930327868852459, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 18.4027099609375, |
|
"learning_rate": 4.99194847020934e-05, |
|
"loss": 0.3839, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_acc_product": 0.7210372836134964, |
|
"eval_fitzgerald_acc": 0.764344262295082, |
|
"eval_hemingway_acc": 0.9815573770491803, |
|
"eval_loss": 0.24899350106716156, |
|
"eval_overall_acc": 0.9023224043715847, |
|
"eval_runtime": 13.1386, |
|
"eval_samples_per_second": 111.428, |
|
"eval_steps_per_second": 1.751, |
|
"eval_woolf_acc": 0.9610655737704918, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 7.836585998535156, |
|
"learning_rate": 4.710144927536232e-05, |
|
"loss": 0.2387, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_acc_product": 0.8025821475696203, |
|
"eval_fitzgerald_acc": 0.9631147540983607, |
|
"eval_hemingway_acc": 0.9221311475409836, |
|
"eval_loss": 0.20742054283618927, |
|
"eval_overall_acc": 0.9296448087431693, |
|
"eval_runtime": 13.1443, |
|
"eval_samples_per_second": 111.379, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.9036885245901639, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 10.438405990600586, |
|
"learning_rate": 4.428341384863124e-05, |
|
"loss": 0.1938, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_acc_product": 0.8229529244050163, |
|
"eval_fitzgerald_acc": 0.9528688524590164, |
|
"eval_hemingway_acc": 0.9733606557377049, |
|
"eval_loss": 0.2009282410144806, |
|
"eval_overall_acc": 0.9378415300546448, |
|
"eval_runtime": 13.1419, |
|
"eval_samples_per_second": 111.399, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.8872950819672131, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 11.882472038269043, |
|
"learning_rate": 4.146537842190016e-05, |
|
"loss": 0.1373, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_acc_product": 0.8087266940845269, |
|
"eval_fitzgerald_acc": 0.9467213114754098, |
|
"eval_hemingway_acc": 0.8831967213114754, |
|
"eval_loss": 0.20678555965423584, |
|
"eval_overall_acc": 0.9323770491803278, |
|
"eval_runtime": 13.1478, |
|
"eval_samples_per_second": 111.35, |
|
"eval_steps_per_second": 1.749, |
|
"eval_woolf_acc": 0.9672131147540983, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 3.76464581489563, |
|
"learning_rate": 3.864734299516908e-05, |
|
"loss": 0.0751, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_acc_product": 0.8355915183980156, |
|
"eval_fitzgerald_acc": 0.8913934426229508, |
|
"eval_hemingway_acc": 0.9774590163934426, |
|
"eval_loss": 0.21373361349105835, |
|
"eval_overall_acc": 0.9426229508196722, |
|
"eval_runtime": 13.1464, |
|
"eval_samples_per_second": 111.361, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.9590163934426229, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 7.145068168640137, |
|
"learning_rate": 3.5829307568438e-05, |
|
"loss": 0.0799, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_acc_product": 0.8595802071539027, |
|
"eval_fitzgerald_acc": 0.9467213114754098, |
|
"eval_hemingway_acc": 0.9508196721311475, |
|
"eval_loss": 0.19885893166065216, |
|
"eval_overall_acc": 0.9508196721311475, |
|
"eval_runtime": 13.143, |
|
"eval_samples_per_second": 111.39, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.9549180327868853, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 19.213457107543945, |
|
"learning_rate": 3.301127214170693e-05, |
|
"loss": 0.0689, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_acc_product": 0.720976809113428, |
|
"eval_fitzgerald_acc": 0.985655737704918, |
|
"eval_hemingway_acc": 0.9036885245901639, |
|
"eval_loss": 0.44272100925445557, |
|
"eval_overall_acc": 0.8995901639344263, |
|
"eval_runtime": 13.1467, |
|
"eval_samples_per_second": 111.359, |
|
"eval_steps_per_second": 1.749, |
|
"eval_woolf_acc": 0.8094262295081968, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 18.969886779785156, |
|
"learning_rate": 3.0193236714975848e-05, |
|
"loss": 0.0514, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_acc_product": 0.8326357712760099, |
|
"eval_fitzgerald_acc": 0.9139344262295082, |
|
"eval_hemingway_acc": 0.9836065573770492, |
|
"eval_loss": 0.3214350938796997, |
|
"eval_overall_acc": 0.9412568306010929, |
|
"eval_runtime": 13.1454, |
|
"eval_samples_per_second": 111.37, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.9262295081967213, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 3.789048433303833, |
|
"learning_rate": 2.7375201288244768e-05, |
|
"loss": 0.0247, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_acc_product": 0.8625536629442553, |
|
"eval_fitzgerald_acc": 0.9200819672131147, |
|
"eval_hemingway_acc": 0.9651639344262295, |
|
"eval_loss": 0.282697856426239, |
|
"eval_overall_acc": 0.9521857923497268, |
|
"eval_runtime": 13.1482, |
|
"eval_samples_per_second": 111.346, |
|
"eval_steps_per_second": 1.749, |
|
"eval_woolf_acc": 0.9713114754098361, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"grad_norm": 0.13263003528118134, |
|
"learning_rate": 2.455716586151369e-05, |
|
"loss": 0.0061, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_acc_product": 0.870603844594922, |
|
"eval_fitzgerald_acc": 0.9508196721311475, |
|
"eval_hemingway_acc": 0.9692622950819673, |
|
"eval_loss": 0.28144514560699463, |
|
"eval_overall_acc": 0.9549180327868853, |
|
"eval_runtime": 13.1424, |
|
"eval_samples_per_second": 111.395, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.944672131147541, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.01614902913570404, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 0.017, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_acc_product": 0.8501492828694912, |
|
"eval_fitzgerald_acc": 0.9528688524590164, |
|
"eval_hemingway_acc": 0.930327868852459, |
|
"eval_loss": 0.34684956073760986, |
|
"eval_overall_acc": 0.9474043715846995, |
|
"eval_runtime": 13.1477, |
|
"eval_samples_per_second": 111.35, |
|
"eval_steps_per_second": 1.749, |
|
"eval_woolf_acc": 0.9590163934426229, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"grad_norm": 0.007705519441515207, |
|
"learning_rate": 1.892109500805153e-05, |
|
"loss": 0.0108, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_acc_product": 0.8705621113386143, |
|
"eval_fitzgerald_acc": 0.9385245901639344, |
|
"eval_hemingway_acc": 0.9651639344262295, |
|
"eval_loss": 0.26387327909469604, |
|
"eval_overall_acc": 0.9549180327868853, |
|
"eval_runtime": 13.1463, |
|
"eval_samples_per_second": 111.362, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.9610655737704918, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.04329540580511093, |
|
"learning_rate": 1.610305958132045e-05, |
|
"loss": 0.0237, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_acc_product": 0.8607751722611143, |
|
"eval_fitzgerald_acc": 0.9426229508196722, |
|
"eval_hemingway_acc": 0.9815573770491803, |
|
"eval_loss": 0.32457903027534485, |
|
"eval_overall_acc": 0.9515027322404371, |
|
"eval_runtime": 13.1416, |
|
"eval_samples_per_second": 111.402, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.930327868852459, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"grad_norm": 18.448688507080078, |
|
"learning_rate": 1.3285024154589374e-05, |
|
"loss": 0.0109, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_acc_product": 0.8574545818262321, |
|
"eval_fitzgerald_acc": 0.9569672131147541, |
|
"eval_hemingway_acc": 0.9631147540983607, |
|
"eval_loss": 0.32199960947036743, |
|
"eval_overall_acc": 0.950136612021858, |
|
"eval_runtime": 13.141, |
|
"eval_samples_per_second": 111.407, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.930327868852459, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"grad_norm": 0.07037464529275894, |
|
"learning_rate": 1.0466988727858294e-05, |
|
"loss": 0.0108, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_acc_product": 0.8835792216639279, |
|
"eval_fitzgerald_acc": 0.9385245901639344, |
|
"eval_hemingway_acc": 0.9692622950819673, |
|
"eval_loss": 0.27515432238578796, |
|
"eval_overall_acc": 0.9596994535519126, |
|
"eval_runtime": 13.1404, |
|
"eval_samples_per_second": 111.412, |
|
"eval_steps_per_second": 1.75, |
|
"eval_woolf_acc": 0.9713114754098361, |
|
"step": 560 |
|
} |
|
], |
|
"logging_steps": 35, |
|
"max_steps": 690, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 35, |
|
"total_flos": 1.4725988675974908e+16, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|