{ "best_metric": 0.436233788728714, "best_model_checkpoint": "flan_large_ft_adam_filtd/checkpoint-20000", "epoch": 8.32639467110741, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42, "learning_rate": 3.8e-05, "loss": 1.6461, "step": 1000 }, { "epoch": 0.42, "eval_f1": 0.7336931843203959, "eval_loss": 0.543613076210022, "eval_precision": 0.7462491253306569, "eval_recall": 0.7237358282616747, "eval_runtime": 339.9295, "eval_sacrebleu": 5.478519586104642, "eval_samples_per_second": 28.262, "eval_steps_per_second": 0.885, "step": 1000 }, { "epoch": 0.83, "learning_rate": 3.6e-05, "loss": 0.6266, "step": 2000 }, { "epoch": 0.83, "eval_f1": 0.7411911502965993, "eval_loss": 0.5126737356185913, "eval_precision": 0.7532233481006616, "eval_recall": 0.7315065660194513, "eval_runtime": 330.4901, "eval_sacrebleu": 5.581437847090348, "eval_samples_per_second": 29.069, "eval_steps_per_second": 0.911, "step": 2000 }, { "epoch": 1.25, "learning_rate": 3.4e-05, "loss": 0.591, "step": 3000 }, { "epoch": 1.25, "eval_f1": 0.744462796992661, "eval_loss": 0.49647408723831177, "eval_precision": 0.7574931879507412, "eval_recall": 0.7338014463765621, "eval_runtime": 328.9086, "eval_sacrebleu": 5.712014723912081, "eval_samples_per_second": 29.209, "eval_steps_per_second": 0.915, "step": 3000 }, { "epoch": 1.67, "learning_rate": 3.2000000000000005e-05, "loss": 0.5723, "step": 4000 }, { "epoch": 1.67, "eval_f1": 0.7482227764808884, "eval_loss": 0.4846822917461395, "eval_precision": 0.7609444108747153, "eval_recall": 0.7378004755756169, "eval_runtime": 328.5657, "eval_sacrebleu": 5.759016321774072, "eval_samples_per_second": 29.239, "eval_steps_per_second": 0.916, "step": 4000 }, { "epoch": 2.08, "learning_rate": 3.0000000000000004e-05, "loss": 0.5576, "step": 5000 }, { "epoch": 2.08, "eval_f1": 0.7495765637889841, "eval_loss": 0.4766782820224762, "eval_precision": 0.7626191373513915, "eval_recall": 0.7388611246473127, "eval_runtime": 326.106, "eval_sacrebleu": 5.811597759442428, "eval_samples_per_second": 29.46, "eval_steps_per_second": 0.923, "step": 5000 }, { "epoch": 2.5, "learning_rate": 2.8e-05, "loss": 0.5393, "step": 6000 }, { "epoch": 2.5, "eval_f1": 0.7505085720869012, "eval_loss": 0.47017449140548706, "eval_precision": 0.7641689959470273, "eval_recall": 0.7391936523188186, "eval_runtime": 328.1093, "eval_sacrebleu": 5.885476099157212, "eval_samples_per_second": 29.28, "eval_steps_per_second": 0.917, "step": 6000 }, { "epoch": 2.91, "learning_rate": 2.6000000000000002e-05, "loss": 0.5318, "step": 7000 }, { "epoch": 2.91, "eval_f1": 0.7533312253634614, "eval_loss": 0.46316930651664734, "eval_precision": 0.7659040383844901, "eval_recall": 0.7430121202963726, "eval_runtime": 330.001, "eval_sacrebleu": 5.928167844047436, "eval_samples_per_second": 29.112, "eval_steps_per_second": 0.912, "step": 7000 }, { "epoch": 3.33, "learning_rate": 2.4e-05, "loss": 0.514, "step": 8000 }, { "epoch": 3.33, "eval_f1": 0.7534677800234317, "eval_loss": 0.45805472135543823, "eval_precision": 0.7670932310055877, "eval_recall": 0.7421523856331236, "eval_runtime": 326.6901, "eval_sacrebleu": 5.977964366842188, "eval_samples_per_second": 29.407, "eval_steps_per_second": 0.921, "step": 8000 }, { "epoch": 3.75, "learning_rate": 2.2000000000000003e-05, "loss": 0.5084, "step": 9000 }, { "epoch": 3.75, "eval_f1": 0.7547050486360937, "eval_loss": 0.4548051953315735, "eval_precision": 0.7675773970595853, "eval_recall": 0.7440757734278951, "eval_runtime": 329.4886, "eval_sacrebleu": 5.986154752583561, "eval_samples_per_second": 29.157, "eval_steps_per_second": 0.914, "step": 9000 }, { "epoch": 4.16, "learning_rate": 2e-05, "loss": 0.4987, "step": 10000 }, { "epoch": 4.16, "eval_f1": 0.7559209833584902, "eval_loss": 0.45204678177833557, "eval_precision": 0.7687782156203135, "eval_recall": 0.7452970269140299, "eval_runtime": 328.8363, "eval_sacrebleu": 6.112626970123219, "eval_samples_per_second": 29.215, "eval_steps_per_second": 0.915, "step": 10000 }, { "epoch": 4.58, "learning_rate": 1.8e-05, "loss": 0.4916, "step": 11000 }, { "epoch": 4.58, "eval_f1": 0.7562574939163441, "eval_loss": 0.44847676157951355, "eval_precision": 0.7693036325555866, "eval_recall": 0.7454933088714478, "eval_runtime": 328.9876, "eval_sacrebleu": 6.110669366899824, "eval_samples_per_second": 29.202, "eval_steps_per_second": 0.915, "step": 11000 }, { "epoch": 5.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.4855, "step": 12000 }, { "epoch": 5.0, "eval_f1": 0.7573605920082946, "eval_loss": 0.44527965784072876, "eval_precision": 0.7699207649359262, "eval_recall": 0.7470148278161243, "eval_runtime": 326.1238, "eval_sacrebleu": 6.199619598523688, "eval_samples_per_second": 29.458, "eval_steps_per_second": 0.923, "step": 12000 }, { "epoch": 5.41, "learning_rate": 1.4e-05, "loss": 0.4735, "step": 13000 }, { "epoch": 5.41, "eval_f1": 0.757606573654931, "eval_loss": 0.44323351979255676, "eval_precision": 0.7707262306620629, "eval_recall": 0.7467577414308141, "eval_runtime": 325.2009, "eval_sacrebleu": 6.170302966384815, "eval_samples_per_second": 29.542, "eval_steps_per_second": 0.926, "step": 13000 }, { "epoch": 5.83, "learning_rate": 1.2e-05, "loss": 0.4714, "step": 14000 }, { "epoch": 5.83, "eval_f1": 0.7582219671704239, "eval_loss": 0.44084230065345764, "eval_precision": 0.7707039175730187, "eval_recall": 0.7479663680666155, "eval_runtime": 328.6225, "eval_sacrebleu": 6.217397957778798, "eval_samples_per_second": 29.234, "eval_steps_per_second": 0.916, "step": 14000 }, { "epoch": 6.24, "learning_rate": 1e-05, "loss": 0.4619, "step": 15000 }, { "epoch": 6.24, "eval_f1": 0.7582127043374982, "eval_loss": 0.44005897641181946, "eval_precision": 0.7708874883637588, "eval_recall": 0.7477565708537125, "eval_runtime": 329.0722, "eval_sacrebleu": 6.251480154755987, "eval_samples_per_second": 29.194, "eval_steps_per_second": 0.915, "step": 15000 }, { "epoch": 6.66, "learning_rate": 8.000000000000001e-06, "loss": 0.4594, "step": 16000 }, { "epoch": 6.66, "eval_f1": 0.7590658383431736, "eval_loss": 0.4385643005371094, "eval_precision": 0.7722058984135644, "eval_recall": 0.7481865650026411, "eval_runtime": 329.0887, "eval_sacrebleu": 6.274907412338864, "eval_samples_per_second": 29.193, "eval_steps_per_second": 0.915, "step": 16000 }, { "epoch": 7.08, "learning_rate": 6e-06, "loss": 0.4548, "step": 17000 }, { "epoch": 7.08, "eval_f1": 0.7591303865470561, "eval_loss": 0.4375361502170563, "eval_precision": 0.7716383277265539, "eval_recall": 0.7488526040952809, "eval_runtime": 326.0628, "eval_sacrebleu": 6.2691323707280056, "eval_samples_per_second": 29.464, "eval_steps_per_second": 0.923, "step": 17000 }, { "epoch": 7.49, "learning_rate": 4.000000000000001e-06, "loss": 0.4496, "step": 18000 }, { "epoch": 7.49, "eval_f1": 0.7595453337276964, "eval_loss": 0.4368315041065216, "eval_precision": 0.7720739444758674, "eval_recall": 0.7492328676017723, "eval_runtime": 326.1933, "eval_sacrebleu": 6.332761567803722, "eval_samples_per_second": 29.452, "eval_steps_per_second": 0.923, "step": 18000 }, { "epoch": 7.91, "learning_rate": 2.0000000000000003e-06, "loss": 0.4484, "step": 19000 }, { "epoch": 7.91, "eval_f1": 0.7595180715794393, "eval_loss": 0.4362909495830536, "eval_precision": 0.7722789199881863, "eval_recall": 0.7489893756232178, "eval_runtime": 326.3639, "eval_sacrebleu": 6.311690383163712, "eval_samples_per_second": 29.436, "eval_steps_per_second": 0.922, "step": 19000 }, { "epoch": 8.33, "learning_rate": 0.0, "loss": 0.4446, "step": 20000 }, { "epoch": 8.33, "eval_f1": 0.7592586470783173, "eval_loss": 0.436233788728714, "eval_precision": 0.7721431963591666, "eval_recall": 0.7486093006745032, "eval_runtime": 326.7518, "eval_sacrebleu": 6.312650510263652, "eval_samples_per_second": 29.402, "eval_steps_per_second": 0.921, "step": 20000 } ], "max_steps": 20000, "num_train_epochs": 9, "total_flos": 2.033175326799954e+17, "trial_name": null, "trial_params": null }