{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.349775784753364, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22, "learning_rate": 0.0002, "loss": 0.6202, "step": 50 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 0.4487, "step": 100 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.419, "step": 150 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 0.4151, "step": 200 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 0.3933, "step": 250 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 0.3797, "step": 300 }, { "epoch": 1.57, "learning_rate": 0.0002, "loss": 0.3688, "step": 350 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 0.3749, "step": 400 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 0.3624, "step": 450 }, { "epoch": 2.24, "learning_rate": 0.0002, "loss": 0.3331, "step": 500 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 0.3347, "step": 550 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 0.3533, "step": 600 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 0.3413, "step": 650 }, { "epoch": 3.14, "learning_rate": 0.0002, "loss": 0.3242, "step": 700 }, { "epoch": 3.36, "learning_rate": 0.0002, "loss": 0.3091, "step": 750 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 0.3166, "step": 800 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 0.3121, "step": 850 }, { "epoch": 4.04, "learning_rate": 0.0002, "loss": 0.304, "step": 900 }, { "epoch": 4.26, "learning_rate": 0.0002, "loss": 0.2776, "step": 950 }, { "epoch": 4.48, "learning_rate": 0.0002, "loss": 0.2783, "step": 1000 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 0.2836, "step": 1050 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 0.2931, "step": 1100 }, { "epoch": 5.16, "learning_rate": 0.0002, "loss": 0.2589, "step": 1150 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 0.2525, "step": 1200 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 0.255, "step": 1250 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 0.2564, "step": 1300 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 0.242, "step": 1350 }, { "epoch": 6.28, "learning_rate": 0.0002, "loss": 0.2092, "step": 1400 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 0.2172, "step": 1450 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 0.2233, "step": 1500 }, { "epoch": 6.95, "learning_rate": 0.0002, "loss": 0.2261, "step": 1550 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 0.1944, "step": 1600 }, { "epoch": 7.4, "learning_rate": 0.0002, "loss": 0.1803, "step": 1650 }, { "epoch": 7.62, "learning_rate": 0.0002, "loss": 0.1852, "step": 1700 }, { "epoch": 7.85, "learning_rate": 0.0002, "loss": 0.1931, "step": 1750 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 0.1755, "step": 1800 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 0.1465, "step": 1850 }, { "epoch": 8.52, "learning_rate": 0.0002, "loss": 0.1535, "step": 1900 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 0.1542, "step": 1950 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 0.161, "step": 2000 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 0.1205, "step": 2050 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 0.1197, "step": 2100 }, { "epoch": 9.64, "learning_rate": 0.0002, "loss": 0.1242, "step": 2150 }, { "epoch": 9.87, "learning_rate": 0.0002, "loss": 0.1277, "step": 2200 }, { "epoch": 10.09, "learning_rate": 0.0002, "loss": 0.1151, "step": 2250 }, { "epoch": 10.31, "learning_rate": 0.0002, "loss": 0.0925, "step": 2300 }, { "epoch": 10.54, "learning_rate": 0.0002, "loss": 0.0969, "step": 2350 }, { "epoch": 10.76, "learning_rate": 0.0002, "loss": 0.1023, "step": 2400 }, { "epoch": 10.99, "learning_rate": 0.0002, "loss": 0.1016, "step": 2450 }, { "epoch": 11.21, "learning_rate": 0.0002, "loss": 0.0708, "step": 2500 }, { "epoch": 11.43, "learning_rate": 0.0002, "loss": 0.0735, "step": 2550 }, { "epoch": 11.66, "learning_rate": 0.0002, "loss": 0.0804, "step": 2600 }, { "epoch": 11.88, "learning_rate": 0.0002, "loss": 0.0833, "step": 2650 }, { "epoch": 12.11, "learning_rate": 0.0002, "loss": 0.072, "step": 2700 }, { "epoch": 12.33, "learning_rate": 0.0002, "loss": 0.0592, "step": 2750 }, { "epoch": 12.56, "learning_rate": 0.0002, "loss": 0.0585, "step": 2800 }, { "epoch": 12.78, "learning_rate": 0.0002, "loss": 0.0647, "step": 2850 }, { "epoch": 13.0, "learning_rate": 0.0002, "loss": 0.0656, "step": 2900 }, { "epoch": 13.23, "learning_rate": 0.0002, "loss": 0.0473, "step": 2950 }, { "epoch": 13.45, "learning_rate": 0.0002, "loss": 0.0509, "step": 3000 }, { "epoch": 13.68, "learning_rate": 0.0002, "loss": 0.0523, "step": 3050 }, { "epoch": 13.9, "learning_rate": 0.0002, "loss": 0.0531, "step": 3100 }, { "epoch": 14.13, "learning_rate": 0.0002, "loss": 0.0455, "step": 3150 }, { "epoch": 14.35, "learning_rate": 0.0002, "loss": 0.0407, "step": 3200 } ], "max_steps": 3345, "num_train_epochs": 15, "total_flos": 5.0781774924831744e+17, "trial_name": null, "trial_params": null }