{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.136986301369863, "global_step": 390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4e-05, "loss": 1.3902, "step": 10 }, { "epoch": 0.05, "eval_loss": 1.3901418447494507, "eval_runtime": 108.6518, "eval_samples_per_second": 1.555, "eval_steps_per_second": 0.396, "step": 10 }, { "epoch": 0.11, "learning_rate": 8e-05, "loss": 1.3304, "step": 20 }, { "epoch": 0.11, "eval_loss": 1.3154278993606567, "eval_runtime": 108.7121, "eval_samples_per_second": 1.555, "eval_steps_per_second": 0.396, "step": 20 }, { "epoch": 0.16, "learning_rate": 0.00012, "loss": 1.2138, "step": 30 }, { "epoch": 0.16, "eval_loss": 1.2570539712905884, "eval_runtime": 108.5686, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.396, "step": 30 }, { "epoch": 0.22, "learning_rate": 0.00016, "loss": 1.2281, "step": 40 }, { "epoch": 0.22, "eval_loss": 1.237758994102478, "eval_runtime": 108.8506, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.395, "step": 40 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 1.2155, "step": 50 }, { "epoch": 0.27, "eval_loss": 1.2247029542922974, "eval_runtime": 108.5608, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.396, "step": 50 }, { "epoch": 0.33, "learning_rate": 0.00019994532573409262, "loss": 1.1823, "step": 60 }, { "epoch": 0.33, "eval_loss": 1.2180429697036743, "eval_runtime": 108.7983, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.395, "step": 60 }, { "epoch": 0.38, "learning_rate": 0.00019978136272187747, "loss": 1.2289, "step": 70 }, { "epoch": 0.38, "eval_loss": 1.2121814489364624, "eval_runtime": 108.4495, "eval_samples_per_second": 1.558, "eval_steps_per_second": 0.396, "step": 70 }, { "epoch": 0.44, "learning_rate": 0.00019950829025450114, "loss": 1.1623, "step": 80 }, { "epoch": 0.44, "eval_loss": 1.2073330879211426, "eval_runtime": 108.7996, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.395, "step": 80 }, { "epoch": 0.49, "learning_rate": 0.00019912640693269752, "loss": 1.2067, "step": 90 }, { "epoch": 0.49, "eval_loss": 1.204315423965454, "eval_runtime": 108.8942, "eval_samples_per_second": 1.552, "eval_steps_per_second": 0.395, "step": 90 }, { "epoch": 0.55, "learning_rate": 0.00019863613034027224, "loss": 1.1499, "step": 100 }, { "epoch": 0.55, "eval_loss": 1.2012346982955933, "eval_runtime": 108.7573, "eval_samples_per_second": 1.554, "eval_steps_per_second": 0.395, "step": 100 }, { "epoch": 0.6, "learning_rate": 0.00019803799658748094, "loss": 1.2184, "step": 110 }, { "epoch": 0.6, "eval_loss": 1.200089454650879, "eval_runtime": 108.5135, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.396, "step": 110 }, { "epoch": 0.66, "learning_rate": 0.0001973326597248006, "loss": 1.1758, "step": 120 }, { "epoch": 0.66, "eval_loss": 1.1969162225723267, "eval_runtime": 108.562, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.396, "step": 120 }, { "epoch": 0.71, "learning_rate": 0.00019652089102773488, "loss": 1.1931, "step": 130 }, { "epoch": 0.71, "eval_loss": 1.1960800886154175, "eval_runtime": 108.6308, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.396, "step": 130 }, { "epoch": 0.77, "learning_rate": 0.00019560357815343577, "loss": 1.1334, "step": 140 }, { "epoch": 0.77, "eval_loss": 1.1931264400482178, "eval_runtime": 108.6207, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.396, "step": 140 }, { "epoch": 0.82, "learning_rate": 0.00019458172417006347, "loss": 1.1321, "step": 150 }, { "epoch": 0.82, "eval_loss": 1.1918461322784424, "eval_runtime": 108.4723, "eval_samples_per_second": 1.558, "eval_steps_per_second": 0.396, "step": 150 }, { "epoch": 0.88, "learning_rate": 0.0001934564464599461, "loss": 1.181, "step": 160 }, { "epoch": 0.88, "eval_loss": 1.1898659467697144, "eval_runtime": 108.5621, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.396, "step": 160 }, { "epoch": 0.93, "learning_rate": 0.00019222897549773848, "loss": 1.2347, "step": 170 }, { "epoch": 0.93, "eval_loss": 1.1882331371307373, "eval_runtime": 108.6542, "eval_samples_per_second": 1.555, "eval_steps_per_second": 0.396, "step": 170 }, { "epoch": 0.99, "learning_rate": 0.00019090065350491626, "loss": 1.0926, "step": 180 }, { "epoch": 0.99, "eval_loss": 1.1871733665466309, "eval_runtime": 108.5395, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.396, "step": 180 }, { "epoch": 1.04, "learning_rate": 0.00018947293298207635, "loss": 1.1425, "step": 190 }, { "epoch": 1.04, "eval_loss": 1.1867098808288574, "eval_runtime": 108.4877, "eval_samples_per_second": 1.558, "eval_steps_per_second": 0.396, "step": 190 }, { "epoch": 1.1, "learning_rate": 0.0001879473751206489, "loss": 1.128, "step": 200 }, { "epoch": 1.1, "eval_loss": 1.1867098808288574, "eval_runtime": 108.7001, "eval_samples_per_second": 1.555, "eval_steps_per_second": 0.396, "step": 200 }, { "epoch": 1.15, "learning_rate": 0.00018632564809575742, "loss": 1.1237, "step": 210 }, { "epoch": 1.15, "eval_loss": 1.1867098808288574, "eval_runtime": 108.5929, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.396, "step": 210 }, { "epoch": 1.21, "learning_rate": 0.00018460952524209355, "loss": 1.0939, "step": 220 }, { "epoch": 1.21, "eval_loss": 1.1867098808288574, "eval_runtime": 108.8988, "eval_samples_per_second": 1.552, "eval_steps_per_second": 0.395, "step": 220 }, { "epoch": 1.26, "learning_rate": 0.00018280088311480201, "loss": 1.093, "step": 230 }, { "epoch": 1.26, "eval_loss": 1.1867098808288574, "eval_runtime": 108.817, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.395, "step": 230 }, { "epoch": 1.32, "learning_rate": 0.00018090169943749476, "loss": 1.0988, "step": 240 }, { "epoch": 1.32, "eval_loss": 1.1867098808288574, "eval_runtime": 108.6088, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.396, "step": 240 }, { "epoch": 1.37, "learning_rate": 0.00017891405093963938, "loss": 1.1054, "step": 250 }, { "epoch": 1.37, "eval_loss": 1.1867098808288574, "eval_runtime": 108.4026, "eval_samples_per_second": 1.559, "eval_steps_per_second": 0.397, "step": 250 }, { "epoch": 1.42, "learning_rate": 0.00017684011108568592, "loss": 1.1051, "step": 260 }, { "epoch": 1.42, "eval_loss": 1.1867098808288574, "eval_runtime": 108.6345, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.396, "step": 260 }, { "epoch": 1.48, "learning_rate": 0.0001746821476984154, "loss": 1.1373, "step": 270 }, { "epoch": 1.48, "eval_loss": 1.1867098808288574, "eval_runtime": 108.873, "eval_samples_per_second": 1.552, "eval_steps_per_second": 0.395, "step": 270 }, { "epoch": 1.53, "learning_rate": 0.00017244252047910892, "loss": 1.119, "step": 280 }, { "epoch": 1.53, "eval_loss": 1.1867098808288574, "eval_runtime": 108.5863, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.396, "step": 280 }, { "epoch": 1.59, "learning_rate": 0.00017012367842724887, "loss": 1.1399, "step": 290 }, { "epoch": 1.59, "eval_loss": 1.1867098808288574, "eval_runtime": 108.7422, "eval_samples_per_second": 1.554, "eval_steps_per_second": 0.395, "step": 290 }, { "epoch": 1.64, "learning_rate": 0.00016772815716257412, "loss": 1.0744, "step": 300 }, { "epoch": 1.64, "eval_loss": 1.1867098808288574, "eval_runtime": 108.7961, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.395, "step": 300 }, { "epoch": 1.7, "learning_rate": 0.00016525857615241687, "loss": 1.0797, "step": 310 }, { "epoch": 1.7, "eval_loss": 1.1867098808288574, "eval_runtime": 108.4529, "eval_samples_per_second": 1.558, "eval_steps_per_second": 0.396, "step": 310 }, { "epoch": 1.75, "learning_rate": 0.0001627176358473537, "loss": 1.1281, "step": 320 }, { "epoch": 1.75, "eval_loss": 1.1867098808288574, "eval_runtime": 108.354, "eval_samples_per_second": 1.56, "eval_steps_per_second": 0.397, "step": 320 }, { "epoch": 1.81, "learning_rate": 0.00016010811472830252, "loss": 1.073, "step": 330 }, { "epoch": 1.81, "eval_loss": 1.1867098808288574, "eval_runtime": 108.3772, "eval_samples_per_second": 1.559, "eval_steps_per_second": 0.397, "step": 330 }, { "epoch": 1.86, "learning_rate": 0.00015743286626829437, "loss": 1.1053, "step": 340 }, { "epoch": 1.86, "eval_loss": 1.1867098808288574, "eval_runtime": 108.3166, "eval_samples_per_second": 1.56, "eval_steps_per_second": 0.397, "step": 340 }, { "epoch": 1.92, "learning_rate": 0.00015469481581224272, "loss": 1.1402, "step": 350 }, { "epoch": 1.92, "eval_loss": 1.1867098808288574, "eval_runtime": 108.4029, "eval_samples_per_second": 1.559, "eval_steps_per_second": 0.397, "step": 350 }, { "epoch": 1.97, "learning_rate": 0.00015189695737812152, "loss": 1.1627, "step": 360 }, { "epoch": 1.97, "eval_loss": 1.1867098808288574, "eval_runtime": 108.4859, "eval_samples_per_second": 1.558, "eval_steps_per_second": 0.396, "step": 360 }, { "epoch": 2.03, "learning_rate": 0.00014904235038305083, "loss": 1.1064, "step": 370 }, { "epoch": 2.03, "eval_loss": 1.1901580095291138, "eval_runtime": 108.5309, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.396, "step": 370 }, { "epoch": 2.08, "learning_rate": 0.0001461341162978688, "loss": 1.0264, "step": 380 }, { "epoch": 2.08, "eval_loss": 1.187896966934204, "eval_runtime": 108.3764, "eval_samples_per_second": 1.559, "eval_steps_per_second": 0.397, "step": 380 }, { "epoch": 2.14, "learning_rate": 0.00014317543523384928, "loss": 1.1384, "step": 390 }, { "epoch": 2.14, "eval_loss": 1.1920827627182007, "eval_runtime": 108.625, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.396, "step": 390 } ], "max_steps": 1000, "num_train_epochs": 6, "total_flos": 6.601561671204864e+16, "trial_name": null, "trial_params": null }