{ "best_metric": 0.021361960098147392, "best_model_checkpoint": "gpt3_finetuned_model/checkpoint-15024", "epoch": 2.0, "eval_steps": 500, "global_step": 30048, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.966719914802982e-05, "loss": 0.0219, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.933439829605964e-05, "loss": 0.012, "step": 1000 }, { "epoch": 0.1, "learning_rate": 1.900159744408946e-05, "loss": 0.0141, "step": 1500 }, { "epoch": 0.13, "learning_rate": 1.866879659211928e-05, "loss": 0.0096, "step": 2000 }, { "epoch": 0.17, "learning_rate": 1.8335995740149097e-05, "loss": 0.0135, "step": 2500 }, { "epoch": 0.2, "learning_rate": 1.8003194888178915e-05, "loss": 0.021, "step": 3000 }, { "epoch": 0.23, "learning_rate": 1.7670394036208734e-05, "loss": 0.0125, "step": 3500 }, { "epoch": 0.27, "learning_rate": 1.7337593184238552e-05, "loss": 0.0084, "step": 4000 }, { "epoch": 0.3, "learning_rate": 1.700479233226837e-05, "loss": 0.0128, "step": 4500 }, { "epoch": 0.33, "learning_rate": 1.6671991480298192e-05, "loss": 0.0142, "step": 5000 }, { "epoch": 0.37, "learning_rate": 1.633919062832801e-05, "loss": 0.0143, "step": 5500 }, { "epoch": 0.4, "learning_rate": 1.600638977635783e-05, "loss": 0.0087, "step": 6000 }, { "epoch": 0.43, "learning_rate": 1.5673588924387647e-05, "loss": 0.0085, "step": 6500 }, { "epoch": 0.47, "learning_rate": 1.5340788072417466e-05, "loss": 0.0122, "step": 7000 }, { "epoch": 0.5, "learning_rate": 1.5007987220447286e-05, "loss": 0.0126, "step": 7500 }, { "epoch": 0.53, "learning_rate": 1.4675186368477104e-05, "loss": 0.014, "step": 8000 }, { "epoch": 0.57, "learning_rate": 1.4342385516506923e-05, "loss": 0.0086, "step": 8500 }, { "epoch": 0.6, "learning_rate": 1.4009584664536741e-05, "loss": 0.0094, "step": 9000 }, { "epoch": 0.63, "learning_rate": 1.3676783812566561e-05, "loss": 0.0136, "step": 9500 }, { "epoch": 0.67, "learning_rate": 1.334398296059638e-05, "loss": 0.0057, "step": 10000 }, { "epoch": 0.7, "learning_rate": 1.30111821086262e-05, "loss": 0.0126, "step": 10500 }, { "epoch": 0.73, "learning_rate": 1.2678381256656018e-05, "loss": 0.013, "step": 11000 }, { "epoch": 0.77, "learning_rate": 1.2345580404685838e-05, "loss": 0.0096, "step": 11500 }, { "epoch": 0.8, "learning_rate": 1.2012779552715656e-05, "loss": 0.0068, "step": 12000 }, { "epoch": 0.83, "learning_rate": 1.1679978700745476e-05, "loss": 0.0057, "step": 12500 }, { "epoch": 0.87, "learning_rate": 1.1347177848775295e-05, "loss": 0.0062, "step": 13000 }, { "epoch": 0.9, "learning_rate": 1.1014376996805112e-05, "loss": 0.0138, "step": 13500 }, { "epoch": 0.93, "learning_rate": 1.0681576144834932e-05, "loss": 0.007, "step": 14000 }, { "epoch": 0.97, "learning_rate": 1.034877529286475e-05, "loss": 0.0054, "step": 14500 }, { "epoch": 1.0, "learning_rate": 1.0015974440894568e-05, "loss": 0.0061, "step": 15000 }, { "epoch": 1.0, "eval_accuracy": 0.9955404685835996, "eval_loss": 0.021361960098147392, "eval_runtime": 198.8868, "eval_samples_per_second": 302.162, "eval_steps_per_second": 18.885, "step": 15024 }, { "epoch": 1.03, "learning_rate": 9.683173588924388e-06, "loss": 0.0015, "step": 15500 }, { "epoch": 1.06, "learning_rate": 9.350372736954207e-06, "loss": 0.0026, "step": 16000 }, { "epoch": 1.1, "learning_rate": 9.017571884984027e-06, "loss": 0.002, "step": 16500 }, { "epoch": 1.13, "learning_rate": 8.684771033013845e-06, "loss": 0.0046, "step": 17000 }, { "epoch": 1.16, "learning_rate": 8.351970181043664e-06, "loss": 0.0023, "step": 17500 }, { "epoch": 1.2, "learning_rate": 8.019169329073482e-06, "loss": 0.0045, "step": 18000 }, { "epoch": 1.23, "learning_rate": 7.686368477103302e-06, "loss": 0.0023, "step": 18500 }, { "epoch": 1.26, "learning_rate": 7.353567625133121e-06, "loss": 0.0041, "step": 19000 }, { "epoch": 1.3, "learning_rate": 7.020766773162941e-06, "loss": 0.0016, "step": 19500 }, { "epoch": 1.33, "learning_rate": 6.687965921192758e-06, "loss": 0.0016, "step": 20000 }, { "epoch": 1.36, "learning_rate": 6.355165069222577e-06, "loss": 0.0033, "step": 20500 }, { "epoch": 1.4, "learning_rate": 6.022364217252397e-06, "loss": 0.0051, "step": 21000 }, { "epoch": 1.43, "learning_rate": 5.689563365282216e-06, "loss": 0.0002, "step": 21500 }, { "epoch": 1.46, "learning_rate": 5.356762513312035e-06, "loss": 0.0025, "step": 22000 }, { "epoch": 1.5, "learning_rate": 5.023961661341853e-06, "loss": 0.0039, "step": 22500 }, { "epoch": 1.53, "learning_rate": 4.691160809371673e-06, "loss": 0.0029, "step": 23000 }, { "epoch": 1.56, "learning_rate": 4.358359957401491e-06, "loss": 0.0041, "step": 23500 }, { "epoch": 1.6, "learning_rate": 4.02555910543131e-06, "loss": 0.0017, "step": 24000 }, { "epoch": 1.63, "learning_rate": 3.692758253461129e-06, "loss": 0.0, "step": 24500 }, { "epoch": 1.66, "learning_rate": 3.359957401490948e-06, "loss": 0.0021, "step": 25000 }, { "epoch": 1.7, "learning_rate": 3.027156549520767e-06, "loss": 0.0016, "step": 25500 }, { "epoch": 1.73, "learning_rate": 2.694355697550586e-06, "loss": 0.0, "step": 26000 }, { "epoch": 1.76, "learning_rate": 2.3615548455804047e-06, "loss": 0.0001, "step": 26500 }, { "epoch": 1.8, "learning_rate": 2.028753993610224e-06, "loss": 0.0001, "step": 27000 }, { "epoch": 1.83, "learning_rate": 1.6959531416400426e-06, "loss": 0.0012, "step": 27500 }, { "epoch": 1.86, "learning_rate": 1.3631522896698618e-06, "loss": 0.001, "step": 28000 }, { "epoch": 1.9, "learning_rate": 1.0303514376996806e-06, "loss": 0.0008, "step": 28500 }, { "epoch": 1.93, "learning_rate": 6.975505857294995e-07, "loss": 0.004, "step": 29000 }, { "epoch": 1.96, "learning_rate": 3.6474973375931847e-07, "loss": 0.0009, "step": 29500 }, { "epoch": 2.0, "learning_rate": 3.194888178913738e-08, "loss": 0.001, "step": 30000 }, { "epoch": 2.0, "eval_accuracy": 0.9962227103301384, "eval_loss": 0.02437487617135048, "eval_runtime": 197.6909, "eval_samples_per_second": 303.99, "eval_steps_per_second": 18.999, "step": 30048 } ], "logging_steps": 500, "max_steps": 30048, "num_train_epochs": 2, "save_steps": 500, "total_flos": 4.576643477567117e+16, "trial_name": null, "trial_params": null }