{ "best_metric": 0.9055714011192322, "best_model_checkpoint": "./lora-alpaca/checkpoint-200", "epoch": 0.5144694533762058, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.9999999999999997e-05, "loss": 1.8867, "step": 10 }, { "epoch": 0.05, "learning_rate": 5.6999999999999996e-05, "loss": 1.8339, "step": 20 }, { "epoch": 0.08, "learning_rate": 8.699999999999999e-05, "loss": 1.6664, "step": 30 }, { "epoch": 0.1, "learning_rate": 0.000117, "loss": 1.3046, "step": 40 }, { "epoch": 0.13, "learning_rate": 0.000147, "loss": 1.115, "step": 50 }, { "epoch": 0.15, "learning_rate": 0.00017399999999999997, "loss": 1.0706, "step": 60 }, { "epoch": 0.18, "learning_rate": 0.000204, "loss": 1.0269, "step": 70 }, { "epoch": 0.21, "learning_rate": 0.000234, "loss": 1.0012, "step": 80 }, { "epoch": 0.23, "learning_rate": 0.00026399999999999997, "loss": 0.9608, "step": 90 }, { "epoch": 0.26, "learning_rate": 0.000294, "loss": 0.9563, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.00029166666666666664, "loss": 0.9512, "step": 110 }, { "epoch": 0.31, "learning_rate": 0.00028125, "loss": 0.9505, "step": 120 }, { "epoch": 0.33, "learning_rate": 0.0002708333333333333, "loss": 0.9326, "step": 130 }, { "epoch": 0.36, "learning_rate": 0.00026041666666666666, "loss": 0.9229, "step": 140 }, { "epoch": 0.39, "learning_rate": 0.00025, "loss": 0.918, "step": 150 }, { "epoch": 0.41, "learning_rate": 0.00023958333333333332, "loss": 0.9128, "step": 160 }, { "epoch": 0.44, "learning_rate": 0.00022916666666666664, "loss": 0.9021, "step": 170 }, { "epoch": 0.46, "learning_rate": 0.00021874999999999998, "loss": 0.9115, "step": 180 }, { "epoch": 0.49, "learning_rate": 0.00020833333333333332, "loss": 0.8915, "step": 190 }, { "epoch": 0.51, "learning_rate": 0.00019791666666666663, "loss": 0.8993, "step": 200 }, { "epoch": 0.51, "eval_loss": 0.9055714011192322, "eval_runtime": 179.4765, "eval_samples_per_second": 11.144, "eval_steps_per_second": 0.696, "step": 200 } ], "max_steps": 388, "num_train_epochs": 1, "total_flos": 2.591511325704192e+17, "trial_name": null, "trial_params": null }