{ "best_metric": 0.3695702850818634, "best_model_checkpoint": "/tmp/model/checkpoint-114", "epoch": 3.0, "eval_steps": 500, "global_step": 114, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.166666666666667e-06, "loss": 1.0986, "step": 1 }, { "epoch": 0.05, "learning_rate": 8.333333333333334e-06, "loss": 1.0986, "step": 2 }, { "epoch": 0.08, "learning_rate": 1.25e-05, "loss": 1.0984, "step": 3 }, { "epoch": 0.11, "learning_rate": 1.6666666666666667e-05, "loss": 1.0967, "step": 4 }, { "epoch": 0.13, "learning_rate": 2.0833333333333336e-05, "loss": 1.0979, "step": 5 }, { "epoch": 0.16, "learning_rate": 2.5e-05, "loss": 1.0964, "step": 6 }, { "epoch": 0.18, "learning_rate": 2.916666666666667e-05, "loss": 1.0935, "step": 7 }, { "epoch": 0.21, "learning_rate": 3.3333333333333335e-05, "loss": 1.0933, "step": 8 }, { "epoch": 0.24, "learning_rate": 3.7500000000000003e-05, "loss": 1.0858, "step": 9 }, { "epoch": 0.26, "learning_rate": 4.166666666666667e-05, "loss": 1.0887, "step": 10 }, { "epoch": 0.29, "learning_rate": 4.5833333333333334e-05, "loss": 1.0779, "step": 11 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 1.0667, "step": 12 }, { "epoch": 0.34, "learning_rate": 4.9509803921568634e-05, "loss": 1.0756, "step": 13 }, { "epoch": 0.37, "learning_rate": 4.901960784313725e-05, "loss": 1.061, "step": 14 }, { "epoch": 0.39, "learning_rate": 4.8529411764705885e-05, "loss": 1.068, "step": 15 }, { "epoch": 0.42, "learning_rate": 4.803921568627452e-05, "loss": 1.0567, "step": 16 }, { "epoch": 0.45, "learning_rate": 4.7549019607843135e-05, "loss": 1.0472, "step": 17 }, { "epoch": 0.47, "learning_rate": 4.705882352941177e-05, "loss": 1.04, "step": 18 }, { "epoch": 0.5, "learning_rate": 4.656862745098039e-05, "loss": 0.962, "step": 19 }, { "epoch": 0.53, "learning_rate": 4.607843137254902e-05, "loss": 1.0195, "step": 20 }, { "epoch": 0.55, "learning_rate": 4.558823529411765e-05, "loss": 0.9738, "step": 21 }, { "epoch": 0.58, "learning_rate": 4.5098039215686275e-05, "loss": 0.9398, "step": 22 }, { "epoch": 0.61, "learning_rate": 4.5098039215686275e-05, "loss": 0.8889, "step": 23 }, { "epoch": 0.63, "learning_rate": 4.460784313725491e-05, "loss": 0.936, "step": 24 }, { "epoch": 0.66, "learning_rate": 4.411764705882353e-05, "loss": 0.9963, "step": 25 }, { "epoch": 0.68, "learning_rate": 4.362745098039216e-05, "loss": 0.9159, "step": 26 }, { "epoch": 0.71, "learning_rate": 4.313725490196079e-05, "loss": 1.035, "step": 27 }, { "epoch": 0.74, "learning_rate": 4.2647058823529415e-05, "loss": 0.8612, "step": 28 }, { "epoch": 0.76, "learning_rate": 4.215686274509804e-05, "loss": 0.7712, "step": 29 }, { "epoch": 0.79, "learning_rate": 4.166666666666667e-05, "loss": 0.8527, "step": 30 }, { "epoch": 0.82, "learning_rate": 4.11764705882353e-05, "loss": 0.7735, "step": 31 }, { "epoch": 0.84, "learning_rate": 4.068627450980392e-05, "loss": 0.6982, "step": 32 }, { "epoch": 0.87, "learning_rate": 4.0196078431372555e-05, "loss": 0.9675, "step": 33 }, { "epoch": 0.89, "learning_rate": 3.970588235294117e-05, "loss": 0.9925, "step": 34 }, { "epoch": 0.92, "learning_rate": 3.9215686274509805e-05, "loss": 0.7302, "step": 35 }, { "epoch": 0.95, "learning_rate": 3.872549019607844e-05, "loss": 0.8112, "step": 36 }, { "epoch": 0.97, "learning_rate": 3.8235294117647055e-05, "loss": 0.8665, "step": 37 }, { "epoch": 1.0, "learning_rate": 3.774509803921569e-05, "loss": 0.9163, "step": 38 }, { "epoch": 1.0, "eval_accuracy": 0.6933333333333334, "eval_f1_macro": 0.5261437908496732, "eval_f1_micro": 0.6933333333333334, "eval_f1_weighted": 0.6364705882352942, "eval_loss": 0.7905013561248779, "eval_precision_macro": 0.5, "eval_precision_micro": 0.6933333333333334, "eval_precision_weighted": 0.6333333333333333, "eval_recall_macro": 0.6126126126126126, "eval_recall_micro": 0.6933333333333334, "eval_recall_weighted": 0.6933333333333334, "eval_runtime": 3.0663, "eval_samples_per_second": 24.459, "eval_steps_per_second": 1.631, "step": 38 }, { "epoch": 1.03, "learning_rate": 3.725490196078432e-05, "loss": 0.7296, "step": 39 }, { "epoch": 1.05, "learning_rate": 3.6764705882352945e-05, "loss": 0.7124, "step": 40 }, { "epoch": 1.08, "learning_rate": 3.627450980392157e-05, "loss": 0.6638, "step": 41 }, { "epoch": 1.11, "learning_rate": 3.5784313725490195e-05, "loss": 0.7569, "step": 42 }, { "epoch": 1.13, "learning_rate": 3.529411764705883e-05, "loss": 0.6873, "step": 43 }, { "epoch": 1.16, "learning_rate": 3.480392156862745e-05, "loss": 0.4846, "step": 44 }, { "epoch": 1.18, "learning_rate": 3.431372549019608e-05, "loss": 0.7043, "step": 45 }, { "epoch": 1.21, "learning_rate": 3.382352941176471e-05, "loss": 0.7506, "step": 46 }, { "epoch": 1.24, "learning_rate": 3.3333333333333335e-05, "loss": 0.5512, "step": 47 }, { "epoch": 1.26, "learning_rate": 3.284313725490196e-05, "loss": 0.5588, "step": 48 }, { "epoch": 1.29, "learning_rate": 3.235294117647059e-05, "loss": 0.6611, "step": 49 }, { "epoch": 1.32, "learning_rate": 3.186274509803922e-05, "loss": 0.5627, "step": 50 }, { "epoch": 1.34, "learning_rate": 3.137254901960784e-05, "loss": 0.744, "step": 51 }, { "epoch": 1.37, "learning_rate": 3.0882352941176475e-05, "loss": 0.4871, "step": 52 }, { "epoch": 1.39, "learning_rate": 3.0392156862745097e-05, "loss": 0.6867, "step": 53 }, { "epoch": 1.42, "learning_rate": 2.9901960784313725e-05, "loss": 0.7154, "step": 54 }, { "epoch": 1.45, "learning_rate": 2.9411764705882354e-05, "loss": 0.5798, "step": 55 }, { "epoch": 1.47, "learning_rate": 2.8921568627450986e-05, "loss": 0.513, "step": 56 }, { "epoch": 1.5, "learning_rate": 2.8431372549019608e-05, "loss": 0.5023, "step": 57 }, { "epoch": 1.53, "learning_rate": 2.7941176470588236e-05, "loss": 0.3863, "step": 58 }, { "epoch": 1.55, "learning_rate": 2.7450980392156865e-05, "loss": 0.6195, "step": 59 }, { "epoch": 1.58, "learning_rate": 2.696078431372549e-05, "loss": 0.664, "step": 60 }, { "epoch": 1.61, "learning_rate": 2.647058823529412e-05, "loss": 0.3312, "step": 61 }, { "epoch": 1.63, "learning_rate": 2.5980392156862747e-05, "loss": 0.6563, "step": 62 }, { "epoch": 1.66, "learning_rate": 2.5490196078431373e-05, "loss": 0.8081, "step": 63 }, { "epoch": 1.68, "learning_rate": 2.5e-05, "loss": 0.6441, "step": 64 }, { "epoch": 1.71, "learning_rate": 2.4509803921568626e-05, "loss": 0.7858, "step": 65 }, { "epoch": 1.74, "learning_rate": 2.401960784313726e-05, "loss": 0.6327, "step": 66 }, { "epoch": 1.76, "learning_rate": 2.3529411764705884e-05, "loss": 0.2841, "step": 67 }, { "epoch": 1.79, "learning_rate": 2.303921568627451e-05, "loss": 0.4691, "step": 68 }, { "epoch": 1.82, "learning_rate": 2.2549019607843138e-05, "loss": 0.3667, "step": 69 }, { "epoch": 1.84, "learning_rate": 2.2058823529411766e-05, "loss": 0.6714, "step": 70 }, { "epoch": 1.87, "learning_rate": 2.1568627450980395e-05, "loss": 0.3061, "step": 71 }, { "epoch": 1.89, "learning_rate": 2.107843137254902e-05, "loss": 0.6383, "step": 72 }, { "epoch": 1.92, "learning_rate": 2.058823529411765e-05, "loss": 0.3669, "step": 73 }, { "epoch": 1.95, "learning_rate": 2.0098039215686277e-05, "loss": 0.4398, "step": 74 }, { "epoch": 1.97, "learning_rate": 1.9607843137254903e-05, "loss": 0.4971, "step": 75 }, { "epoch": 2.0, "learning_rate": 1.9117647058823528e-05, "loss": 1.4849, "step": 76 }, { "epoch": 2.0, "eval_accuracy": 0.8933333333333333, "eval_f1_macro": 0.8683603977721625, "eval_f1_micro": 0.8933333333333333, "eval_f1_weighted": 0.8905228105228105, "eval_loss": 0.4645865857601166, "eval_precision_macro": 0.8791394335511983, "eval_precision_micro": 0.8933333333333333, "eval_precision_weighted": 0.8918888888888888, "eval_recall_macro": 0.8618113912231559, "eval_recall_micro": 0.8933333333333333, "eval_recall_weighted": 0.8933333333333333, "eval_runtime": 3.0873, "eval_samples_per_second": 24.293, "eval_steps_per_second": 1.62, "step": 76 }, { "epoch": 2.03, "learning_rate": 1.862745098039216e-05, "loss": 0.4711, "step": 77 }, { "epoch": 2.05, "learning_rate": 1.8137254901960785e-05, "loss": 0.3112, "step": 78 }, { "epoch": 2.08, "learning_rate": 1.7647058823529414e-05, "loss": 0.5531, "step": 79 }, { "epoch": 2.11, "learning_rate": 1.715686274509804e-05, "loss": 0.3208, "step": 80 }, { "epoch": 2.13, "learning_rate": 1.6666666666666667e-05, "loss": 0.5656, "step": 81 }, { "epoch": 2.16, "learning_rate": 1.6176470588235296e-05, "loss": 0.4154, "step": 82 }, { "epoch": 2.18, "learning_rate": 1.568627450980392e-05, "loss": 0.4136, "step": 83 }, { "epoch": 2.21, "learning_rate": 1.5196078431372548e-05, "loss": 0.4718, "step": 84 }, { "epoch": 2.24, "learning_rate": 1.4705882352941177e-05, "loss": 0.3163, "step": 85 }, { "epoch": 2.26, "learning_rate": 1.4215686274509804e-05, "loss": 0.6051, "step": 86 }, { "epoch": 2.29, "learning_rate": 1.3725490196078432e-05, "loss": 0.4157, "step": 87 }, { "epoch": 2.32, "learning_rate": 1.323529411764706e-05, "loss": 0.5979, "step": 88 }, { "epoch": 2.34, "learning_rate": 1.2745098039215686e-05, "loss": 0.4237, "step": 89 }, { "epoch": 2.37, "learning_rate": 1.2254901960784313e-05, "loss": 0.3573, "step": 90 }, { "epoch": 2.39, "learning_rate": 1.1764705882352942e-05, "loss": 0.3732, "step": 91 }, { "epoch": 2.42, "learning_rate": 1.1274509803921569e-05, "loss": 0.539, "step": 92 }, { "epoch": 2.45, "learning_rate": 1.0784313725490197e-05, "loss": 0.3959, "step": 93 }, { "epoch": 2.47, "learning_rate": 1.0294117647058824e-05, "loss": 0.29, "step": 94 }, { "epoch": 2.5, "learning_rate": 9.803921568627451e-06, "loss": 0.3109, "step": 95 }, { "epoch": 2.53, "learning_rate": 9.31372549019608e-06, "loss": 0.3063, "step": 96 }, { "epoch": 2.55, "learning_rate": 8.823529411764707e-06, "loss": 0.4449, "step": 97 }, { "epoch": 2.58, "learning_rate": 8.333333333333334e-06, "loss": 0.4337, "step": 98 }, { "epoch": 2.61, "learning_rate": 7.84313725490196e-06, "loss": 0.3803, "step": 99 }, { "epoch": 2.63, "learning_rate": 7.3529411764705884e-06, "loss": 0.4617, "step": 100 }, { "epoch": 2.66, "learning_rate": 6.862745098039216e-06, "loss": 0.3859, "step": 101 }, { "epoch": 2.68, "learning_rate": 6.372549019607843e-06, "loss": 0.4455, "step": 102 }, { "epoch": 2.71, "learning_rate": 5.882352941176471e-06, "loss": 0.2667, "step": 103 }, { "epoch": 2.74, "learning_rate": 5.392156862745099e-06, "loss": 0.6251, "step": 104 }, { "epoch": 2.76, "learning_rate": 4.901960784313726e-06, "loss": 0.3378, "step": 105 }, { "epoch": 2.79, "learning_rate": 4.411764705882353e-06, "loss": 0.4611, "step": 106 }, { "epoch": 2.82, "learning_rate": 3.92156862745098e-06, "loss": 0.5596, "step": 107 }, { "epoch": 2.84, "learning_rate": 3.431372549019608e-06, "loss": 0.237, "step": 108 }, { "epoch": 2.87, "learning_rate": 2.9411764705882355e-06, "loss": 0.1953, "step": 109 }, { "epoch": 2.89, "learning_rate": 2.450980392156863e-06, "loss": 0.3592, "step": 110 }, { "epoch": 2.92, "learning_rate": 1.96078431372549e-06, "loss": 0.539, "step": 111 }, { "epoch": 2.95, "learning_rate": 1.4705882352941177e-06, "loss": 0.4407, "step": 112 }, { "epoch": 2.97, "learning_rate": 9.80392156862745e-07, "loss": 0.2672, "step": 113 }, { "epoch": 3.0, "learning_rate": 4.901960784313725e-07, "loss": 0.7935, "step": 114 }, { "epoch": 3.0, "eval_accuracy": 0.9333333333333333, "eval_f1_macro": 0.9200292729704493, "eval_f1_micro": 0.9333333333333333, "eval_f1_weighted": 0.9333333333333333, "eval_loss": 0.3695702850818634, "eval_precision_macro": 0.9200292729704493, "eval_precision_micro": 0.9333333333333333, "eval_precision_weighted": 0.9333333333333333, "eval_recall_macro": 0.9200292729704493, "eval_recall_micro": 0.9333333333333333, "eval_recall_weighted": 0.9333333333333333, "eval_runtime": 3.049, "eval_samples_per_second": 24.598, "eval_steps_per_second": 1.64, "step": 114 } ], "logging_steps": 1, "max_steps": 114, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.4489798441692774e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }