{ "best_metric": 0.1436834264468159, "best_model_checkpoint": "cola-pixel-handwritten-mean-vatrpp-256-64-4-5e-5-15000-42/checkpoint-1600", "epoch": 78.77611940298507, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.03, "learning_rate": 2.5e-05, "loss": 0.6426, "step": 100 }, { "epoch": 3.03, "eval_loss": 0.6255418658256531, "eval_matthews_correlation": 0.0, "eval_runtime": 8.2752, "eval_samples_per_second": 126.039, "eval_steps_per_second": 15.83, "step": 100 }, { "epoch": 6.06, "learning_rate": 5e-05, "loss": 0.6176, "step": 200 }, { "epoch": 6.06, "eval_loss": 0.630817711353302, "eval_matthews_correlation": 0.0, "eval_runtime": 8.1785, "eval_samples_per_second": 127.53, "eval_steps_per_second": 16.018, "step": 200 }, { "epoch": 9.09, "learning_rate": 4.9662162162162164e-05, "loss": 0.6183, "step": 300 }, { "epoch": 9.09, "eval_loss": 0.6186701059341431, "eval_matthews_correlation": 0.0, "eval_runtime": 7.8187, "eval_samples_per_second": 133.398, "eval_steps_per_second": 16.755, "step": 300 }, { "epoch": 12.12, "learning_rate": 4.9324324324324325e-05, "loss": 0.6162, "step": 400 }, { "epoch": 12.12, "eval_loss": 0.6158396005630493, "eval_matthews_correlation": 0.0, "eval_runtime": 7.9658, "eval_samples_per_second": 130.935, "eval_steps_per_second": 16.445, "step": 400 }, { "epoch": 15.15, "learning_rate": 4.8986486486486486e-05, "loss": 0.614, "step": 500 }, { "epoch": 15.15, "eval_loss": 0.6250273585319519, "eval_matthews_correlation": -0.02929206145132745, "eval_runtime": 7.9393, "eval_samples_per_second": 131.373, "eval_steps_per_second": 16.5, "step": 500 }, { "epoch": 18.18, "learning_rate": 4.8648648648648654e-05, "loss": 0.6096, "step": 600 }, { "epoch": 18.18, "eval_loss": 0.618541419506073, "eval_matthews_correlation": 0.0, "eval_runtime": 7.9167, "eval_samples_per_second": 131.746, "eval_steps_per_second": 16.547, "step": 600 }, { "epoch": 21.21, "learning_rate": 4.8310810810810816e-05, "loss": 0.6055, "step": 700 }, { "epoch": 21.21, "eval_loss": 0.6223562359809875, "eval_matthews_correlation": 0.017470726784935973, "eval_runtime": 7.909, "eval_samples_per_second": 131.875, "eval_steps_per_second": 16.563, "step": 700 }, { "epoch": 24.24, "learning_rate": 4.7979729729729736e-05, "loss": 0.6001, "step": 800 }, { "epoch": 24.24, "eval_loss": 0.6550844311714172, "eval_matthews_correlation": 0.13008617806157513, "eval_runtime": 8.0719, "eval_samples_per_second": 129.214, "eval_steps_per_second": 16.229, "step": 800 }, { "epoch": 27.27, "learning_rate": 4.764527027027027e-05, "loss": 0.5909, "step": 900 }, { "epoch": 27.27, "eval_loss": 0.6533612012863159, "eval_matthews_correlation": 0.056561954805504064, "eval_runtime": 8.0431, "eval_samples_per_second": 129.676, "eval_steps_per_second": 16.287, "step": 900 }, { "epoch": 30.3, "learning_rate": 4.730743243243244e-05, "loss": 0.5726, "step": 1000 }, { "epoch": 30.3, "eval_loss": 0.6678630709648132, "eval_matthews_correlation": 0.10285544103286985, "eval_runtime": 7.9909, "eval_samples_per_second": 130.524, "eval_steps_per_second": 16.394, "step": 1000 }, { "epoch": 33.33, "learning_rate": 4.697297297297297e-05, "loss": 0.5524, "step": 1100 }, { "epoch": 33.33, "eval_loss": 0.6901304721832275, "eval_matthews_correlation": 0.06313184843830015, "eval_runtime": 8.3103, "eval_samples_per_second": 125.507, "eval_steps_per_second": 15.764, "step": 1100 }, { "epoch": 36.36, "learning_rate": 4.663513513513514e-05, "loss": 0.5167, "step": 1200 }, { "epoch": 36.36, "eval_loss": 0.7026833891868591, "eval_matthews_correlation": 0.09479027897712053, "eval_runtime": 8.0473, "eval_samples_per_second": 129.608, "eval_steps_per_second": 16.279, "step": 1200 }, { "epoch": 39.39, "learning_rate": 4.6297297297297295e-05, "loss": 0.4779, "step": 1300 }, { "epoch": 39.39, "eval_loss": 0.7578131556510925, "eval_matthews_correlation": 0.10121883963858187, "eval_runtime": 8.2334, "eval_samples_per_second": 126.679, "eval_steps_per_second": 15.911, "step": 1300 }, { "epoch": 42.42, "learning_rate": 4.5959459459459463e-05, "loss": 0.4271, "step": 1400 }, { "epoch": 42.42, "eval_loss": 0.8021395802497864, "eval_matthews_correlation": 0.11075235134282446, "eval_runtime": 8.0267, "eval_samples_per_second": 129.941, "eval_steps_per_second": 16.32, "step": 1400 }, { "epoch": 45.45, "learning_rate": 4.5621621621621625e-05, "loss": 0.3888, "step": 1500 }, { "epoch": 45.45, "eval_loss": 0.8813392519950867, "eval_matthews_correlation": 0.10250522978751038, "eval_runtime": 7.9207, "eval_samples_per_second": 131.68, "eval_steps_per_second": 16.539, "step": 1500 }, { "epoch": 48.48, "learning_rate": 4.5283783783783786e-05, "loss": 0.3428, "step": 1600 }, { "epoch": 48.48, "eval_loss": 0.9361783862113953, "eval_matthews_correlation": 0.1436834264468159, "eval_runtime": 7.8976, "eval_samples_per_second": 132.066, "eval_steps_per_second": 16.587, "step": 1600 }, { "epoch": 51.51, "learning_rate": 4.494594594594595e-05, "loss": 0.2977, "step": 1700 }, { "epoch": 51.51, "eval_loss": 1.078627347946167, "eval_matthews_correlation": 0.11181658143300324, "eval_runtime": 7.9966, "eval_samples_per_second": 130.43, "eval_steps_per_second": 16.382, "step": 1700 }, { "epoch": 54.54, "learning_rate": 4.460810810810811e-05, "loss": 0.2642, "step": 1800 }, { "epoch": 54.54, "eval_loss": 1.0609544515609741, "eval_matthews_correlation": 0.09013603883941315, "eval_runtime": 7.8862, "eval_samples_per_second": 132.257, "eval_steps_per_second": 16.611, "step": 1800 }, { "epoch": 57.57, "learning_rate": 4.427027027027027e-05, "loss": 0.2272, "step": 1900 }, { "epoch": 57.57, "eval_loss": 1.183494210243225, "eval_matthews_correlation": 0.11545854045964393, "eval_runtime": 7.9453, "eval_samples_per_second": 131.273, "eval_steps_per_second": 16.488, "step": 1900 }, { "epoch": 60.6, "learning_rate": 4.393243243243244e-05, "loss": 0.1915, "step": 2000 }, { "epoch": 60.6, "eval_loss": 1.2531063556671143, "eval_matthews_correlation": 0.12244528836896967, "eval_runtime": 8.0199, "eval_samples_per_second": 130.051, "eval_steps_per_second": 16.334, "step": 2000 }, { "epoch": 63.63, "learning_rate": 4.359459459459459e-05, "loss": 0.1691, "step": 2100 }, { "epoch": 63.63, "eval_loss": 1.3903430700302124, "eval_matthews_correlation": 0.07541181195571064, "eval_runtime": 7.9376, "eval_samples_per_second": 131.4, "eval_steps_per_second": 16.504, "step": 2100 }, { "epoch": 66.66, "learning_rate": 4.325675675675676e-05, "loss": 0.1491, "step": 2200 }, { "epoch": 66.66, "eval_loss": 1.4947072267532349, "eval_matthews_correlation": 0.06742580984707468, "eval_runtime": 7.9502, "eval_samples_per_second": 131.191, "eval_steps_per_second": 16.477, "step": 2200 }, { "epoch": 69.69, "learning_rate": 4.291891891891892e-05, "loss": 0.1339, "step": 2300 }, { "epoch": 69.69, "eval_loss": 1.5433533191680908, "eval_matthews_correlation": 0.0736455413240434, "eval_runtime": 8.2493, "eval_samples_per_second": 126.434, "eval_steps_per_second": 15.88, "step": 2300 }, { "epoch": 72.72, "learning_rate": 4.258108108108108e-05, "loss": 0.1164, "step": 2400 }, { "epoch": 72.72, "eval_loss": 1.5793086290359497, "eval_matthews_correlation": 0.11645872984022461, "eval_runtime": 8.2848, "eval_samples_per_second": 125.893, "eval_steps_per_second": 15.812, "step": 2400 }, { "epoch": 75.75, "learning_rate": 4.2243243243243244e-05, "loss": 0.1078, "step": 2500 }, { "epoch": 75.75, "eval_loss": 1.5938163995742798, "eval_matthews_correlation": 0.09946100084630931, "eval_runtime": 8.3263, "eval_samples_per_second": 125.266, "eval_steps_per_second": 15.733, "step": 2500 }, { "epoch": 78.78, "learning_rate": 4.1905405405405406e-05, "loss": 0.0974, "step": 2600 }, { "epoch": 78.78, "eval_loss": 1.700919508934021, "eval_matthews_correlation": 0.07568068132313144, "eval_runtime": 8.4149, "eval_samples_per_second": 123.947, "eval_steps_per_second": 15.568, "step": 2600 }, { "epoch": 78.78, "step": 2600, "total_flos": 1.7353653717551284e+19, "train_loss": 0.40566940747774566, "train_runtime": 7554.577, "train_samples_per_second": 508.301, "train_steps_per_second": 1.986 } ], "max_steps": 15000, "num_train_epochs": 455, "total_flos": 1.7353653717551284e+19, "trial_name": null, "trial_params": null }