{ "best_metric": 1.299881935119629, "best_model_checkpoint": "./outputs/checkpoint-4100", "epoch": 2.9876138433515482, "eval_steps": 100, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.315, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.160651922225952, "eval_runtime": 144.1197, "eval_samples_per_second": 43.533, "eval_steps_per_second": 5.447, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.1091, "step": 200 }, { "epoch": 0.15, "eval_loss": 2.0636215209960938, "eval_runtime": 143.9553, "eval_samples_per_second": 43.583, "eval_steps_per_second": 5.453, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.0344, "step": 300 }, { "epoch": 0.22, "eval_loss": 2.0027897357940674, "eval_runtime": 144.1134, "eval_samples_per_second": 43.535, "eval_steps_per_second": 5.447, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.9806, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.9537049531936646, "eval_runtime": 144.1171, "eval_samples_per_second": 43.534, "eval_steps_per_second": 5.447, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.9262, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.9101046323776245, "eval_runtime": 143.9719, "eval_samples_per_second": 43.578, "eval_steps_per_second": 5.452, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.8971, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.8801190853118896, "eval_runtime": 143.9079, "eval_samples_per_second": 43.597, "eval_steps_per_second": 5.455, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.8565, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.8463259935379028, "eval_runtime": 143.8032, "eval_samples_per_second": 43.629, "eval_steps_per_second": 5.459, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.8327, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.8173062801361084, "eval_runtime": 143.9045, "eval_samples_per_second": 43.598, "eval_steps_per_second": 5.455, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.7952, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.7927803993225098, "eval_runtime": 143.8647, "eval_samples_per_second": 43.61, "eval_steps_per_second": 5.457, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.7792, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.7664344310760498, "eval_runtime": 143.816, "eval_samples_per_second": 43.625, "eval_steps_per_second": 5.458, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.7588, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.7422090768814087, "eval_runtime": 143.8688, "eval_samples_per_second": 43.609, "eval_steps_per_second": 5.456, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.7204, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.72101891040802, "eval_runtime": 143.8632, "eval_samples_per_second": 43.611, "eval_steps_per_second": 5.457, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.7282, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.6993989944458008, "eval_runtime": 143.9159, "eval_samples_per_second": 43.595, "eval_steps_per_second": 5.455, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.7077, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.6939107179641724, "eval_runtime": 132.967, "eval_samples_per_second": 47.185, "eval_steps_per_second": 5.904, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.6649, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.6719050407409668, "eval_runtime": 132.9346, "eval_samples_per_second": 47.196, "eval_steps_per_second": 5.905, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.6428, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.649336576461792, "eval_runtime": 132.9323, "eval_samples_per_second": 47.197, "eval_steps_per_second": 5.905, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.6362, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.629374623298645, "eval_runtime": 132.9442, "eval_samples_per_second": 47.193, "eval_steps_per_second": 5.905, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.6043, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.6119710206985474, "eval_runtime": 132.947, "eval_samples_per_second": 47.192, "eval_steps_per_second": 5.905, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.57, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.5946060419082642, "eval_runtime": 133.2291, "eval_samples_per_second": 47.092, "eval_steps_per_second": 5.892, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.5637, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.577666163444519, "eval_runtime": 133.1755, "eval_samples_per_second": 47.111, "eval_steps_per_second": 5.894, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.5403, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.563066005706787, "eval_runtime": 133.3045, "eval_samples_per_second": 47.065, "eval_steps_per_second": 5.889, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.5421, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.5478307008743286, "eval_runtime": 133.145, "eval_samples_per_second": 47.122, "eval_steps_per_second": 5.896, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.5198, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.531746506690979, "eval_runtime": 133.271, "eval_samples_per_second": 47.077, "eval_steps_per_second": 5.89, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.5204, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.5162949562072754, "eval_runtime": 133.1561, "eval_samples_per_second": 47.118, "eval_steps_per_second": 5.895, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.4905, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.5029550790786743, "eval_runtime": 133.1207, "eval_samples_per_second": 47.13, "eval_steps_per_second": 5.897, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.4783, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.4883637428283691, "eval_runtime": 133.2381, "eval_samples_per_second": 47.089, "eval_steps_per_second": 5.892, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.4709, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.4737261533737183, "eval_runtime": 133.3108, "eval_samples_per_second": 47.063, "eval_steps_per_second": 5.888, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.4331, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.462561011314392, "eval_runtime": 133.3104, "eval_samples_per_second": 47.063, "eval_steps_per_second": 5.889, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.4116, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.4483799934387207, "eval_runtime": 133.2589, "eval_samples_per_second": 47.081, "eval_steps_per_second": 5.891, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.4104, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.4345015287399292, "eval_runtime": 133.1546, "eval_samples_per_second": 47.118, "eval_steps_per_second": 5.895, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.3987, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.4243552684783936, "eval_runtime": 133.2552, "eval_samples_per_second": 47.083, "eval_steps_per_second": 5.891, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.3984, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.4116299152374268, "eval_runtime": 133.2306, "eval_samples_per_second": 47.091, "eval_steps_per_second": 5.892, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.3786, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.3974164724349976, "eval_runtime": 133.3674, "eval_samples_per_second": 47.043, "eval_steps_per_second": 5.886, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.3594, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.3871479034423828, "eval_runtime": 133.2189, "eval_samples_per_second": 47.095, "eval_steps_per_second": 5.893, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.3587, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.3761059045791626, "eval_runtime": 133.2457, "eval_samples_per_second": 47.086, "eval_steps_per_second": 5.891, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.3466, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.3624399900436401, "eval_runtime": 133.3002, "eval_samples_per_second": 47.067, "eval_steps_per_second": 5.889, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.3282, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.3485013246536255, "eval_runtime": 133.2962, "eval_samples_per_second": 47.068, "eval_steps_per_second": 5.889, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.3053, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.3402942419052124, "eval_runtime": 133.2508, "eval_samples_per_second": 47.084, "eval_steps_per_second": 5.891, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.312, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.3246259689331055, "eval_runtime": 133.2626, "eval_samples_per_second": 47.08, "eval_steps_per_second": 5.891, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.2977, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.314718246459961, "eval_runtime": 133.1685, "eval_samples_per_second": 47.113, "eval_steps_per_second": 5.895, "step": 4000 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 1.2922, "step": 4100 }, { "epoch": 2.99, "eval_loss": 1.299881935119629, "eval_runtime": 133.2671, "eval_samples_per_second": 47.078, "eval_steps_per_second": 5.89, "step": 4100 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.4410408101719245e+17, "trial_name": null, "trial_params": null }