|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 5630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 6.937275985663082e-06, |
|
"loss": 0.6362, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.7197197079658508, |
|
"eval_loss": 0.5481122136116028, |
|
"eval_runtime": 6.2072, |
|
"eval_samples_per_second": 160.941, |
|
"eval_steps_per_second": 10.149, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.811827956989247e-06, |
|
"loss": 0.4264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.8008008003234863, |
|
"eval_loss": 0.4550396203994751, |
|
"eval_runtime": 6.2195, |
|
"eval_samples_per_second": 160.623, |
|
"eval_steps_per_second": 10.129, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.6863799283154114e-06, |
|
"loss": 0.4174, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.7867867946624756, |
|
"eval_loss": 0.452409952878952, |
|
"eval_runtime": 6.2183, |
|
"eval_samples_per_second": 160.655, |
|
"eval_steps_per_second": 10.131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.560931899641577e-06, |
|
"loss": 0.4197, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.7917917966842651, |
|
"eval_loss": 0.4586125910282135, |
|
"eval_runtime": 6.2441, |
|
"eval_samples_per_second": 159.991, |
|
"eval_steps_per_second": 10.09, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.435483870967742e-06, |
|
"loss": 0.3819, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.8078078031539917, |
|
"eval_loss": 0.4367608428001404, |
|
"eval_runtime": 6.2213, |
|
"eval_samples_per_second": 160.577, |
|
"eval_steps_per_second": 10.126, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.310035842293907e-06, |
|
"loss": 0.3558, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.8068068027496338, |
|
"eval_loss": 0.4524727463722229, |
|
"eval_runtime": 6.2342, |
|
"eval_samples_per_second": 160.246, |
|
"eval_steps_per_second": 10.106, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.184587813620071e-06, |
|
"loss": 0.2982, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.792792797088623, |
|
"eval_loss": 0.49992287158966064, |
|
"eval_runtime": 6.206, |
|
"eval_samples_per_second": 160.973, |
|
"eval_steps_per_second": 10.151, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 6.059139784946236e-06, |
|
"loss": 0.2885, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.8108108043670654, |
|
"eval_loss": 0.5129059553146362, |
|
"eval_runtime": 6.2199, |
|
"eval_samples_per_second": 160.613, |
|
"eval_steps_per_second": 10.129, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.933691756272401e-06, |
|
"loss": 0.253, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.8208208084106445, |
|
"eval_loss": 0.5872611403465271, |
|
"eval_runtime": 6.2332, |
|
"eval_samples_per_second": 160.27, |
|
"eval_steps_per_second": 10.107, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.8082437275985665e-06, |
|
"loss": 0.3354, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.8178178071975708, |
|
"eval_loss": 0.4244420826435089, |
|
"eval_runtime": 6.2275, |
|
"eval_samples_per_second": 160.417, |
|
"eval_steps_per_second": 10.116, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.682795698924731e-06, |
|
"loss": 0.3083, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.8058058023452759, |
|
"eval_loss": 0.4852960705757141, |
|
"eval_runtime": 6.2193, |
|
"eval_samples_per_second": 160.63, |
|
"eval_steps_per_second": 10.13, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.557347670250896e-06, |
|
"loss": 0.2301, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.8018018007278442, |
|
"eval_loss": 0.7208853960037231, |
|
"eval_runtime": 6.2021, |
|
"eval_samples_per_second": 161.075, |
|
"eval_steps_per_second": 10.158, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 5.431899641577061e-06, |
|
"loss": 0.2167, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.7777777910232544, |
|
"eval_loss": 0.8089737892150879, |
|
"eval_runtime": 6.2037, |
|
"eval_samples_per_second": 161.034, |
|
"eval_steps_per_second": 10.155, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.306451612903225e-06, |
|
"loss": 0.1863, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.8038038015365601, |
|
"eval_loss": 0.6812323927879333, |
|
"eval_runtime": 6.2398, |
|
"eval_samples_per_second": 160.102, |
|
"eval_steps_per_second": 10.097, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.181003584229391e-06, |
|
"loss": 0.2181, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.8138138055801392, |
|
"eval_loss": 0.6958026885986328, |
|
"eval_runtime": 6.2122, |
|
"eval_samples_per_second": 160.812, |
|
"eval_steps_per_second": 10.141, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.2159, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.8118118047714233, |
|
"eval_loss": 0.6314735412597656, |
|
"eval_runtime": 6.2306, |
|
"eval_samples_per_second": 160.337, |
|
"eval_steps_per_second": 10.111, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.930107526881721e-06, |
|
"loss": 0.1828, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.8138138055801392, |
|
"eval_loss": 0.7173236608505249, |
|
"eval_runtime": 6.2107, |
|
"eval_samples_per_second": 160.851, |
|
"eval_steps_per_second": 10.144, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.804659498207885e-06, |
|
"loss": 0.1287, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.8018018007278442, |
|
"eval_loss": 0.9080932140350342, |
|
"eval_runtime": 6.2027, |
|
"eval_samples_per_second": 161.06, |
|
"eval_steps_per_second": 10.157, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.67921146953405e-06, |
|
"loss": 0.1711, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_accuracy": 0.8068068027496338, |
|
"eval_loss": 0.8858422040939331, |
|
"eval_runtime": 6.2188, |
|
"eval_samples_per_second": 160.641, |
|
"eval_steps_per_second": 10.131, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.553763440860215e-06, |
|
"loss": 0.1598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_accuracy": 0.8028028011322021, |
|
"eval_loss": 0.7877860069274902, |
|
"eval_runtime": 6.2062, |
|
"eval_samples_per_second": 160.967, |
|
"eval_steps_per_second": 10.151, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.42831541218638e-06, |
|
"loss": 0.1467, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_accuracy": 0.7947947978973389, |
|
"eval_loss": 0.900332510471344, |
|
"eval_runtime": 6.2358, |
|
"eval_samples_per_second": 160.203, |
|
"eval_steps_per_second": 10.103, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.302867383512545e-06, |
|
"loss": 0.127, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.804804801940918, |
|
"eval_loss": 0.9066368341445923, |
|
"eval_runtime": 6.2129, |
|
"eval_samples_per_second": 160.795, |
|
"eval_steps_per_second": 10.14, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.17741935483871e-06, |
|
"loss": 0.1134, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_accuracy": 0.8118118047714233, |
|
"eval_loss": 0.9645766615867615, |
|
"eval_runtime": 6.2157, |
|
"eval_samples_per_second": 160.721, |
|
"eval_steps_per_second": 10.136, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 4.051971326164874e-06, |
|
"loss": 0.1017, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_accuracy": 0.804804801940918, |
|
"eval_loss": 0.9778422713279724, |
|
"eval_runtime": 6.2303, |
|
"eval_samples_per_second": 160.346, |
|
"eval_steps_per_second": 10.112, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3.926523297491039e-06, |
|
"loss": 0.085, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_accuracy": 0.8088088035583496, |
|
"eval_loss": 1.0528582334518433, |
|
"eval_runtime": 6.238, |
|
"eval_samples_per_second": 160.149, |
|
"eval_steps_per_second": 10.099, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 3.801075268817204e-06, |
|
"loss": 0.0996, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"eval_accuracy": 0.8058058023452759, |
|
"eval_loss": 1.0082268714904785, |
|
"eval_runtime": 6.2065, |
|
"eval_samples_per_second": 160.961, |
|
"eval_steps_per_second": 10.151, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3.6756272401433694e-06, |
|
"loss": 0.1054, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.8108108043670654, |
|
"eval_loss": 0.9697705507278442, |
|
"eval_runtime": 6.2348, |
|
"eval_samples_per_second": 160.231, |
|
"eval_steps_per_second": 10.105, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.5501792114695336e-06, |
|
"loss": 0.1375, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_accuracy": 0.804804801940918, |
|
"eval_loss": 0.9333746433258057, |
|
"eval_runtime": 6.2109, |
|
"eval_samples_per_second": 160.846, |
|
"eval_steps_per_second": 10.143, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 3.4247311827956988e-06, |
|
"loss": 0.0487, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_accuracy": 0.8108108043670654, |
|
"eval_loss": 1.1273365020751953, |
|
"eval_runtime": 6.2065, |
|
"eval_samples_per_second": 160.961, |
|
"eval_steps_per_second": 10.151, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3.299283154121864e-06, |
|
"loss": 0.0611, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_accuracy": 0.8058058023452759, |
|
"eval_loss": 1.1528337001800537, |
|
"eval_runtime": 6.2119, |
|
"eval_samples_per_second": 160.821, |
|
"eval_steps_per_second": 10.142, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 3.1738351254480286e-06, |
|
"loss": 0.0668, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_accuracy": 0.8118118047714233, |
|
"eval_loss": 1.0147671699523926, |
|
"eval_runtime": 6.2218, |
|
"eval_samples_per_second": 160.564, |
|
"eval_steps_per_second": 10.126, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 3.0483870967741937e-06, |
|
"loss": 0.0582, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_accuracy": 0.8108108043670654, |
|
"eval_loss": 1.1332666873931885, |
|
"eval_runtime": 6.2186, |
|
"eval_samples_per_second": 160.648, |
|
"eval_steps_per_second": 10.131, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 2.9229390681003584e-06, |
|
"loss": 0.0869, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_accuracy": 0.8088088035583496, |
|
"eval_loss": 1.060727596282959, |
|
"eval_runtime": 6.1932, |
|
"eval_samples_per_second": 161.305, |
|
"eval_steps_per_second": 10.172, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 2.797491039426523e-06, |
|
"loss": 0.0623, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_accuracy": 0.8068068027496338, |
|
"eval_loss": 1.1880476474761963, |
|
"eval_runtime": 6.2192, |
|
"eval_samples_per_second": 160.631, |
|
"eval_steps_per_second": 10.13, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 2.6720430107526883e-06, |
|
"loss": 0.0317, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_accuracy": 0.8008008003234863, |
|
"eval_loss": 1.2836244106292725, |
|
"eval_runtime": 6.2079, |
|
"eval_samples_per_second": 160.925, |
|
"eval_steps_per_second": 10.148, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 2.546594982078853e-06, |
|
"loss": 0.0546, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_accuracy": 0.8058058023452759, |
|
"eval_loss": 1.2147704362869263, |
|
"eval_runtime": 6.2243, |
|
"eval_samples_per_second": 160.501, |
|
"eval_steps_per_second": 10.122, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 2.4211469534050177e-06, |
|
"loss": 0.0486, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_accuracy": 0.8008008003234863, |
|
"eval_loss": 1.334807276725769, |
|
"eval_runtime": 6.1963, |
|
"eval_samples_per_second": 161.225, |
|
"eval_steps_per_second": 10.167, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 2.2956989247311828e-06, |
|
"loss": 0.0332, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_accuracy": 0.8018018007278442, |
|
"eval_loss": 1.3734461069107056, |
|
"eval_runtime": 6.3321, |
|
"eval_samples_per_second": 157.768, |
|
"eval_steps_per_second": 9.949, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 2.1702508960573475e-06, |
|
"loss": 0.051, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_accuracy": 0.7977977991104126, |
|
"eval_loss": 1.2966439723968506, |
|
"eval_runtime": 6.2073, |
|
"eval_samples_per_second": 160.94, |
|
"eval_steps_per_second": 10.149, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 2.044802867383512e-06, |
|
"loss": 0.0217, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.804804801940918, |
|
"eval_loss": 1.385273814201355, |
|
"eval_runtime": 6.2117, |
|
"eval_samples_per_second": 160.826, |
|
"eval_steps_per_second": 10.142, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.9193548387096773e-06, |
|
"loss": 0.0109, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_accuracy": 0.8068068027496338, |
|
"eval_loss": 1.480326533317566, |
|
"eval_runtime": 6.2106, |
|
"eval_samples_per_second": 160.854, |
|
"eval_steps_per_second": 10.144, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 1.793906810035842e-06, |
|
"loss": 0.0345, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"eval_accuracy": 0.7997997999191284, |
|
"eval_loss": 1.4906260967254639, |
|
"eval_runtime": 6.2002, |
|
"eval_samples_per_second": 161.124, |
|
"eval_steps_per_second": 10.161, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 1.6684587813620071e-06, |
|
"loss": 0.0365, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_accuracy": 0.8028028011322021, |
|
"eval_loss": 1.4347106218338013, |
|
"eval_runtime": 6.2133, |
|
"eval_samples_per_second": 160.783, |
|
"eval_steps_per_second": 10.139, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 1.543010752688172e-06, |
|
"loss": 0.0265, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"eval_accuracy": 0.8128128051757812, |
|
"eval_loss": 1.3976863622665405, |
|
"eval_runtime": 6.224, |
|
"eval_samples_per_second": 160.508, |
|
"eval_steps_per_second": 10.122, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 1.417562724014337e-06, |
|
"loss": 0.0257, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.8108108043670654, |
|
"eval_loss": 1.370467185974121, |
|
"eval_runtime": 6.2313, |
|
"eval_samples_per_second": 160.321, |
|
"eval_steps_per_second": 10.11, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 1.2921146953405017e-06, |
|
"loss": 0.0036, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"eval_accuracy": 0.8168168067932129, |
|
"eval_loss": 1.4352822303771973, |
|
"eval_runtime": 6.2072, |
|
"eval_samples_per_second": 160.943, |
|
"eval_steps_per_second": 10.15, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 1.1666666666666666e-06, |
|
"loss": 0.0269, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_accuracy": 0.8068068027496338, |
|
"eval_loss": 1.4826140403747559, |
|
"eval_runtime": 6.2178, |
|
"eval_samples_per_second": 160.669, |
|
"eval_steps_per_second": 10.132, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 1.0412186379928315e-06, |
|
"loss": 0.0231, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"eval_accuracy": 0.8118118047714233, |
|
"eval_loss": 1.4810999631881714, |
|
"eval_runtime": 6.3061, |
|
"eval_samples_per_second": 158.417, |
|
"eval_steps_per_second": 9.99, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 9.157706093189965e-07, |
|
"loss": 0.0204, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_accuracy": 0.8028028011322021, |
|
"eval_loss": 1.5245323181152344, |
|
"eval_runtime": 6.2057, |
|
"eval_samples_per_second": 160.982, |
|
"eval_steps_per_second": 10.152, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 7.903225806451612e-07, |
|
"loss": 0.0263, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_accuracy": 0.8018018007278442, |
|
"eval_loss": 1.5123308897018433, |
|
"eval_runtime": 6.2053, |
|
"eval_samples_per_second": 160.991, |
|
"eval_steps_per_second": 10.153, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 6.648745519713261e-07, |
|
"loss": 0.0138, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_accuracy": 0.8028028011322021, |
|
"eval_loss": 1.51128089427948, |
|
"eval_runtime": 6.2898, |
|
"eval_samples_per_second": 158.83, |
|
"eval_steps_per_second": 10.016, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 5.39426523297491e-07, |
|
"loss": 0.0089, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"eval_accuracy": 0.7977977991104126, |
|
"eval_loss": 1.5846397876739502, |
|
"eval_runtime": 6.2124, |
|
"eval_samples_per_second": 160.808, |
|
"eval_steps_per_second": 10.141, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 4.1397849462365595e-07, |
|
"loss": 0.029, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"eval_accuracy": 0.8008008003234863, |
|
"eval_loss": 1.5361814498901367, |
|
"eval_runtime": 6.2541, |
|
"eval_samples_per_second": 159.736, |
|
"eval_steps_per_second": 10.073, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.8853046594982076e-07, |
|
"loss": 0.0058, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_accuracy": 0.8018018007278442, |
|
"eval_loss": 1.5759379863739014, |
|
"eval_runtime": 6.221, |
|
"eval_samples_per_second": 160.585, |
|
"eval_steps_per_second": 10.127, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.6308243727598568e-07, |
|
"loss": 0.0084, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_accuracy": 0.8018018007278442, |
|
"eval_loss": 1.5678976774215698, |
|
"eval_runtime": 6.2009, |
|
"eval_samples_per_second": 161.105, |
|
"eval_steps_per_second": 10.16, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 3.763440860215054e-08, |
|
"loss": 0.0065, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.8028028011322021, |
|
"eval_loss": 1.568334937095642, |
|
"eval_runtime": 6.2439, |
|
"eval_samples_per_second": 159.996, |
|
"eval_steps_per_second": 10.09, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5630, |
|
"total_flos": 4.193719446528e+16, |
|
"train_loss": 0.13640729715885533, |
|
"train_runtime": 2182.3127, |
|
"train_samples_per_second": 41.241, |
|
"train_steps_per_second": 2.58 |
|
} |
|
], |
|
"max_steps": 5630, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.193719446528e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|