|
{ |
|
"best_metric": 0.4429960548877716, |
|
"best_model_checkpoint": "speecht5_finetuned_voxpopuli_pl/checkpoint-4000", |
|
"epoch": 20.100502512562816, |
|
"eval_steps": 1000, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12562814070351758, |
|
"grad_norm": 34.14754104614258, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 6.2754, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.25125628140703515, |
|
"grad_norm": 25.906333923339844, |
|
"learning_rate": 9.400000000000001e-07, |
|
"loss": 6.1497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3768844221105528, |
|
"grad_norm": 29.097463607788086, |
|
"learning_rate": 1.44e-06, |
|
"loss": 6.0335, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5025125628140703, |
|
"grad_norm": 18.543378829956055, |
|
"learning_rate": 1.94e-06, |
|
"loss": 5.8815, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.628140703517588, |
|
"grad_norm": 24.147188186645508, |
|
"learning_rate": 2.4400000000000004e-06, |
|
"loss": 5.5659, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7537688442211056, |
|
"grad_norm": 15.634539604187012, |
|
"learning_rate": 2.9400000000000002e-06, |
|
"loss": 5.3573, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8793969849246231, |
|
"grad_norm": 14.31460189819336, |
|
"learning_rate": 3.44e-06, |
|
"loss": 5.2616, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0050251256281406, |
|
"grad_norm": 19.75978660583496, |
|
"learning_rate": 3.94e-06, |
|
"loss": 5.1787, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1306532663316582, |
|
"grad_norm": 12.639854431152344, |
|
"learning_rate": 4.440000000000001e-06, |
|
"loss": 5.1214, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2562814070351758, |
|
"grad_norm": 17.792396545410156, |
|
"learning_rate": 4.94e-06, |
|
"loss": 5.0703, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3819095477386933, |
|
"grad_norm": 22.859201431274414, |
|
"learning_rate": 5.4400000000000004e-06, |
|
"loss": 4.9714, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.507537688442211, |
|
"grad_norm": 16.62458610534668, |
|
"learning_rate": 5.94e-06, |
|
"loss": 4.8681, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6331658291457285, |
|
"grad_norm": 17.881174087524414, |
|
"learning_rate": 6.440000000000001e-06, |
|
"loss": 4.8252, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.758793969849246, |
|
"grad_norm": 14.765595436096191, |
|
"learning_rate": 6.9400000000000005e-06, |
|
"loss": 4.5588, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8844221105527639, |
|
"grad_norm": 13.808978080749512, |
|
"learning_rate": 7.440000000000001e-06, |
|
"loss": 4.5211, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0100502512562812, |
|
"grad_norm": 10.674385070800781, |
|
"learning_rate": 7.94e-06, |
|
"loss": 4.4888, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.135678391959799, |
|
"grad_norm": 15.555588722229004, |
|
"learning_rate": 8.44e-06, |
|
"loss": 4.4513, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.2613065326633164, |
|
"grad_norm": 13.092530250549316, |
|
"learning_rate": 8.94e-06, |
|
"loss": 4.3646, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.3869346733668344, |
|
"grad_norm": 12.971352577209473, |
|
"learning_rate": 9.440000000000001e-06, |
|
"loss": 4.3459, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.5125628140703515, |
|
"grad_norm": 10.348517417907715, |
|
"learning_rate": 9.940000000000001e-06, |
|
"loss": 4.2915, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.6381909547738696, |
|
"grad_norm": 13.625853538513184, |
|
"learning_rate": 9.937142857142858e-06, |
|
"loss": 4.3095, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.7638190954773867, |
|
"grad_norm": 11.426923751831055, |
|
"learning_rate": 9.865714285714285e-06, |
|
"loss": 4.1899, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.8894472361809047, |
|
"grad_norm": 11.516312599182129, |
|
"learning_rate": 9.794285714285714e-06, |
|
"loss": 4.2233, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.0150753768844223, |
|
"grad_norm": 16.141698837280273, |
|
"learning_rate": 9.722857142857143e-06, |
|
"loss": 4.1831, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.14070351758794, |
|
"grad_norm": 14.935202598571777, |
|
"learning_rate": 9.651428571428572e-06, |
|
"loss": 4.1739, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.2663316582914574, |
|
"grad_norm": 10.119762420654297, |
|
"learning_rate": 9.58e-06, |
|
"loss": 4.1649, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.391959798994975, |
|
"grad_norm": 16.50948143005371, |
|
"learning_rate": 9.508571428571429e-06, |
|
"loss": 4.1627, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.5175879396984926, |
|
"grad_norm": 10.08300495147705, |
|
"learning_rate": 9.437142857142858e-06, |
|
"loss": 4.1915, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.64321608040201, |
|
"grad_norm": 9.375288963317871, |
|
"learning_rate": 9.365714285714287e-06, |
|
"loss": 4.1279, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.7688442211055277, |
|
"grad_norm": 13.562234878540039, |
|
"learning_rate": 9.294285714285714e-06, |
|
"loss": 4.1083, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.8944723618090453, |
|
"grad_norm": 15.115653991699219, |
|
"learning_rate": 9.222857142857143e-06, |
|
"loss": 4.1236, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 4.0201005025125625, |
|
"grad_norm": 9.049237251281738, |
|
"learning_rate": 9.151428571428572e-06, |
|
"loss": 4.0964, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.1457286432160805, |
|
"grad_norm": 9.250771522521973, |
|
"learning_rate": 9.080000000000001e-06, |
|
"loss": 4.0669, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.271356783919598, |
|
"grad_norm": 12.07302474975586, |
|
"learning_rate": 9.00857142857143e-06, |
|
"loss": 4.0587, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.396984924623116, |
|
"grad_norm": 9.538933753967285, |
|
"learning_rate": 8.937142857142857e-06, |
|
"loss": 4.0837, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.522613065326633, |
|
"grad_norm": 14.347626686096191, |
|
"learning_rate": 8.865714285714287e-06, |
|
"loss": 4.0086, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.648241206030151, |
|
"grad_norm": 10.831369400024414, |
|
"learning_rate": 8.794285714285716e-06, |
|
"loss": 4.0563, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.773869346733669, |
|
"grad_norm": 9.891343116760254, |
|
"learning_rate": 8.722857142857145e-06, |
|
"loss": 4.0057, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.899497487437186, |
|
"grad_norm": 9.572471618652344, |
|
"learning_rate": 8.651428571428572e-06, |
|
"loss": 4.0557, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.025125628140704, |
|
"grad_norm": 9.745298385620117, |
|
"learning_rate": 8.580000000000001e-06, |
|
"loss": 3.9902, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.025125628140704, |
|
"eval_loss": 0.46117448806762695, |
|
"eval_runtime": 75.4681, |
|
"eval_samples_per_second": 9.381, |
|
"eval_steps_per_second": 4.691, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.150753768844221, |
|
"grad_norm": 10.219923973083496, |
|
"learning_rate": 8.50857142857143e-06, |
|
"loss": 3.9991, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.276381909547739, |
|
"grad_norm": 9.258833885192871, |
|
"learning_rate": 8.437142857142859e-06, |
|
"loss": 4.0032, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.402010050251256, |
|
"grad_norm": 12.21104907989502, |
|
"learning_rate": 8.365714285714286e-06, |
|
"loss": 4.0337, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 5.527638190954773, |
|
"grad_norm": 10.777599334716797, |
|
"learning_rate": 8.294285714285715e-06, |
|
"loss": 4.0036, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.653266331658291, |
|
"grad_norm": 10.322349548339844, |
|
"learning_rate": 8.222857142857144e-06, |
|
"loss": 3.9811, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.778894472361809, |
|
"grad_norm": 9.940053939819336, |
|
"learning_rate": 8.151428571428572e-06, |
|
"loss": 4.0264, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.9045226130653266, |
|
"grad_norm": 12.996850967407227, |
|
"learning_rate": 8.08e-06, |
|
"loss": 3.9783, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 6.030150753768845, |
|
"grad_norm": 10.784270286560059, |
|
"learning_rate": 8.00857142857143e-06, |
|
"loss": 3.936, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.155778894472362, |
|
"grad_norm": 10.86549186706543, |
|
"learning_rate": 7.937142857142857e-06, |
|
"loss": 3.9803, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 6.28140703517588, |
|
"grad_norm": 10.926647186279297, |
|
"learning_rate": 7.865714285714286e-06, |
|
"loss": 3.9587, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.407035175879397, |
|
"grad_norm": 12.103693008422852, |
|
"learning_rate": 7.794285714285715e-06, |
|
"loss": 3.9441, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 6.532663316582915, |
|
"grad_norm": 14.731566429138184, |
|
"learning_rate": 7.722857142857142e-06, |
|
"loss": 3.9541, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.658291457286432, |
|
"grad_norm": 13.810087203979492, |
|
"learning_rate": 7.651428571428571e-06, |
|
"loss": 3.962, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 6.78391959798995, |
|
"grad_norm": 10.733597755432129, |
|
"learning_rate": 7.58e-06, |
|
"loss": 3.9696, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.909547738693467, |
|
"grad_norm": 12.271906852722168, |
|
"learning_rate": 7.508571428571429e-06, |
|
"loss": 3.9319, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 7.035175879396985, |
|
"grad_norm": 17.454214096069336, |
|
"learning_rate": 7.4371428571428575e-06, |
|
"loss": 3.9096, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.160804020100502, |
|
"grad_norm": 9.891731262207031, |
|
"learning_rate": 7.365714285714286e-06, |
|
"loss": 3.963, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 7.28643216080402, |
|
"grad_norm": 10.400883674621582, |
|
"learning_rate": 7.294285714285715e-06, |
|
"loss": 3.9529, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.4120603015075375, |
|
"grad_norm": 8.886434555053711, |
|
"learning_rate": 7.222857142857144e-06, |
|
"loss": 3.9048, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 7.5376884422110555, |
|
"grad_norm": 13.72711181640625, |
|
"learning_rate": 7.151428571428573e-06, |
|
"loss": 3.9097, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.663316582914573, |
|
"grad_norm": 10.161803245544434, |
|
"learning_rate": 7.08e-06, |
|
"loss": 3.9029, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 7.788944723618091, |
|
"grad_norm": 9.717086791992188, |
|
"learning_rate": 7.008571428571429e-06, |
|
"loss": 3.9028, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.914572864321608, |
|
"grad_norm": 9.483698844909668, |
|
"learning_rate": 6.937142857142858e-06, |
|
"loss": 3.9157, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 8.040201005025125, |
|
"grad_norm": 8.951908111572266, |
|
"learning_rate": 6.865714285714287e-06, |
|
"loss": 3.9106, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.165829145728644, |
|
"grad_norm": 8.099654197692871, |
|
"learning_rate": 6.794285714285714e-06, |
|
"loss": 3.9062, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 8.291457286432161, |
|
"grad_norm": 10.005452156066895, |
|
"learning_rate": 6.722857142857143e-06, |
|
"loss": 3.9139, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.417085427135678, |
|
"grad_norm": 7.832030296325684, |
|
"learning_rate": 6.651428571428572e-06, |
|
"loss": 3.8731, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 8.542713567839195, |
|
"grad_norm": 9.519272804260254, |
|
"learning_rate": 6.5800000000000005e-06, |
|
"loss": 3.9234, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.668341708542714, |
|
"grad_norm": 7.6004252433776855, |
|
"learning_rate": 6.5085714285714295e-06, |
|
"loss": 3.8889, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 8.793969849246231, |
|
"grad_norm": 8.43916130065918, |
|
"learning_rate": 6.437142857142858e-06, |
|
"loss": 3.8813, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.919597989949748, |
|
"grad_norm": 9.573629379272461, |
|
"learning_rate": 6.365714285714286e-06, |
|
"loss": 3.9234, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 9.045226130653266, |
|
"grad_norm": 11.034466743469238, |
|
"learning_rate": 6.294285714285715e-06, |
|
"loss": 3.9227, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.170854271356784, |
|
"grad_norm": 9.65125560760498, |
|
"learning_rate": 6.222857142857144e-06, |
|
"loss": 3.8806, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 9.296482412060302, |
|
"grad_norm": 8.907113075256348, |
|
"learning_rate": 6.151428571428571e-06, |
|
"loss": 3.8878, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.422110552763819, |
|
"grad_norm": 9.000490188598633, |
|
"learning_rate": 6.08e-06, |
|
"loss": 3.858, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 9.547738693467338, |
|
"grad_norm": 10.825636863708496, |
|
"learning_rate": 6.008571428571429e-06, |
|
"loss": 3.8546, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.673366834170855, |
|
"grad_norm": 8.722935676574707, |
|
"learning_rate": 5.937142857142858e-06, |
|
"loss": 3.871, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 9.798994974874372, |
|
"grad_norm": 9.243980407714844, |
|
"learning_rate": 5.865714285714286e-06, |
|
"loss": 3.8486, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.924623115577889, |
|
"grad_norm": 10.595390319824219, |
|
"learning_rate": 5.794285714285715e-06, |
|
"loss": 3.8973, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 10.050251256281408, |
|
"grad_norm": 11.8334321975708, |
|
"learning_rate": 5.722857142857144e-06, |
|
"loss": 3.8124, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.050251256281408, |
|
"eval_loss": 0.44917792081832886, |
|
"eval_runtime": 76.9989, |
|
"eval_samples_per_second": 9.195, |
|
"eval_steps_per_second": 4.597, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.175879396984925, |
|
"grad_norm": 12.580653190612793, |
|
"learning_rate": 5.651428571428572e-06, |
|
"loss": 3.8754, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 10.301507537688442, |
|
"grad_norm": 9.411772727966309, |
|
"learning_rate": 5.582857142857143e-06, |
|
"loss": 3.839, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.42713567839196, |
|
"grad_norm": 8.65829849243164, |
|
"learning_rate": 5.511428571428572e-06, |
|
"loss": 3.8123, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 10.552763819095478, |
|
"grad_norm": 9.79350757598877, |
|
"learning_rate": 5.4400000000000004e-06, |
|
"loss": 3.8609, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.678391959798995, |
|
"grad_norm": 9.445945739746094, |
|
"learning_rate": 5.368571428571429e-06, |
|
"loss": 3.8359, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 10.804020100502512, |
|
"grad_norm": 11.811120986938477, |
|
"learning_rate": 5.297142857142858e-06, |
|
"loss": 3.8632, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 10.92964824120603, |
|
"grad_norm": 10.508607864379883, |
|
"learning_rate": 5.225714285714286e-06, |
|
"loss": 3.8324, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 11.055276381909549, |
|
"grad_norm": 8.808614730834961, |
|
"learning_rate": 5.154285714285715e-06, |
|
"loss": 3.8314, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.180904522613066, |
|
"grad_norm": 11.709993362426758, |
|
"learning_rate": 5.082857142857144e-06, |
|
"loss": 3.8904, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 11.306532663316583, |
|
"grad_norm": 8.451616287231445, |
|
"learning_rate": 5.011428571428571e-06, |
|
"loss": 3.8471, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 11.4321608040201, |
|
"grad_norm": 10.084294319152832, |
|
"learning_rate": 4.94e-06, |
|
"loss": 3.8881, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 11.557788944723619, |
|
"grad_norm": 17.51856803894043, |
|
"learning_rate": 4.868571428571429e-06, |
|
"loss": 3.8931, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 11.683417085427136, |
|
"grad_norm": 11.671639442443848, |
|
"learning_rate": 4.797142857142857e-06, |
|
"loss": 3.8578, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 11.809045226130653, |
|
"grad_norm": 8.713754653930664, |
|
"learning_rate": 4.725714285714286e-06, |
|
"loss": 3.832, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 11.93467336683417, |
|
"grad_norm": 11.429880142211914, |
|
"learning_rate": 4.6542857142857145e-06, |
|
"loss": 3.8556, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 12.06030150753769, |
|
"grad_norm": 9.178875923156738, |
|
"learning_rate": 4.5828571428571435e-06, |
|
"loss": 3.8361, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.185929648241206, |
|
"grad_norm": 7.655440330505371, |
|
"learning_rate": 4.511428571428572e-06, |
|
"loss": 3.8337, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 12.311557788944723, |
|
"grad_norm": 10.44965648651123, |
|
"learning_rate": 4.440000000000001e-06, |
|
"loss": 3.877, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 12.43718592964824, |
|
"grad_norm": 9.04977035522461, |
|
"learning_rate": 4.368571428571429e-06, |
|
"loss": 3.8504, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 12.56281407035176, |
|
"grad_norm": 8.15368366241455, |
|
"learning_rate": 4.297142857142858e-06, |
|
"loss": 3.8297, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.688442211055277, |
|
"grad_norm": 9.811213493347168, |
|
"learning_rate": 4.225714285714286e-06, |
|
"loss": 3.8289, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 12.814070351758794, |
|
"grad_norm": 8.397767066955566, |
|
"learning_rate": 4.154285714285714e-06, |
|
"loss": 3.8352, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 12.93969849246231, |
|
"grad_norm": 10.471597671508789, |
|
"learning_rate": 4.082857142857143e-06, |
|
"loss": 3.8438, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 13.06532663316583, |
|
"grad_norm": 8.042658805847168, |
|
"learning_rate": 4.011428571428571e-06, |
|
"loss": 3.8308, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 13.190954773869347, |
|
"grad_norm": 9.240007400512695, |
|
"learning_rate": 3.94e-06, |
|
"loss": 3.811, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 13.316582914572864, |
|
"grad_norm": 8.514374732971191, |
|
"learning_rate": 3.8685714285714286e-06, |
|
"loss": 3.8253, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 13.442211055276381, |
|
"grad_norm": 8.689332008361816, |
|
"learning_rate": 3.7971428571428576e-06, |
|
"loss": 3.8387, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 13.5678391959799, |
|
"grad_norm": 11.250374794006348, |
|
"learning_rate": 3.7257142857142857e-06, |
|
"loss": 3.8216, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 13.693467336683417, |
|
"grad_norm": 8.129416465759277, |
|
"learning_rate": 3.6542857142857148e-06, |
|
"loss": 3.8273, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 13.819095477386934, |
|
"grad_norm": 9.398397445678711, |
|
"learning_rate": 3.582857142857143e-06, |
|
"loss": 3.8116, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 13.944723618090451, |
|
"grad_norm": 9.966887474060059, |
|
"learning_rate": 3.511428571428572e-06, |
|
"loss": 3.8401, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 14.07035175879397, |
|
"grad_norm": 7.829891204833984, |
|
"learning_rate": 3.44e-06, |
|
"loss": 3.7607, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.195979899497488, |
|
"grad_norm": 10.33727741241455, |
|
"learning_rate": 3.3685714285714287e-06, |
|
"loss": 3.8406, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 14.321608040201005, |
|
"grad_norm": 8.579668998718262, |
|
"learning_rate": 3.2971428571428577e-06, |
|
"loss": 3.7893, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 14.447236180904522, |
|
"grad_norm": 9.909623146057129, |
|
"learning_rate": 3.225714285714286e-06, |
|
"loss": 3.8661, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 14.57286432160804, |
|
"grad_norm": 9.630623817443848, |
|
"learning_rate": 3.154285714285715e-06, |
|
"loss": 3.8248, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 14.698492462311558, |
|
"grad_norm": 13.711705207824707, |
|
"learning_rate": 3.082857142857143e-06, |
|
"loss": 3.8268, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 14.824120603015075, |
|
"grad_norm": 8.66063117980957, |
|
"learning_rate": 3.0114285714285716e-06, |
|
"loss": 3.8154, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 14.949748743718594, |
|
"grad_norm": 10.465475082397461, |
|
"learning_rate": 2.9400000000000002e-06, |
|
"loss": 3.826, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 15.075376884422111, |
|
"grad_norm": 9.025050163269043, |
|
"learning_rate": 2.868571428571429e-06, |
|
"loss": 3.8132, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.075376884422111, |
|
"eval_loss": 0.44469836354255676, |
|
"eval_runtime": 78.3312, |
|
"eval_samples_per_second": 9.039, |
|
"eval_steps_per_second": 4.519, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.201005025125628, |
|
"grad_norm": 9.333688735961914, |
|
"learning_rate": 2.797142857142857e-06, |
|
"loss": 3.8107, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 15.326633165829145, |
|
"grad_norm": 11.503759384155273, |
|
"learning_rate": 2.725714285714286e-06, |
|
"loss": 3.8315, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 15.452261306532664, |
|
"grad_norm": 10.728011131286621, |
|
"learning_rate": 2.654285714285714e-06, |
|
"loss": 3.8135, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 15.577889447236181, |
|
"grad_norm": 8.355088233947754, |
|
"learning_rate": 2.582857142857143e-06, |
|
"loss": 3.8079, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 15.703517587939698, |
|
"grad_norm": 9.261649131774902, |
|
"learning_rate": 2.5114285714285718e-06, |
|
"loss": 3.832, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 15.829145728643216, |
|
"grad_norm": 7.470139026641846, |
|
"learning_rate": 2.4400000000000004e-06, |
|
"loss": 3.8065, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 15.954773869346734, |
|
"grad_norm": 10.546520233154297, |
|
"learning_rate": 2.3685714285714285e-06, |
|
"loss": 3.7759, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 16.08040201005025, |
|
"grad_norm": 8.22718334197998, |
|
"learning_rate": 2.297142857142857e-06, |
|
"loss": 3.7758, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 16.20603015075377, |
|
"grad_norm": 7.318994045257568, |
|
"learning_rate": 2.2257142857142857e-06, |
|
"loss": 3.7738, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 16.331658291457288, |
|
"grad_norm": 8.533567428588867, |
|
"learning_rate": 2.1542857142857147e-06, |
|
"loss": 3.7611, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 16.457286432160803, |
|
"grad_norm": 12.127696990966797, |
|
"learning_rate": 2.0828571428571433e-06, |
|
"loss": 3.8306, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 16.582914572864322, |
|
"grad_norm": 8.16425609588623, |
|
"learning_rate": 2.0114285714285715e-06, |
|
"loss": 3.8017, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 16.70854271356784, |
|
"grad_norm": 9.96759033203125, |
|
"learning_rate": 1.94e-06, |
|
"loss": 3.7988, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 16.834170854271356, |
|
"grad_norm": 7.703837871551514, |
|
"learning_rate": 1.8685714285714289e-06, |
|
"loss": 3.8025, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 16.959798994974875, |
|
"grad_norm": 9.635499000549316, |
|
"learning_rate": 1.7971428571428572e-06, |
|
"loss": 3.7641, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 17.08542713567839, |
|
"grad_norm": 8.293730735778809, |
|
"learning_rate": 1.7257142857142858e-06, |
|
"loss": 3.7946, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 17.21105527638191, |
|
"grad_norm": 9.113801002502441, |
|
"learning_rate": 1.6542857142857144e-06, |
|
"loss": 3.7743, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 17.33668341708543, |
|
"grad_norm": 12.916595458984375, |
|
"learning_rate": 1.582857142857143e-06, |
|
"loss": 3.8164, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 17.462311557788944, |
|
"grad_norm": 8.181452751159668, |
|
"learning_rate": 1.5114285714285714e-06, |
|
"loss": 3.7756, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 17.587939698492463, |
|
"grad_norm": 9.33465576171875, |
|
"learning_rate": 1.44e-06, |
|
"loss": 3.7851, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.71356783919598, |
|
"grad_norm": 9.609382629394531, |
|
"learning_rate": 1.3685714285714286e-06, |
|
"loss": 3.8087, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 17.839195979899497, |
|
"grad_norm": 10.107198715209961, |
|
"learning_rate": 1.2971428571428574e-06, |
|
"loss": 3.7887, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 17.964824120603016, |
|
"grad_norm": 10.769806861877441, |
|
"learning_rate": 1.2257142857142857e-06, |
|
"loss": 3.8042, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 18.09045226130653, |
|
"grad_norm": 9.459657669067383, |
|
"learning_rate": 1.1542857142857143e-06, |
|
"loss": 3.8038, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 18.21608040201005, |
|
"grad_norm": 8.211793899536133, |
|
"learning_rate": 1.082857142857143e-06, |
|
"loss": 3.7708, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 18.34170854271357, |
|
"grad_norm": 6.89067268371582, |
|
"learning_rate": 1.0114285714285715e-06, |
|
"loss": 3.832, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 18.467336683417084, |
|
"grad_norm": 11.42387866973877, |
|
"learning_rate": 9.400000000000001e-07, |
|
"loss": 3.7837, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 18.592964824120603, |
|
"grad_norm": 9.029644012451172, |
|
"learning_rate": 8.685714285714286e-07, |
|
"loss": 3.7696, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 18.718592964824122, |
|
"grad_norm": 7.634586334228516, |
|
"learning_rate": 7.971428571428572e-07, |
|
"loss": 3.7854, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 18.844221105527637, |
|
"grad_norm": 9.12209701538086, |
|
"learning_rate": 7.257142857142857e-07, |
|
"loss": 3.788, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 18.969849246231156, |
|
"grad_norm": 9.065604209899902, |
|
"learning_rate": 6.542857142857144e-07, |
|
"loss": 3.7881, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 19.09547738693467, |
|
"grad_norm": 8.798233032226562, |
|
"learning_rate": 5.82857142857143e-07, |
|
"loss": 3.8061, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 19.22110552763819, |
|
"grad_norm": 7.6765971183776855, |
|
"learning_rate": 5.114285714285714e-07, |
|
"loss": 3.7917, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 19.34673366834171, |
|
"grad_norm": 7.686089992523193, |
|
"learning_rate": 4.4e-07, |
|
"loss": 3.807, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 19.472361809045225, |
|
"grad_norm": 7.45636510848999, |
|
"learning_rate": 3.685714285714286e-07, |
|
"loss": 3.7984, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 19.597989949748744, |
|
"grad_norm": 7.467589378356934, |
|
"learning_rate": 2.9714285714285715e-07, |
|
"loss": 3.778, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 19.723618090452263, |
|
"grad_norm": 14.702427864074707, |
|
"learning_rate": 2.2571428571428574e-07, |
|
"loss": 3.8033, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 19.849246231155778, |
|
"grad_norm": 8.807873725891113, |
|
"learning_rate": 1.542857142857143e-07, |
|
"loss": 3.7807, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 19.974874371859297, |
|
"grad_norm": 8.14664363861084, |
|
"learning_rate": 8.285714285714285e-08, |
|
"loss": 3.7466, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 20.100502512562816, |
|
"grad_norm": 9.232961654663086, |
|
"learning_rate": 1.142857142857143e-08, |
|
"loss": 3.8105, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 20.100502512562816, |
|
"eval_loss": 0.4429960548877716, |
|
"eval_runtime": 78.2115, |
|
"eval_samples_per_second": 9.052, |
|
"eval_steps_per_second": 4.526, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 21, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.800433559026872e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|