{ "best_metric": 0.4429960548877716, "best_model_checkpoint": "speecht5_finetuned_voxpopuli_pl/checkpoint-4000", "epoch": 20.100502512562816, "eval_steps": 1000, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12562814070351758, "grad_norm": 34.14754104614258, "learning_rate": 4.6000000000000004e-07, "loss": 6.2754, "step": 25 }, { "epoch": 0.25125628140703515, "grad_norm": 25.906333923339844, "learning_rate": 9.400000000000001e-07, "loss": 6.1497, "step": 50 }, { "epoch": 0.3768844221105528, "grad_norm": 29.097463607788086, "learning_rate": 1.44e-06, "loss": 6.0335, "step": 75 }, { "epoch": 0.5025125628140703, "grad_norm": 18.543378829956055, "learning_rate": 1.94e-06, "loss": 5.8815, "step": 100 }, { "epoch": 0.628140703517588, "grad_norm": 24.147188186645508, "learning_rate": 2.4400000000000004e-06, "loss": 5.5659, "step": 125 }, { "epoch": 0.7537688442211056, "grad_norm": 15.634539604187012, "learning_rate": 2.9400000000000002e-06, "loss": 5.3573, "step": 150 }, { "epoch": 0.8793969849246231, "grad_norm": 14.31460189819336, "learning_rate": 3.44e-06, "loss": 5.2616, "step": 175 }, { "epoch": 1.0050251256281406, "grad_norm": 19.75978660583496, "learning_rate": 3.94e-06, "loss": 5.1787, "step": 200 }, { "epoch": 1.1306532663316582, "grad_norm": 12.639854431152344, "learning_rate": 4.440000000000001e-06, "loss": 5.1214, "step": 225 }, { "epoch": 1.2562814070351758, "grad_norm": 17.792396545410156, "learning_rate": 4.94e-06, "loss": 5.0703, "step": 250 }, { "epoch": 1.3819095477386933, "grad_norm": 22.859201431274414, "learning_rate": 5.4400000000000004e-06, "loss": 4.9714, "step": 275 }, { "epoch": 1.507537688442211, "grad_norm": 16.62458610534668, "learning_rate": 5.94e-06, "loss": 4.8681, "step": 300 }, { "epoch": 1.6331658291457285, "grad_norm": 17.881174087524414, "learning_rate": 6.440000000000001e-06, "loss": 4.8252, "step": 325 }, { "epoch": 1.758793969849246, "grad_norm": 14.765595436096191, "learning_rate": 6.9400000000000005e-06, "loss": 4.5588, "step": 350 }, { "epoch": 1.8844221105527639, "grad_norm": 13.808978080749512, "learning_rate": 7.440000000000001e-06, "loss": 4.5211, "step": 375 }, { "epoch": 2.0100502512562812, "grad_norm": 10.674385070800781, "learning_rate": 7.94e-06, "loss": 4.4888, "step": 400 }, { "epoch": 2.135678391959799, "grad_norm": 15.555588722229004, "learning_rate": 8.44e-06, "loss": 4.4513, "step": 425 }, { "epoch": 2.2613065326633164, "grad_norm": 13.092530250549316, "learning_rate": 8.94e-06, "loss": 4.3646, "step": 450 }, { "epoch": 2.3869346733668344, "grad_norm": 12.971352577209473, "learning_rate": 9.440000000000001e-06, "loss": 4.3459, "step": 475 }, { "epoch": 2.5125628140703515, "grad_norm": 10.348517417907715, "learning_rate": 9.940000000000001e-06, "loss": 4.2915, "step": 500 }, { "epoch": 2.6381909547738696, "grad_norm": 13.625853538513184, "learning_rate": 9.937142857142858e-06, "loss": 4.3095, "step": 525 }, { "epoch": 2.7638190954773867, "grad_norm": 11.426923751831055, "learning_rate": 9.865714285714285e-06, "loss": 4.1899, "step": 550 }, { "epoch": 2.8894472361809047, "grad_norm": 11.516312599182129, "learning_rate": 9.794285714285714e-06, "loss": 4.2233, "step": 575 }, { "epoch": 3.0150753768844223, "grad_norm": 16.141698837280273, "learning_rate": 9.722857142857143e-06, "loss": 4.1831, "step": 600 }, { "epoch": 3.14070351758794, "grad_norm": 14.935202598571777, "learning_rate": 9.651428571428572e-06, "loss": 4.1739, "step": 625 }, { "epoch": 3.2663316582914574, "grad_norm": 10.119762420654297, "learning_rate": 9.58e-06, "loss": 4.1649, "step": 650 }, { "epoch": 3.391959798994975, "grad_norm": 16.50948143005371, "learning_rate": 9.508571428571429e-06, "loss": 4.1627, "step": 675 }, { "epoch": 3.5175879396984926, "grad_norm": 10.08300495147705, "learning_rate": 9.437142857142858e-06, "loss": 4.1915, "step": 700 }, { "epoch": 3.64321608040201, "grad_norm": 9.375288963317871, "learning_rate": 9.365714285714287e-06, "loss": 4.1279, "step": 725 }, { "epoch": 3.7688442211055277, "grad_norm": 13.562234878540039, "learning_rate": 9.294285714285714e-06, "loss": 4.1083, "step": 750 }, { "epoch": 3.8944723618090453, "grad_norm": 15.115653991699219, "learning_rate": 9.222857142857143e-06, "loss": 4.1236, "step": 775 }, { "epoch": 4.0201005025125625, "grad_norm": 9.049237251281738, "learning_rate": 9.151428571428572e-06, "loss": 4.0964, "step": 800 }, { "epoch": 4.1457286432160805, "grad_norm": 9.250771522521973, "learning_rate": 9.080000000000001e-06, "loss": 4.0669, "step": 825 }, { "epoch": 4.271356783919598, "grad_norm": 12.07302474975586, "learning_rate": 9.00857142857143e-06, "loss": 4.0587, "step": 850 }, { "epoch": 4.396984924623116, "grad_norm": 9.538933753967285, "learning_rate": 8.937142857142857e-06, "loss": 4.0837, "step": 875 }, { "epoch": 4.522613065326633, "grad_norm": 14.347626686096191, "learning_rate": 8.865714285714287e-06, "loss": 4.0086, "step": 900 }, { "epoch": 4.648241206030151, "grad_norm": 10.831369400024414, "learning_rate": 8.794285714285716e-06, "loss": 4.0563, "step": 925 }, { "epoch": 4.773869346733669, "grad_norm": 9.891343116760254, "learning_rate": 8.722857142857145e-06, "loss": 4.0057, "step": 950 }, { "epoch": 4.899497487437186, "grad_norm": 9.572471618652344, "learning_rate": 8.651428571428572e-06, "loss": 4.0557, "step": 975 }, { "epoch": 5.025125628140704, "grad_norm": 9.745298385620117, "learning_rate": 8.580000000000001e-06, "loss": 3.9902, "step": 1000 }, { "epoch": 5.025125628140704, "eval_loss": 0.46117448806762695, "eval_runtime": 75.4681, "eval_samples_per_second": 9.381, "eval_steps_per_second": 4.691, "step": 1000 }, { "epoch": 5.150753768844221, "grad_norm": 10.219923973083496, "learning_rate": 8.50857142857143e-06, "loss": 3.9991, "step": 1025 }, { "epoch": 5.276381909547739, "grad_norm": 9.258833885192871, "learning_rate": 8.437142857142859e-06, "loss": 4.0032, "step": 1050 }, { "epoch": 5.402010050251256, "grad_norm": 12.21104907989502, "learning_rate": 8.365714285714286e-06, "loss": 4.0337, "step": 1075 }, { "epoch": 5.527638190954773, "grad_norm": 10.777599334716797, "learning_rate": 8.294285714285715e-06, "loss": 4.0036, "step": 1100 }, { "epoch": 5.653266331658291, "grad_norm": 10.322349548339844, "learning_rate": 8.222857142857144e-06, "loss": 3.9811, "step": 1125 }, { "epoch": 5.778894472361809, "grad_norm": 9.940053939819336, "learning_rate": 8.151428571428572e-06, "loss": 4.0264, "step": 1150 }, { "epoch": 5.9045226130653266, "grad_norm": 12.996850967407227, "learning_rate": 8.08e-06, "loss": 3.9783, "step": 1175 }, { "epoch": 6.030150753768845, "grad_norm": 10.784270286560059, "learning_rate": 8.00857142857143e-06, "loss": 3.936, "step": 1200 }, { "epoch": 6.155778894472362, "grad_norm": 10.86549186706543, "learning_rate": 7.937142857142857e-06, "loss": 3.9803, "step": 1225 }, { "epoch": 6.28140703517588, "grad_norm": 10.926647186279297, "learning_rate": 7.865714285714286e-06, "loss": 3.9587, "step": 1250 }, { "epoch": 6.407035175879397, "grad_norm": 12.103693008422852, "learning_rate": 7.794285714285715e-06, "loss": 3.9441, "step": 1275 }, { "epoch": 6.532663316582915, "grad_norm": 14.731566429138184, "learning_rate": 7.722857142857142e-06, "loss": 3.9541, "step": 1300 }, { "epoch": 6.658291457286432, "grad_norm": 13.810087203979492, "learning_rate": 7.651428571428571e-06, "loss": 3.962, "step": 1325 }, { "epoch": 6.78391959798995, "grad_norm": 10.733597755432129, "learning_rate": 7.58e-06, "loss": 3.9696, "step": 1350 }, { "epoch": 6.909547738693467, "grad_norm": 12.271906852722168, "learning_rate": 7.508571428571429e-06, "loss": 3.9319, "step": 1375 }, { "epoch": 7.035175879396985, "grad_norm": 17.454214096069336, "learning_rate": 7.4371428571428575e-06, "loss": 3.9096, "step": 1400 }, { "epoch": 7.160804020100502, "grad_norm": 9.891731262207031, "learning_rate": 7.365714285714286e-06, "loss": 3.963, "step": 1425 }, { "epoch": 7.28643216080402, "grad_norm": 10.400883674621582, "learning_rate": 7.294285714285715e-06, "loss": 3.9529, "step": 1450 }, { "epoch": 7.4120603015075375, "grad_norm": 8.886434555053711, "learning_rate": 7.222857142857144e-06, "loss": 3.9048, "step": 1475 }, { "epoch": 7.5376884422110555, "grad_norm": 13.72711181640625, "learning_rate": 7.151428571428573e-06, "loss": 3.9097, "step": 1500 }, { "epoch": 7.663316582914573, "grad_norm": 10.161803245544434, "learning_rate": 7.08e-06, "loss": 3.9029, "step": 1525 }, { "epoch": 7.788944723618091, "grad_norm": 9.717086791992188, "learning_rate": 7.008571428571429e-06, "loss": 3.9028, "step": 1550 }, { "epoch": 7.914572864321608, "grad_norm": 9.483698844909668, "learning_rate": 6.937142857142858e-06, "loss": 3.9157, "step": 1575 }, { "epoch": 8.040201005025125, "grad_norm": 8.951908111572266, "learning_rate": 6.865714285714287e-06, "loss": 3.9106, "step": 1600 }, { "epoch": 8.165829145728644, "grad_norm": 8.099654197692871, "learning_rate": 6.794285714285714e-06, "loss": 3.9062, "step": 1625 }, { "epoch": 8.291457286432161, "grad_norm": 10.005452156066895, "learning_rate": 6.722857142857143e-06, "loss": 3.9139, "step": 1650 }, { "epoch": 8.417085427135678, "grad_norm": 7.832030296325684, "learning_rate": 6.651428571428572e-06, "loss": 3.8731, "step": 1675 }, { "epoch": 8.542713567839195, "grad_norm": 9.519272804260254, "learning_rate": 6.5800000000000005e-06, "loss": 3.9234, "step": 1700 }, { "epoch": 8.668341708542714, "grad_norm": 7.6004252433776855, "learning_rate": 6.5085714285714295e-06, "loss": 3.8889, "step": 1725 }, { "epoch": 8.793969849246231, "grad_norm": 8.43916130065918, "learning_rate": 6.437142857142858e-06, "loss": 3.8813, "step": 1750 }, { "epoch": 8.919597989949748, "grad_norm": 9.573629379272461, "learning_rate": 6.365714285714286e-06, "loss": 3.9234, "step": 1775 }, { "epoch": 9.045226130653266, "grad_norm": 11.034466743469238, "learning_rate": 6.294285714285715e-06, "loss": 3.9227, "step": 1800 }, { "epoch": 9.170854271356784, "grad_norm": 9.65125560760498, "learning_rate": 6.222857142857144e-06, "loss": 3.8806, "step": 1825 }, { "epoch": 9.296482412060302, "grad_norm": 8.907113075256348, "learning_rate": 6.151428571428571e-06, "loss": 3.8878, "step": 1850 }, { "epoch": 9.422110552763819, "grad_norm": 9.000490188598633, "learning_rate": 6.08e-06, "loss": 3.858, "step": 1875 }, { "epoch": 9.547738693467338, "grad_norm": 10.825636863708496, "learning_rate": 6.008571428571429e-06, "loss": 3.8546, "step": 1900 }, { "epoch": 9.673366834170855, "grad_norm": 8.722935676574707, "learning_rate": 5.937142857142858e-06, "loss": 3.871, "step": 1925 }, { "epoch": 9.798994974874372, "grad_norm": 9.243980407714844, "learning_rate": 5.865714285714286e-06, "loss": 3.8486, "step": 1950 }, { "epoch": 9.924623115577889, "grad_norm": 10.595390319824219, "learning_rate": 5.794285714285715e-06, "loss": 3.8973, "step": 1975 }, { "epoch": 10.050251256281408, "grad_norm": 11.8334321975708, "learning_rate": 5.722857142857144e-06, "loss": 3.8124, "step": 2000 }, { "epoch": 10.050251256281408, "eval_loss": 0.44917792081832886, "eval_runtime": 76.9989, "eval_samples_per_second": 9.195, "eval_steps_per_second": 4.597, "step": 2000 }, { "epoch": 10.175879396984925, "grad_norm": 12.580653190612793, "learning_rate": 5.651428571428572e-06, "loss": 3.8754, "step": 2025 }, { "epoch": 10.301507537688442, "grad_norm": 9.411772727966309, "learning_rate": 5.582857142857143e-06, "loss": 3.839, "step": 2050 }, { "epoch": 10.42713567839196, "grad_norm": 8.65829849243164, "learning_rate": 5.511428571428572e-06, "loss": 3.8123, "step": 2075 }, { "epoch": 10.552763819095478, "grad_norm": 9.79350757598877, "learning_rate": 5.4400000000000004e-06, "loss": 3.8609, "step": 2100 }, { "epoch": 10.678391959798995, "grad_norm": 9.445945739746094, "learning_rate": 5.368571428571429e-06, "loss": 3.8359, "step": 2125 }, { "epoch": 10.804020100502512, "grad_norm": 11.811120986938477, "learning_rate": 5.297142857142858e-06, "loss": 3.8632, "step": 2150 }, { "epoch": 10.92964824120603, "grad_norm": 10.508607864379883, "learning_rate": 5.225714285714286e-06, "loss": 3.8324, "step": 2175 }, { "epoch": 11.055276381909549, "grad_norm": 8.808614730834961, "learning_rate": 5.154285714285715e-06, "loss": 3.8314, "step": 2200 }, { "epoch": 11.180904522613066, "grad_norm": 11.709993362426758, "learning_rate": 5.082857142857144e-06, "loss": 3.8904, "step": 2225 }, { "epoch": 11.306532663316583, "grad_norm": 8.451616287231445, "learning_rate": 5.011428571428571e-06, "loss": 3.8471, "step": 2250 }, { "epoch": 11.4321608040201, "grad_norm": 10.084294319152832, "learning_rate": 4.94e-06, "loss": 3.8881, "step": 2275 }, { "epoch": 11.557788944723619, "grad_norm": 17.51856803894043, "learning_rate": 4.868571428571429e-06, "loss": 3.8931, "step": 2300 }, { "epoch": 11.683417085427136, "grad_norm": 11.671639442443848, "learning_rate": 4.797142857142857e-06, "loss": 3.8578, "step": 2325 }, { "epoch": 11.809045226130653, "grad_norm": 8.713754653930664, "learning_rate": 4.725714285714286e-06, "loss": 3.832, "step": 2350 }, { "epoch": 11.93467336683417, "grad_norm": 11.429880142211914, "learning_rate": 4.6542857142857145e-06, "loss": 3.8556, "step": 2375 }, { "epoch": 12.06030150753769, "grad_norm": 9.178875923156738, "learning_rate": 4.5828571428571435e-06, "loss": 3.8361, "step": 2400 }, { "epoch": 12.185929648241206, "grad_norm": 7.655440330505371, "learning_rate": 4.511428571428572e-06, "loss": 3.8337, "step": 2425 }, { "epoch": 12.311557788944723, "grad_norm": 10.44965648651123, "learning_rate": 4.440000000000001e-06, "loss": 3.877, "step": 2450 }, { "epoch": 12.43718592964824, "grad_norm": 9.04977035522461, "learning_rate": 4.368571428571429e-06, "loss": 3.8504, "step": 2475 }, { "epoch": 12.56281407035176, "grad_norm": 8.15368366241455, "learning_rate": 4.297142857142858e-06, "loss": 3.8297, "step": 2500 }, { "epoch": 12.688442211055277, "grad_norm": 9.811213493347168, "learning_rate": 4.225714285714286e-06, "loss": 3.8289, "step": 2525 }, { "epoch": 12.814070351758794, "grad_norm": 8.397767066955566, "learning_rate": 4.154285714285714e-06, "loss": 3.8352, "step": 2550 }, { "epoch": 12.93969849246231, "grad_norm": 10.471597671508789, "learning_rate": 4.082857142857143e-06, "loss": 3.8438, "step": 2575 }, { "epoch": 13.06532663316583, "grad_norm": 8.042658805847168, "learning_rate": 4.011428571428571e-06, "loss": 3.8308, "step": 2600 }, { "epoch": 13.190954773869347, "grad_norm": 9.240007400512695, "learning_rate": 3.94e-06, "loss": 3.811, "step": 2625 }, { "epoch": 13.316582914572864, "grad_norm": 8.514374732971191, "learning_rate": 3.8685714285714286e-06, "loss": 3.8253, "step": 2650 }, { "epoch": 13.442211055276381, "grad_norm": 8.689332008361816, "learning_rate": 3.7971428571428576e-06, "loss": 3.8387, "step": 2675 }, { "epoch": 13.5678391959799, "grad_norm": 11.250374794006348, "learning_rate": 3.7257142857142857e-06, "loss": 3.8216, "step": 2700 }, { "epoch": 13.693467336683417, "grad_norm": 8.129416465759277, "learning_rate": 3.6542857142857148e-06, "loss": 3.8273, "step": 2725 }, { "epoch": 13.819095477386934, "grad_norm": 9.398397445678711, "learning_rate": 3.582857142857143e-06, "loss": 3.8116, "step": 2750 }, { "epoch": 13.944723618090451, "grad_norm": 9.966887474060059, "learning_rate": 3.511428571428572e-06, "loss": 3.8401, "step": 2775 }, { "epoch": 14.07035175879397, "grad_norm": 7.829891204833984, "learning_rate": 3.44e-06, "loss": 3.7607, "step": 2800 }, { "epoch": 14.195979899497488, "grad_norm": 10.33727741241455, "learning_rate": 3.3685714285714287e-06, "loss": 3.8406, "step": 2825 }, { "epoch": 14.321608040201005, "grad_norm": 8.579668998718262, "learning_rate": 3.2971428571428577e-06, "loss": 3.7893, "step": 2850 }, { "epoch": 14.447236180904522, "grad_norm": 9.909623146057129, "learning_rate": 3.225714285714286e-06, "loss": 3.8661, "step": 2875 }, { "epoch": 14.57286432160804, "grad_norm": 9.630623817443848, "learning_rate": 3.154285714285715e-06, "loss": 3.8248, "step": 2900 }, { "epoch": 14.698492462311558, "grad_norm": 13.711705207824707, "learning_rate": 3.082857142857143e-06, "loss": 3.8268, "step": 2925 }, { "epoch": 14.824120603015075, "grad_norm": 8.66063117980957, "learning_rate": 3.0114285714285716e-06, "loss": 3.8154, "step": 2950 }, { "epoch": 14.949748743718594, "grad_norm": 10.465475082397461, "learning_rate": 2.9400000000000002e-06, "loss": 3.826, "step": 2975 }, { "epoch": 15.075376884422111, "grad_norm": 9.025050163269043, "learning_rate": 2.868571428571429e-06, "loss": 3.8132, "step": 3000 }, { "epoch": 15.075376884422111, "eval_loss": 0.44469836354255676, "eval_runtime": 78.3312, "eval_samples_per_second": 9.039, "eval_steps_per_second": 4.519, "step": 3000 }, { "epoch": 15.201005025125628, "grad_norm": 9.333688735961914, "learning_rate": 2.797142857142857e-06, "loss": 3.8107, "step": 3025 }, { "epoch": 15.326633165829145, "grad_norm": 11.503759384155273, "learning_rate": 2.725714285714286e-06, "loss": 3.8315, "step": 3050 }, { "epoch": 15.452261306532664, "grad_norm": 10.728011131286621, "learning_rate": 2.654285714285714e-06, "loss": 3.8135, "step": 3075 }, { "epoch": 15.577889447236181, "grad_norm": 8.355088233947754, "learning_rate": 2.582857142857143e-06, "loss": 3.8079, "step": 3100 }, { "epoch": 15.703517587939698, "grad_norm": 9.261649131774902, "learning_rate": 2.5114285714285718e-06, "loss": 3.832, "step": 3125 }, { "epoch": 15.829145728643216, "grad_norm": 7.470139026641846, "learning_rate": 2.4400000000000004e-06, "loss": 3.8065, "step": 3150 }, { "epoch": 15.954773869346734, "grad_norm": 10.546520233154297, "learning_rate": 2.3685714285714285e-06, "loss": 3.7759, "step": 3175 }, { "epoch": 16.08040201005025, "grad_norm": 8.22718334197998, "learning_rate": 2.297142857142857e-06, "loss": 3.7758, "step": 3200 }, { "epoch": 16.20603015075377, "grad_norm": 7.318994045257568, "learning_rate": 2.2257142857142857e-06, "loss": 3.7738, "step": 3225 }, { "epoch": 16.331658291457288, "grad_norm": 8.533567428588867, "learning_rate": 2.1542857142857147e-06, "loss": 3.7611, "step": 3250 }, { "epoch": 16.457286432160803, "grad_norm": 12.127696990966797, "learning_rate": 2.0828571428571433e-06, "loss": 3.8306, "step": 3275 }, { "epoch": 16.582914572864322, "grad_norm": 8.16425609588623, "learning_rate": 2.0114285714285715e-06, "loss": 3.8017, "step": 3300 }, { "epoch": 16.70854271356784, "grad_norm": 9.96759033203125, "learning_rate": 1.94e-06, "loss": 3.7988, "step": 3325 }, { "epoch": 16.834170854271356, "grad_norm": 7.703837871551514, "learning_rate": 1.8685714285714289e-06, "loss": 3.8025, "step": 3350 }, { "epoch": 16.959798994974875, "grad_norm": 9.635499000549316, "learning_rate": 1.7971428571428572e-06, "loss": 3.7641, "step": 3375 }, { "epoch": 17.08542713567839, "grad_norm": 8.293730735778809, "learning_rate": 1.7257142857142858e-06, "loss": 3.7946, "step": 3400 }, { "epoch": 17.21105527638191, "grad_norm": 9.113801002502441, "learning_rate": 1.6542857142857144e-06, "loss": 3.7743, "step": 3425 }, { "epoch": 17.33668341708543, "grad_norm": 12.916595458984375, "learning_rate": 1.582857142857143e-06, "loss": 3.8164, "step": 3450 }, { "epoch": 17.462311557788944, "grad_norm": 8.181452751159668, "learning_rate": 1.5114285714285714e-06, "loss": 3.7756, "step": 3475 }, { "epoch": 17.587939698492463, "grad_norm": 9.33465576171875, "learning_rate": 1.44e-06, "loss": 3.7851, "step": 3500 }, { "epoch": 17.71356783919598, "grad_norm": 9.609382629394531, "learning_rate": 1.3685714285714286e-06, "loss": 3.8087, "step": 3525 }, { "epoch": 17.839195979899497, "grad_norm": 10.107198715209961, "learning_rate": 1.2971428571428574e-06, "loss": 3.7887, "step": 3550 }, { "epoch": 17.964824120603016, "grad_norm": 10.769806861877441, "learning_rate": 1.2257142857142857e-06, "loss": 3.8042, "step": 3575 }, { "epoch": 18.09045226130653, "grad_norm": 9.459657669067383, "learning_rate": 1.1542857142857143e-06, "loss": 3.8038, "step": 3600 }, { "epoch": 18.21608040201005, "grad_norm": 8.211793899536133, "learning_rate": 1.082857142857143e-06, "loss": 3.7708, "step": 3625 }, { "epoch": 18.34170854271357, "grad_norm": 6.89067268371582, "learning_rate": 1.0114285714285715e-06, "loss": 3.832, "step": 3650 }, { "epoch": 18.467336683417084, "grad_norm": 11.42387866973877, "learning_rate": 9.400000000000001e-07, "loss": 3.7837, "step": 3675 }, { "epoch": 18.592964824120603, "grad_norm": 9.029644012451172, "learning_rate": 8.685714285714286e-07, "loss": 3.7696, "step": 3700 }, { "epoch": 18.718592964824122, "grad_norm": 7.634586334228516, "learning_rate": 7.971428571428572e-07, "loss": 3.7854, "step": 3725 }, { "epoch": 18.844221105527637, "grad_norm": 9.12209701538086, "learning_rate": 7.257142857142857e-07, "loss": 3.788, "step": 3750 }, { "epoch": 18.969849246231156, "grad_norm": 9.065604209899902, "learning_rate": 6.542857142857144e-07, "loss": 3.7881, "step": 3775 }, { "epoch": 19.09547738693467, "grad_norm": 8.798233032226562, "learning_rate": 5.82857142857143e-07, "loss": 3.8061, "step": 3800 }, { "epoch": 19.22110552763819, "grad_norm": 7.6765971183776855, "learning_rate": 5.114285714285714e-07, "loss": 3.7917, "step": 3825 }, { "epoch": 19.34673366834171, "grad_norm": 7.686089992523193, "learning_rate": 4.4e-07, "loss": 3.807, "step": 3850 }, { "epoch": 19.472361809045225, "grad_norm": 7.45636510848999, "learning_rate": 3.685714285714286e-07, "loss": 3.7984, "step": 3875 }, { "epoch": 19.597989949748744, "grad_norm": 7.467589378356934, "learning_rate": 2.9714285714285715e-07, "loss": 3.778, "step": 3900 }, { "epoch": 19.723618090452263, "grad_norm": 14.702427864074707, "learning_rate": 2.2571428571428574e-07, "loss": 3.8033, "step": 3925 }, { "epoch": 19.849246231155778, "grad_norm": 8.807873725891113, "learning_rate": 1.542857142857143e-07, "loss": 3.7807, "step": 3950 }, { "epoch": 19.974874371859297, "grad_norm": 8.14664363861084, "learning_rate": 8.285714285714285e-08, "loss": 3.7466, "step": 3975 }, { "epoch": 20.100502512562816, "grad_norm": 9.232961654663086, "learning_rate": 1.142857142857143e-08, "loss": 3.8105, "step": 4000 }, { "epoch": 20.100502512562816, "eval_loss": 0.4429960548877716, "eval_runtime": 78.2115, "eval_samples_per_second": 9.052, "eval_steps_per_second": 4.526, "step": 4000 } ], "logging_steps": 25, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 21, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.800433559026872e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }