clip-wiki-pretrain-2560-22800 / trainer_state.json
gowitheflow1998
upload checkpoint
a337d56
raw
history blame contribute delete
No virus
23.3 kB
{
"best_metric": 0.7275755258839849,
"best_model_checkpoint": "./checkpoints/clip-2560-wikispan-all/checkpoint-240",
"epoch": 0.9472765798329802,
"global_step": 22800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.8e-06,
"loss": 1.8723,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 3.6e-06,
"loss": 1.0969,
"step": 240
},
{
"epoch": 0.01,
"learning_rate": 5.4e-06,
"loss": 0.8982,
"step": 360
},
{
"epoch": 0.02,
"learning_rate": 7.2e-06,
"loss": 0.802,
"step": 480
},
{
"epoch": 0.02,
"learning_rate": 9e-06,
"loss": 0.7304,
"step": 600
},
{
"epoch": 0.03,
"learning_rate": 1.08e-05,
"loss": 0.6815,
"step": 720
},
{
"epoch": 0.03,
"learning_rate": 1.26e-05,
"loss": 0.6435,
"step": 840
},
{
"epoch": 0.04,
"learning_rate": 1.44e-05,
"loss": 0.6089,
"step": 960
},
{
"epoch": 0.04,
"learning_rate": 1.62e-05,
"loss": 0.581,
"step": 1080
},
{
"epoch": 0.05,
"learning_rate": 1.8e-05,
"loss": 0.5637,
"step": 1200
},
{
"epoch": 0.05,
"learning_rate": 1.98e-05,
"loss": 0.538,
"step": 1320
},
{
"epoch": 0.06,
"learning_rate": 2.16e-05,
"loss": 0.5223,
"step": 1440
},
{
"epoch": 0.06,
"learning_rate": 2.3400000000000003e-05,
"loss": 0.5123,
"step": 1560
},
{
"epoch": 0.07,
"learning_rate": 2.52e-05,
"loss": 0.4984,
"step": 1680
},
{
"epoch": 0.07,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.4848,
"step": 1800
},
{
"epoch": 0.08,
"learning_rate": 2.88e-05,
"loss": 0.4763,
"step": 1920
},
{
"epoch": 0.08,
"learning_rate": 2.9945750452079567e-05,
"loss": 0.468,
"step": 2040
},
{
"epoch": 0.09,
"learning_rate": 2.978300180831826e-05,
"loss": 0.4532,
"step": 2160
},
{
"epoch": 0.09,
"learning_rate": 2.9620253164556963e-05,
"loss": 0.445,
"step": 2280
},
{
"epoch": 0.1,
"learning_rate": 2.945750452079566e-05,
"loss": 0.4382,
"step": 2400
},
{
"epoch": 0.1,
"learning_rate": 2.929475587703436e-05,
"loss": 0.4289,
"step": 2520
},
{
"epoch": 0.11,
"learning_rate": 2.9132007233273057e-05,
"loss": 0.4216,
"step": 2640
},
{
"epoch": 0.11,
"learning_rate": 2.8969258589511756e-05,
"loss": 0.4134,
"step": 2760
},
{
"epoch": 0.12,
"learning_rate": 2.8806509945750454e-05,
"loss": 0.4084,
"step": 2880
},
{
"epoch": 0.12,
"learning_rate": 2.864376130198915e-05,
"loss": 0.3988,
"step": 3000
},
{
"epoch": 0.13,
"learning_rate": 2.8481012658227846e-05,
"loss": 0.3938,
"step": 3120
},
{
"epoch": 0.13,
"learning_rate": 2.8318264014466548e-05,
"loss": 0.3886,
"step": 3240
},
{
"epoch": 0.14,
"learning_rate": 2.8155515370705246e-05,
"loss": 0.3845,
"step": 3360
},
{
"epoch": 0.14,
"learning_rate": 2.7992766726943944e-05,
"loss": 0.378,
"step": 3480
},
{
"epoch": 0.15,
"learning_rate": 2.7830018083182642e-05,
"loss": 0.375,
"step": 3600
},
{
"epoch": 0.15,
"learning_rate": 2.766726943942134e-05,
"loss": 0.3665,
"step": 3720
},
{
"epoch": 0.16,
"learning_rate": 2.7504520795660035e-05,
"loss": 0.3644,
"step": 3840
},
{
"epoch": 0.16,
"learning_rate": 2.7341772151898733e-05,
"loss": 0.36,
"step": 3960
},
{
"epoch": 0.17,
"learning_rate": 2.717902350813743e-05,
"loss": 0.3597,
"step": 4080
},
{
"epoch": 0.17,
"learning_rate": 2.701627486437613e-05,
"loss": 0.3549,
"step": 4200
},
{
"epoch": 0.18,
"learning_rate": 2.685352622061483e-05,
"loss": 0.3545,
"step": 4320
},
{
"epoch": 0.18,
"learning_rate": 2.669077757685353e-05,
"loss": 0.3496,
"step": 4440
},
{
"epoch": 0.19,
"learning_rate": 2.6528028933092224e-05,
"loss": 0.3442,
"step": 4560
},
{
"epoch": 0.19,
"learning_rate": 2.6365280289330922e-05,
"loss": 0.3424,
"step": 4680
},
{
"epoch": 0.2,
"learning_rate": 2.620253164556962e-05,
"loss": 0.3414,
"step": 4800
},
{
"epoch": 0.2,
"learning_rate": 2.6039783001808318e-05,
"loss": 0.3361,
"step": 4920
},
{
"epoch": 0.21,
"learning_rate": 2.5877034358047016e-05,
"loss": 0.3378,
"step": 5040
},
{
"epoch": 0.21,
"learning_rate": 2.5714285714285714e-05,
"loss": 0.3316,
"step": 5160
},
{
"epoch": 0.22,
"learning_rate": 2.5551537070524416e-05,
"loss": 0.3302,
"step": 5280
},
{
"epoch": 0.22,
"learning_rate": 2.538878842676311e-05,
"loss": 0.3245,
"step": 5400
},
{
"epoch": 0.23,
"learning_rate": 2.522603978300181e-05,
"loss": 0.3254,
"step": 5520
},
{
"epoch": 0.23,
"learning_rate": 2.5063291139240507e-05,
"loss": 0.3206,
"step": 5640
},
{
"epoch": 0.24,
"learning_rate": 2.4900542495479205e-05,
"loss": 0.3212,
"step": 5760
},
{
"epoch": 0.24,
"learning_rate": 2.4737793851717903e-05,
"loss": 0.3184,
"step": 5880
},
{
"epoch": 0.25,
"learning_rate": 2.45750452079566e-05,
"loss": 0.3141,
"step": 6000
},
{
"epoch": 0.25,
"learning_rate": 2.44122965641953e-05,
"loss": 0.3185,
"step": 6120
},
{
"epoch": 0.26,
"learning_rate": 2.4249547920433994e-05,
"loss": 0.3115,
"step": 6240
},
{
"epoch": 0.26,
"learning_rate": 2.4086799276672696e-05,
"loss": 0.3093,
"step": 6360
},
{
"epoch": 0.27,
"learning_rate": 2.3924050632911394e-05,
"loss": 0.3116,
"step": 6480
},
{
"epoch": 0.27,
"learning_rate": 2.3761301989150092e-05,
"loss": 0.3084,
"step": 6600
},
{
"epoch": 0.28,
"learning_rate": 2.359855334538879e-05,
"loss": 0.3071,
"step": 6720
},
{
"epoch": 0.28,
"learning_rate": 2.3435804701627488e-05,
"loss": 0.3047,
"step": 6840
},
{
"epoch": 0.29,
"learning_rate": 2.3273056057866183e-05,
"loss": 0.302,
"step": 6960
},
{
"epoch": 0.29,
"learning_rate": 2.311030741410488e-05,
"loss": 0.3027,
"step": 7080
},
{
"epoch": 0.3,
"learning_rate": 2.294755877034358e-05,
"loss": 0.3019,
"step": 7200
},
{
"epoch": 0.3,
"learning_rate": 2.278481012658228e-05,
"loss": 0.3,
"step": 7320
},
{
"epoch": 0.31,
"learning_rate": 2.262206148282098e-05,
"loss": 0.2961,
"step": 7440
},
{
"epoch": 0.31,
"learning_rate": 2.2459312839059677e-05,
"loss": 0.2967,
"step": 7560
},
{
"epoch": 0.32,
"learning_rate": 2.2296564195298375e-05,
"loss": 0.2944,
"step": 7680
},
{
"epoch": 0.32,
"learning_rate": 2.213381555153707e-05,
"loss": 0.2937,
"step": 7800
},
{
"epoch": 0.33,
"learning_rate": 2.1971066907775768e-05,
"loss": 0.2914,
"step": 7920
},
{
"epoch": 0.33,
"learning_rate": 2.1808318264014466e-05,
"loss": 0.2902,
"step": 8040
},
{
"epoch": 0.34,
"learning_rate": 2.1645569620253164e-05,
"loss": 0.2889,
"step": 8160
},
{
"epoch": 0.34,
"learning_rate": 2.1482820976491862e-05,
"loss": 0.2894,
"step": 8280
},
{
"epoch": 0.35,
"learning_rate": 2.1320072332730564e-05,
"loss": 0.286,
"step": 8400
},
{
"epoch": 0.35,
"learning_rate": 2.1157323688969262e-05,
"loss": 0.2847,
"step": 8520
},
{
"epoch": 0.36,
"learning_rate": 2.0994575045207956e-05,
"loss": 0.2838,
"step": 8640
},
{
"epoch": 0.36,
"learning_rate": 2.0831826401446655e-05,
"loss": 0.2822,
"step": 8760
},
{
"epoch": 0.37,
"learning_rate": 2.0669077757685353e-05,
"loss": 0.2814,
"step": 8880
},
{
"epoch": 0.37,
"learning_rate": 2.050632911392405e-05,
"loss": 0.2818,
"step": 9000
},
{
"epoch": 0.38,
"learning_rate": 2.034358047016275e-05,
"loss": 0.2779,
"step": 9120
},
{
"epoch": 0.38,
"learning_rate": 2.0180831826401447e-05,
"loss": 0.2794,
"step": 9240
},
{
"epoch": 0.39,
"learning_rate": 2.0018083182640145e-05,
"loss": 0.2789,
"step": 9360
},
{
"epoch": 0.39,
"learning_rate": 1.9855334538878843e-05,
"loss": 0.2757,
"step": 9480
},
{
"epoch": 0.4,
"learning_rate": 1.969258589511754e-05,
"loss": 0.2758,
"step": 9600
},
{
"epoch": 0.4,
"learning_rate": 1.952983725135624e-05,
"loss": 0.277,
"step": 9720
},
{
"epoch": 0.41,
"learning_rate": 1.9367088607594938e-05,
"loss": 0.2731,
"step": 9840
},
{
"epoch": 0.41,
"learning_rate": 1.9204339963833636e-05,
"loss": 0.268,
"step": 9960
},
{
"epoch": 0.42,
"learning_rate": 1.9041591320072334e-05,
"loss": 0.2702,
"step": 10080
},
{
"epoch": 0.42,
"learning_rate": 1.887884267631103e-05,
"loss": 0.2699,
"step": 10200
},
{
"epoch": 0.43,
"learning_rate": 1.8716094032549727e-05,
"loss": 0.2707,
"step": 10320
},
{
"epoch": 0.43,
"learning_rate": 1.8553345388788428e-05,
"loss": 0.2661,
"step": 10440
},
{
"epoch": 0.44,
"learning_rate": 1.8390596745027126e-05,
"loss": 0.2668,
"step": 10560
},
{
"epoch": 0.44,
"learning_rate": 1.8227848101265824e-05,
"loss": 0.2689,
"step": 10680
},
{
"epoch": 0.45,
"learning_rate": 1.8065099457504523e-05,
"loss": 0.2695,
"step": 10800
},
{
"epoch": 0.45,
"learning_rate": 1.790235081374322e-05,
"loss": 0.2637,
"step": 10920
},
{
"epoch": 0.46,
"learning_rate": 1.7739602169981915e-05,
"loss": 0.2645,
"step": 11040
},
{
"epoch": 0.46,
"learning_rate": 1.7576853526220614e-05,
"loss": 0.2618,
"step": 11160
},
{
"epoch": 0.47,
"learning_rate": 1.741410488245931e-05,
"loss": 0.2626,
"step": 11280
},
{
"epoch": 0.47,
"learning_rate": 1.7251356238698013e-05,
"loss": 0.2581,
"step": 11400
},
{
"epoch": 0.48,
"learning_rate": 1.708860759493671e-05,
"loss": 0.2597,
"step": 11520
},
{
"epoch": 0.48,
"learning_rate": 1.692585895117541e-05,
"loss": 0.2594,
"step": 11640
},
{
"epoch": 0.49,
"learning_rate": 1.6763110307414104e-05,
"loss": 0.2577,
"step": 11760
},
{
"epoch": 0.49,
"learning_rate": 1.6600361663652802e-05,
"loss": 0.2585,
"step": 11880
},
{
"epoch": 0.5,
"learning_rate": 1.64376130198915e-05,
"loss": 0.2565,
"step": 12000
},
{
"epoch": 0.5,
"learning_rate": 1.62748643761302e-05,
"loss": 0.2562,
"step": 12120
},
{
"epoch": 0.51,
"learning_rate": 1.6112115732368897e-05,
"loss": 0.2559,
"step": 12240
},
{
"epoch": 0.51,
"learning_rate": 1.5949367088607595e-05,
"loss": 0.2551,
"step": 12360
},
{
"epoch": 0.52,
"learning_rate": 1.5786618444846296e-05,
"loss": 0.2528,
"step": 12480
},
{
"epoch": 0.52,
"learning_rate": 1.562386980108499e-05,
"loss": 0.2539,
"step": 12600
},
{
"epoch": 0.53,
"learning_rate": 1.546112115732369e-05,
"loss": 0.2525,
"step": 12720
},
{
"epoch": 0.53,
"learning_rate": 1.5298372513562387e-05,
"loss": 0.2534,
"step": 12840
},
{
"epoch": 0.54,
"learning_rate": 1.5135623869801085e-05,
"loss": 0.2518,
"step": 12960
},
{
"epoch": 0.54,
"learning_rate": 1.4972875226039783e-05,
"loss": 0.2506,
"step": 13080
},
{
"epoch": 0.55,
"learning_rate": 1.4810126582278482e-05,
"loss": 0.2517,
"step": 13200
},
{
"epoch": 0.55,
"learning_rate": 1.464737793851718e-05,
"loss": 0.2483,
"step": 13320
},
{
"epoch": 0.56,
"learning_rate": 1.4484629294755878e-05,
"loss": 0.2471,
"step": 13440
},
{
"epoch": 0.56,
"learning_rate": 1.4321880650994574e-05,
"loss": 0.2484,
"step": 13560
},
{
"epoch": 0.57,
"learning_rate": 1.4159132007233274e-05,
"loss": 0.2474,
"step": 13680
},
{
"epoch": 0.57,
"learning_rate": 1.3996383363471972e-05,
"loss": 0.25,
"step": 13800
},
{
"epoch": 0.58,
"learning_rate": 1.383363471971067e-05,
"loss": 0.2458,
"step": 13920
},
{
"epoch": 0.58,
"learning_rate": 1.3670886075949367e-05,
"loss": 0.2466,
"step": 14040
},
{
"epoch": 0.59,
"learning_rate": 1.3508137432188065e-05,
"loss": 0.2454,
"step": 14160
},
{
"epoch": 0.59,
"learning_rate": 1.3345388788426765e-05,
"loss": 0.2461,
"step": 14280
},
{
"epoch": 0.6,
"learning_rate": 1.3182640144665461e-05,
"loss": 0.2427,
"step": 14400
},
{
"epoch": 0.6,
"learning_rate": 1.3019891500904159e-05,
"loss": 0.2446,
"step": 14520
},
{
"epoch": 0.61,
"learning_rate": 1.2857142857142857e-05,
"loss": 0.2433,
"step": 14640
},
{
"epoch": 0.61,
"learning_rate": 1.2694394213381555e-05,
"loss": 0.241,
"step": 14760
},
{
"epoch": 0.62,
"learning_rate": 1.2531645569620253e-05,
"loss": 0.2424,
"step": 14880
},
{
"epoch": 0.62,
"learning_rate": 1.2368896925858952e-05,
"loss": 0.2427,
"step": 15000
},
{
"epoch": 0.63,
"learning_rate": 1.220614828209765e-05,
"loss": 0.2395,
"step": 15120
},
{
"epoch": 0.63,
"learning_rate": 1.2043399638336348e-05,
"loss": 0.2396,
"step": 15240
},
{
"epoch": 0.64,
"learning_rate": 1.1880650994575046e-05,
"loss": 0.2416,
"step": 15360
},
{
"epoch": 0.64,
"learning_rate": 1.1717902350813744e-05,
"loss": 0.2402,
"step": 15480
},
{
"epoch": 0.65,
"learning_rate": 1.155515370705244e-05,
"loss": 0.2382,
"step": 15600
},
{
"epoch": 0.65,
"learning_rate": 1.139240506329114e-05,
"loss": 0.2361,
"step": 15720
},
{
"epoch": 0.66,
"learning_rate": 1.1229656419529838e-05,
"loss": 0.2383,
"step": 15840
},
{
"epoch": 0.66,
"learning_rate": 1.1066907775768535e-05,
"loss": 0.2368,
"step": 15960
},
{
"epoch": 0.67,
"learning_rate": 1.0904159132007233e-05,
"loss": 0.2374,
"step": 16080
},
{
"epoch": 0.67,
"learning_rate": 1.0741410488245931e-05,
"loss": 0.2363,
"step": 16200
},
{
"epoch": 0.68,
"learning_rate": 1.0578661844484631e-05,
"loss": 0.2364,
"step": 16320
},
{
"epoch": 0.68,
"learning_rate": 1.0415913200723327e-05,
"loss": 0.2326,
"step": 16440
},
{
"epoch": 0.69,
"learning_rate": 1.0253164556962025e-05,
"loss": 0.232,
"step": 16560
},
{
"epoch": 0.69,
"learning_rate": 1.0090415913200724e-05,
"loss": 0.2317,
"step": 16680
},
{
"epoch": 0.7,
"learning_rate": 9.927667269439422e-06,
"loss": 0.2316,
"step": 16800
},
{
"epoch": 0.7,
"learning_rate": 9.76491862567812e-06,
"loss": 0.2343,
"step": 16920
},
{
"epoch": 0.71,
"learning_rate": 9.602169981916818e-06,
"loss": 0.2304,
"step": 17040
},
{
"epoch": 0.71,
"learning_rate": 9.439421338155514e-06,
"loss": 0.2297,
"step": 17160
},
{
"epoch": 0.72,
"learning_rate": 9.276672694394214e-06,
"loss": 0.231,
"step": 17280
},
{
"epoch": 0.72,
"learning_rate": 9.113924050632912e-06,
"loss": 0.2316,
"step": 17400
},
{
"epoch": 0.73,
"learning_rate": 8.95117540687161e-06,
"loss": 0.2314,
"step": 17520
},
{
"epoch": 0.73,
"learning_rate": 8.788426763110307e-06,
"loss": 0.2289,
"step": 17640
},
{
"epoch": 0.74,
"learning_rate": 8.625678119349007e-06,
"loss": 0.2275,
"step": 17760
},
{
"epoch": 0.74,
"learning_rate": 8.462929475587705e-06,
"loss": 0.2282,
"step": 17880
},
{
"epoch": 0.75,
"learning_rate": 8.300180831826401e-06,
"loss": 0.2285,
"step": 18000
},
{
"epoch": 0.75,
"learning_rate": 8.1374321880651e-06,
"loss": 0.2284,
"step": 18120
},
{
"epoch": 0.76,
"learning_rate": 7.974683544303797e-06,
"loss": 0.2274,
"step": 18240
},
{
"epoch": 0.76,
"learning_rate": 7.811934900542495e-06,
"loss": 0.2234,
"step": 18360
},
{
"epoch": 0.77,
"learning_rate": 7.649186256781194e-06,
"loss": 0.229,
"step": 18480
},
{
"epoch": 0.77,
"learning_rate": 7.486437613019892e-06,
"loss": 0.2248,
"step": 18600
},
{
"epoch": 0.78,
"learning_rate": 7.32368896925859e-06,
"loss": 0.2232,
"step": 18720
},
{
"epoch": 0.78,
"learning_rate": 7.160940325497287e-06,
"loss": 0.2234,
"step": 18840
},
{
"epoch": 0.79,
"learning_rate": 6.998191681735986e-06,
"loss": 0.2258,
"step": 18960
},
{
"epoch": 0.79,
"learning_rate": 6.835443037974683e-06,
"loss": 0.2238,
"step": 19080
},
{
"epoch": 0.8,
"learning_rate": 6.672694394213382e-06,
"loss": 0.2217,
"step": 19200
},
{
"epoch": 0.8,
"learning_rate": 6.5099457504520796e-06,
"loss": 0.2222,
"step": 19320
},
{
"epoch": 0.81,
"learning_rate": 6.347197106690778e-06,
"loss": 0.2256,
"step": 19440
},
{
"epoch": 0.81,
"learning_rate": 6.184448462929476e-06,
"loss": 0.2221,
"step": 19560
},
{
"epoch": 0.82,
"learning_rate": 6.021699819168174e-06,
"loss": 0.2243,
"step": 19680
},
{
"epoch": 0.82,
"learning_rate": 5.858951175406872e-06,
"loss": 0.2227,
"step": 19800
},
{
"epoch": 0.83,
"learning_rate": 5.69620253164557e-06,
"loss": 0.222,
"step": 19920
},
{
"epoch": 0.83,
"learning_rate": 5.533453887884267e-06,
"loss": 0.2205,
"step": 20040
},
{
"epoch": 0.84,
"learning_rate": 5.3707052441229655e-06,
"loss": 0.22,
"step": 20160
},
{
"epoch": 0.84,
"learning_rate": 5.207956600361664e-06,
"loss": 0.2223,
"step": 20280
},
{
"epoch": 0.85,
"learning_rate": 5.045207956600362e-06,
"loss": 0.2209,
"step": 20400
},
{
"epoch": 0.85,
"learning_rate": 4.88245931283906e-06,
"loss": 0.2207,
"step": 20520
},
{
"epoch": 0.86,
"learning_rate": 4.719710669077757e-06,
"loss": 0.2206,
"step": 20640
},
{
"epoch": 0.86,
"learning_rate": 4.556962025316456e-06,
"loss": 0.2204,
"step": 20760
},
{
"epoch": 0.87,
"learning_rate": 4.394213381555153e-06,
"loss": 0.2221,
"step": 20880
},
{
"epoch": 0.87,
"learning_rate": 4.231464737793852e-06,
"loss": 0.2197,
"step": 21000
},
{
"epoch": 0.88,
"learning_rate": 4.06871609403255e-06,
"loss": 0.2188,
"step": 21120
},
{
"epoch": 0.88,
"learning_rate": 3.905967450271248e-06,
"loss": 0.2202,
"step": 21240
},
{
"epoch": 0.89,
"learning_rate": 3.743218806509946e-06,
"loss": 0.2176,
"step": 21360
},
{
"epoch": 0.89,
"learning_rate": 3.5804701627486435e-06,
"loss": 0.2185,
"step": 21480
},
{
"epoch": 0.9,
"learning_rate": 3.4177215189873417e-06,
"loss": 0.2169,
"step": 21600
},
{
"epoch": 0.9,
"learning_rate": 3.2549728752260398e-06,
"loss": 0.2175,
"step": 21720
},
{
"epoch": 0.91,
"learning_rate": 3.092224231464738e-06,
"loss": 0.2153,
"step": 21840
},
{
"epoch": 0.91,
"learning_rate": 2.929475587703436e-06,
"loss": 0.218,
"step": 21960
},
{
"epoch": 0.92,
"learning_rate": 2.7667269439421337e-06,
"loss": 0.2161,
"step": 22080
},
{
"epoch": 0.92,
"learning_rate": 2.603978300180832e-06,
"loss": 0.2167,
"step": 22200
},
{
"epoch": 0.93,
"learning_rate": 2.44122965641953e-06,
"loss": 0.2166,
"step": 22320
},
{
"epoch": 0.93,
"learning_rate": 2.278481012658228e-06,
"loss": 0.215,
"step": 22440
},
{
"epoch": 0.94,
"learning_rate": 2.115732368896926e-06,
"loss": 0.2145,
"step": 22560
},
{
"epoch": 0.94,
"learning_rate": 1.952983725135624e-06,
"loss": 0.2165,
"step": 22680
},
{
"epoch": 0.95,
"learning_rate": 1.7902350813743218e-06,
"loss": 0.2155,
"step": 22800
}
],
"max_steps": 24120,
"num_train_epochs": 2,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}