paper-cuttingv1 / trainer_state.json
hidonbush's picture
Upload 7 files
2805dcd verified
{
"best_metric": 0.03488548472523689,
"best_model_checkpoint": "paper-cutting-outputs4/checkpoint-640",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003125,
"grad_norm": 7.335386276245117,
"learning_rate": 4.9968750000000005e-05,
"loss": 0.6822,
"step": 1
},
{
"epoch": 0.00625,
"grad_norm": 3.8524413108825684,
"learning_rate": 4.99375e-05,
"loss": 0.4184,
"step": 2
},
{
"epoch": 0.009375,
"grad_norm": 2.6370558738708496,
"learning_rate": 4.9906250000000004e-05,
"loss": 0.3522,
"step": 3
},
{
"epoch": 0.0125,
"grad_norm": 2.292393207550049,
"learning_rate": 4.9875000000000006e-05,
"loss": 0.3575,
"step": 4
},
{
"epoch": 0.015625,
"grad_norm": 2.067209482192993,
"learning_rate": 4.984375e-05,
"loss": 0.2668,
"step": 5
},
{
"epoch": 0.01875,
"grad_norm": 1.895901083946228,
"learning_rate": 4.98125e-05,
"loss": 0.1906,
"step": 6
},
{
"epoch": 0.021875,
"grad_norm": 2.7861101627349854,
"learning_rate": 4.978125e-05,
"loss": 0.278,
"step": 7
},
{
"epoch": 0.025,
"grad_norm": 2.8299612998962402,
"learning_rate": 4.975e-05,
"loss": 0.1534,
"step": 8
},
{
"epoch": 0.028125,
"grad_norm": 2.44992995262146,
"learning_rate": 4.9718750000000006e-05,
"loss": 0.1361,
"step": 9
},
{
"epoch": 0.03125,
"grad_norm": 5.079458236694336,
"learning_rate": 4.96875e-05,
"loss": 0.2674,
"step": 10
},
{
"epoch": 0.034375,
"grad_norm": 1.0134013891220093,
"learning_rate": 4.9656250000000004e-05,
"loss": 0.1353,
"step": 11
},
{
"epoch": 0.0375,
"grad_norm": 1.1559659242630005,
"learning_rate": 4.962500000000001e-05,
"loss": 0.1347,
"step": 12
},
{
"epoch": 0.040625,
"grad_norm": 1.4666818380355835,
"learning_rate": 4.959375e-05,
"loss": 0.1011,
"step": 13
},
{
"epoch": 0.04375,
"grad_norm": 2.2786307334899902,
"learning_rate": 4.95625e-05,
"loss": 0.2227,
"step": 14
},
{
"epoch": 0.046875,
"grad_norm": 3.400648593902588,
"learning_rate": 4.953125e-05,
"loss": 0.1775,
"step": 15
},
{
"epoch": 0.05,
"grad_norm": 1.4692819118499756,
"learning_rate": 4.9500000000000004e-05,
"loss": 0.1277,
"step": 16
},
{
"epoch": 0.053125,
"grad_norm": 1.0176641941070557,
"learning_rate": 4.946875e-05,
"loss": 0.114,
"step": 17
},
{
"epoch": 0.05625,
"grad_norm": 1.5734282732009888,
"learning_rate": 4.94375e-05,
"loss": 0.1533,
"step": 18
},
{
"epoch": 0.059375,
"grad_norm": 1.4404799938201904,
"learning_rate": 4.9406250000000005e-05,
"loss": 0.1014,
"step": 19
},
{
"epoch": 0.0625,
"grad_norm": 1.989609718322754,
"learning_rate": 4.937500000000001e-05,
"loss": 0.1868,
"step": 20
},
{
"epoch": 0.065625,
"grad_norm": 1.4631582498550415,
"learning_rate": 4.9343749999999997e-05,
"loss": 0.1154,
"step": 21
},
{
"epoch": 0.06875,
"grad_norm": 2.48142671585083,
"learning_rate": 4.93125e-05,
"loss": 0.2596,
"step": 22
},
{
"epoch": 0.071875,
"grad_norm": 3.020716905593872,
"learning_rate": 4.928125e-05,
"loss": 0.1456,
"step": 23
},
{
"epoch": 0.075,
"grad_norm": 2.902977466583252,
"learning_rate": 4.9250000000000004e-05,
"loss": 0.204,
"step": 24
},
{
"epoch": 0.078125,
"grad_norm": 1.2099602222442627,
"learning_rate": 4.921875e-05,
"loss": 0.0828,
"step": 25
},
{
"epoch": 0.08125,
"grad_norm": 0.564108669757843,
"learning_rate": 4.91875e-05,
"loss": 0.0664,
"step": 26
},
{
"epoch": 0.084375,
"grad_norm": 1.3213393688201904,
"learning_rate": 4.9156250000000006e-05,
"loss": 0.103,
"step": 27
},
{
"epoch": 0.0875,
"grad_norm": 1.1386405229568481,
"learning_rate": 4.9125e-05,
"loss": 0.1028,
"step": 28
},
{
"epoch": 0.090625,
"grad_norm": 0.9390538930892944,
"learning_rate": 4.9093750000000004e-05,
"loss": 0.1038,
"step": 29
},
{
"epoch": 0.09375,
"grad_norm": 2.117478847503662,
"learning_rate": 4.90625e-05,
"loss": 0.2128,
"step": 30
},
{
"epoch": 0.096875,
"grad_norm": 2.953338861465454,
"learning_rate": 4.903125e-05,
"loss": 0.1803,
"step": 31
},
{
"epoch": 0.1,
"grad_norm": 4.287204265594482,
"learning_rate": 4.9e-05,
"loss": 0.1473,
"step": 32
},
{
"epoch": 0.103125,
"grad_norm": 0.6180092692375183,
"learning_rate": 4.896875e-05,
"loss": 0.0485,
"step": 33
},
{
"epoch": 0.10625,
"grad_norm": 1.5996137857437134,
"learning_rate": 4.8937500000000004e-05,
"loss": 0.0749,
"step": 34
},
{
"epoch": 0.109375,
"grad_norm": 1.6394084692001343,
"learning_rate": 4.8906250000000006e-05,
"loss": 0.1464,
"step": 35
},
{
"epoch": 0.1125,
"grad_norm": 1.3696956634521484,
"learning_rate": 4.8875e-05,
"loss": 0.0705,
"step": 36
},
{
"epoch": 0.115625,
"grad_norm": 1.3914557695388794,
"learning_rate": 4.8843750000000005e-05,
"loss": 0.0801,
"step": 37
},
{
"epoch": 0.11875,
"grad_norm": 1.3529096841812134,
"learning_rate": 4.88125e-05,
"loss": 0.0734,
"step": 38
},
{
"epoch": 0.121875,
"grad_norm": 1.6977622509002686,
"learning_rate": 4.878125e-05,
"loss": 0.1075,
"step": 39
},
{
"epoch": 0.125,
"grad_norm": 0.7406987547874451,
"learning_rate": 4.875e-05,
"loss": 0.0527,
"step": 40
},
{
"epoch": 0.128125,
"grad_norm": 0.7165554761886597,
"learning_rate": 4.871875e-05,
"loss": 0.0638,
"step": 41
},
{
"epoch": 0.13125,
"grad_norm": 0.4635627865791321,
"learning_rate": 4.8687500000000004e-05,
"loss": 0.0548,
"step": 42
},
{
"epoch": 0.134375,
"grad_norm": 1.0817699432373047,
"learning_rate": 4.865625e-05,
"loss": 0.0524,
"step": 43
},
{
"epoch": 0.1375,
"grad_norm": 7.260894775390625,
"learning_rate": 4.8625e-05,
"loss": 0.1702,
"step": 44
},
{
"epoch": 0.140625,
"grad_norm": 6.2862396240234375,
"learning_rate": 4.8593750000000005e-05,
"loss": 0.1442,
"step": 45
},
{
"epoch": 0.14375,
"grad_norm": 2.473857879638672,
"learning_rate": 4.85625e-05,
"loss": 0.0904,
"step": 46
},
{
"epoch": 0.146875,
"grad_norm": 3.885066509246826,
"learning_rate": 4.853125e-05,
"loss": 0.0898,
"step": 47
},
{
"epoch": 0.15,
"grad_norm": 1.2689714431762695,
"learning_rate": 4.85e-05,
"loss": 0.0561,
"step": 48
},
{
"epoch": 0.153125,
"grad_norm": 2.687859058380127,
"learning_rate": 4.846875e-05,
"loss": 0.1582,
"step": 49
},
{
"epoch": 0.15625,
"grad_norm": 0.5876914858818054,
"learning_rate": 4.8437500000000005e-05,
"loss": 0.0458,
"step": 50
},
{
"epoch": 0.159375,
"grad_norm": 8.143379211425781,
"learning_rate": 4.840625e-05,
"loss": 0.2132,
"step": 51
},
{
"epoch": 0.1625,
"grad_norm": 2.991544246673584,
"learning_rate": 4.8375000000000004e-05,
"loss": 0.1885,
"step": 52
},
{
"epoch": 0.165625,
"grad_norm": 1.1332241296768188,
"learning_rate": 4.8343750000000006e-05,
"loss": 0.0871,
"step": 53
},
{
"epoch": 0.16875,
"grad_norm": 0.9255159497261047,
"learning_rate": 4.83125e-05,
"loss": 0.0769,
"step": 54
},
{
"epoch": 0.171875,
"grad_norm": 1.800412654876709,
"learning_rate": 4.828125e-05,
"loss": 0.0633,
"step": 55
},
{
"epoch": 0.175,
"grad_norm": 0.9372511506080627,
"learning_rate": 4.825e-05,
"loss": 0.0504,
"step": 56
},
{
"epoch": 0.178125,
"grad_norm": 0.34521690011024475,
"learning_rate": 4.821875e-05,
"loss": 0.0356,
"step": 57
},
{
"epoch": 0.18125,
"grad_norm": 0.763290524482727,
"learning_rate": 4.81875e-05,
"loss": 0.0599,
"step": 58
},
{
"epoch": 0.184375,
"grad_norm": 0.39605632424354553,
"learning_rate": 4.815625e-05,
"loss": 0.0484,
"step": 59
},
{
"epoch": 0.1875,
"grad_norm": 0.8304187655448914,
"learning_rate": 4.8125000000000004e-05,
"loss": 0.0594,
"step": 60
},
{
"epoch": 0.190625,
"grad_norm": 0.6326848864555359,
"learning_rate": 4.809375000000001e-05,
"loss": 0.0678,
"step": 61
},
{
"epoch": 0.19375,
"grad_norm": 2.4955437183380127,
"learning_rate": 4.80625e-05,
"loss": 0.0682,
"step": 62
},
{
"epoch": 0.196875,
"grad_norm": 4.784257888793945,
"learning_rate": 4.803125e-05,
"loss": 0.1404,
"step": 63
},
{
"epoch": 0.2,
"grad_norm": 0.6176247596740723,
"learning_rate": 4.8e-05,
"loss": 0.0513,
"step": 64
},
{
"epoch": 0.203125,
"grad_norm": 0.35477644205093384,
"learning_rate": 4.7968750000000004e-05,
"loss": 0.0482,
"step": 65
},
{
"epoch": 0.20625,
"grad_norm": 0.8327121734619141,
"learning_rate": 4.79375e-05,
"loss": 0.046,
"step": 66
},
{
"epoch": 0.209375,
"grad_norm": 5.646777153015137,
"learning_rate": 4.790625e-05,
"loss": 0.1429,
"step": 67
},
{
"epoch": 0.2125,
"grad_norm": 0.5917462706565857,
"learning_rate": 4.7875000000000005e-05,
"loss": 0.0498,
"step": 68
},
{
"epoch": 0.215625,
"grad_norm": 3.3251428604125977,
"learning_rate": 4.784375e-05,
"loss": 0.2996,
"step": 69
},
{
"epoch": 0.21875,
"grad_norm": 2.83424711227417,
"learning_rate": 4.7812500000000003e-05,
"loss": 0.072,
"step": 70
},
{
"epoch": 0.221875,
"grad_norm": 2.925737142562866,
"learning_rate": 4.778125e-05,
"loss": 0.0679,
"step": 71
},
{
"epoch": 0.225,
"grad_norm": 1.0089963674545288,
"learning_rate": 4.775e-05,
"loss": 0.0555,
"step": 72
},
{
"epoch": 0.228125,
"grad_norm": 0.6455210447311401,
"learning_rate": 4.771875e-05,
"loss": 0.048,
"step": 73
},
{
"epoch": 0.23125,
"grad_norm": 6.585851669311523,
"learning_rate": 4.76875e-05,
"loss": 0.1616,
"step": 74
},
{
"epoch": 0.234375,
"grad_norm": 1.0643088817596436,
"learning_rate": 4.765625e-05,
"loss": 0.0904,
"step": 75
},
{
"epoch": 0.2375,
"grad_norm": 5.661847114562988,
"learning_rate": 4.7625000000000006e-05,
"loss": 0.1051,
"step": 76
},
{
"epoch": 0.240625,
"grad_norm": 0.9035895466804504,
"learning_rate": 4.759375e-05,
"loss": 0.0482,
"step": 77
},
{
"epoch": 0.24375,
"grad_norm": 8.772421836853027,
"learning_rate": 4.7562500000000004e-05,
"loss": 0.2399,
"step": 78
},
{
"epoch": 0.246875,
"grad_norm": 1.299116611480713,
"learning_rate": 4.753125000000001e-05,
"loss": 0.0796,
"step": 79
},
{
"epoch": 0.25,
"grad_norm": 1.2476378679275513,
"learning_rate": 4.75e-05,
"loss": 0.0712,
"step": 80
},
{
"epoch": 0.253125,
"grad_norm": 0.3314186632633209,
"learning_rate": 4.746875e-05,
"loss": 0.039,
"step": 81
},
{
"epoch": 0.25625,
"grad_norm": 5.228423595428467,
"learning_rate": 4.74375e-05,
"loss": 0.1188,
"step": 82
},
{
"epoch": 0.259375,
"grad_norm": 1.0700784921646118,
"learning_rate": 4.7406250000000004e-05,
"loss": 0.0519,
"step": 83
},
{
"epoch": 0.2625,
"grad_norm": 1.3984243869781494,
"learning_rate": 4.7375e-05,
"loss": 0.1092,
"step": 84
},
{
"epoch": 0.265625,
"grad_norm": 0.9596419334411621,
"learning_rate": 4.734375e-05,
"loss": 0.1136,
"step": 85
},
{
"epoch": 0.26875,
"grad_norm": 0.6729472875595093,
"learning_rate": 4.7312500000000005e-05,
"loss": 0.0539,
"step": 86
},
{
"epoch": 0.271875,
"grad_norm": 1.7557507753372192,
"learning_rate": 4.728125000000001e-05,
"loss": 0.0742,
"step": 87
},
{
"epoch": 0.275,
"grad_norm": 0.6950563788414001,
"learning_rate": 4.7249999999999997e-05,
"loss": 0.0435,
"step": 88
},
{
"epoch": 0.278125,
"grad_norm": 2.5702097415924072,
"learning_rate": 4.721875e-05,
"loss": 0.1343,
"step": 89
},
{
"epoch": 0.28125,
"grad_norm": 0.4034491181373596,
"learning_rate": 4.71875e-05,
"loss": 0.0464,
"step": 90
},
{
"epoch": 0.284375,
"grad_norm": 1.8170459270477295,
"learning_rate": 4.7156250000000004e-05,
"loss": 0.1692,
"step": 91
},
{
"epoch": 0.2875,
"grad_norm": 2.3292648792266846,
"learning_rate": 4.7125e-05,
"loss": 0.1435,
"step": 92
},
{
"epoch": 0.290625,
"grad_norm": 2.5120768547058105,
"learning_rate": 4.709375e-05,
"loss": 0.0533,
"step": 93
},
{
"epoch": 0.29375,
"grad_norm": 0.4364502727985382,
"learning_rate": 4.7062500000000006e-05,
"loss": 0.047,
"step": 94
},
{
"epoch": 0.296875,
"grad_norm": 1.0511380434036255,
"learning_rate": 4.703125e-05,
"loss": 0.0645,
"step": 95
},
{
"epoch": 0.3,
"grad_norm": 1.8538405895233154,
"learning_rate": 4.7e-05,
"loss": 0.0819,
"step": 96
},
{
"epoch": 0.303125,
"grad_norm": 0.2478204071521759,
"learning_rate": 4.696875e-05,
"loss": 0.0324,
"step": 97
},
{
"epoch": 0.30625,
"grad_norm": 0.31610116362571716,
"learning_rate": 4.69375e-05,
"loss": 0.0387,
"step": 98
},
{
"epoch": 0.309375,
"grad_norm": 0.24444423615932465,
"learning_rate": 4.690625e-05,
"loss": 0.0345,
"step": 99
},
{
"epoch": 0.3125,
"grad_norm": 0.391217976808548,
"learning_rate": 4.6875e-05,
"loss": 0.0387,
"step": 100
},
{
"epoch": 0.315625,
"grad_norm": 0.32689204812049866,
"learning_rate": 4.6843750000000004e-05,
"loss": 0.0392,
"step": 101
},
{
"epoch": 0.31875,
"grad_norm": 0.930630087852478,
"learning_rate": 4.6812500000000006e-05,
"loss": 0.0608,
"step": 102
},
{
"epoch": 0.321875,
"grad_norm": 0.7391718029975891,
"learning_rate": 4.678125e-05,
"loss": 0.046,
"step": 103
},
{
"epoch": 0.325,
"grad_norm": 0.5312808752059937,
"learning_rate": 4.6750000000000005e-05,
"loss": 0.0517,
"step": 104
},
{
"epoch": 0.328125,
"grad_norm": 3.3850011825561523,
"learning_rate": 4.671875e-05,
"loss": 0.2326,
"step": 105
},
{
"epoch": 0.33125,
"grad_norm": 2.874311685562134,
"learning_rate": 4.66875e-05,
"loss": 0.0675,
"step": 106
},
{
"epoch": 0.334375,
"grad_norm": 1.2302155494689941,
"learning_rate": 4.665625e-05,
"loss": 0.0487,
"step": 107
},
{
"epoch": 0.3375,
"grad_norm": 2.3928442001342773,
"learning_rate": 4.6625e-05,
"loss": 0.1066,
"step": 108
},
{
"epoch": 0.340625,
"grad_norm": 7.547820568084717,
"learning_rate": 4.6593750000000004e-05,
"loss": 0.1176,
"step": 109
},
{
"epoch": 0.34375,
"grad_norm": 5.985020160675049,
"learning_rate": 4.65625e-05,
"loss": 0.1135,
"step": 110
},
{
"epoch": 0.346875,
"grad_norm": 0.6734495759010315,
"learning_rate": 4.653125e-05,
"loss": 0.0512,
"step": 111
},
{
"epoch": 0.35,
"grad_norm": 1.863390564918518,
"learning_rate": 4.6500000000000005e-05,
"loss": 0.0816,
"step": 112
},
{
"epoch": 0.353125,
"grad_norm": 0.24223440885543823,
"learning_rate": 4.646875e-05,
"loss": 0.0373,
"step": 113
},
{
"epoch": 0.35625,
"grad_norm": 1.8812682628631592,
"learning_rate": 4.64375e-05,
"loss": 0.1516,
"step": 114
},
{
"epoch": 0.359375,
"grad_norm": 0.5781030654907227,
"learning_rate": 4.640625e-05,
"loss": 0.0757,
"step": 115
},
{
"epoch": 0.3625,
"grad_norm": 3.6789309978485107,
"learning_rate": 4.6375e-05,
"loss": 0.114,
"step": 116
},
{
"epoch": 0.365625,
"grad_norm": 0.8261707425117493,
"learning_rate": 4.6343750000000005e-05,
"loss": 0.0697,
"step": 117
},
{
"epoch": 0.36875,
"grad_norm": 1.4307565689086914,
"learning_rate": 4.63125e-05,
"loss": 0.0682,
"step": 118
},
{
"epoch": 0.371875,
"grad_norm": 0.6145803928375244,
"learning_rate": 4.6281250000000003e-05,
"loss": 0.057,
"step": 119
},
{
"epoch": 0.375,
"grad_norm": 3.645620822906494,
"learning_rate": 4.6250000000000006e-05,
"loss": 0.1571,
"step": 120
},
{
"epoch": 0.378125,
"grad_norm": 1.2431564331054688,
"learning_rate": 4.621875e-05,
"loss": 0.0914,
"step": 121
},
{
"epoch": 0.38125,
"grad_norm": 0.5691125988960266,
"learning_rate": 4.61875e-05,
"loss": 0.0584,
"step": 122
},
{
"epoch": 0.384375,
"grad_norm": 0.9178672432899475,
"learning_rate": 4.615625e-05,
"loss": 0.0735,
"step": 123
},
{
"epoch": 0.3875,
"grad_norm": 0.9264055490493774,
"learning_rate": 4.6125e-05,
"loss": 0.0562,
"step": 124
},
{
"epoch": 0.390625,
"grad_norm": 0.6835108399391174,
"learning_rate": 4.609375e-05,
"loss": 0.0522,
"step": 125
},
{
"epoch": 0.39375,
"grad_norm": 0.6347059011459351,
"learning_rate": 4.60625e-05,
"loss": 0.071,
"step": 126
},
{
"epoch": 0.396875,
"grad_norm": 0.8624080419540405,
"learning_rate": 4.6031250000000004e-05,
"loss": 0.0632,
"step": 127
},
{
"epoch": 0.4,
"grad_norm": 3.383160352706909,
"learning_rate": 4.600000000000001e-05,
"loss": 0.1591,
"step": 128
},
{
"epoch": 0.403125,
"grad_norm": 0.9181774258613586,
"learning_rate": 4.596875e-05,
"loss": 0.0627,
"step": 129
},
{
"epoch": 0.40625,
"grad_norm": 0.7968156933784485,
"learning_rate": 4.59375e-05,
"loss": 0.0472,
"step": 130
},
{
"epoch": 0.409375,
"grad_norm": 0.8290426135063171,
"learning_rate": 4.590625e-05,
"loss": 0.0508,
"step": 131
},
{
"epoch": 0.4125,
"grad_norm": 0.950431764125824,
"learning_rate": 4.5875000000000004e-05,
"loss": 0.0615,
"step": 132
},
{
"epoch": 0.415625,
"grad_norm": 1.1434526443481445,
"learning_rate": 4.584375e-05,
"loss": 0.0977,
"step": 133
},
{
"epoch": 0.41875,
"grad_norm": 0.858519971370697,
"learning_rate": 4.58125e-05,
"loss": 0.0626,
"step": 134
},
{
"epoch": 0.421875,
"grad_norm": 0.4805968701839447,
"learning_rate": 4.5781250000000005e-05,
"loss": 0.0475,
"step": 135
},
{
"epoch": 0.425,
"grad_norm": 1.4369895458221436,
"learning_rate": 4.575e-05,
"loss": 0.0479,
"step": 136
},
{
"epoch": 0.428125,
"grad_norm": 0.2389107346534729,
"learning_rate": 4.571875e-05,
"loss": 0.0303,
"step": 137
},
{
"epoch": 0.43125,
"grad_norm": 0.3905261754989624,
"learning_rate": 4.56875e-05,
"loss": 0.0413,
"step": 138
},
{
"epoch": 0.434375,
"grad_norm": 3.2840700149536133,
"learning_rate": 4.565625e-05,
"loss": 0.2217,
"step": 139
},
{
"epoch": 0.4375,
"grad_norm": 2.7919232845306396,
"learning_rate": 4.5625e-05,
"loss": 0.1966,
"step": 140
},
{
"epoch": 0.440625,
"grad_norm": 2.9008607864379883,
"learning_rate": 4.559375e-05,
"loss": 0.2469,
"step": 141
},
{
"epoch": 0.44375,
"grad_norm": 1.680885672569275,
"learning_rate": 4.55625e-05,
"loss": 0.0687,
"step": 142
},
{
"epoch": 0.446875,
"grad_norm": 0.41792237758636475,
"learning_rate": 4.5531250000000006e-05,
"loss": 0.0405,
"step": 143
},
{
"epoch": 0.45,
"grad_norm": 0.24879863858222961,
"learning_rate": 4.55e-05,
"loss": 0.0321,
"step": 144
},
{
"epoch": 0.453125,
"grad_norm": 1.766568899154663,
"learning_rate": 4.5468750000000004e-05,
"loss": 0.0652,
"step": 145
},
{
"epoch": 0.45625,
"grad_norm": 2.5179293155670166,
"learning_rate": 4.54375e-05,
"loss": 0.0711,
"step": 146
},
{
"epoch": 0.459375,
"grad_norm": 1.7946605682373047,
"learning_rate": 4.540625e-05,
"loss": 0.0509,
"step": 147
},
{
"epoch": 0.4625,
"grad_norm": 0.46179938316345215,
"learning_rate": 4.5375e-05,
"loss": 0.0388,
"step": 148
},
{
"epoch": 0.465625,
"grad_norm": 0.3265767991542816,
"learning_rate": 4.534375e-05,
"loss": 0.0403,
"step": 149
},
{
"epoch": 0.46875,
"grad_norm": 0.6787862181663513,
"learning_rate": 4.5312500000000004e-05,
"loss": 0.0452,
"step": 150
},
{
"epoch": 0.471875,
"grad_norm": 0.46895185112953186,
"learning_rate": 4.528125e-05,
"loss": 0.0501,
"step": 151
},
{
"epoch": 0.475,
"grad_norm": 3.4975266456604004,
"learning_rate": 4.525e-05,
"loss": 0.1403,
"step": 152
},
{
"epoch": 0.478125,
"grad_norm": 2.780132532119751,
"learning_rate": 4.5218750000000005e-05,
"loss": 0.0864,
"step": 153
},
{
"epoch": 0.48125,
"grad_norm": 0.5605061054229736,
"learning_rate": 4.518750000000001e-05,
"loss": 0.0437,
"step": 154
},
{
"epoch": 0.484375,
"grad_norm": 1.14775550365448,
"learning_rate": 4.515625e-05,
"loss": 0.073,
"step": 155
},
{
"epoch": 0.4875,
"grad_norm": 0.5990891456604004,
"learning_rate": 4.5125e-05,
"loss": 0.0531,
"step": 156
},
{
"epoch": 0.490625,
"grad_norm": 0.4071764349937439,
"learning_rate": 4.509375e-05,
"loss": 0.0394,
"step": 157
},
{
"epoch": 0.49375,
"grad_norm": 1.256040334701538,
"learning_rate": 4.5062500000000004e-05,
"loss": 0.074,
"step": 158
},
{
"epoch": 0.496875,
"grad_norm": 0.6708590388298035,
"learning_rate": 4.503125e-05,
"loss": 0.0469,
"step": 159
},
{
"epoch": 0.5,
"grad_norm": 1.4366428852081299,
"learning_rate": 4.5e-05,
"loss": 0.0794,
"step": 160
},
{
"epoch": 0.503125,
"grad_norm": 2.625788927078247,
"learning_rate": 4.4968750000000005e-05,
"loss": 0.0456,
"step": 161
},
{
"epoch": 0.50625,
"grad_norm": 0.7705633640289307,
"learning_rate": 4.49375e-05,
"loss": 0.0367,
"step": 162
},
{
"epoch": 0.509375,
"grad_norm": 0.39144399762153625,
"learning_rate": 4.490625e-05,
"loss": 0.0342,
"step": 163
},
{
"epoch": 0.5125,
"grad_norm": 0.43612250685691833,
"learning_rate": 4.4875e-05,
"loss": 0.0377,
"step": 164
},
{
"epoch": 0.515625,
"grad_norm": 0.9478850364685059,
"learning_rate": 4.484375e-05,
"loss": 0.0558,
"step": 165
},
{
"epoch": 0.51875,
"grad_norm": 0.5833193063735962,
"learning_rate": 4.4812500000000005e-05,
"loss": 0.0534,
"step": 166
},
{
"epoch": 0.521875,
"grad_norm": 0.512205183506012,
"learning_rate": 4.478125e-05,
"loss": 0.0344,
"step": 167
},
{
"epoch": 0.525,
"grad_norm": 1.5145163536071777,
"learning_rate": 4.4750000000000004e-05,
"loss": 0.2032,
"step": 168
},
{
"epoch": 0.528125,
"grad_norm": 2.2172932624816895,
"learning_rate": 4.4718750000000006e-05,
"loss": 0.1559,
"step": 169
},
{
"epoch": 0.53125,
"grad_norm": 1.8957103490829468,
"learning_rate": 4.46875e-05,
"loss": 0.052,
"step": 170
},
{
"epoch": 0.534375,
"grad_norm": 0.2700859010219574,
"learning_rate": 4.465625e-05,
"loss": 0.0337,
"step": 171
},
{
"epoch": 0.5375,
"grad_norm": 0.6030570268630981,
"learning_rate": 4.4625e-05,
"loss": 0.0546,
"step": 172
},
{
"epoch": 0.540625,
"grad_norm": 0.25488942861557007,
"learning_rate": 4.459375e-05,
"loss": 0.0344,
"step": 173
},
{
"epoch": 0.54375,
"grad_norm": 0.6342024207115173,
"learning_rate": 4.45625e-05,
"loss": 0.0481,
"step": 174
},
{
"epoch": 0.546875,
"grad_norm": 0.34221383929252625,
"learning_rate": 4.453125e-05,
"loss": 0.0446,
"step": 175
},
{
"epoch": 0.55,
"grad_norm": 2.870440721511841,
"learning_rate": 4.4500000000000004e-05,
"loss": 0.0879,
"step": 176
},
{
"epoch": 0.553125,
"grad_norm": 0.8905161023139954,
"learning_rate": 4.446875e-05,
"loss": 0.0634,
"step": 177
},
{
"epoch": 0.55625,
"grad_norm": 0.35176849365234375,
"learning_rate": 4.44375e-05,
"loss": 0.0528,
"step": 178
},
{
"epoch": 0.559375,
"grad_norm": 0.9682378172874451,
"learning_rate": 4.4406250000000005e-05,
"loss": 0.0496,
"step": 179
},
{
"epoch": 0.5625,
"grad_norm": 1.700124740600586,
"learning_rate": 4.4375e-05,
"loss": 0.0824,
"step": 180
},
{
"epoch": 0.565625,
"grad_norm": 0.5797895193099976,
"learning_rate": 4.4343750000000004e-05,
"loss": 0.0424,
"step": 181
},
{
"epoch": 0.56875,
"grad_norm": 0.8441882729530334,
"learning_rate": 4.43125e-05,
"loss": 0.0501,
"step": 182
},
{
"epoch": 0.571875,
"grad_norm": 0.999452531337738,
"learning_rate": 4.428125e-05,
"loss": 0.0505,
"step": 183
},
{
"epoch": 0.575,
"grad_norm": 0.48567822575569153,
"learning_rate": 4.4250000000000005e-05,
"loss": 0.0384,
"step": 184
},
{
"epoch": 0.578125,
"grad_norm": 2.9717018604278564,
"learning_rate": 4.421875e-05,
"loss": 0.0806,
"step": 185
},
{
"epoch": 0.58125,
"grad_norm": 0.2634088695049286,
"learning_rate": 4.4187500000000003e-05,
"loss": 0.0416,
"step": 186
},
{
"epoch": 0.584375,
"grad_norm": 0.593612015247345,
"learning_rate": 4.4156250000000006e-05,
"loss": 0.0397,
"step": 187
},
{
"epoch": 0.5875,
"grad_norm": 0.531893789768219,
"learning_rate": 4.4125e-05,
"loss": 0.0444,
"step": 188
},
{
"epoch": 0.590625,
"grad_norm": 0.29221251606941223,
"learning_rate": 4.409375e-05,
"loss": 0.0288,
"step": 189
},
{
"epoch": 0.59375,
"grad_norm": 0.5673308372497559,
"learning_rate": 4.40625e-05,
"loss": 0.032,
"step": 190
},
{
"epoch": 0.596875,
"grad_norm": 0.9111027121543884,
"learning_rate": 4.403125e-05,
"loss": 0.0574,
"step": 191
},
{
"epoch": 0.6,
"grad_norm": 0.6866423487663269,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.0484,
"step": 192
},
{
"epoch": 0.603125,
"grad_norm": 0.30543097853660583,
"learning_rate": 4.396875e-05,
"loss": 0.0382,
"step": 193
},
{
"epoch": 0.60625,
"grad_norm": 0.3667004704475403,
"learning_rate": 4.3937500000000004e-05,
"loss": 0.039,
"step": 194
},
{
"epoch": 0.609375,
"grad_norm": 0.351158082485199,
"learning_rate": 4.390625000000001e-05,
"loss": 0.0286,
"step": 195
},
{
"epoch": 0.6125,
"grad_norm": 0.22909612953662872,
"learning_rate": 4.3875e-05,
"loss": 0.0378,
"step": 196
},
{
"epoch": 0.615625,
"grad_norm": 0.6320449709892273,
"learning_rate": 4.384375e-05,
"loss": 0.0676,
"step": 197
},
{
"epoch": 0.61875,
"grad_norm": 0.6454492211341858,
"learning_rate": 4.38125e-05,
"loss": 0.0461,
"step": 198
},
{
"epoch": 0.621875,
"grad_norm": 0.3367100954055786,
"learning_rate": 4.3781250000000004e-05,
"loss": 0.0352,
"step": 199
},
{
"epoch": 0.625,
"grad_norm": 0.46938446164131165,
"learning_rate": 4.375e-05,
"loss": 0.0406,
"step": 200
},
{
"epoch": 0.628125,
"grad_norm": 1.5403215885162354,
"learning_rate": 4.371875e-05,
"loss": 0.0819,
"step": 201
},
{
"epoch": 0.63125,
"grad_norm": 0.8773464560508728,
"learning_rate": 4.3687500000000005e-05,
"loss": 0.0625,
"step": 202
},
{
"epoch": 0.634375,
"grad_norm": 2.457127571105957,
"learning_rate": 4.365625000000001e-05,
"loss": 0.1925,
"step": 203
},
{
"epoch": 0.6375,
"grad_norm": 0.7906845808029175,
"learning_rate": 4.3625e-05,
"loss": 0.0595,
"step": 204
},
{
"epoch": 0.640625,
"grad_norm": 0.7821226716041565,
"learning_rate": 4.359375e-05,
"loss": 0.0494,
"step": 205
},
{
"epoch": 0.64375,
"grad_norm": 0.373221218585968,
"learning_rate": 4.35625e-05,
"loss": 0.0345,
"step": 206
},
{
"epoch": 0.646875,
"grad_norm": 0.2497500479221344,
"learning_rate": 4.3531250000000004e-05,
"loss": 0.0285,
"step": 207
},
{
"epoch": 0.65,
"grad_norm": 0.20948593318462372,
"learning_rate": 4.35e-05,
"loss": 0.0326,
"step": 208
},
{
"epoch": 0.653125,
"grad_norm": 0.6792771220207214,
"learning_rate": 4.346875e-05,
"loss": 0.0404,
"step": 209
},
{
"epoch": 0.65625,
"grad_norm": 0.9426431655883789,
"learning_rate": 4.3437500000000006e-05,
"loss": 0.0337,
"step": 210
},
{
"epoch": 0.659375,
"grad_norm": 0.46820443868637085,
"learning_rate": 4.340625e-05,
"loss": 0.0437,
"step": 211
},
{
"epoch": 0.6625,
"grad_norm": 0.2894236147403717,
"learning_rate": 4.3375000000000004e-05,
"loss": 0.0356,
"step": 212
},
{
"epoch": 0.665625,
"grad_norm": 1.8175113201141357,
"learning_rate": 4.334375e-05,
"loss": 0.0478,
"step": 213
},
{
"epoch": 0.66875,
"grad_norm": 0.35592272877693176,
"learning_rate": 4.33125e-05,
"loss": 0.0461,
"step": 214
},
{
"epoch": 0.671875,
"grad_norm": 1.7846156358718872,
"learning_rate": 4.328125e-05,
"loss": 0.1487,
"step": 215
},
{
"epoch": 0.675,
"grad_norm": 2.822322368621826,
"learning_rate": 4.325e-05,
"loss": 0.1751,
"step": 216
},
{
"epoch": 0.678125,
"grad_norm": 1.4585069417953491,
"learning_rate": 4.3218750000000004e-05,
"loss": 0.0771,
"step": 217
},
{
"epoch": 0.68125,
"grad_norm": 3.51526141166687,
"learning_rate": 4.3187500000000006e-05,
"loss": 0.0757,
"step": 218
},
{
"epoch": 0.684375,
"grad_norm": 0.7315062880516052,
"learning_rate": 4.315625e-05,
"loss": 0.0552,
"step": 219
},
{
"epoch": 0.6875,
"grad_norm": 6.735369682312012,
"learning_rate": 4.3125000000000005e-05,
"loss": 0.2329,
"step": 220
},
{
"epoch": 0.690625,
"grad_norm": 0.3840850293636322,
"learning_rate": 4.309375e-05,
"loss": 0.0395,
"step": 221
},
{
"epoch": 0.69375,
"grad_norm": 0.7250546813011169,
"learning_rate": 4.30625e-05,
"loss": 0.0395,
"step": 222
},
{
"epoch": 0.696875,
"grad_norm": 0.7680365443229675,
"learning_rate": 4.303125e-05,
"loss": 0.0488,
"step": 223
},
{
"epoch": 0.7,
"grad_norm": 0.4709233343601227,
"learning_rate": 4.3e-05,
"loss": 0.0554,
"step": 224
},
{
"epoch": 0.703125,
"grad_norm": 0.3094955086708069,
"learning_rate": 4.2968750000000004e-05,
"loss": 0.0297,
"step": 225
},
{
"epoch": 0.70625,
"grad_norm": 0.7511866092681885,
"learning_rate": 4.29375e-05,
"loss": 0.0572,
"step": 226
},
{
"epoch": 0.709375,
"grad_norm": 0.7133306264877319,
"learning_rate": 4.290625e-05,
"loss": 0.034,
"step": 227
},
{
"epoch": 0.7125,
"grad_norm": 1.1605385541915894,
"learning_rate": 4.2875000000000005e-05,
"loss": 0.0722,
"step": 228
},
{
"epoch": 0.715625,
"grad_norm": 2.668914794921875,
"learning_rate": 4.284375000000001e-05,
"loss": 0.1256,
"step": 229
},
{
"epoch": 0.71875,
"grad_norm": 0.4482596814632416,
"learning_rate": 4.28125e-05,
"loss": 0.0447,
"step": 230
},
{
"epoch": 0.721875,
"grad_norm": 0.3817666172981262,
"learning_rate": 4.278125e-05,
"loss": 0.0403,
"step": 231
},
{
"epoch": 0.725,
"grad_norm": 1.5475538969039917,
"learning_rate": 4.275e-05,
"loss": 0.0511,
"step": 232
},
{
"epoch": 0.728125,
"grad_norm": 2.1910324096679688,
"learning_rate": 4.2718750000000005e-05,
"loss": 0.1158,
"step": 233
},
{
"epoch": 0.73125,
"grad_norm": 1.3402481079101562,
"learning_rate": 4.26875e-05,
"loss": 0.0772,
"step": 234
},
{
"epoch": 0.734375,
"grad_norm": 0.43478524684906006,
"learning_rate": 4.2656250000000003e-05,
"loss": 0.0342,
"step": 235
},
{
"epoch": 0.7375,
"grad_norm": 0.15397940576076508,
"learning_rate": 4.2625000000000006e-05,
"loss": 0.0251,
"step": 236
},
{
"epoch": 0.740625,
"grad_norm": 0.2772510051727295,
"learning_rate": 4.259375e-05,
"loss": 0.0446,
"step": 237
},
{
"epoch": 0.74375,
"grad_norm": 1.5829936265945435,
"learning_rate": 4.25625e-05,
"loss": 0.0696,
"step": 238
},
{
"epoch": 0.746875,
"grad_norm": 1.7104264497756958,
"learning_rate": 4.253125e-05,
"loss": 0.1076,
"step": 239
},
{
"epoch": 0.75,
"grad_norm": 1.011113166809082,
"learning_rate": 4.25e-05,
"loss": 0.0698,
"step": 240
},
{
"epoch": 0.753125,
"grad_norm": 0.2590930461883545,
"learning_rate": 4.246875e-05,
"loss": 0.0292,
"step": 241
},
{
"epoch": 0.75625,
"grad_norm": 0.6169472932815552,
"learning_rate": 4.24375e-05,
"loss": 0.0486,
"step": 242
},
{
"epoch": 0.759375,
"grad_norm": 0.5677545070648193,
"learning_rate": 4.2406250000000004e-05,
"loss": 0.0487,
"step": 243
},
{
"epoch": 0.7625,
"grad_norm": 0.1974128633737564,
"learning_rate": 4.237500000000001e-05,
"loss": 0.0346,
"step": 244
},
{
"epoch": 0.765625,
"grad_norm": 0.2740221321582794,
"learning_rate": 4.234375e-05,
"loss": 0.0337,
"step": 245
},
{
"epoch": 0.76875,
"grad_norm": 3.3777387142181396,
"learning_rate": 4.23125e-05,
"loss": 0.0799,
"step": 246
},
{
"epoch": 0.771875,
"grad_norm": 0.6743062138557434,
"learning_rate": 4.228125e-05,
"loss": 0.0388,
"step": 247
},
{
"epoch": 0.775,
"grad_norm": 0.25554272532463074,
"learning_rate": 4.2250000000000004e-05,
"loss": 0.0432,
"step": 248
},
{
"epoch": 0.778125,
"grad_norm": 0.2516278326511383,
"learning_rate": 4.221875e-05,
"loss": 0.0322,
"step": 249
},
{
"epoch": 0.78125,
"grad_norm": 0.34015893936157227,
"learning_rate": 4.21875e-05,
"loss": 0.0379,
"step": 250
},
{
"epoch": 0.784375,
"grad_norm": 2.134775161743164,
"learning_rate": 4.2156250000000005e-05,
"loss": 0.2122,
"step": 251
},
{
"epoch": 0.7875,
"grad_norm": 0.3281037211418152,
"learning_rate": 4.2125e-05,
"loss": 0.0358,
"step": 252
},
{
"epoch": 0.790625,
"grad_norm": 1.2461293935775757,
"learning_rate": 4.209375e-05,
"loss": 0.0687,
"step": 253
},
{
"epoch": 0.79375,
"grad_norm": 3.500396251678467,
"learning_rate": 4.2062500000000006e-05,
"loss": 0.1092,
"step": 254
},
{
"epoch": 0.796875,
"grad_norm": 0.7699318528175354,
"learning_rate": 4.203125e-05,
"loss": 0.0929,
"step": 255
},
{
"epoch": 0.8,
"grad_norm": 1.3767575025558472,
"learning_rate": 4.2e-05,
"loss": 0.0533,
"step": 256
},
{
"epoch": 0.803125,
"grad_norm": 0.3553447425365448,
"learning_rate": 4.196875e-05,
"loss": 0.0423,
"step": 257
},
{
"epoch": 0.80625,
"grad_norm": 0.6199429035186768,
"learning_rate": 4.19375e-05,
"loss": 0.0455,
"step": 258
},
{
"epoch": 0.809375,
"grad_norm": 0.23632241785526276,
"learning_rate": 4.1906250000000006e-05,
"loss": 0.03,
"step": 259
},
{
"epoch": 0.8125,
"grad_norm": 4.547003746032715,
"learning_rate": 4.1875e-05,
"loss": 0.0938,
"step": 260
},
{
"epoch": 0.815625,
"grad_norm": 0.2540350556373596,
"learning_rate": 4.1843750000000004e-05,
"loss": 0.0367,
"step": 261
},
{
"epoch": 0.81875,
"grad_norm": 0.418260395526886,
"learning_rate": 4.181250000000001e-05,
"loss": 0.0446,
"step": 262
},
{
"epoch": 0.821875,
"grad_norm": 0.23045071959495544,
"learning_rate": 4.178125e-05,
"loss": 0.0308,
"step": 263
},
{
"epoch": 0.825,
"grad_norm": 0.8318238854408264,
"learning_rate": 4.175e-05,
"loss": 0.0698,
"step": 264
},
{
"epoch": 0.828125,
"grad_norm": 0.45676013827323914,
"learning_rate": 4.171875e-05,
"loss": 0.0388,
"step": 265
},
{
"epoch": 0.83125,
"grad_norm": 3.701650619506836,
"learning_rate": 4.1687500000000004e-05,
"loss": 0.1129,
"step": 266
},
{
"epoch": 0.834375,
"grad_norm": 1.591991662979126,
"learning_rate": 4.165625e-05,
"loss": 0.0957,
"step": 267
},
{
"epoch": 0.8375,
"grad_norm": 0.4643809497356415,
"learning_rate": 4.1625e-05,
"loss": 0.0346,
"step": 268
},
{
"epoch": 0.840625,
"grad_norm": 0.34625670313835144,
"learning_rate": 4.1593750000000005e-05,
"loss": 0.0388,
"step": 269
},
{
"epoch": 0.84375,
"grad_norm": 0.3321349024772644,
"learning_rate": 4.156250000000001e-05,
"loss": 0.0353,
"step": 270
},
{
"epoch": 0.846875,
"grad_norm": 0.23249466717243195,
"learning_rate": 4.1531249999999996e-05,
"loss": 0.0322,
"step": 271
},
{
"epoch": 0.85,
"grad_norm": 0.27158722281455994,
"learning_rate": 4.15e-05,
"loss": 0.0384,
"step": 272
},
{
"epoch": 0.853125,
"grad_norm": 0.2619187533855438,
"learning_rate": 4.146875e-05,
"loss": 0.034,
"step": 273
},
{
"epoch": 0.85625,
"grad_norm": 0.25599196553230286,
"learning_rate": 4.1437500000000004e-05,
"loss": 0.0366,
"step": 274
},
{
"epoch": 0.859375,
"grad_norm": 0.16068707406520844,
"learning_rate": 4.140625e-05,
"loss": 0.0259,
"step": 275
},
{
"epoch": 0.8625,
"grad_norm": 0.2689169645309448,
"learning_rate": 4.1375e-05,
"loss": 0.0301,
"step": 276
},
{
"epoch": 0.865625,
"grad_norm": 0.12709015607833862,
"learning_rate": 4.1343750000000005e-05,
"loss": 0.0287,
"step": 277
},
{
"epoch": 0.86875,
"grad_norm": 1.140897512435913,
"learning_rate": 4.13125e-05,
"loss": 0.0614,
"step": 278
},
{
"epoch": 0.871875,
"grad_norm": 2.3372366428375244,
"learning_rate": 4.1281250000000004e-05,
"loss": 0.0852,
"step": 279
},
{
"epoch": 0.875,
"grad_norm": 1.0569149255752563,
"learning_rate": 4.125e-05,
"loss": 0.0439,
"step": 280
},
{
"epoch": 0.878125,
"grad_norm": 0.7298460006713867,
"learning_rate": 4.121875e-05,
"loss": 0.0479,
"step": 281
},
{
"epoch": 0.88125,
"grad_norm": 1.4520108699798584,
"learning_rate": 4.11875e-05,
"loss": 0.051,
"step": 282
},
{
"epoch": 0.884375,
"grad_norm": 1.1062657833099365,
"learning_rate": 4.115625e-05,
"loss": 0.0527,
"step": 283
},
{
"epoch": 0.8875,
"grad_norm": 0.32543355226516724,
"learning_rate": 4.1125000000000004e-05,
"loss": 0.0361,
"step": 284
},
{
"epoch": 0.890625,
"grad_norm": 1.7986284494400024,
"learning_rate": 4.1093750000000006e-05,
"loss": 0.1552,
"step": 285
},
{
"epoch": 0.89375,
"grad_norm": 0.2922920286655426,
"learning_rate": 4.10625e-05,
"loss": 0.0335,
"step": 286
},
{
"epoch": 0.896875,
"grad_norm": 0.2638067305088043,
"learning_rate": 4.1031250000000005e-05,
"loss": 0.0301,
"step": 287
},
{
"epoch": 0.9,
"grad_norm": 0.16769519448280334,
"learning_rate": 4.1e-05,
"loss": 0.0267,
"step": 288
},
{
"epoch": 0.903125,
"grad_norm": 0.38744208216667175,
"learning_rate": 4.096875e-05,
"loss": 0.0326,
"step": 289
},
{
"epoch": 0.90625,
"grad_norm": 1.4372221231460571,
"learning_rate": 4.09375e-05,
"loss": 0.0699,
"step": 290
},
{
"epoch": 0.909375,
"grad_norm": 1.0212301015853882,
"learning_rate": 4.090625e-05,
"loss": 0.1233,
"step": 291
},
{
"epoch": 0.9125,
"grad_norm": 0.18620982766151428,
"learning_rate": 4.0875000000000004e-05,
"loss": 0.0254,
"step": 292
},
{
"epoch": 0.915625,
"grad_norm": 0.6351355910301208,
"learning_rate": 4.084375e-05,
"loss": 0.0442,
"step": 293
},
{
"epoch": 0.91875,
"grad_norm": 0.2035447657108307,
"learning_rate": 4.08125e-05,
"loss": 0.0342,
"step": 294
},
{
"epoch": 0.921875,
"grad_norm": 0.8789761066436768,
"learning_rate": 4.0781250000000005e-05,
"loss": 0.0601,
"step": 295
},
{
"epoch": 0.925,
"grad_norm": 0.4256390631198883,
"learning_rate": 4.075e-05,
"loss": 0.0335,
"step": 296
},
{
"epoch": 0.928125,
"grad_norm": 0.5044748187065125,
"learning_rate": 4.071875e-05,
"loss": 0.0403,
"step": 297
},
{
"epoch": 0.93125,
"grad_norm": 0.6527408361434937,
"learning_rate": 4.06875e-05,
"loss": 0.0525,
"step": 298
},
{
"epoch": 0.934375,
"grad_norm": 0.5137639045715332,
"learning_rate": 4.065625e-05,
"loss": 0.0328,
"step": 299
},
{
"epoch": 0.9375,
"grad_norm": 0.3386867940425873,
"learning_rate": 4.0625000000000005e-05,
"loss": 0.0299,
"step": 300
},
{
"epoch": 0.940625,
"grad_norm": 0.3191845417022705,
"learning_rate": 4.059375e-05,
"loss": 0.0293,
"step": 301
},
{
"epoch": 0.94375,
"grad_norm": 0.6586815118789673,
"learning_rate": 4.0562500000000003e-05,
"loss": 0.0334,
"step": 302
},
{
"epoch": 0.946875,
"grad_norm": 0.33495861291885376,
"learning_rate": 4.0531250000000006e-05,
"loss": 0.0378,
"step": 303
},
{
"epoch": 0.95,
"grad_norm": 0.2046739012002945,
"learning_rate": 4.05e-05,
"loss": 0.0276,
"step": 304
},
{
"epoch": 0.953125,
"grad_norm": 1.4748613834381104,
"learning_rate": 4.046875e-05,
"loss": 0.0903,
"step": 305
},
{
"epoch": 0.95625,
"grad_norm": 1.4325345754623413,
"learning_rate": 4.04375e-05,
"loss": 0.0447,
"step": 306
},
{
"epoch": 0.959375,
"grad_norm": 1.7935466766357422,
"learning_rate": 4.040625e-05,
"loss": 0.1974,
"step": 307
},
{
"epoch": 0.9625,
"grad_norm": 0.872593104839325,
"learning_rate": 4.0375e-05,
"loss": 0.0544,
"step": 308
},
{
"epoch": 0.965625,
"grad_norm": 0.17810046672821045,
"learning_rate": 4.034375e-05,
"loss": 0.0253,
"step": 309
},
{
"epoch": 0.96875,
"grad_norm": 0.3628585636615753,
"learning_rate": 4.0312500000000004e-05,
"loss": 0.0293,
"step": 310
},
{
"epoch": 0.971875,
"grad_norm": 0.3988589346408844,
"learning_rate": 4.028125000000001e-05,
"loss": 0.0294,
"step": 311
},
{
"epoch": 0.975,
"grad_norm": 0.5909652709960938,
"learning_rate": 4.025e-05,
"loss": 0.0454,
"step": 312
},
{
"epoch": 0.978125,
"grad_norm": 0.44711074233055115,
"learning_rate": 4.021875e-05,
"loss": 0.0326,
"step": 313
},
{
"epoch": 0.98125,
"grad_norm": 0.5259913802146912,
"learning_rate": 4.01875e-05,
"loss": 0.0349,
"step": 314
},
{
"epoch": 0.984375,
"grad_norm": 0.4235933721065521,
"learning_rate": 4.0156250000000004e-05,
"loss": 0.0339,
"step": 315
},
{
"epoch": 0.9875,
"grad_norm": 0.26886850595474243,
"learning_rate": 4.0125e-05,
"loss": 0.0276,
"step": 316
},
{
"epoch": 0.990625,
"grad_norm": 0.11596307903528214,
"learning_rate": 4.009375e-05,
"loss": 0.0209,
"step": 317
},
{
"epoch": 0.99375,
"grad_norm": 2.2217676639556885,
"learning_rate": 4.0062500000000005e-05,
"loss": 0.1677,
"step": 318
},
{
"epoch": 0.996875,
"grad_norm": 0.20450957119464874,
"learning_rate": 4.003125e-05,
"loss": 0.0269,
"step": 319
},
{
"epoch": 1.0,
"grad_norm": 0.8894203305244446,
"learning_rate": 4e-05,
"loss": 0.0656,
"step": 320
},
{
"epoch": 1.0,
"eval_accuracy_N/A": NaN,
"eval_accuracy_content": 0.9856742228684513,
"eval_iou_N/A": 0.0,
"eval_iou_content": 0.9856742228684513,
"eval_loss": 0.04362819343805313,
"eval_mean_accuracy": 0.9856742228684513,
"eval_mean_iou": 0.49283711143422565,
"eval_overall_accuracy": 0.9856742228684513,
"eval_runtime": 999.4276,
"eval_samples_per_second": 1.281,
"eval_steps_per_second": 0.08,
"step": 320
},
{
"epoch": 1.003125,
"grad_norm": 1.1715222597122192,
"learning_rate": 3.996875e-05,
"loss": 0.0396,
"step": 321
},
{
"epoch": 1.00625,
"grad_norm": 0.3776068389415741,
"learning_rate": 3.99375e-05,
"loss": 0.0436,
"step": 322
},
{
"epoch": 1.009375,
"grad_norm": 2.7889890670776367,
"learning_rate": 3.990625e-05,
"loss": 0.0938,
"step": 323
},
{
"epoch": 1.0125,
"grad_norm": 0.6198174953460693,
"learning_rate": 3.9875e-05,
"loss": 0.0379,
"step": 324
},
{
"epoch": 1.015625,
"grad_norm": 0.18462005257606506,
"learning_rate": 3.984375e-05,
"loss": 0.0262,
"step": 325
},
{
"epoch": 1.01875,
"grad_norm": 0.5885348320007324,
"learning_rate": 3.9812500000000005e-05,
"loss": 0.0412,
"step": 326
},
{
"epoch": 1.021875,
"grad_norm": 2.135707139968872,
"learning_rate": 3.978125e-05,
"loss": 0.0545,
"step": 327
},
{
"epoch": 1.025,
"grad_norm": 1.119269609451294,
"learning_rate": 3.9750000000000004e-05,
"loss": 0.0482,
"step": 328
},
{
"epoch": 1.028125,
"grad_norm": 0.5240402817726135,
"learning_rate": 3.9718750000000007e-05,
"loss": 0.0335,
"step": 329
},
{
"epoch": 1.03125,
"grad_norm": 0.14166490733623505,
"learning_rate": 3.96875e-05,
"loss": 0.0207,
"step": 330
},
{
"epoch": 1.034375,
"grad_norm": 0.8404735922813416,
"learning_rate": 3.965625e-05,
"loss": 0.0424,
"step": 331
},
{
"epoch": 1.0375,
"grad_norm": 0.206669881939888,
"learning_rate": 3.9625e-05,
"loss": 0.0273,
"step": 332
},
{
"epoch": 1.040625,
"grad_norm": 0.2163994461297989,
"learning_rate": 3.9593750000000004e-05,
"loss": 0.0343,
"step": 333
},
{
"epoch": 1.04375,
"grad_norm": 0.4119163155555725,
"learning_rate": 3.95625e-05,
"loss": 0.0364,
"step": 334
},
{
"epoch": 1.046875,
"grad_norm": 0.37234818935394287,
"learning_rate": 3.953125e-05,
"loss": 0.0246,
"step": 335
},
{
"epoch": 1.05,
"grad_norm": 0.3997434377670288,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.0276,
"step": 336
},
{
"epoch": 1.053125,
"grad_norm": 0.13193507492542267,
"learning_rate": 3.946875000000001e-05,
"loss": 0.0277,
"step": 337
},
{
"epoch": 1.05625,
"grad_norm": 0.1905323565006256,
"learning_rate": 3.9437499999999996e-05,
"loss": 0.0229,
"step": 338
},
{
"epoch": 1.059375,
"grad_norm": 0.23189271986484528,
"learning_rate": 3.940625e-05,
"loss": 0.0305,
"step": 339
},
{
"epoch": 1.0625,
"grad_norm": 0.4632808268070221,
"learning_rate": 3.9375e-05,
"loss": 0.0273,
"step": 340
},
{
"epoch": 1.065625,
"grad_norm": 0.15538431704044342,
"learning_rate": 3.9343750000000004e-05,
"loss": 0.0259,
"step": 341
},
{
"epoch": 1.06875,
"grad_norm": 0.53954017162323,
"learning_rate": 3.93125e-05,
"loss": 0.0324,
"step": 342
},
{
"epoch": 1.071875,
"grad_norm": 0.48742935061454773,
"learning_rate": 3.928125e-05,
"loss": 0.0329,
"step": 343
},
{
"epoch": 1.075,
"grad_norm": 0.22778959572315216,
"learning_rate": 3.9250000000000005e-05,
"loss": 0.0288,
"step": 344
},
{
"epoch": 1.078125,
"grad_norm": 0.2957710027694702,
"learning_rate": 3.921875e-05,
"loss": 0.0274,
"step": 345
},
{
"epoch": 1.08125,
"grad_norm": 0.5753316283226013,
"learning_rate": 3.91875e-05,
"loss": 0.0355,
"step": 346
},
{
"epoch": 1.084375,
"grad_norm": 1.4917867183685303,
"learning_rate": 3.915625e-05,
"loss": 0.0715,
"step": 347
},
{
"epoch": 1.0875,
"grad_norm": 2.4194159507751465,
"learning_rate": 3.9125e-05,
"loss": 0.1007,
"step": 348
},
{
"epoch": 1.090625,
"grad_norm": 0.2074752151966095,
"learning_rate": 3.909375e-05,
"loss": 0.0281,
"step": 349
},
{
"epoch": 1.09375,
"grad_norm": 0.315179705619812,
"learning_rate": 3.90625e-05,
"loss": 0.0387,
"step": 350
},
{
"epoch": 1.096875,
"grad_norm": 0.8541421890258789,
"learning_rate": 3.9031250000000003e-05,
"loss": 0.059,
"step": 351
},
{
"epoch": 1.1,
"grad_norm": 0.3108535706996918,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0294,
"step": 352
},
{
"epoch": 1.103125,
"grad_norm": 0.6527621150016785,
"learning_rate": 3.896875e-05,
"loss": 0.0463,
"step": 353
},
{
"epoch": 1.10625,
"grad_norm": 0.20114494860172272,
"learning_rate": 3.8937500000000005e-05,
"loss": 0.0304,
"step": 354
},
{
"epoch": 1.109375,
"grad_norm": 0.4402085244655609,
"learning_rate": 3.890625e-05,
"loss": 0.0413,
"step": 355
},
{
"epoch": 1.1125,
"grad_norm": 0.38828253746032715,
"learning_rate": 3.8875e-05,
"loss": 0.0338,
"step": 356
},
{
"epoch": 1.115625,
"grad_norm": 0.7028439044952393,
"learning_rate": 3.884375e-05,
"loss": 0.0461,
"step": 357
},
{
"epoch": 1.11875,
"grad_norm": 0.9470064640045166,
"learning_rate": 3.88125e-05,
"loss": 0.0419,
"step": 358
},
{
"epoch": 1.121875,
"grad_norm": 0.5290505290031433,
"learning_rate": 3.8781250000000004e-05,
"loss": 0.0357,
"step": 359
},
{
"epoch": 1.125,
"grad_norm": 1.1553906202316284,
"learning_rate": 3.875e-05,
"loss": 0.0722,
"step": 360
},
{
"epoch": 1.128125,
"grad_norm": 0.40173131227493286,
"learning_rate": 3.871875e-05,
"loss": 0.0286,
"step": 361
},
{
"epoch": 1.13125,
"grad_norm": 0.895039975643158,
"learning_rate": 3.8687500000000005e-05,
"loss": 0.0491,
"step": 362
},
{
"epoch": 1.134375,
"grad_norm": 0.607846200466156,
"learning_rate": 3.865625e-05,
"loss": 0.0445,
"step": 363
},
{
"epoch": 1.1375,
"grad_norm": 1.4331371784210205,
"learning_rate": 3.8625e-05,
"loss": 0.0725,
"step": 364
},
{
"epoch": 1.140625,
"grad_norm": 0.7139254212379456,
"learning_rate": 3.859375e-05,
"loss": 0.0387,
"step": 365
},
{
"epoch": 1.14375,
"grad_norm": 0.21128199994564056,
"learning_rate": 3.85625e-05,
"loss": 0.0253,
"step": 366
},
{
"epoch": 1.146875,
"grad_norm": 0.28902533650398254,
"learning_rate": 3.8531250000000005e-05,
"loss": 0.0247,
"step": 367
},
{
"epoch": 1.15,
"grad_norm": 0.33391180634498596,
"learning_rate": 3.85e-05,
"loss": 0.0437,
"step": 368
},
{
"epoch": 1.153125,
"grad_norm": 0.2734394371509552,
"learning_rate": 3.846875e-05,
"loss": 0.0273,
"step": 369
},
{
"epoch": 1.15625,
"grad_norm": 0.9343364238739014,
"learning_rate": 3.8437500000000006e-05,
"loss": 0.0339,
"step": 370
},
{
"epoch": 1.159375,
"grad_norm": 1.0542594194412231,
"learning_rate": 3.840625e-05,
"loss": 0.0485,
"step": 371
},
{
"epoch": 1.1625,
"grad_norm": 2.3781888484954834,
"learning_rate": 3.8375e-05,
"loss": 0.0381,
"step": 372
},
{
"epoch": 1.165625,
"grad_norm": 0.3513585329055786,
"learning_rate": 3.834375e-05,
"loss": 0.0474,
"step": 373
},
{
"epoch": 1.16875,
"grad_norm": 0.2519855797290802,
"learning_rate": 3.83125e-05,
"loss": 0.0267,
"step": 374
},
{
"epoch": 1.171875,
"grad_norm": 1.4920951128005981,
"learning_rate": 3.828125e-05,
"loss": 0.0412,
"step": 375
},
{
"epoch": 1.175,
"grad_norm": 0.26298317313194275,
"learning_rate": 3.825e-05,
"loss": 0.0263,
"step": 376
},
{
"epoch": 1.178125,
"grad_norm": 1.309157133102417,
"learning_rate": 3.8218750000000004e-05,
"loss": 0.05,
"step": 377
},
{
"epoch": 1.18125,
"grad_norm": 2.483985424041748,
"learning_rate": 3.818750000000001e-05,
"loss": 0.056,
"step": 378
},
{
"epoch": 1.184375,
"grad_norm": 0.3043748140335083,
"learning_rate": 3.815625e-05,
"loss": 0.0251,
"step": 379
},
{
"epoch": 1.1875,
"grad_norm": 0.45542070269584656,
"learning_rate": 3.8125e-05,
"loss": 0.0322,
"step": 380
},
{
"epoch": 1.190625,
"grad_norm": 0.1830095499753952,
"learning_rate": 3.809375e-05,
"loss": 0.0285,
"step": 381
},
{
"epoch": 1.19375,
"grad_norm": 0.7842034697532654,
"learning_rate": 3.8062500000000004e-05,
"loss": 0.0324,
"step": 382
},
{
"epoch": 1.196875,
"grad_norm": 0.8280196785926819,
"learning_rate": 3.803125e-05,
"loss": 0.036,
"step": 383
},
{
"epoch": 1.2,
"grad_norm": 0.1664057970046997,
"learning_rate": 3.8e-05,
"loss": 0.022,
"step": 384
},
{
"epoch": 1.203125,
"grad_norm": 0.2700011730194092,
"learning_rate": 3.7968750000000005e-05,
"loss": 0.0226,
"step": 385
},
{
"epoch": 1.20625,
"grad_norm": 0.2379760593175888,
"learning_rate": 3.79375e-05,
"loss": 0.0382,
"step": 386
},
{
"epoch": 1.209375,
"grad_norm": 0.1285543590784073,
"learning_rate": 3.790625e-05,
"loss": 0.0207,
"step": 387
},
{
"epoch": 1.2125,
"grad_norm": 0.2303180694580078,
"learning_rate": 3.7875e-05,
"loss": 0.0313,
"step": 388
},
{
"epoch": 1.215625,
"grad_norm": 0.5406865477561951,
"learning_rate": 3.784375e-05,
"loss": 0.0346,
"step": 389
},
{
"epoch": 1.21875,
"grad_norm": 0.2952568829059601,
"learning_rate": 3.78125e-05,
"loss": 0.0297,
"step": 390
},
{
"epoch": 1.221875,
"grad_norm": 3.3401618003845215,
"learning_rate": 3.778125e-05,
"loss": 0.0787,
"step": 391
},
{
"epoch": 1.225,
"grad_norm": 0.2306816279888153,
"learning_rate": 3.775e-05,
"loss": 0.0305,
"step": 392
},
{
"epoch": 1.228125,
"grad_norm": 2.631598472595215,
"learning_rate": 3.7718750000000005e-05,
"loss": 0.102,
"step": 393
},
{
"epoch": 1.23125,
"grad_norm": 5.553420066833496,
"learning_rate": 3.76875e-05,
"loss": 0.0751,
"step": 394
},
{
"epoch": 1.234375,
"grad_norm": 0.2419847697019577,
"learning_rate": 3.7656250000000004e-05,
"loss": 0.0305,
"step": 395
},
{
"epoch": 1.2375,
"grad_norm": 0.9081357717514038,
"learning_rate": 3.7625e-05,
"loss": 0.0431,
"step": 396
},
{
"epoch": 1.240625,
"grad_norm": 1.1338061094284058,
"learning_rate": 3.759375e-05,
"loss": 0.0527,
"step": 397
},
{
"epoch": 1.24375,
"grad_norm": 1.058129906654358,
"learning_rate": 3.75625e-05,
"loss": 0.064,
"step": 398
},
{
"epoch": 1.246875,
"grad_norm": 0.22226227819919586,
"learning_rate": 3.753125e-05,
"loss": 0.0264,
"step": 399
},
{
"epoch": 1.25,
"grad_norm": 0.23206289112567902,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.023,
"step": 400
},
{
"epoch": 1.253125,
"grad_norm": 0.3117161691188812,
"learning_rate": 3.746875e-05,
"loss": 0.027,
"step": 401
},
{
"epoch": 1.25625,
"grad_norm": 0.1130838617682457,
"learning_rate": 3.74375e-05,
"loss": 0.0212,
"step": 402
},
{
"epoch": 1.259375,
"grad_norm": 0.29144272208213806,
"learning_rate": 3.7406250000000005e-05,
"loss": 0.0476,
"step": 403
},
{
"epoch": 1.2625,
"grad_norm": 0.31826767325401306,
"learning_rate": 3.737500000000001e-05,
"loss": 0.027,
"step": 404
},
{
"epoch": 1.265625,
"grad_norm": 0.3600537180900574,
"learning_rate": 3.7343749999999996e-05,
"loss": 0.033,
"step": 405
},
{
"epoch": 1.26875,
"grad_norm": 0.4717816114425659,
"learning_rate": 3.73125e-05,
"loss": 0.0333,
"step": 406
},
{
"epoch": 1.271875,
"grad_norm": 0.2320399135351181,
"learning_rate": 3.728125e-05,
"loss": 0.0236,
"step": 407
},
{
"epoch": 1.275,
"grad_norm": 0.29288992285728455,
"learning_rate": 3.7250000000000004e-05,
"loss": 0.0272,
"step": 408
},
{
"epoch": 1.278125,
"grad_norm": 0.4767632782459259,
"learning_rate": 3.721875e-05,
"loss": 0.0378,
"step": 409
},
{
"epoch": 1.28125,
"grad_norm": 5.730660915374756,
"learning_rate": 3.71875e-05,
"loss": 0.0775,
"step": 410
},
{
"epoch": 1.284375,
"grad_norm": 0.10694890469312668,
"learning_rate": 3.7156250000000005e-05,
"loss": 0.019,
"step": 411
},
{
"epoch": 1.2875,
"grad_norm": 0.34038084745407104,
"learning_rate": 3.7125e-05,
"loss": 0.0229,
"step": 412
},
{
"epoch": 1.290625,
"grad_norm": 0.17101223766803741,
"learning_rate": 3.709375e-05,
"loss": 0.0308,
"step": 413
},
{
"epoch": 1.29375,
"grad_norm": 0.1484222412109375,
"learning_rate": 3.70625e-05,
"loss": 0.0244,
"step": 414
},
{
"epoch": 1.296875,
"grad_norm": 0.3410337567329407,
"learning_rate": 3.703125e-05,
"loss": 0.0298,
"step": 415
},
{
"epoch": 1.3,
"grad_norm": 0.641090452671051,
"learning_rate": 3.7e-05,
"loss": 0.0345,
"step": 416
},
{
"epoch": 1.303125,
"grad_norm": 0.8170056343078613,
"learning_rate": 3.696875e-05,
"loss": 0.0448,
"step": 417
},
{
"epoch": 1.30625,
"grad_norm": 3.6934096813201904,
"learning_rate": 3.69375e-05,
"loss": 0.0552,
"step": 418
},
{
"epoch": 1.309375,
"grad_norm": 0.1905418187379837,
"learning_rate": 3.6906250000000006e-05,
"loss": 0.0305,
"step": 419
},
{
"epoch": 1.3125,
"grad_norm": 0.6752256155014038,
"learning_rate": 3.6875e-05,
"loss": 0.0373,
"step": 420
},
{
"epoch": 1.315625,
"grad_norm": 0.8538122177124023,
"learning_rate": 3.684375e-05,
"loss": 0.0355,
"step": 421
},
{
"epoch": 1.31875,
"grad_norm": 0.13017673790454865,
"learning_rate": 3.68125e-05,
"loss": 0.0287,
"step": 422
},
{
"epoch": 1.321875,
"grad_norm": 0.3785141110420227,
"learning_rate": 3.678125e-05,
"loss": 0.0329,
"step": 423
},
{
"epoch": 1.325,
"grad_norm": 0.21819180250167847,
"learning_rate": 3.675e-05,
"loss": 0.0263,
"step": 424
},
{
"epoch": 1.328125,
"grad_norm": 9.360857963562012,
"learning_rate": 3.671875e-05,
"loss": 0.0819,
"step": 425
},
{
"epoch": 1.33125,
"grad_norm": 0.1742083877325058,
"learning_rate": 3.6687500000000004e-05,
"loss": 0.0262,
"step": 426
},
{
"epoch": 1.334375,
"grad_norm": 0.4147292971611023,
"learning_rate": 3.665625e-05,
"loss": 0.0311,
"step": 427
},
{
"epoch": 1.3375,
"grad_norm": 4.418566703796387,
"learning_rate": 3.6625e-05,
"loss": 0.0971,
"step": 428
},
{
"epoch": 1.340625,
"grad_norm": 1.636364459991455,
"learning_rate": 3.6593750000000005e-05,
"loss": 0.0379,
"step": 429
},
{
"epoch": 1.34375,
"grad_norm": 0.22759225964546204,
"learning_rate": 3.65625e-05,
"loss": 0.0266,
"step": 430
},
{
"epoch": 1.346875,
"grad_norm": 0.17647528648376465,
"learning_rate": 3.653125e-05,
"loss": 0.0179,
"step": 431
},
{
"epoch": 1.35,
"grad_norm": 2.2657251358032227,
"learning_rate": 3.65e-05,
"loss": 0.0363,
"step": 432
},
{
"epoch": 1.353125,
"grad_norm": 0.17360058426856995,
"learning_rate": 3.646875e-05,
"loss": 0.0272,
"step": 433
},
{
"epoch": 1.35625,
"grad_norm": 0.1535394936800003,
"learning_rate": 3.6437500000000005e-05,
"loss": 0.0204,
"step": 434
},
{
"epoch": 1.359375,
"grad_norm": 0.23168876767158508,
"learning_rate": 3.640625e-05,
"loss": 0.0324,
"step": 435
},
{
"epoch": 1.3625,
"grad_norm": 0.623339831829071,
"learning_rate": 3.6375e-05,
"loss": 0.0372,
"step": 436
},
{
"epoch": 1.365625,
"grad_norm": 0.1481848806142807,
"learning_rate": 3.6343750000000006e-05,
"loss": 0.0231,
"step": 437
},
{
"epoch": 1.36875,
"grad_norm": 0.2897062599658966,
"learning_rate": 3.63125e-05,
"loss": 0.0394,
"step": 438
},
{
"epoch": 1.371875,
"grad_norm": 0.12459568679332733,
"learning_rate": 3.628125e-05,
"loss": 0.0261,
"step": 439
},
{
"epoch": 1.375,
"grad_norm": 2.7815325260162354,
"learning_rate": 3.625e-05,
"loss": 0.0757,
"step": 440
},
{
"epoch": 1.378125,
"grad_norm": 10.49199104309082,
"learning_rate": 3.621875e-05,
"loss": 0.3111,
"step": 441
},
{
"epoch": 1.38125,
"grad_norm": 8.13404369354248,
"learning_rate": 3.61875e-05,
"loss": 0.1123,
"step": 442
},
{
"epoch": 1.384375,
"grad_norm": 0.24218522012233734,
"learning_rate": 3.615625e-05,
"loss": 0.0271,
"step": 443
},
{
"epoch": 1.3875,
"grad_norm": 0.4631577730178833,
"learning_rate": 3.6125000000000004e-05,
"loss": 0.0364,
"step": 444
},
{
"epoch": 1.390625,
"grad_norm": 0.2414701133966446,
"learning_rate": 3.6093750000000007e-05,
"loss": 0.034,
"step": 445
},
{
"epoch": 1.39375,
"grad_norm": 0.10716137290000916,
"learning_rate": 3.60625e-05,
"loss": 0.024,
"step": 446
},
{
"epoch": 1.396875,
"grad_norm": 0.1930936723947525,
"learning_rate": 3.603125e-05,
"loss": 0.0297,
"step": 447
},
{
"epoch": 1.4,
"grad_norm": 0.1756851226091385,
"learning_rate": 3.6e-05,
"loss": 0.0202,
"step": 448
},
{
"epoch": 1.403125,
"grad_norm": 0.4439844489097595,
"learning_rate": 3.5968750000000004e-05,
"loss": 0.0577,
"step": 449
},
{
"epoch": 1.40625,
"grad_norm": 3.5776124000549316,
"learning_rate": 3.59375e-05,
"loss": 0.0463,
"step": 450
},
{
"epoch": 1.409375,
"grad_norm": 1.1139960289001465,
"learning_rate": 3.590625e-05,
"loss": 0.0429,
"step": 451
},
{
"epoch": 1.4125,
"grad_norm": 0.21480640769004822,
"learning_rate": 3.5875000000000005e-05,
"loss": 0.0331,
"step": 452
},
{
"epoch": 1.415625,
"grad_norm": 0.36357372999191284,
"learning_rate": 3.584375e-05,
"loss": 0.034,
"step": 453
},
{
"epoch": 1.41875,
"grad_norm": 0.25747624039649963,
"learning_rate": 3.58125e-05,
"loss": 0.03,
"step": 454
},
{
"epoch": 1.421875,
"grad_norm": 0.5258784890174866,
"learning_rate": 3.578125e-05,
"loss": 0.0392,
"step": 455
},
{
"epoch": 1.425,
"grad_norm": 0.0880817100405693,
"learning_rate": 3.575e-05,
"loss": 0.0212,
"step": 456
},
{
"epoch": 1.428125,
"grad_norm": 0.55647212266922,
"learning_rate": 3.571875e-05,
"loss": 0.0369,
"step": 457
},
{
"epoch": 1.43125,
"grad_norm": 0.4609752297401428,
"learning_rate": 3.56875e-05,
"loss": 0.0281,
"step": 458
},
{
"epoch": 1.434375,
"grad_norm": 3.478707790374756,
"learning_rate": 3.565625e-05,
"loss": 0.1029,
"step": 459
},
{
"epoch": 1.4375,
"grad_norm": 0.14912380278110504,
"learning_rate": 3.5625000000000005e-05,
"loss": 0.0229,
"step": 460
},
{
"epoch": 1.440625,
"grad_norm": 0.19087623059749603,
"learning_rate": 3.559375e-05,
"loss": 0.0308,
"step": 461
},
{
"epoch": 1.44375,
"grad_norm": 0.138632670044899,
"learning_rate": 3.5562500000000004e-05,
"loss": 0.0229,
"step": 462
},
{
"epoch": 1.446875,
"grad_norm": 0.18853573501110077,
"learning_rate": 3.553125e-05,
"loss": 0.0295,
"step": 463
},
{
"epoch": 1.45,
"grad_norm": 0.3829871714115143,
"learning_rate": 3.55e-05,
"loss": 0.0312,
"step": 464
},
{
"epoch": 1.453125,
"grad_norm": 0.9517104029655457,
"learning_rate": 3.546875e-05,
"loss": 0.0384,
"step": 465
},
{
"epoch": 1.45625,
"grad_norm": 0.16418811678886414,
"learning_rate": 3.54375e-05,
"loss": 0.0279,
"step": 466
},
{
"epoch": 1.459375,
"grad_norm": 0.6732227206230164,
"learning_rate": 3.5406250000000003e-05,
"loss": 0.041,
"step": 467
},
{
"epoch": 1.4625,
"grad_norm": 0.26701417565345764,
"learning_rate": 3.5375e-05,
"loss": 0.0311,
"step": 468
},
{
"epoch": 1.465625,
"grad_norm": 0.5570465326309204,
"learning_rate": 3.534375e-05,
"loss": 0.0431,
"step": 469
},
{
"epoch": 1.46875,
"grad_norm": 0.18663549423217773,
"learning_rate": 3.5312500000000005e-05,
"loss": 0.0327,
"step": 470
},
{
"epoch": 1.471875,
"grad_norm": 6.698427200317383,
"learning_rate": 3.528125e-05,
"loss": 0.0793,
"step": 471
},
{
"epoch": 1.475,
"grad_norm": 0.1952434629201889,
"learning_rate": 3.525e-05,
"loss": 0.03,
"step": 472
},
{
"epoch": 1.478125,
"grad_norm": 0.13568946719169617,
"learning_rate": 3.521875e-05,
"loss": 0.0228,
"step": 473
},
{
"epoch": 1.48125,
"grad_norm": 0.3262503147125244,
"learning_rate": 3.51875e-05,
"loss": 0.036,
"step": 474
},
{
"epoch": 1.484375,
"grad_norm": 0.2775207757949829,
"learning_rate": 3.5156250000000004e-05,
"loss": 0.0244,
"step": 475
},
{
"epoch": 1.4875,
"grad_norm": 0.17881157994270325,
"learning_rate": 3.5125e-05,
"loss": 0.0239,
"step": 476
},
{
"epoch": 1.490625,
"grad_norm": 0.09230555593967438,
"learning_rate": 3.509375e-05,
"loss": 0.0235,
"step": 477
},
{
"epoch": 1.49375,
"grad_norm": 0.11491113901138306,
"learning_rate": 3.5062500000000005e-05,
"loss": 0.0189,
"step": 478
},
{
"epoch": 1.496875,
"grad_norm": 0.1791650950908661,
"learning_rate": 3.503125e-05,
"loss": 0.0302,
"step": 479
},
{
"epoch": 1.5,
"grad_norm": 0.632184624671936,
"learning_rate": 3.5e-05,
"loss": 0.0554,
"step": 480
},
{
"epoch": 1.503125,
"grad_norm": 0.47974345088005066,
"learning_rate": 3.496875e-05,
"loss": 0.031,
"step": 481
},
{
"epoch": 1.50625,
"grad_norm": 0.25306665897369385,
"learning_rate": 3.49375e-05,
"loss": 0.0348,
"step": 482
},
{
"epoch": 1.509375,
"grad_norm": 0.23441192507743835,
"learning_rate": 3.4906250000000005e-05,
"loss": 0.0267,
"step": 483
},
{
"epoch": 1.5125,
"grad_norm": 0.47122475504875183,
"learning_rate": 3.4875e-05,
"loss": 0.0531,
"step": 484
},
{
"epoch": 1.515625,
"grad_norm": 0.42970484495162964,
"learning_rate": 3.484375e-05,
"loss": 0.0407,
"step": 485
},
{
"epoch": 1.51875,
"grad_norm": 0.09633981436491013,
"learning_rate": 3.4812500000000006e-05,
"loss": 0.0237,
"step": 486
},
{
"epoch": 1.521875,
"grad_norm": 0.36600834131240845,
"learning_rate": 3.478125e-05,
"loss": 0.0284,
"step": 487
},
{
"epoch": 1.525,
"grad_norm": 0.20985989272594452,
"learning_rate": 3.475e-05,
"loss": 0.0291,
"step": 488
},
{
"epoch": 1.528125,
"grad_norm": 0.19182531535625458,
"learning_rate": 3.471875e-05,
"loss": 0.0215,
"step": 489
},
{
"epoch": 1.53125,
"grad_norm": 0.1707691103219986,
"learning_rate": 3.46875e-05,
"loss": 0.0243,
"step": 490
},
{
"epoch": 1.534375,
"grad_norm": 1.414674162864685,
"learning_rate": 3.465625e-05,
"loss": 0.0386,
"step": 491
},
{
"epoch": 1.5375,
"grad_norm": 0.8973498344421387,
"learning_rate": 3.4625e-05,
"loss": 0.0541,
"step": 492
},
{
"epoch": 1.540625,
"grad_norm": 0.0919414833188057,
"learning_rate": 3.4593750000000004e-05,
"loss": 0.0211,
"step": 493
},
{
"epoch": 1.54375,
"grad_norm": 0.4503600299358368,
"learning_rate": 3.45625e-05,
"loss": 0.0345,
"step": 494
},
{
"epoch": 1.546875,
"grad_norm": 0.16320058703422546,
"learning_rate": 3.453125e-05,
"loss": 0.0258,
"step": 495
},
{
"epoch": 1.55,
"grad_norm": 0.14714613556861877,
"learning_rate": 3.45e-05,
"loss": 0.0267,
"step": 496
},
{
"epoch": 1.553125,
"grad_norm": 0.45230165123939514,
"learning_rate": 3.446875e-05,
"loss": 0.0452,
"step": 497
},
{
"epoch": 1.55625,
"grad_norm": 0.10125567764043808,
"learning_rate": 3.4437500000000004e-05,
"loss": 0.0193,
"step": 498
},
{
"epoch": 1.559375,
"grad_norm": 0.27993062138557434,
"learning_rate": 3.440625e-05,
"loss": 0.0292,
"step": 499
},
{
"epoch": 1.5625,
"grad_norm": 0.15163570642471313,
"learning_rate": 3.4375e-05,
"loss": 0.021,
"step": 500
},
{
"epoch": 1.565625,
"grad_norm": 0.3911040723323822,
"learning_rate": 3.4343750000000005e-05,
"loss": 0.0267,
"step": 501
},
{
"epoch": 1.56875,
"grad_norm": 0.9082874059677124,
"learning_rate": 3.43125e-05,
"loss": 0.0529,
"step": 502
},
{
"epoch": 1.571875,
"grad_norm": 9.674172401428223,
"learning_rate": 3.428125e-05,
"loss": 0.1488,
"step": 503
},
{
"epoch": 1.575,
"grad_norm": 0.43405047059059143,
"learning_rate": 3.4250000000000006e-05,
"loss": 0.0264,
"step": 504
},
{
"epoch": 1.578125,
"grad_norm": 0.28418371081352234,
"learning_rate": 3.421875e-05,
"loss": 0.0208,
"step": 505
},
{
"epoch": 1.58125,
"grad_norm": 0.07861240953207016,
"learning_rate": 3.41875e-05,
"loss": 0.0234,
"step": 506
},
{
"epoch": 1.584375,
"grad_norm": 1.7110495567321777,
"learning_rate": 3.415625e-05,
"loss": 0.0382,
"step": 507
},
{
"epoch": 1.5875,
"grad_norm": 0.24076040089130402,
"learning_rate": 3.4125e-05,
"loss": 0.0272,
"step": 508
},
{
"epoch": 1.590625,
"grad_norm": 0.6059406995773315,
"learning_rate": 3.4093750000000005e-05,
"loss": 0.0475,
"step": 509
},
{
"epoch": 1.59375,
"grad_norm": 0.24730996787548065,
"learning_rate": 3.40625e-05,
"loss": 0.0215,
"step": 510
},
{
"epoch": 1.596875,
"grad_norm": 0.6072419285774231,
"learning_rate": 3.4031250000000004e-05,
"loss": 0.0306,
"step": 511
},
{
"epoch": 1.6,
"grad_norm": 0.3612338900566101,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.0437,
"step": 512
},
{
"epoch": 1.603125,
"grad_norm": 0.18105106055736542,
"learning_rate": 3.396875e-05,
"loss": 0.042,
"step": 513
},
{
"epoch": 1.60625,
"grad_norm": 0.21311897039413452,
"learning_rate": 3.39375e-05,
"loss": 0.0228,
"step": 514
},
{
"epoch": 1.609375,
"grad_norm": 0.20102500915527344,
"learning_rate": 3.390625e-05,
"loss": 0.0225,
"step": 515
},
{
"epoch": 1.6125,
"grad_norm": 0.08760484308004379,
"learning_rate": 3.3875000000000003e-05,
"loss": 0.0206,
"step": 516
},
{
"epoch": 1.615625,
"grad_norm": 1.122139573097229,
"learning_rate": 3.384375e-05,
"loss": 0.0279,
"step": 517
},
{
"epoch": 1.61875,
"grad_norm": 1.3522179126739502,
"learning_rate": 3.38125e-05,
"loss": 0.0468,
"step": 518
},
{
"epoch": 1.621875,
"grad_norm": 0.24191869795322418,
"learning_rate": 3.3781250000000005e-05,
"loss": 0.0283,
"step": 519
},
{
"epoch": 1.625,
"grad_norm": 0.3224029839038849,
"learning_rate": 3.375000000000001e-05,
"loss": 0.0329,
"step": 520
},
{
"epoch": 1.628125,
"grad_norm": 0.2289983034133911,
"learning_rate": 3.3718749999999996e-05,
"loss": 0.0238,
"step": 521
},
{
"epoch": 1.63125,
"grad_norm": 1.206566572189331,
"learning_rate": 3.36875e-05,
"loss": 0.0418,
"step": 522
},
{
"epoch": 1.634375,
"grad_norm": 2.037022113800049,
"learning_rate": 3.365625e-05,
"loss": 0.0515,
"step": 523
},
{
"epoch": 1.6375,
"grad_norm": 0.4265249967575073,
"learning_rate": 3.3625000000000004e-05,
"loss": 0.0372,
"step": 524
},
{
"epoch": 1.640625,
"grad_norm": 0.12385348975658417,
"learning_rate": 3.359375e-05,
"loss": 0.0285,
"step": 525
},
{
"epoch": 1.64375,
"grad_norm": 0.7889988422393799,
"learning_rate": 3.35625e-05,
"loss": 0.0419,
"step": 526
},
{
"epoch": 1.646875,
"grad_norm": 1.755566120147705,
"learning_rate": 3.3531250000000005e-05,
"loss": 0.0399,
"step": 527
},
{
"epoch": 1.65,
"grad_norm": 0.5016866326332092,
"learning_rate": 3.35e-05,
"loss": 0.0654,
"step": 528
},
{
"epoch": 1.653125,
"grad_norm": 0.09634485840797424,
"learning_rate": 3.3468750000000004e-05,
"loss": 0.0195,
"step": 529
},
{
"epoch": 1.65625,
"grad_norm": 0.13417887687683105,
"learning_rate": 3.34375e-05,
"loss": 0.0198,
"step": 530
},
{
"epoch": 1.659375,
"grad_norm": 0.33312031626701355,
"learning_rate": 3.340625e-05,
"loss": 0.0313,
"step": 531
},
{
"epoch": 1.6625,
"grad_norm": 0.17174910008907318,
"learning_rate": 3.3375e-05,
"loss": 0.0258,
"step": 532
},
{
"epoch": 1.665625,
"grad_norm": 0.09567829966545105,
"learning_rate": 3.334375e-05,
"loss": 0.0207,
"step": 533
},
{
"epoch": 1.66875,
"grad_norm": 0.12403300404548645,
"learning_rate": 3.33125e-05,
"loss": 0.0229,
"step": 534
},
{
"epoch": 1.671875,
"grad_norm": 0.2110747992992401,
"learning_rate": 3.3281250000000006e-05,
"loss": 0.0246,
"step": 535
},
{
"epoch": 1.675,
"grad_norm": 0.4586084485054016,
"learning_rate": 3.325e-05,
"loss": 0.0386,
"step": 536
},
{
"epoch": 1.678125,
"grad_norm": 0.37612682580947876,
"learning_rate": 3.3218750000000004e-05,
"loss": 0.0273,
"step": 537
},
{
"epoch": 1.68125,
"grad_norm": 0.12443527579307556,
"learning_rate": 3.31875e-05,
"loss": 0.0251,
"step": 538
},
{
"epoch": 1.684375,
"grad_norm": 1.3459047079086304,
"learning_rate": 3.315625e-05,
"loss": 0.055,
"step": 539
},
{
"epoch": 1.6875,
"grad_norm": 1.1800158023834229,
"learning_rate": 3.3125e-05,
"loss": 0.1293,
"step": 540
},
{
"epoch": 1.690625,
"grad_norm": 0.2551305890083313,
"learning_rate": 3.309375e-05,
"loss": 0.0244,
"step": 541
},
{
"epoch": 1.69375,
"grad_norm": 0.3867127001285553,
"learning_rate": 3.3062500000000004e-05,
"loss": 0.034,
"step": 542
},
{
"epoch": 1.696875,
"grad_norm": 0.21304133534431458,
"learning_rate": 3.303125e-05,
"loss": 0.0222,
"step": 543
},
{
"epoch": 1.7,
"grad_norm": 0.41967159509658813,
"learning_rate": 3.3e-05,
"loss": 0.0552,
"step": 544
},
{
"epoch": 1.703125,
"grad_norm": 0.12126383185386658,
"learning_rate": 3.2968750000000005e-05,
"loss": 0.0203,
"step": 545
},
{
"epoch": 1.70625,
"grad_norm": 0.5720413327217102,
"learning_rate": 3.29375e-05,
"loss": 0.0281,
"step": 546
},
{
"epoch": 1.709375,
"grad_norm": 0.14413952827453613,
"learning_rate": 3.290625e-05,
"loss": 0.0234,
"step": 547
},
{
"epoch": 1.7125,
"grad_norm": 0.30371683835983276,
"learning_rate": 3.2875e-05,
"loss": 0.031,
"step": 548
},
{
"epoch": 1.715625,
"grad_norm": 0.24292483925819397,
"learning_rate": 3.284375e-05,
"loss": 0.0356,
"step": 549
},
{
"epoch": 1.71875,
"grad_norm": 0.22597646713256836,
"learning_rate": 3.2812500000000005e-05,
"loss": 0.0272,
"step": 550
},
{
"epoch": 1.721875,
"grad_norm": 0.12884864211082458,
"learning_rate": 3.278125e-05,
"loss": 0.0173,
"step": 551
},
{
"epoch": 1.725,
"grad_norm": 0.21374590694904327,
"learning_rate": 3.275e-05,
"loss": 0.0233,
"step": 552
},
{
"epoch": 1.728125,
"grad_norm": 0.1160837858915329,
"learning_rate": 3.2718750000000006e-05,
"loss": 0.0204,
"step": 553
},
{
"epoch": 1.73125,
"grad_norm": 0.09611855447292328,
"learning_rate": 3.26875e-05,
"loss": 0.0217,
"step": 554
},
{
"epoch": 1.734375,
"grad_norm": 0.39267030358314514,
"learning_rate": 3.265625e-05,
"loss": 0.0293,
"step": 555
},
{
"epoch": 1.7375,
"grad_norm": 0.587985098361969,
"learning_rate": 3.2625e-05,
"loss": 0.0357,
"step": 556
},
{
"epoch": 1.740625,
"grad_norm": 0.12259811908006668,
"learning_rate": 3.259375e-05,
"loss": 0.0241,
"step": 557
},
{
"epoch": 1.74375,
"grad_norm": 0.10824635624885559,
"learning_rate": 3.25625e-05,
"loss": 0.0236,
"step": 558
},
{
"epoch": 1.746875,
"grad_norm": 0.0685066282749176,
"learning_rate": 3.253125e-05,
"loss": 0.0157,
"step": 559
},
{
"epoch": 1.75,
"grad_norm": 0.15844418108463287,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.0279,
"step": 560
},
{
"epoch": 1.753125,
"grad_norm": 0.5560875535011292,
"learning_rate": 3.2468750000000007e-05,
"loss": 0.0504,
"step": 561
},
{
"epoch": 1.75625,
"grad_norm": 0.15164297819137573,
"learning_rate": 3.24375e-05,
"loss": 0.0285,
"step": 562
},
{
"epoch": 1.759375,
"grad_norm": 0.09835915267467499,
"learning_rate": 3.240625e-05,
"loss": 0.0231,
"step": 563
},
{
"epoch": 1.7625,
"grad_norm": 0.33482205867767334,
"learning_rate": 3.2375e-05,
"loss": 0.0276,
"step": 564
},
{
"epoch": 1.765625,
"grad_norm": 0.16952961683273315,
"learning_rate": 3.2343750000000004e-05,
"loss": 0.028,
"step": 565
},
{
"epoch": 1.76875,
"grad_norm": 0.5915741324424744,
"learning_rate": 3.23125e-05,
"loss": 0.0241,
"step": 566
},
{
"epoch": 1.771875,
"grad_norm": 0.0879567340016365,
"learning_rate": 3.228125e-05,
"loss": 0.0182,
"step": 567
},
{
"epoch": 1.775,
"grad_norm": 0.13319243490695953,
"learning_rate": 3.2250000000000005e-05,
"loss": 0.0214,
"step": 568
},
{
"epoch": 1.778125,
"grad_norm": 1.3187066316604614,
"learning_rate": 3.221875e-05,
"loss": 0.0286,
"step": 569
},
{
"epoch": 1.78125,
"grad_norm": 0.13088369369506836,
"learning_rate": 3.21875e-05,
"loss": 0.0219,
"step": 570
},
{
"epoch": 1.784375,
"grad_norm": 0.11556047946214676,
"learning_rate": 3.215625e-05,
"loss": 0.0233,
"step": 571
},
{
"epoch": 1.7875,
"grad_norm": 0.6260407567024231,
"learning_rate": 3.2125e-05,
"loss": 0.0385,
"step": 572
},
{
"epoch": 1.790625,
"grad_norm": 0.12837764620780945,
"learning_rate": 3.209375e-05,
"loss": 0.0193,
"step": 573
},
{
"epoch": 1.79375,
"grad_norm": 0.4870481789112091,
"learning_rate": 3.20625e-05,
"loss": 0.032,
"step": 574
},
{
"epoch": 1.796875,
"grad_norm": 0.21629931032657623,
"learning_rate": 3.203125e-05,
"loss": 0.021,
"step": 575
},
{
"epoch": 1.8,
"grad_norm": 0.34100577235221863,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0249,
"step": 576
},
{
"epoch": 1.803125,
"grad_norm": 0.5092364549636841,
"learning_rate": 3.196875e-05,
"loss": 0.034,
"step": 577
},
{
"epoch": 1.80625,
"grad_norm": 0.3579314351081848,
"learning_rate": 3.1937500000000004e-05,
"loss": 0.0231,
"step": 578
},
{
"epoch": 1.809375,
"grad_norm": 0.3164561688899994,
"learning_rate": 3.1906250000000006e-05,
"loss": 0.0336,
"step": 579
},
{
"epoch": 1.8125,
"grad_norm": 0.41202688217163086,
"learning_rate": 3.1875e-05,
"loss": 0.0279,
"step": 580
},
{
"epoch": 1.815625,
"grad_norm": 0.34404581785202026,
"learning_rate": 3.184375e-05,
"loss": 0.0266,
"step": 581
},
{
"epoch": 1.81875,
"grad_norm": 0.3374820053577423,
"learning_rate": 3.18125e-05,
"loss": 0.0413,
"step": 582
},
{
"epoch": 1.821875,
"grad_norm": 1.9358925819396973,
"learning_rate": 3.1781250000000003e-05,
"loss": 0.0887,
"step": 583
},
{
"epoch": 1.825,
"grad_norm": 1.2504078149795532,
"learning_rate": 3.175e-05,
"loss": 0.0873,
"step": 584
},
{
"epoch": 1.828125,
"grad_norm": 0.500586211681366,
"learning_rate": 3.171875e-05,
"loss": 0.1037,
"step": 585
},
{
"epoch": 1.83125,
"grad_norm": 0.1820017248392105,
"learning_rate": 3.1687500000000005e-05,
"loss": 0.0266,
"step": 586
},
{
"epoch": 1.834375,
"grad_norm": 0.22211194038391113,
"learning_rate": 3.165625000000001e-05,
"loss": 0.0225,
"step": 587
},
{
"epoch": 1.8375,
"grad_norm": 0.28489556908607483,
"learning_rate": 3.1624999999999996e-05,
"loss": 0.0268,
"step": 588
},
{
"epoch": 1.840625,
"grad_norm": 0.19432474672794342,
"learning_rate": 3.159375e-05,
"loss": 0.0265,
"step": 589
},
{
"epoch": 1.84375,
"grad_norm": 0.15175053477287292,
"learning_rate": 3.15625e-05,
"loss": 0.0202,
"step": 590
},
{
"epoch": 1.846875,
"grad_norm": 0.17748066782951355,
"learning_rate": 3.1531250000000004e-05,
"loss": 0.024,
"step": 591
},
{
"epoch": 1.85,
"grad_norm": 0.21079498529434204,
"learning_rate": 3.15e-05,
"loss": 0.0199,
"step": 592
},
{
"epoch": 1.853125,
"grad_norm": 2.379624128341675,
"learning_rate": 3.146875e-05,
"loss": 0.2896,
"step": 593
},
{
"epoch": 1.85625,
"grad_norm": 0.2815399467945099,
"learning_rate": 3.1437500000000005e-05,
"loss": 0.0353,
"step": 594
},
{
"epoch": 1.859375,
"grad_norm": 0.16545546054840088,
"learning_rate": 3.140625e-05,
"loss": 0.0294,
"step": 595
},
{
"epoch": 1.8625,
"grad_norm": 0.11324725300073624,
"learning_rate": 3.1375e-05,
"loss": 0.0173,
"step": 596
},
{
"epoch": 1.865625,
"grad_norm": 0.324992299079895,
"learning_rate": 3.134375e-05,
"loss": 0.029,
"step": 597
},
{
"epoch": 1.86875,
"grad_norm": 0.1070389673113823,
"learning_rate": 3.13125e-05,
"loss": 0.027,
"step": 598
},
{
"epoch": 1.871875,
"grad_norm": 1.371116042137146,
"learning_rate": 3.128125e-05,
"loss": 0.0534,
"step": 599
},
{
"epoch": 1.875,
"grad_norm": 3.377976894378662,
"learning_rate": 3.125e-05,
"loss": 0.0479,
"step": 600
},
{
"epoch": 1.878125,
"grad_norm": 0.20367911458015442,
"learning_rate": 3.121875e-05,
"loss": 0.0282,
"step": 601
},
{
"epoch": 1.88125,
"grad_norm": 0.20612278580665588,
"learning_rate": 3.1187500000000006e-05,
"loss": 0.0249,
"step": 602
},
{
"epoch": 1.884375,
"grad_norm": 0.23075002431869507,
"learning_rate": 3.115625e-05,
"loss": 0.0233,
"step": 603
},
{
"epoch": 1.8875,
"grad_norm": 9.430220603942871,
"learning_rate": 3.1125000000000004e-05,
"loss": 0.1505,
"step": 604
},
{
"epoch": 1.890625,
"grad_norm": 0.27332308888435364,
"learning_rate": 3.109375e-05,
"loss": 0.0259,
"step": 605
},
{
"epoch": 1.89375,
"grad_norm": 2.547020673751831,
"learning_rate": 3.10625e-05,
"loss": 0.0565,
"step": 606
},
{
"epoch": 1.896875,
"grad_norm": 0.22681358456611633,
"learning_rate": 3.103125e-05,
"loss": 0.029,
"step": 607
},
{
"epoch": 1.9,
"grad_norm": 0.47494059801101685,
"learning_rate": 3.1e-05,
"loss": 0.0345,
"step": 608
},
{
"epoch": 1.903125,
"grad_norm": 0.16444207727909088,
"learning_rate": 3.0968750000000004e-05,
"loss": 0.0243,
"step": 609
},
{
"epoch": 1.90625,
"grad_norm": 0.27043211460113525,
"learning_rate": 3.09375e-05,
"loss": 0.0289,
"step": 610
},
{
"epoch": 1.909375,
"grad_norm": 0.18597914278507233,
"learning_rate": 3.090625e-05,
"loss": 0.0207,
"step": 611
},
{
"epoch": 1.9125,
"grad_norm": 0.4550987780094147,
"learning_rate": 3.0875000000000005e-05,
"loss": 0.0259,
"step": 612
},
{
"epoch": 1.915625,
"grad_norm": 0.9754087924957275,
"learning_rate": 3.084375e-05,
"loss": 0.0615,
"step": 613
},
{
"epoch": 1.91875,
"grad_norm": 0.07866744697093964,
"learning_rate": 3.08125e-05,
"loss": 0.0206,
"step": 614
},
{
"epoch": 1.921875,
"grad_norm": 0.3353641927242279,
"learning_rate": 3.078125e-05,
"loss": 0.0284,
"step": 615
},
{
"epoch": 1.925,
"grad_norm": 0.6235747337341309,
"learning_rate": 3.075e-05,
"loss": 0.0387,
"step": 616
},
{
"epoch": 1.928125,
"grad_norm": 0.14865784347057343,
"learning_rate": 3.0718750000000005e-05,
"loss": 0.0253,
"step": 617
},
{
"epoch": 1.93125,
"grad_norm": 0.4304657280445099,
"learning_rate": 3.06875e-05,
"loss": 0.0314,
"step": 618
},
{
"epoch": 1.934375,
"grad_norm": 0.10021252185106277,
"learning_rate": 3.065625e-05,
"loss": 0.0181,
"step": 619
},
{
"epoch": 1.9375,
"grad_norm": 0.16892488300800323,
"learning_rate": 3.0625000000000006e-05,
"loss": 0.0276,
"step": 620
},
{
"epoch": 1.940625,
"grad_norm": 1.247506022453308,
"learning_rate": 3.059375e-05,
"loss": 0.0346,
"step": 621
},
{
"epoch": 1.94375,
"grad_norm": 0.6756719350814819,
"learning_rate": 3.05625e-05,
"loss": 0.03,
"step": 622
},
{
"epoch": 1.946875,
"grad_norm": 0.14591379463672638,
"learning_rate": 3.053125e-05,
"loss": 0.0216,
"step": 623
},
{
"epoch": 1.95,
"grad_norm": 0.4332157373428345,
"learning_rate": 3.05e-05,
"loss": 0.0357,
"step": 624
},
{
"epoch": 1.953125,
"grad_norm": 0.8189756274223328,
"learning_rate": 3.0468750000000002e-05,
"loss": 0.021,
"step": 625
},
{
"epoch": 1.95625,
"grad_norm": 0.2692781388759613,
"learning_rate": 3.04375e-05,
"loss": 0.0207,
"step": 626
},
{
"epoch": 1.959375,
"grad_norm": 0.10346498340368271,
"learning_rate": 3.0406250000000004e-05,
"loss": 0.0184,
"step": 627
},
{
"epoch": 1.9625,
"grad_norm": 0.22864669561386108,
"learning_rate": 3.0375000000000003e-05,
"loss": 0.0337,
"step": 628
},
{
"epoch": 1.965625,
"grad_norm": 0.15825912356376648,
"learning_rate": 3.0343750000000006e-05,
"loss": 0.0258,
"step": 629
},
{
"epoch": 1.96875,
"grad_norm": 0.29511621594429016,
"learning_rate": 3.0312499999999998e-05,
"loss": 0.0291,
"step": 630
},
{
"epoch": 1.971875,
"grad_norm": 0.12500648200511932,
"learning_rate": 3.028125e-05,
"loss": 0.0231,
"step": 631
},
{
"epoch": 1.975,
"grad_norm": 0.09671773761510849,
"learning_rate": 3.025e-05,
"loss": 0.0176,
"step": 632
},
{
"epoch": 1.978125,
"grad_norm": 0.11605774611234665,
"learning_rate": 3.0218750000000003e-05,
"loss": 0.0184,
"step": 633
},
{
"epoch": 1.98125,
"grad_norm": 0.7053767442703247,
"learning_rate": 3.0187500000000002e-05,
"loss": 0.0419,
"step": 634
},
{
"epoch": 1.984375,
"grad_norm": 0.5477724671363831,
"learning_rate": 3.015625e-05,
"loss": 0.0283,
"step": 635
},
{
"epoch": 1.9875,
"grad_norm": 0.17797940969467163,
"learning_rate": 3.0125000000000004e-05,
"loss": 0.024,
"step": 636
},
{
"epoch": 1.990625,
"grad_norm": 2.187450408935547,
"learning_rate": 3.0093750000000003e-05,
"loss": 0.0466,
"step": 637
},
{
"epoch": 1.99375,
"grad_norm": 0.17812545597553253,
"learning_rate": 3.00625e-05,
"loss": 0.0245,
"step": 638
},
{
"epoch": 1.996875,
"grad_norm": 0.29999840259552,
"learning_rate": 3.0031249999999998e-05,
"loss": 0.0335,
"step": 639
},
{
"epoch": 2.0,
"grad_norm": 0.09049447625875473,
"learning_rate": 3e-05,
"loss": 0.0186,
"step": 640
},
{
"epoch": 2.0,
"eval_accuracy_N/A": NaN,
"eval_accuracy_content": 0.9907844483799361,
"eval_iou_N/A": 0.0,
"eval_iou_content": 0.9907844483799361,
"eval_loss": 0.03488548472523689,
"eval_mean_accuracy": 0.9907844483799361,
"eval_mean_iou": 0.49539222418996803,
"eval_overall_accuracy": 0.9907844483799361,
"eval_runtime": 1027.8886,
"eval_samples_per_second": 1.245,
"eval_steps_per_second": 0.078,
"step": 640
}
],
"logging_steps": 1,
"max_steps": 1600,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.5441172247101343e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}