|
{ |
|
"best_metric": 0.03488548472523689, |
|
"best_model_checkpoint": "paper-cutting-outputs4/checkpoint-640", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003125, |
|
"grad_norm": 7.335386276245117, |
|
"learning_rate": 4.9968750000000005e-05, |
|
"loss": 0.6822, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00625, |
|
"grad_norm": 3.8524413108825684, |
|
"learning_rate": 4.99375e-05, |
|
"loss": 0.4184, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009375, |
|
"grad_norm": 2.6370558738708496, |
|
"learning_rate": 4.9906250000000004e-05, |
|
"loss": 0.3522, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0125, |
|
"grad_norm": 2.292393207550049, |
|
"learning_rate": 4.9875000000000006e-05, |
|
"loss": 0.3575, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 2.067209482192993, |
|
"learning_rate": 4.984375e-05, |
|
"loss": 0.2668, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01875, |
|
"grad_norm": 1.895901083946228, |
|
"learning_rate": 4.98125e-05, |
|
"loss": 0.1906, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.021875, |
|
"grad_norm": 2.7861101627349854, |
|
"learning_rate": 4.978125e-05, |
|
"loss": 0.278, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 2.8299612998962402, |
|
"learning_rate": 4.975e-05, |
|
"loss": 0.1534, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.028125, |
|
"grad_norm": 2.44992995262146, |
|
"learning_rate": 4.9718750000000006e-05, |
|
"loss": 0.1361, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 5.079458236694336, |
|
"learning_rate": 4.96875e-05, |
|
"loss": 0.2674, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.034375, |
|
"grad_norm": 1.0134013891220093, |
|
"learning_rate": 4.9656250000000004e-05, |
|
"loss": 0.1353, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0375, |
|
"grad_norm": 1.1559659242630005, |
|
"learning_rate": 4.962500000000001e-05, |
|
"loss": 0.1347, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.040625, |
|
"grad_norm": 1.4666818380355835, |
|
"learning_rate": 4.959375e-05, |
|
"loss": 0.1011, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04375, |
|
"grad_norm": 2.2786307334899902, |
|
"learning_rate": 4.95625e-05, |
|
"loss": 0.2227, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"grad_norm": 3.400648593902588, |
|
"learning_rate": 4.953125e-05, |
|
"loss": 0.1775, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.4692819118499756, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 0.1277, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.053125, |
|
"grad_norm": 1.0176641941070557, |
|
"learning_rate": 4.946875e-05, |
|
"loss": 0.114, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05625, |
|
"grad_norm": 1.5734282732009888, |
|
"learning_rate": 4.94375e-05, |
|
"loss": 0.1533, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.059375, |
|
"grad_norm": 1.4404799938201904, |
|
"learning_rate": 4.9406250000000005e-05, |
|
"loss": 0.1014, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 1.989609718322754, |
|
"learning_rate": 4.937500000000001e-05, |
|
"loss": 0.1868, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.065625, |
|
"grad_norm": 1.4631582498550415, |
|
"learning_rate": 4.9343749999999997e-05, |
|
"loss": 0.1154, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06875, |
|
"grad_norm": 2.48142671585083, |
|
"learning_rate": 4.93125e-05, |
|
"loss": 0.2596, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.071875, |
|
"grad_norm": 3.020716905593872, |
|
"learning_rate": 4.928125e-05, |
|
"loss": 0.1456, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 2.902977466583252, |
|
"learning_rate": 4.9250000000000004e-05, |
|
"loss": 0.204, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 1.2099602222442627, |
|
"learning_rate": 4.921875e-05, |
|
"loss": 0.0828, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08125, |
|
"grad_norm": 0.564108669757843, |
|
"learning_rate": 4.91875e-05, |
|
"loss": 0.0664, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.084375, |
|
"grad_norm": 1.3213393688201904, |
|
"learning_rate": 4.9156250000000006e-05, |
|
"loss": 0.103, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0875, |
|
"grad_norm": 1.1386405229568481, |
|
"learning_rate": 4.9125e-05, |
|
"loss": 0.1028, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.090625, |
|
"grad_norm": 0.9390538930892944, |
|
"learning_rate": 4.9093750000000004e-05, |
|
"loss": 0.1038, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 2.117478847503662, |
|
"learning_rate": 4.90625e-05, |
|
"loss": 0.2128, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.096875, |
|
"grad_norm": 2.953338861465454, |
|
"learning_rate": 4.903125e-05, |
|
"loss": 0.1803, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.287204265594482, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.1473, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.103125, |
|
"grad_norm": 0.6180092692375183, |
|
"learning_rate": 4.896875e-05, |
|
"loss": 0.0485, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10625, |
|
"grad_norm": 1.5996137857437134, |
|
"learning_rate": 4.8937500000000004e-05, |
|
"loss": 0.0749, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"grad_norm": 1.6394084692001343, |
|
"learning_rate": 4.8906250000000006e-05, |
|
"loss": 0.1464, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1125, |
|
"grad_norm": 1.3696956634521484, |
|
"learning_rate": 4.8875e-05, |
|
"loss": 0.0705, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.115625, |
|
"grad_norm": 1.3914557695388794, |
|
"learning_rate": 4.8843750000000005e-05, |
|
"loss": 0.0801, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11875, |
|
"grad_norm": 1.3529096841812134, |
|
"learning_rate": 4.88125e-05, |
|
"loss": 0.0734, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.121875, |
|
"grad_norm": 1.6977622509002686, |
|
"learning_rate": 4.878125e-05, |
|
"loss": 0.1075, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.7406987547874451, |
|
"learning_rate": 4.875e-05, |
|
"loss": 0.0527, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.128125, |
|
"grad_norm": 0.7165554761886597, |
|
"learning_rate": 4.871875e-05, |
|
"loss": 0.0638, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.13125, |
|
"grad_norm": 0.4635627865791321, |
|
"learning_rate": 4.8687500000000004e-05, |
|
"loss": 0.0548, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.134375, |
|
"grad_norm": 1.0817699432373047, |
|
"learning_rate": 4.865625e-05, |
|
"loss": 0.0524, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1375, |
|
"grad_norm": 7.260894775390625, |
|
"learning_rate": 4.8625e-05, |
|
"loss": 0.1702, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"grad_norm": 6.2862396240234375, |
|
"learning_rate": 4.8593750000000005e-05, |
|
"loss": 0.1442, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14375, |
|
"grad_norm": 2.473857879638672, |
|
"learning_rate": 4.85625e-05, |
|
"loss": 0.0904, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.146875, |
|
"grad_norm": 3.885066509246826, |
|
"learning_rate": 4.853125e-05, |
|
"loss": 0.0898, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.2689714431762695, |
|
"learning_rate": 4.85e-05, |
|
"loss": 0.0561, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.153125, |
|
"grad_norm": 2.687859058380127, |
|
"learning_rate": 4.846875e-05, |
|
"loss": 0.1582, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 0.5876914858818054, |
|
"learning_rate": 4.8437500000000005e-05, |
|
"loss": 0.0458, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.159375, |
|
"grad_norm": 8.143379211425781, |
|
"learning_rate": 4.840625e-05, |
|
"loss": 0.2132, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1625, |
|
"grad_norm": 2.991544246673584, |
|
"learning_rate": 4.8375000000000004e-05, |
|
"loss": 0.1885, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.165625, |
|
"grad_norm": 1.1332241296768188, |
|
"learning_rate": 4.8343750000000006e-05, |
|
"loss": 0.0871, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16875, |
|
"grad_norm": 0.9255159497261047, |
|
"learning_rate": 4.83125e-05, |
|
"loss": 0.0769, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"grad_norm": 1.800412654876709, |
|
"learning_rate": 4.828125e-05, |
|
"loss": 0.0633, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 0.9372511506080627, |
|
"learning_rate": 4.825e-05, |
|
"loss": 0.0504, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.178125, |
|
"grad_norm": 0.34521690011024475, |
|
"learning_rate": 4.821875e-05, |
|
"loss": 0.0356, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.18125, |
|
"grad_norm": 0.763290524482727, |
|
"learning_rate": 4.81875e-05, |
|
"loss": 0.0599, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.184375, |
|
"grad_norm": 0.39605632424354553, |
|
"learning_rate": 4.815625e-05, |
|
"loss": 0.0484, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 0.8304187655448914, |
|
"learning_rate": 4.8125000000000004e-05, |
|
"loss": 0.0594, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.190625, |
|
"grad_norm": 0.6326848864555359, |
|
"learning_rate": 4.809375000000001e-05, |
|
"loss": 0.0678, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.19375, |
|
"grad_norm": 2.4955437183380127, |
|
"learning_rate": 4.80625e-05, |
|
"loss": 0.0682, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.196875, |
|
"grad_norm": 4.784257888793945, |
|
"learning_rate": 4.803125e-05, |
|
"loss": 0.1404, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.6176247596740723, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.0513, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"grad_norm": 0.35477644205093384, |
|
"learning_rate": 4.7968750000000004e-05, |
|
"loss": 0.0482, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.20625, |
|
"grad_norm": 0.8327121734619141, |
|
"learning_rate": 4.79375e-05, |
|
"loss": 0.046, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.209375, |
|
"grad_norm": 5.646777153015137, |
|
"learning_rate": 4.790625e-05, |
|
"loss": 0.1429, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2125, |
|
"grad_norm": 0.5917462706565857, |
|
"learning_rate": 4.7875000000000005e-05, |
|
"loss": 0.0498, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.215625, |
|
"grad_norm": 3.3251428604125977, |
|
"learning_rate": 4.784375e-05, |
|
"loss": 0.2996, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 2.83424711227417, |
|
"learning_rate": 4.7812500000000003e-05, |
|
"loss": 0.072, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.221875, |
|
"grad_norm": 2.925737142562866, |
|
"learning_rate": 4.778125e-05, |
|
"loss": 0.0679, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 1.0089963674545288, |
|
"learning_rate": 4.775e-05, |
|
"loss": 0.0555, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.228125, |
|
"grad_norm": 0.6455210447311401, |
|
"learning_rate": 4.771875e-05, |
|
"loss": 0.048, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.23125, |
|
"grad_norm": 6.585851669311523, |
|
"learning_rate": 4.76875e-05, |
|
"loss": 0.1616, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 1.0643088817596436, |
|
"learning_rate": 4.765625e-05, |
|
"loss": 0.0904, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2375, |
|
"grad_norm": 5.661847114562988, |
|
"learning_rate": 4.7625000000000006e-05, |
|
"loss": 0.1051, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.240625, |
|
"grad_norm": 0.9035895466804504, |
|
"learning_rate": 4.759375e-05, |
|
"loss": 0.0482, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.24375, |
|
"grad_norm": 8.772421836853027, |
|
"learning_rate": 4.7562500000000004e-05, |
|
"loss": 0.2399, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.246875, |
|
"grad_norm": 1.299116611480713, |
|
"learning_rate": 4.753125000000001e-05, |
|
"loss": 0.0796, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.2476378679275513, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.0712, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.253125, |
|
"grad_norm": 0.3314186632633209, |
|
"learning_rate": 4.746875e-05, |
|
"loss": 0.039, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.25625, |
|
"grad_norm": 5.228423595428467, |
|
"learning_rate": 4.74375e-05, |
|
"loss": 0.1188, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.259375, |
|
"grad_norm": 1.0700784921646118, |
|
"learning_rate": 4.7406250000000004e-05, |
|
"loss": 0.0519, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2625, |
|
"grad_norm": 1.3984243869781494, |
|
"learning_rate": 4.7375e-05, |
|
"loss": 0.1092, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"grad_norm": 0.9596419334411621, |
|
"learning_rate": 4.734375e-05, |
|
"loss": 0.1136, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.26875, |
|
"grad_norm": 0.6729472875595093, |
|
"learning_rate": 4.7312500000000005e-05, |
|
"loss": 0.0539, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.271875, |
|
"grad_norm": 1.7557507753372192, |
|
"learning_rate": 4.728125000000001e-05, |
|
"loss": 0.0742, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 0.6950563788414001, |
|
"learning_rate": 4.7249999999999997e-05, |
|
"loss": 0.0435, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.278125, |
|
"grad_norm": 2.5702097415924072, |
|
"learning_rate": 4.721875e-05, |
|
"loss": 0.1343, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 0.4034491181373596, |
|
"learning_rate": 4.71875e-05, |
|
"loss": 0.0464, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.284375, |
|
"grad_norm": 1.8170459270477295, |
|
"learning_rate": 4.7156250000000004e-05, |
|
"loss": 0.1692, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2875, |
|
"grad_norm": 2.3292648792266846, |
|
"learning_rate": 4.7125e-05, |
|
"loss": 0.1435, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.290625, |
|
"grad_norm": 2.5120768547058105, |
|
"learning_rate": 4.709375e-05, |
|
"loss": 0.0533, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.29375, |
|
"grad_norm": 0.4364502727985382, |
|
"learning_rate": 4.7062500000000006e-05, |
|
"loss": 0.047, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"grad_norm": 1.0511380434036255, |
|
"learning_rate": 4.703125e-05, |
|
"loss": 0.0645, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.8538405895233154, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.0819, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.303125, |
|
"grad_norm": 0.2478204071521759, |
|
"learning_rate": 4.696875e-05, |
|
"loss": 0.0324, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.30625, |
|
"grad_norm": 0.31610116362571716, |
|
"learning_rate": 4.69375e-05, |
|
"loss": 0.0387, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.309375, |
|
"grad_norm": 0.24444423615932465, |
|
"learning_rate": 4.690625e-05, |
|
"loss": 0.0345, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.391217976808548, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.0387, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.315625, |
|
"grad_norm": 0.32689204812049866, |
|
"learning_rate": 4.6843750000000004e-05, |
|
"loss": 0.0392, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.31875, |
|
"grad_norm": 0.930630087852478, |
|
"learning_rate": 4.6812500000000006e-05, |
|
"loss": 0.0608, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.321875, |
|
"grad_norm": 0.7391718029975891, |
|
"learning_rate": 4.678125e-05, |
|
"loss": 0.046, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 0.5312808752059937, |
|
"learning_rate": 4.6750000000000005e-05, |
|
"loss": 0.0517, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"grad_norm": 3.3850011825561523, |
|
"learning_rate": 4.671875e-05, |
|
"loss": 0.2326, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.33125, |
|
"grad_norm": 2.874311685562134, |
|
"learning_rate": 4.66875e-05, |
|
"loss": 0.0675, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.334375, |
|
"grad_norm": 1.2302155494689941, |
|
"learning_rate": 4.665625e-05, |
|
"loss": 0.0487, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3375, |
|
"grad_norm": 2.3928442001342773, |
|
"learning_rate": 4.6625e-05, |
|
"loss": 0.1066, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.340625, |
|
"grad_norm": 7.547820568084717, |
|
"learning_rate": 4.6593750000000004e-05, |
|
"loss": 0.1176, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 5.985020160675049, |
|
"learning_rate": 4.65625e-05, |
|
"loss": 0.1135, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.346875, |
|
"grad_norm": 0.6734495759010315, |
|
"learning_rate": 4.653125e-05, |
|
"loss": 0.0512, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.863390564918518, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 0.0816, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.353125, |
|
"grad_norm": 0.24223440885543823, |
|
"learning_rate": 4.646875e-05, |
|
"loss": 0.0373, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.35625, |
|
"grad_norm": 1.8812682628631592, |
|
"learning_rate": 4.64375e-05, |
|
"loss": 0.1516, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"grad_norm": 0.5781030654907227, |
|
"learning_rate": 4.640625e-05, |
|
"loss": 0.0757, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3625, |
|
"grad_norm": 3.6789309978485107, |
|
"learning_rate": 4.6375e-05, |
|
"loss": 0.114, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.365625, |
|
"grad_norm": 0.8261707425117493, |
|
"learning_rate": 4.6343750000000005e-05, |
|
"loss": 0.0697, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.36875, |
|
"grad_norm": 1.4307565689086914, |
|
"learning_rate": 4.63125e-05, |
|
"loss": 0.0682, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.371875, |
|
"grad_norm": 0.6145803928375244, |
|
"learning_rate": 4.6281250000000003e-05, |
|
"loss": 0.057, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 3.645620822906494, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 0.1571, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.378125, |
|
"grad_norm": 1.2431564331054688, |
|
"learning_rate": 4.621875e-05, |
|
"loss": 0.0914, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.38125, |
|
"grad_norm": 0.5691125988960266, |
|
"learning_rate": 4.61875e-05, |
|
"loss": 0.0584, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.384375, |
|
"grad_norm": 0.9178672432899475, |
|
"learning_rate": 4.615625e-05, |
|
"loss": 0.0735, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3875, |
|
"grad_norm": 0.9264055490493774, |
|
"learning_rate": 4.6125e-05, |
|
"loss": 0.0562, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 0.6835108399391174, |
|
"learning_rate": 4.609375e-05, |
|
"loss": 0.0522, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.39375, |
|
"grad_norm": 0.6347059011459351, |
|
"learning_rate": 4.60625e-05, |
|
"loss": 0.071, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.396875, |
|
"grad_norm": 0.8624080419540405, |
|
"learning_rate": 4.6031250000000004e-05, |
|
"loss": 0.0632, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.383160352706909, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.1591, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.403125, |
|
"grad_norm": 0.9181774258613586, |
|
"learning_rate": 4.596875e-05, |
|
"loss": 0.0627, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 0.7968156933784485, |
|
"learning_rate": 4.59375e-05, |
|
"loss": 0.0472, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.409375, |
|
"grad_norm": 0.8290426135063171, |
|
"learning_rate": 4.590625e-05, |
|
"loss": 0.0508, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.4125, |
|
"grad_norm": 0.950431764125824, |
|
"learning_rate": 4.5875000000000004e-05, |
|
"loss": 0.0615, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.415625, |
|
"grad_norm": 1.1434526443481445, |
|
"learning_rate": 4.584375e-05, |
|
"loss": 0.0977, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.41875, |
|
"grad_norm": 0.858519971370697, |
|
"learning_rate": 4.58125e-05, |
|
"loss": 0.0626, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"grad_norm": 0.4805968701839447, |
|
"learning_rate": 4.5781250000000005e-05, |
|
"loss": 0.0475, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 1.4369895458221436, |
|
"learning_rate": 4.575e-05, |
|
"loss": 0.0479, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.428125, |
|
"grad_norm": 0.2389107346534729, |
|
"learning_rate": 4.571875e-05, |
|
"loss": 0.0303, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.43125, |
|
"grad_norm": 0.3905261754989624, |
|
"learning_rate": 4.56875e-05, |
|
"loss": 0.0413, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.434375, |
|
"grad_norm": 3.2840700149536133, |
|
"learning_rate": 4.565625e-05, |
|
"loss": 0.2217, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 2.7919232845306396, |
|
"learning_rate": 4.5625e-05, |
|
"loss": 0.1966, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.440625, |
|
"grad_norm": 2.9008607864379883, |
|
"learning_rate": 4.559375e-05, |
|
"loss": 0.2469, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.44375, |
|
"grad_norm": 1.680885672569275, |
|
"learning_rate": 4.55625e-05, |
|
"loss": 0.0687, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.446875, |
|
"grad_norm": 0.41792237758636475, |
|
"learning_rate": 4.5531250000000006e-05, |
|
"loss": 0.0405, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.24879863858222961, |
|
"learning_rate": 4.55e-05, |
|
"loss": 0.0321, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"grad_norm": 1.766568899154663, |
|
"learning_rate": 4.5468750000000004e-05, |
|
"loss": 0.0652, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.45625, |
|
"grad_norm": 2.5179293155670166, |
|
"learning_rate": 4.54375e-05, |
|
"loss": 0.0711, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.459375, |
|
"grad_norm": 1.7946605682373047, |
|
"learning_rate": 4.540625e-05, |
|
"loss": 0.0509, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4625, |
|
"grad_norm": 0.46179938316345215, |
|
"learning_rate": 4.5375e-05, |
|
"loss": 0.0388, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.465625, |
|
"grad_norm": 0.3265767991542816, |
|
"learning_rate": 4.534375e-05, |
|
"loss": 0.0403, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 0.6787862181663513, |
|
"learning_rate": 4.5312500000000004e-05, |
|
"loss": 0.0452, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.471875, |
|
"grad_norm": 0.46895185112953186, |
|
"learning_rate": 4.528125e-05, |
|
"loss": 0.0501, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 3.4975266456604004, |
|
"learning_rate": 4.525e-05, |
|
"loss": 0.1403, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.478125, |
|
"grad_norm": 2.780132532119751, |
|
"learning_rate": 4.5218750000000005e-05, |
|
"loss": 0.0864, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.48125, |
|
"grad_norm": 0.5605061054229736, |
|
"learning_rate": 4.518750000000001e-05, |
|
"loss": 0.0437, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"grad_norm": 1.14775550365448, |
|
"learning_rate": 4.515625e-05, |
|
"loss": 0.073, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4875, |
|
"grad_norm": 0.5990891456604004, |
|
"learning_rate": 4.5125e-05, |
|
"loss": 0.0531, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.490625, |
|
"grad_norm": 0.4071764349937439, |
|
"learning_rate": 4.509375e-05, |
|
"loss": 0.0394, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.49375, |
|
"grad_norm": 1.256040334701538, |
|
"learning_rate": 4.5062500000000004e-05, |
|
"loss": 0.074, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.496875, |
|
"grad_norm": 0.6708590388298035, |
|
"learning_rate": 4.503125e-05, |
|
"loss": 0.0469, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.4366428852081299, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0794, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.503125, |
|
"grad_norm": 2.625788927078247, |
|
"learning_rate": 4.4968750000000005e-05, |
|
"loss": 0.0456, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.50625, |
|
"grad_norm": 0.7705633640289307, |
|
"learning_rate": 4.49375e-05, |
|
"loss": 0.0367, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.509375, |
|
"grad_norm": 0.39144399762153625, |
|
"learning_rate": 4.490625e-05, |
|
"loss": 0.0342, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.5125, |
|
"grad_norm": 0.43612250685691833, |
|
"learning_rate": 4.4875e-05, |
|
"loss": 0.0377, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.515625, |
|
"grad_norm": 0.9478850364685059, |
|
"learning_rate": 4.484375e-05, |
|
"loss": 0.0558, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.51875, |
|
"grad_norm": 0.5833193063735962, |
|
"learning_rate": 4.4812500000000005e-05, |
|
"loss": 0.0534, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.521875, |
|
"grad_norm": 0.512205183506012, |
|
"learning_rate": 4.478125e-05, |
|
"loss": 0.0344, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 1.5145163536071777, |
|
"learning_rate": 4.4750000000000004e-05, |
|
"loss": 0.2032, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.528125, |
|
"grad_norm": 2.2172932624816895, |
|
"learning_rate": 4.4718750000000006e-05, |
|
"loss": 0.1559, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 1.8957103490829468, |
|
"learning_rate": 4.46875e-05, |
|
"loss": 0.052, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.534375, |
|
"grad_norm": 0.2700859010219574, |
|
"learning_rate": 4.465625e-05, |
|
"loss": 0.0337, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5375, |
|
"grad_norm": 0.6030570268630981, |
|
"learning_rate": 4.4625e-05, |
|
"loss": 0.0546, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.540625, |
|
"grad_norm": 0.25488942861557007, |
|
"learning_rate": 4.459375e-05, |
|
"loss": 0.0344, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.54375, |
|
"grad_norm": 0.6342024207115173, |
|
"learning_rate": 4.45625e-05, |
|
"loss": 0.0481, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 0.34221383929252625, |
|
"learning_rate": 4.453125e-05, |
|
"loss": 0.0446, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.870440721511841, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 0.0879, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.553125, |
|
"grad_norm": 0.8905161023139954, |
|
"learning_rate": 4.446875e-05, |
|
"loss": 0.0634, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.55625, |
|
"grad_norm": 0.35176849365234375, |
|
"learning_rate": 4.44375e-05, |
|
"loss": 0.0528, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.559375, |
|
"grad_norm": 0.9682378172874451, |
|
"learning_rate": 4.4406250000000005e-05, |
|
"loss": 0.0496, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 1.700124740600586, |
|
"learning_rate": 4.4375e-05, |
|
"loss": 0.0824, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.565625, |
|
"grad_norm": 0.5797895193099976, |
|
"learning_rate": 4.4343750000000004e-05, |
|
"loss": 0.0424, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.56875, |
|
"grad_norm": 0.8441882729530334, |
|
"learning_rate": 4.43125e-05, |
|
"loss": 0.0501, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.571875, |
|
"grad_norm": 0.999452531337738, |
|
"learning_rate": 4.428125e-05, |
|
"loss": 0.0505, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 0.48567822575569153, |
|
"learning_rate": 4.4250000000000005e-05, |
|
"loss": 0.0384, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.578125, |
|
"grad_norm": 2.9717018604278564, |
|
"learning_rate": 4.421875e-05, |
|
"loss": 0.0806, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.58125, |
|
"grad_norm": 0.2634088695049286, |
|
"learning_rate": 4.4187500000000003e-05, |
|
"loss": 0.0416, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.584375, |
|
"grad_norm": 0.593612015247345, |
|
"learning_rate": 4.4156250000000006e-05, |
|
"loss": 0.0397, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5875, |
|
"grad_norm": 0.531893789768219, |
|
"learning_rate": 4.4125e-05, |
|
"loss": 0.0444, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.590625, |
|
"grad_norm": 0.29221251606941223, |
|
"learning_rate": 4.409375e-05, |
|
"loss": 0.0288, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 0.5673308372497559, |
|
"learning_rate": 4.40625e-05, |
|
"loss": 0.032, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.596875, |
|
"grad_norm": 0.9111027121543884, |
|
"learning_rate": 4.403125e-05, |
|
"loss": 0.0574, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.6866423487663269, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.0484, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.603125, |
|
"grad_norm": 0.30543097853660583, |
|
"learning_rate": 4.396875e-05, |
|
"loss": 0.0382, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.60625, |
|
"grad_norm": 0.3667004704475403, |
|
"learning_rate": 4.3937500000000004e-05, |
|
"loss": 0.039, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.609375, |
|
"grad_norm": 0.351158082485199, |
|
"learning_rate": 4.390625000000001e-05, |
|
"loss": 0.0286, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6125, |
|
"grad_norm": 0.22909612953662872, |
|
"learning_rate": 4.3875e-05, |
|
"loss": 0.0378, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.615625, |
|
"grad_norm": 0.6320449709892273, |
|
"learning_rate": 4.384375e-05, |
|
"loss": 0.0676, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.61875, |
|
"grad_norm": 0.6454492211341858, |
|
"learning_rate": 4.38125e-05, |
|
"loss": 0.0461, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.621875, |
|
"grad_norm": 0.3367100954055786, |
|
"learning_rate": 4.3781250000000004e-05, |
|
"loss": 0.0352, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.46938446164131165, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.0406, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.628125, |
|
"grad_norm": 1.5403215885162354, |
|
"learning_rate": 4.371875e-05, |
|
"loss": 0.0819, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.63125, |
|
"grad_norm": 0.8773464560508728, |
|
"learning_rate": 4.3687500000000005e-05, |
|
"loss": 0.0625, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.634375, |
|
"grad_norm": 2.457127571105957, |
|
"learning_rate": 4.365625000000001e-05, |
|
"loss": 0.1925, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6375, |
|
"grad_norm": 0.7906845808029175, |
|
"learning_rate": 4.3625e-05, |
|
"loss": 0.0595, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.640625, |
|
"grad_norm": 0.7821226716041565, |
|
"learning_rate": 4.359375e-05, |
|
"loss": 0.0494, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.64375, |
|
"grad_norm": 0.373221218585968, |
|
"learning_rate": 4.35625e-05, |
|
"loss": 0.0345, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.646875, |
|
"grad_norm": 0.2497500479221344, |
|
"learning_rate": 4.3531250000000004e-05, |
|
"loss": 0.0285, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.20948593318462372, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.0326, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.653125, |
|
"grad_norm": 0.6792771220207214, |
|
"learning_rate": 4.346875e-05, |
|
"loss": 0.0404, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 0.9426431655883789, |
|
"learning_rate": 4.3437500000000006e-05, |
|
"loss": 0.0337, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.659375, |
|
"grad_norm": 0.46820443868637085, |
|
"learning_rate": 4.340625e-05, |
|
"loss": 0.0437, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6625, |
|
"grad_norm": 0.2894236147403717, |
|
"learning_rate": 4.3375000000000004e-05, |
|
"loss": 0.0356, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.665625, |
|
"grad_norm": 1.8175113201141357, |
|
"learning_rate": 4.334375e-05, |
|
"loss": 0.0478, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.66875, |
|
"grad_norm": 0.35592272877693176, |
|
"learning_rate": 4.33125e-05, |
|
"loss": 0.0461, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.671875, |
|
"grad_norm": 1.7846156358718872, |
|
"learning_rate": 4.328125e-05, |
|
"loss": 0.1487, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 2.822322368621826, |
|
"learning_rate": 4.325e-05, |
|
"loss": 0.1751, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.678125, |
|
"grad_norm": 1.4585069417953491, |
|
"learning_rate": 4.3218750000000004e-05, |
|
"loss": 0.0771, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.68125, |
|
"grad_norm": 3.51526141166687, |
|
"learning_rate": 4.3187500000000006e-05, |
|
"loss": 0.0757, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.684375, |
|
"grad_norm": 0.7315062880516052, |
|
"learning_rate": 4.315625e-05, |
|
"loss": 0.0552, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 6.735369682312012, |
|
"learning_rate": 4.3125000000000005e-05, |
|
"loss": 0.2329, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.690625, |
|
"grad_norm": 0.3840850293636322, |
|
"learning_rate": 4.309375e-05, |
|
"loss": 0.0395, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.69375, |
|
"grad_norm": 0.7250546813011169, |
|
"learning_rate": 4.30625e-05, |
|
"loss": 0.0395, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.696875, |
|
"grad_norm": 0.7680365443229675, |
|
"learning_rate": 4.303125e-05, |
|
"loss": 0.0488, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.4709233343601227, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0554, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 0.3094955086708069, |
|
"learning_rate": 4.2968750000000004e-05, |
|
"loss": 0.0297, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.70625, |
|
"grad_norm": 0.7511866092681885, |
|
"learning_rate": 4.29375e-05, |
|
"loss": 0.0572, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.709375, |
|
"grad_norm": 0.7133306264877319, |
|
"learning_rate": 4.290625e-05, |
|
"loss": 0.034, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.7125, |
|
"grad_norm": 1.1605385541915894, |
|
"learning_rate": 4.2875000000000005e-05, |
|
"loss": 0.0722, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.715625, |
|
"grad_norm": 2.668914794921875, |
|
"learning_rate": 4.284375000000001e-05, |
|
"loss": 0.1256, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 0.4482596814632416, |
|
"learning_rate": 4.28125e-05, |
|
"loss": 0.0447, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.721875, |
|
"grad_norm": 0.3817666172981262, |
|
"learning_rate": 4.278125e-05, |
|
"loss": 0.0403, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 1.5475538969039917, |
|
"learning_rate": 4.275e-05, |
|
"loss": 0.0511, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.728125, |
|
"grad_norm": 2.1910324096679688, |
|
"learning_rate": 4.2718750000000005e-05, |
|
"loss": 0.1158, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.73125, |
|
"grad_norm": 1.3402481079101562, |
|
"learning_rate": 4.26875e-05, |
|
"loss": 0.0772, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.734375, |
|
"grad_norm": 0.43478524684906006, |
|
"learning_rate": 4.2656250000000003e-05, |
|
"loss": 0.0342, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7375, |
|
"grad_norm": 0.15397940576076508, |
|
"learning_rate": 4.2625000000000006e-05, |
|
"loss": 0.0251, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.740625, |
|
"grad_norm": 0.2772510051727295, |
|
"learning_rate": 4.259375e-05, |
|
"loss": 0.0446, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.74375, |
|
"grad_norm": 1.5829936265945435, |
|
"learning_rate": 4.25625e-05, |
|
"loss": 0.0696, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.746875, |
|
"grad_norm": 1.7104264497756958, |
|
"learning_rate": 4.253125e-05, |
|
"loss": 0.1076, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.011113166809082, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.0698, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.753125, |
|
"grad_norm": 0.2590930461883545, |
|
"learning_rate": 4.246875e-05, |
|
"loss": 0.0292, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.75625, |
|
"grad_norm": 0.6169472932815552, |
|
"learning_rate": 4.24375e-05, |
|
"loss": 0.0486, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.759375, |
|
"grad_norm": 0.5677545070648193, |
|
"learning_rate": 4.2406250000000004e-05, |
|
"loss": 0.0487, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7625, |
|
"grad_norm": 0.1974128633737564, |
|
"learning_rate": 4.237500000000001e-05, |
|
"loss": 0.0346, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.765625, |
|
"grad_norm": 0.2740221321582794, |
|
"learning_rate": 4.234375e-05, |
|
"loss": 0.0337, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.76875, |
|
"grad_norm": 3.3777387142181396, |
|
"learning_rate": 4.23125e-05, |
|
"loss": 0.0799, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.771875, |
|
"grad_norm": 0.6743062138557434, |
|
"learning_rate": 4.228125e-05, |
|
"loss": 0.0388, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 0.25554272532463074, |
|
"learning_rate": 4.2250000000000004e-05, |
|
"loss": 0.0432, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.778125, |
|
"grad_norm": 0.2516278326511383, |
|
"learning_rate": 4.221875e-05, |
|
"loss": 0.0322, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.34015893936157227, |
|
"learning_rate": 4.21875e-05, |
|
"loss": 0.0379, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.784375, |
|
"grad_norm": 2.134775161743164, |
|
"learning_rate": 4.2156250000000005e-05, |
|
"loss": 0.2122, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7875, |
|
"grad_norm": 0.3281037211418152, |
|
"learning_rate": 4.2125e-05, |
|
"loss": 0.0358, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.790625, |
|
"grad_norm": 1.2461293935775757, |
|
"learning_rate": 4.209375e-05, |
|
"loss": 0.0687, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.79375, |
|
"grad_norm": 3.500396251678467, |
|
"learning_rate": 4.2062500000000006e-05, |
|
"loss": 0.1092, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.796875, |
|
"grad_norm": 0.7699318528175354, |
|
"learning_rate": 4.203125e-05, |
|
"loss": 0.0929, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.3767575025558472, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0533, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.803125, |
|
"grad_norm": 0.3553447425365448, |
|
"learning_rate": 4.196875e-05, |
|
"loss": 0.0423, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.80625, |
|
"grad_norm": 0.6199429035186768, |
|
"learning_rate": 4.19375e-05, |
|
"loss": 0.0455, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.809375, |
|
"grad_norm": 0.23632241785526276, |
|
"learning_rate": 4.1906250000000006e-05, |
|
"loss": 0.03, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 4.547003746032715, |
|
"learning_rate": 4.1875e-05, |
|
"loss": 0.0938, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.815625, |
|
"grad_norm": 0.2540350556373596, |
|
"learning_rate": 4.1843750000000004e-05, |
|
"loss": 0.0367, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.81875, |
|
"grad_norm": 0.418260395526886, |
|
"learning_rate": 4.181250000000001e-05, |
|
"loss": 0.0446, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.821875, |
|
"grad_norm": 0.23045071959495544, |
|
"learning_rate": 4.178125e-05, |
|
"loss": 0.0308, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 0.8318238854408264, |
|
"learning_rate": 4.175e-05, |
|
"loss": 0.0698, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.828125, |
|
"grad_norm": 0.45676013827323914, |
|
"learning_rate": 4.171875e-05, |
|
"loss": 0.0388, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.83125, |
|
"grad_norm": 3.701650619506836, |
|
"learning_rate": 4.1687500000000004e-05, |
|
"loss": 0.1129, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.834375, |
|
"grad_norm": 1.591991662979126, |
|
"learning_rate": 4.165625e-05, |
|
"loss": 0.0957, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8375, |
|
"grad_norm": 0.4643809497356415, |
|
"learning_rate": 4.1625e-05, |
|
"loss": 0.0346, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.840625, |
|
"grad_norm": 0.34625670313835144, |
|
"learning_rate": 4.1593750000000005e-05, |
|
"loss": 0.0388, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 0.3321349024772644, |
|
"learning_rate": 4.156250000000001e-05, |
|
"loss": 0.0353, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.846875, |
|
"grad_norm": 0.23249466717243195, |
|
"learning_rate": 4.1531249999999996e-05, |
|
"loss": 0.0322, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.27158722281455994, |
|
"learning_rate": 4.15e-05, |
|
"loss": 0.0384, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.853125, |
|
"grad_norm": 0.2619187533855438, |
|
"learning_rate": 4.146875e-05, |
|
"loss": 0.034, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.85625, |
|
"grad_norm": 0.25599196553230286, |
|
"learning_rate": 4.1437500000000004e-05, |
|
"loss": 0.0366, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 0.16068707406520844, |
|
"learning_rate": 4.140625e-05, |
|
"loss": 0.0259, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8625, |
|
"grad_norm": 0.2689169645309448, |
|
"learning_rate": 4.1375e-05, |
|
"loss": 0.0301, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.865625, |
|
"grad_norm": 0.12709015607833862, |
|
"learning_rate": 4.1343750000000005e-05, |
|
"loss": 0.0287, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.86875, |
|
"grad_norm": 1.140897512435913, |
|
"learning_rate": 4.13125e-05, |
|
"loss": 0.0614, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.871875, |
|
"grad_norm": 2.3372366428375244, |
|
"learning_rate": 4.1281250000000004e-05, |
|
"loss": 0.0852, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 1.0569149255752563, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.0439, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.878125, |
|
"grad_norm": 0.7298460006713867, |
|
"learning_rate": 4.121875e-05, |
|
"loss": 0.0479, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.88125, |
|
"grad_norm": 1.4520108699798584, |
|
"learning_rate": 4.11875e-05, |
|
"loss": 0.051, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.884375, |
|
"grad_norm": 1.1062657833099365, |
|
"learning_rate": 4.115625e-05, |
|
"loss": 0.0527, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8875, |
|
"grad_norm": 0.32543355226516724, |
|
"learning_rate": 4.1125000000000004e-05, |
|
"loss": 0.0361, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.890625, |
|
"grad_norm": 1.7986284494400024, |
|
"learning_rate": 4.1093750000000006e-05, |
|
"loss": 0.1552, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.89375, |
|
"grad_norm": 0.2922920286655426, |
|
"learning_rate": 4.10625e-05, |
|
"loss": 0.0335, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.896875, |
|
"grad_norm": 0.2638067305088043, |
|
"learning_rate": 4.1031250000000005e-05, |
|
"loss": 0.0301, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.16769519448280334, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0267, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.903125, |
|
"grad_norm": 0.38744208216667175, |
|
"learning_rate": 4.096875e-05, |
|
"loss": 0.0326, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 1.4372221231460571, |
|
"learning_rate": 4.09375e-05, |
|
"loss": 0.0699, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.909375, |
|
"grad_norm": 1.0212301015853882, |
|
"learning_rate": 4.090625e-05, |
|
"loss": 0.1233, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.9125, |
|
"grad_norm": 0.18620982766151428, |
|
"learning_rate": 4.0875000000000004e-05, |
|
"loss": 0.0254, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.915625, |
|
"grad_norm": 0.6351355910301208, |
|
"learning_rate": 4.084375e-05, |
|
"loss": 0.0442, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.91875, |
|
"grad_norm": 0.2035447657108307, |
|
"learning_rate": 4.08125e-05, |
|
"loss": 0.0342, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.921875, |
|
"grad_norm": 0.8789761066436768, |
|
"learning_rate": 4.0781250000000005e-05, |
|
"loss": 0.0601, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 0.4256390631198883, |
|
"learning_rate": 4.075e-05, |
|
"loss": 0.0335, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.928125, |
|
"grad_norm": 0.5044748187065125, |
|
"learning_rate": 4.071875e-05, |
|
"loss": 0.0403, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.93125, |
|
"grad_norm": 0.6527408361434937, |
|
"learning_rate": 4.06875e-05, |
|
"loss": 0.0525, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.934375, |
|
"grad_norm": 0.5137639045715332, |
|
"learning_rate": 4.065625e-05, |
|
"loss": 0.0328, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 0.3386867940425873, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 0.0299, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.940625, |
|
"grad_norm": 0.3191845417022705, |
|
"learning_rate": 4.059375e-05, |
|
"loss": 0.0293, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.94375, |
|
"grad_norm": 0.6586815118789673, |
|
"learning_rate": 4.0562500000000003e-05, |
|
"loss": 0.0334, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.946875, |
|
"grad_norm": 0.33495861291885376, |
|
"learning_rate": 4.0531250000000006e-05, |
|
"loss": 0.0378, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.2046739012002945, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.0276, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.953125, |
|
"grad_norm": 1.4748613834381104, |
|
"learning_rate": 4.046875e-05, |
|
"loss": 0.0903, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.95625, |
|
"grad_norm": 1.4325345754623413, |
|
"learning_rate": 4.04375e-05, |
|
"loss": 0.0447, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.959375, |
|
"grad_norm": 1.7935466766357422, |
|
"learning_rate": 4.040625e-05, |
|
"loss": 0.1974, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9625, |
|
"grad_norm": 0.872593104839325, |
|
"learning_rate": 4.0375e-05, |
|
"loss": 0.0544, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.965625, |
|
"grad_norm": 0.17810046672821045, |
|
"learning_rate": 4.034375e-05, |
|
"loss": 0.0253, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 0.3628585636615753, |
|
"learning_rate": 4.0312500000000004e-05, |
|
"loss": 0.0293, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.971875, |
|
"grad_norm": 0.3988589346408844, |
|
"learning_rate": 4.028125000000001e-05, |
|
"loss": 0.0294, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 0.5909652709960938, |
|
"learning_rate": 4.025e-05, |
|
"loss": 0.0454, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.978125, |
|
"grad_norm": 0.44711074233055115, |
|
"learning_rate": 4.021875e-05, |
|
"loss": 0.0326, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.98125, |
|
"grad_norm": 0.5259913802146912, |
|
"learning_rate": 4.01875e-05, |
|
"loss": 0.0349, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.984375, |
|
"grad_norm": 0.4235933721065521, |
|
"learning_rate": 4.0156250000000004e-05, |
|
"loss": 0.0339, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9875, |
|
"grad_norm": 0.26886850595474243, |
|
"learning_rate": 4.0125e-05, |
|
"loss": 0.0276, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.990625, |
|
"grad_norm": 0.11596307903528214, |
|
"learning_rate": 4.009375e-05, |
|
"loss": 0.0209, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.99375, |
|
"grad_norm": 2.2217676639556885, |
|
"learning_rate": 4.0062500000000005e-05, |
|
"loss": 0.1677, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.996875, |
|
"grad_norm": 0.20450957119464874, |
|
"learning_rate": 4.003125e-05, |
|
"loss": 0.0269, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.8894203305244446, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0656, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy_N/A": NaN, |
|
"eval_accuracy_content": 0.9856742228684513, |
|
"eval_iou_N/A": 0.0, |
|
"eval_iou_content": 0.9856742228684513, |
|
"eval_loss": 0.04362819343805313, |
|
"eval_mean_accuracy": 0.9856742228684513, |
|
"eval_mean_iou": 0.49283711143422565, |
|
"eval_overall_accuracy": 0.9856742228684513, |
|
"eval_runtime": 999.4276, |
|
"eval_samples_per_second": 1.281, |
|
"eval_steps_per_second": 0.08, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.003125, |
|
"grad_norm": 1.1715222597122192, |
|
"learning_rate": 3.996875e-05, |
|
"loss": 0.0396, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.00625, |
|
"grad_norm": 0.3776068389415741, |
|
"learning_rate": 3.99375e-05, |
|
"loss": 0.0436, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.009375, |
|
"grad_norm": 2.7889890670776367, |
|
"learning_rate": 3.990625e-05, |
|
"loss": 0.0938, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.0125, |
|
"grad_norm": 0.6198174953460693, |
|
"learning_rate": 3.9875e-05, |
|
"loss": 0.0379, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"grad_norm": 0.18462005257606506, |
|
"learning_rate": 3.984375e-05, |
|
"loss": 0.0262, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.01875, |
|
"grad_norm": 0.5885348320007324, |
|
"learning_rate": 3.9812500000000005e-05, |
|
"loss": 0.0412, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.021875, |
|
"grad_norm": 2.135707139968872, |
|
"learning_rate": 3.978125e-05, |
|
"loss": 0.0545, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 1.119269609451294, |
|
"learning_rate": 3.9750000000000004e-05, |
|
"loss": 0.0482, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.028125, |
|
"grad_norm": 0.5240402817726135, |
|
"learning_rate": 3.9718750000000007e-05, |
|
"loss": 0.0335, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.03125, |
|
"grad_norm": 0.14166490733623505, |
|
"learning_rate": 3.96875e-05, |
|
"loss": 0.0207, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.034375, |
|
"grad_norm": 0.8404735922813416, |
|
"learning_rate": 3.965625e-05, |
|
"loss": 0.0424, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.0375, |
|
"grad_norm": 0.206669881939888, |
|
"learning_rate": 3.9625e-05, |
|
"loss": 0.0273, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.040625, |
|
"grad_norm": 0.2163994461297989, |
|
"learning_rate": 3.9593750000000004e-05, |
|
"loss": 0.0343, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.04375, |
|
"grad_norm": 0.4119163155555725, |
|
"learning_rate": 3.95625e-05, |
|
"loss": 0.0364, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.046875, |
|
"grad_norm": 0.37234818935394287, |
|
"learning_rate": 3.953125e-05, |
|
"loss": 0.0246, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.3997434377670288, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 0.0276, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.053125, |
|
"grad_norm": 0.13193507492542267, |
|
"learning_rate": 3.946875000000001e-05, |
|
"loss": 0.0277, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.05625, |
|
"grad_norm": 0.1905323565006256, |
|
"learning_rate": 3.9437499999999996e-05, |
|
"loss": 0.0229, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.059375, |
|
"grad_norm": 0.23189271986484528, |
|
"learning_rate": 3.940625e-05, |
|
"loss": 0.0305, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 0.4632808268070221, |
|
"learning_rate": 3.9375e-05, |
|
"loss": 0.0273, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.065625, |
|
"grad_norm": 0.15538431704044342, |
|
"learning_rate": 3.9343750000000004e-05, |
|
"loss": 0.0259, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.06875, |
|
"grad_norm": 0.53954017162323, |
|
"learning_rate": 3.93125e-05, |
|
"loss": 0.0324, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.071875, |
|
"grad_norm": 0.48742935061454773, |
|
"learning_rate": 3.928125e-05, |
|
"loss": 0.0329, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 0.22778959572315216, |
|
"learning_rate": 3.9250000000000005e-05, |
|
"loss": 0.0288, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.078125, |
|
"grad_norm": 0.2957710027694702, |
|
"learning_rate": 3.921875e-05, |
|
"loss": 0.0274, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.08125, |
|
"grad_norm": 0.5753316283226013, |
|
"learning_rate": 3.91875e-05, |
|
"loss": 0.0355, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.084375, |
|
"grad_norm": 1.4917867183685303, |
|
"learning_rate": 3.915625e-05, |
|
"loss": 0.0715, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.0875, |
|
"grad_norm": 2.4194159507751465, |
|
"learning_rate": 3.9125e-05, |
|
"loss": 0.1007, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.090625, |
|
"grad_norm": 0.2074752151966095, |
|
"learning_rate": 3.909375e-05, |
|
"loss": 0.0281, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 0.315179705619812, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 0.0387, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.096875, |
|
"grad_norm": 0.8541421890258789, |
|
"learning_rate": 3.9031250000000003e-05, |
|
"loss": 0.059, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.3108535706996918, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.0294, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.103125, |
|
"grad_norm": 0.6527621150016785, |
|
"learning_rate": 3.896875e-05, |
|
"loss": 0.0463, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.10625, |
|
"grad_norm": 0.20114494860172272, |
|
"learning_rate": 3.8937500000000005e-05, |
|
"loss": 0.0304, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.109375, |
|
"grad_norm": 0.4402085244655609, |
|
"learning_rate": 3.890625e-05, |
|
"loss": 0.0413, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.1125, |
|
"grad_norm": 0.38828253746032715, |
|
"learning_rate": 3.8875e-05, |
|
"loss": 0.0338, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.115625, |
|
"grad_norm": 0.7028439044952393, |
|
"learning_rate": 3.884375e-05, |
|
"loss": 0.0461, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.11875, |
|
"grad_norm": 0.9470064640045166, |
|
"learning_rate": 3.88125e-05, |
|
"loss": 0.0419, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.121875, |
|
"grad_norm": 0.5290505290031433, |
|
"learning_rate": 3.8781250000000004e-05, |
|
"loss": 0.0357, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 1.1553906202316284, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.0722, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.128125, |
|
"grad_norm": 0.40173131227493286, |
|
"learning_rate": 3.871875e-05, |
|
"loss": 0.0286, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.13125, |
|
"grad_norm": 0.895039975643158, |
|
"learning_rate": 3.8687500000000005e-05, |
|
"loss": 0.0491, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.134375, |
|
"grad_norm": 0.607846200466156, |
|
"learning_rate": 3.865625e-05, |
|
"loss": 0.0445, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.1375, |
|
"grad_norm": 1.4331371784210205, |
|
"learning_rate": 3.8625e-05, |
|
"loss": 0.0725, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.140625, |
|
"grad_norm": 0.7139254212379456, |
|
"learning_rate": 3.859375e-05, |
|
"loss": 0.0387, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.14375, |
|
"grad_norm": 0.21128199994564056, |
|
"learning_rate": 3.85625e-05, |
|
"loss": 0.0253, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.146875, |
|
"grad_norm": 0.28902533650398254, |
|
"learning_rate": 3.8531250000000005e-05, |
|
"loss": 0.0247, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.33391180634498596, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.0437, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.153125, |
|
"grad_norm": 0.2734394371509552, |
|
"learning_rate": 3.846875e-05, |
|
"loss": 0.0273, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.15625, |
|
"grad_norm": 0.9343364238739014, |
|
"learning_rate": 3.8437500000000006e-05, |
|
"loss": 0.0339, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.159375, |
|
"grad_norm": 1.0542594194412231, |
|
"learning_rate": 3.840625e-05, |
|
"loss": 0.0485, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.1625, |
|
"grad_norm": 2.3781888484954834, |
|
"learning_rate": 3.8375e-05, |
|
"loss": 0.0381, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.165625, |
|
"grad_norm": 0.3513585329055786, |
|
"learning_rate": 3.834375e-05, |
|
"loss": 0.0474, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.16875, |
|
"grad_norm": 0.2519855797290802, |
|
"learning_rate": 3.83125e-05, |
|
"loss": 0.0267, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 1.4920951128005981, |
|
"learning_rate": 3.828125e-05, |
|
"loss": 0.0412, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 0.26298317313194275, |
|
"learning_rate": 3.825e-05, |
|
"loss": 0.0263, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.178125, |
|
"grad_norm": 1.309157133102417, |
|
"learning_rate": 3.8218750000000004e-05, |
|
"loss": 0.05, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.18125, |
|
"grad_norm": 2.483985424041748, |
|
"learning_rate": 3.818750000000001e-05, |
|
"loss": 0.056, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.184375, |
|
"grad_norm": 0.3043748140335083, |
|
"learning_rate": 3.815625e-05, |
|
"loss": 0.0251, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 0.45542070269584656, |
|
"learning_rate": 3.8125e-05, |
|
"loss": 0.0322, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.190625, |
|
"grad_norm": 0.1830095499753952, |
|
"learning_rate": 3.809375e-05, |
|
"loss": 0.0285, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.19375, |
|
"grad_norm": 0.7842034697532654, |
|
"learning_rate": 3.8062500000000004e-05, |
|
"loss": 0.0324, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.196875, |
|
"grad_norm": 0.8280196785926819, |
|
"learning_rate": 3.803125e-05, |
|
"loss": 0.036, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.1664057970046997, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.022, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.203125, |
|
"grad_norm": 0.2700011730194092, |
|
"learning_rate": 3.7968750000000005e-05, |
|
"loss": 0.0226, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.20625, |
|
"grad_norm": 0.2379760593175888, |
|
"learning_rate": 3.79375e-05, |
|
"loss": 0.0382, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.209375, |
|
"grad_norm": 0.1285543590784073, |
|
"learning_rate": 3.790625e-05, |
|
"loss": 0.0207, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.2125, |
|
"grad_norm": 0.2303180694580078, |
|
"learning_rate": 3.7875e-05, |
|
"loss": 0.0313, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.215625, |
|
"grad_norm": 0.5406865477561951, |
|
"learning_rate": 3.784375e-05, |
|
"loss": 0.0346, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.21875, |
|
"grad_norm": 0.2952568829059601, |
|
"learning_rate": 3.78125e-05, |
|
"loss": 0.0297, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.221875, |
|
"grad_norm": 3.3401618003845215, |
|
"learning_rate": 3.778125e-05, |
|
"loss": 0.0787, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 0.2306816279888153, |
|
"learning_rate": 3.775e-05, |
|
"loss": 0.0305, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.228125, |
|
"grad_norm": 2.631598472595215, |
|
"learning_rate": 3.7718750000000005e-05, |
|
"loss": 0.102, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.23125, |
|
"grad_norm": 5.553420066833496, |
|
"learning_rate": 3.76875e-05, |
|
"loss": 0.0751, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.234375, |
|
"grad_norm": 0.2419847697019577, |
|
"learning_rate": 3.7656250000000004e-05, |
|
"loss": 0.0305, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.2375, |
|
"grad_norm": 0.9081357717514038, |
|
"learning_rate": 3.7625e-05, |
|
"loss": 0.0431, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.240625, |
|
"grad_norm": 1.1338061094284058, |
|
"learning_rate": 3.759375e-05, |
|
"loss": 0.0527, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.24375, |
|
"grad_norm": 1.058129906654358, |
|
"learning_rate": 3.75625e-05, |
|
"loss": 0.064, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.246875, |
|
"grad_norm": 0.22226227819919586, |
|
"learning_rate": 3.753125e-05, |
|
"loss": 0.0264, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.23206289112567902, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.023, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.253125, |
|
"grad_norm": 0.3117161691188812, |
|
"learning_rate": 3.746875e-05, |
|
"loss": 0.027, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.25625, |
|
"grad_norm": 0.1130838617682457, |
|
"learning_rate": 3.74375e-05, |
|
"loss": 0.0212, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.259375, |
|
"grad_norm": 0.29144272208213806, |
|
"learning_rate": 3.7406250000000005e-05, |
|
"loss": 0.0476, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.2625, |
|
"grad_norm": 0.31826767325401306, |
|
"learning_rate": 3.737500000000001e-05, |
|
"loss": 0.027, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.265625, |
|
"grad_norm": 0.3600537180900574, |
|
"learning_rate": 3.7343749999999996e-05, |
|
"loss": 0.033, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.26875, |
|
"grad_norm": 0.4717816114425659, |
|
"learning_rate": 3.73125e-05, |
|
"loss": 0.0333, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.271875, |
|
"grad_norm": 0.2320399135351181, |
|
"learning_rate": 3.728125e-05, |
|
"loss": 0.0236, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 0.29288992285728455, |
|
"learning_rate": 3.7250000000000004e-05, |
|
"loss": 0.0272, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.278125, |
|
"grad_norm": 0.4767632782459259, |
|
"learning_rate": 3.721875e-05, |
|
"loss": 0.0378, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.28125, |
|
"grad_norm": 5.730660915374756, |
|
"learning_rate": 3.71875e-05, |
|
"loss": 0.0775, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.284375, |
|
"grad_norm": 0.10694890469312668, |
|
"learning_rate": 3.7156250000000005e-05, |
|
"loss": 0.019, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.2875, |
|
"grad_norm": 0.34038084745407104, |
|
"learning_rate": 3.7125e-05, |
|
"loss": 0.0229, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.290625, |
|
"grad_norm": 0.17101223766803741, |
|
"learning_rate": 3.709375e-05, |
|
"loss": 0.0308, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.29375, |
|
"grad_norm": 0.1484222412109375, |
|
"learning_rate": 3.70625e-05, |
|
"loss": 0.0244, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.296875, |
|
"grad_norm": 0.3410337567329407, |
|
"learning_rate": 3.703125e-05, |
|
"loss": 0.0298, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.641090452671051, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.0345, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.303125, |
|
"grad_norm": 0.8170056343078613, |
|
"learning_rate": 3.696875e-05, |
|
"loss": 0.0448, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.30625, |
|
"grad_norm": 3.6934096813201904, |
|
"learning_rate": 3.69375e-05, |
|
"loss": 0.0552, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.309375, |
|
"grad_norm": 0.1905418187379837, |
|
"learning_rate": 3.6906250000000006e-05, |
|
"loss": 0.0305, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 0.6752256155014038, |
|
"learning_rate": 3.6875e-05, |
|
"loss": 0.0373, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.315625, |
|
"grad_norm": 0.8538122177124023, |
|
"learning_rate": 3.684375e-05, |
|
"loss": 0.0355, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.31875, |
|
"grad_norm": 0.13017673790454865, |
|
"learning_rate": 3.68125e-05, |
|
"loss": 0.0287, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.321875, |
|
"grad_norm": 0.3785141110420227, |
|
"learning_rate": 3.678125e-05, |
|
"loss": 0.0329, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 0.21819180250167847, |
|
"learning_rate": 3.675e-05, |
|
"loss": 0.0263, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.328125, |
|
"grad_norm": 9.360857963562012, |
|
"learning_rate": 3.671875e-05, |
|
"loss": 0.0819, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.33125, |
|
"grad_norm": 0.1742083877325058, |
|
"learning_rate": 3.6687500000000004e-05, |
|
"loss": 0.0262, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.334375, |
|
"grad_norm": 0.4147292971611023, |
|
"learning_rate": 3.665625e-05, |
|
"loss": 0.0311, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.3375, |
|
"grad_norm": 4.418566703796387, |
|
"learning_rate": 3.6625e-05, |
|
"loss": 0.0971, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.340625, |
|
"grad_norm": 1.636364459991455, |
|
"learning_rate": 3.6593750000000005e-05, |
|
"loss": 0.0379, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.34375, |
|
"grad_norm": 0.22759225964546204, |
|
"learning_rate": 3.65625e-05, |
|
"loss": 0.0266, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.346875, |
|
"grad_norm": 0.17647528648376465, |
|
"learning_rate": 3.653125e-05, |
|
"loss": 0.0179, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 2.2657251358032227, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.0363, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.353125, |
|
"grad_norm": 0.17360058426856995, |
|
"learning_rate": 3.646875e-05, |
|
"loss": 0.0272, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.35625, |
|
"grad_norm": 0.1535394936800003, |
|
"learning_rate": 3.6437500000000005e-05, |
|
"loss": 0.0204, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.359375, |
|
"grad_norm": 0.23168876767158508, |
|
"learning_rate": 3.640625e-05, |
|
"loss": 0.0324, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.3625, |
|
"grad_norm": 0.623339831829071, |
|
"learning_rate": 3.6375e-05, |
|
"loss": 0.0372, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.365625, |
|
"grad_norm": 0.1481848806142807, |
|
"learning_rate": 3.6343750000000006e-05, |
|
"loss": 0.0231, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.36875, |
|
"grad_norm": 0.2897062599658966, |
|
"learning_rate": 3.63125e-05, |
|
"loss": 0.0394, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.371875, |
|
"grad_norm": 0.12459568679332733, |
|
"learning_rate": 3.628125e-05, |
|
"loss": 0.0261, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 2.7815325260162354, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.0757, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.378125, |
|
"grad_norm": 10.49199104309082, |
|
"learning_rate": 3.621875e-05, |
|
"loss": 0.3111, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.38125, |
|
"grad_norm": 8.13404369354248, |
|
"learning_rate": 3.61875e-05, |
|
"loss": 0.1123, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.384375, |
|
"grad_norm": 0.24218522012233734, |
|
"learning_rate": 3.615625e-05, |
|
"loss": 0.0271, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.3875, |
|
"grad_norm": 0.4631577730178833, |
|
"learning_rate": 3.6125000000000004e-05, |
|
"loss": 0.0364, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.390625, |
|
"grad_norm": 0.2414701133966446, |
|
"learning_rate": 3.6093750000000007e-05, |
|
"loss": 0.034, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.39375, |
|
"grad_norm": 0.10716137290000916, |
|
"learning_rate": 3.60625e-05, |
|
"loss": 0.024, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.396875, |
|
"grad_norm": 0.1930936723947525, |
|
"learning_rate": 3.603125e-05, |
|
"loss": 0.0297, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.1756851226091385, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.0202, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.403125, |
|
"grad_norm": 0.4439844489097595, |
|
"learning_rate": 3.5968750000000004e-05, |
|
"loss": 0.0577, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.40625, |
|
"grad_norm": 3.5776124000549316, |
|
"learning_rate": 3.59375e-05, |
|
"loss": 0.0463, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.409375, |
|
"grad_norm": 1.1139960289001465, |
|
"learning_rate": 3.590625e-05, |
|
"loss": 0.0429, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.4125, |
|
"grad_norm": 0.21480640769004822, |
|
"learning_rate": 3.5875000000000005e-05, |
|
"loss": 0.0331, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.415625, |
|
"grad_norm": 0.36357372999191284, |
|
"learning_rate": 3.584375e-05, |
|
"loss": 0.034, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.41875, |
|
"grad_norm": 0.25747624039649963, |
|
"learning_rate": 3.58125e-05, |
|
"loss": 0.03, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.421875, |
|
"grad_norm": 0.5258784890174866, |
|
"learning_rate": 3.578125e-05, |
|
"loss": 0.0392, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 0.0880817100405693, |
|
"learning_rate": 3.575e-05, |
|
"loss": 0.0212, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.428125, |
|
"grad_norm": 0.55647212266922, |
|
"learning_rate": 3.571875e-05, |
|
"loss": 0.0369, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.43125, |
|
"grad_norm": 0.4609752297401428, |
|
"learning_rate": 3.56875e-05, |
|
"loss": 0.0281, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.434375, |
|
"grad_norm": 3.478707790374756, |
|
"learning_rate": 3.565625e-05, |
|
"loss": 0.1029, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.4375, |
|
"grad_norm": 0.14912380278110504, |
|
"learning_rate": 3.5625000000000005e-05, |
|
"loss": 0.0229, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.440625, |
|
"grad_norm": 0.19087623059749603, |
|
"learning_rate": 3.559375e-05, |
|
"loss": 0.0308, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.44375, |
|
"grad_norm": 0.138632670044899, |
|
"learning_rate": 3.5562500000000004e-05, |
|
"loss": 0.0229, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.446875, |
|
"grad_norm": 0.18853573501110077, |
|
"learning_rate": 3.553125e-05, |
|
"loss": 0.0295, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.3829871714115143, |
|
"learning_rate": 3.55e-05, |
|
"loss": 0.0312, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.453125, |
|
"grad_norm": 0.9517104029655457, |
|
"learning_rate": 3.546875e-05, |
|
"loss": 0.0384, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.45625, |
|
"grad_norm": 0.16418811678886414, |
|
"learning_rate": 3.54375e-05, |
|
"loss": 0.0279, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.459375, |
|
"grad_norm": 0.6732227206230164, |
|
"learning_rate": 3.5406250000000003e-05, |
|
"loss": 0.041, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.4625, |
|
"grad_norm": 0.26701417565345764, |
|
"learning_rate": 3.5375e-05, |
|
"loss": 0.0311, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.465625, |
|
"grad_norm": 0.5570465326309204, |
|
"learning_rate": 3.534375e-05, |
|
"loss": 0.0431, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.46875, |
|
"grad_norm": 0.18663549423217773, |
|
"learning_rate": 3.5312500000000005e-05, |
|
"loss": 0.0327, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.471875, |
|
"grad_norm": 6.698427200317383, |
|
"learning_rate": 3.528125e-05, |
|
"loss": 0.0793, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 0.1952434629201889, |
|
"learning_rate": 3.525e-05, |
|
"loss": 0.03, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.478125, |
|
"grad_norm": 0.13568946719169617, |
|
"learning_rate": 3.521875e-05, |
|
"loss": 0.0228, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.48125, |
|
"grad_norm": 0.3262503147125244, |
|
"learning_rate": 3.51875e-05, |
|
"loss": 0.036, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.484375, |
|
"grad_norm": 0.2775207757949829, |
|
"learning_rate": 3.5156250000000004e-05, |
|
"loss": 0.0244, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.4875, |
|
"grad_norm": 0.17881157994270325, |
|
"learning_rate": 3.5125e-05, |
|
"loss": 0.0239, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.490625, |
|
"grad_norm": 0.09230555593967438, |
|
"learning_rate": 3.509375e-05, |
|
"loss": 0.0235, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.49375, |
|
"grad_norm": 0.11491113901138306, |
|
"learning_rate": 3.5062500000000005e-05, |
|
"loss": 0.0189, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.496875, |
|
"grad_norm": 0.1791650950908661, |
|
"learning_rate": 3.503125e-05, |
|
"loss": 0.0302, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.632184624671936, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0554, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.503125, |
|
"grad_norm": 0.47974345088005066, |
|
"learning_rate": 3.496875e-05, |
|
"loss": 0.031, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.50625, |
|
"grad_norm": 0.25306665897369385, |
|
"learning_rate": 3.49375e-05, |
|
"loss": 0.0348, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.509375, |
|
"grad_norm": 0.23441192507743835, |
|
"learning_rate": 3.4906250000000005e-05, |
|
"loss": 0.0267, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.5125, |
|
"grad_norm": 0.47122475504875183, |
|
"learning_rate": 3.4875e-05, |
|
"loss": 0.0531, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.515625, |
|
"grad_norm": 0.42970484495162964, |
|
"learning_rate": 3.484375e-05, |
|
"loss": 0.0407, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.51875, |
|
"grad_norm": 0.09633981436491013, |
|
"learning_rate": 3.4812500000000006e-05, |
|
"loss": 0.0237, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.521875, |
|
"grad_norm": 0.36600834131240845, |
|
"learning_rate": 3.478125e-05, |
|
"loss": 0.0284, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 0.20985989272594452, |
|
"learning_rate": 3.475e-05, |
|
"loss": 0.0291, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.528125, |
|
"grad_norm": 0.19182531535625458, |
|
"learning_rate": 3.471875e-05, |
|
"loss": 0.0215, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.53125, |
|
"grad_norm": 0.1707691103219986, |
|
"learning_rate": 3.46875e-05, |
|
"loss": 0.0243, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.534375, |
|
"grad_norm": 1.414674162864685, |
|
"learning_rate": 3.465625e-05, |
|
"loss": 0.0386, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.5375, |
|
"grad_norm": 0.8973498344421387, |
|
"learning_rate": 3.4625e-05, |
|
"loss": 0.0541, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.540625, |
|
"grad_norm": 0.0919414833188057, |
|
"learning_rate": 3.4593750000000004e-05, |
|
"loss": 0.0211, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.54375, |
|
"grad_norm": 0.4503600299358368, |
|
"learning_rate": 3.45625e-05, |
|
"loss": 0.0345, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.546875, |
|
"grad_norm": 0.16320058703422546, |
|
"learning_rate": 3.453125e-05, |
|
"loss": 0.0258, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.14714613556861877, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.0267, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.553125, |
|
"grad_norm": 0.45230165123939514, |
|
"learning_rate": 3.446875e-05, |
|
"loss": 0.0452, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.55625, |
|
"grad_norm": 0.10125567764043808, |
|
"learning_rate": 3.4437500000000004e-05, |
|
"loss": 0.0193, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.559375, |
|
"grad_norm": 0.27993062138557434, |
|
"learning_rate": 3.440625e-05, |
|
"loss": 0.0292, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.15163570642471313, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 0.021, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.565625, |
|
"grad_norm": 0.3911040723323822, |
|
"learning_rate": 3.4343750000000005e-05, |
|
"loss": 0.0267, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.56875, |
|
"grad_norm": 0.9082874059677124, |
|
"learning_rate": 3.43125e-05, |
|
"loss": 0.0529, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.571875, |
|
"grad_norm": 9.674172401428223, |
|
"learning_rate": 3.428125e-05, |
|
"loss": 0.1488, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 0.43405047059059143, |
|
"learning_rate": 3.4250000000000006e-05, |
|
"loss": 0.0264, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.578125, |
|
"grad_norm": 0.28418371081352234, |
|
"learning_rate": 3.421875e-05, |
|
"loss": 0.0208, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.58125, |
|
"grad_norm": 0.07861240953207016, |
|
"learning_rate": 3.41875e-05, |
|
"loss": 0.0234, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.584375, |
|
"grad_norm": 1.7110495567321777, |
|
"learning_rate": 3.415625e-05, |
|
"loss": 0.0382, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.5875, |
|
"grad_norm": 0.24076040089130402, |
|
"learning_rate": 3.4125e-05, |
|
"loss": 0.0272, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.590625, |
|
"grad_norm": 0.6059406995773315, |
|
"learning_rate": 3.4093750000000005e-05, |
|
"loss": 0.0475, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.59375, |
|
"grad_norm": 0.24730996787548065, |
|
"learning_rate": 3.40625e-05, |
|
"loss": 0.0215, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.596875, |
|
"grad_norm": 0.6072419285774231, |
|
"learning_rate": 3.4031250000000004e-05, |
|
"loss": 0.0306, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.3612338900566101, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.0437, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.603125, |
|
"grad_norm": 0.18105106055736542, |
|
"learning_rate": 3.396875e-05, |
|
"loss": 0.042, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.60625, |
|
"grad_norm": 0.21311897039413452, |
|
"learning_rate": 3.39375e-05, |
|
"loss": 0.0228, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.609375, |
|
"grad_norm": 0.20102500915527344, |
|
"learning_rate": 3.390625e-05, |
|
"loss": 0.0225, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.6125, |
|
"grad_norm": 0.08760484308004379, |
|
"learning_rate": 3.3875000000000003e-05, |
|
"loss": 0.0206, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.615625, |
|
"grad_norm": 1.122139573097229, |
|
"learning_rate": 3.384375e-05, |
|
"loss": 0.0279, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.61875, |
|
"grad_norm": 1.3522179126739502, |
|
"learning_rate": 3.38125e-05, |
|
"loss": 0.0468, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.621875, |
|
"grad_norm": 0.24191869795322418, |
|
"learning_rate": 3.3781250000000005e-05, |
|
"loss": 0.0283, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 0.3224029839038849, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 0.0329, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.628125, |
|
"grad_norm": 0.2289983034133911, |
|
"learning_rate": 3.3718749999999996e-05, |
|
"loss": 0.0238, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.63125, |
|
"grad_norm": 1.206566572189331, |
|
"learning_rate": 3.36875e-05, |
|
"loss": 0.0418, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.634375, |
|
"grad_norm": 2.037022113800049, |
|
"learning_rate": 3.365625e-05, |
|
"loss": 0.0515, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.6375, |
|
"grad_norm": 0.4265249967575073, |
|
"learning_rate": 3.3625000000000004e-05, |
|
"loss": 0.0372, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.640625, |
|
"grad_norm": 0.12385348975658417, |
|
"learning_rate": 3.359375e-05, |
|
"loss": 0.0285, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.64375, |
|
"grad_norm": 0.7889988422393799, |
|
"learning_rate": 3.35625e-05, |
|
"loss": 0.0419, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.646875, |
|
"grad_norm": 1.755566120147705, |
|
"learning_rate": 3.3531250000000005e-05, |
|
"loss": 0.0399, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.5016866326332092, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.0654, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.653125, |
|
"grad_norm": 0.09634485840797424, |
|
"learning_rate": 3.3468750000000004e-05, |
|
"loss": 0.0195, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.65625, |
|
"grad_norm": 0.13417887687683105, |
|
"learning_rate": 3.34375e-05, |
|
"loss": 0.0198, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.659375, |
|
"grad_norm": 0.33312031626701355, |
|
"learning_rate": 3.340625e-05, |
|
"loss": 0.0313, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.6625, |
|
"grad_norm": 0.17174910008907318, |
|
"learning_rate": 3.3375e-05, |
|
"loss": 0.0258, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.665625, |
|
"grad_norm": 0.09567829966545105, |
|
"learning_rate": 3.334375e-05, |
|
"loss": 0.0207, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.66875, |
|
"grad_norm": 0.12403300404548645, |
|
"learning_rate": 3.33125e-05, |
|
"loss": 0.0229, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.671875, |
|
"grad_norm": 0.2110747992992401, |
|
"learning_rate": 3.3281250000000006e-05, |
|
"loss": 0.0246, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 0.4586084485054016, |
|
"learning_rate": 3.325e-05, |
|
"loss": 0.0386, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.678125, |
|
"grad_norm": 0.37612682580947876, |
|
"learning_rate": 3.3218750000000004e-05, |
|
"loss": 0.0273, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.68125, |
|
"grad_norm": 0.12443527579307556, |
|
"learning_rate": 3.31875e-05, |
|
"loss": 0.0251, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.684375, |
|
"grad_norm": 1.3459047079086304, |
|
"learning_rate": 3.315625e-05, |
|
"loss": 0.055, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.6875, |
|
"grad_norm": 1.1800158023834229, |
|
"learning_rate": 3.3125e-05, |
|
"loss": 0.1293, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.690625, |
|
"grad_norm": 0.2551305890083313, |
|
"learning_rate": 3.309375e-05, |
|
"loss": 0.0244, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.69375, |
|
"grad_norm": 0.3867127001285553, |
|
"learning_rate": 3.3062500000000004e-05, |
|
"loss": 0.034, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.696875, |
|
"grad_norm": 0.21304133534431458, |
|
"learning_rate": 3.303125e-05, |
|
"loss": 0.0222, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.41967159509658813, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.0552, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.703125, |
|
"grad_norm": 0.12126383185386658, |
|
"learning_rate": 3.2968750000000005e-05, |
|
"loss": 0.0203, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.70625, |
|
"grad_norm": 0.5720413327217102, |
|
"learning_rate": 3.29375e-05, |
|
"loss": 0.0281, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.709375, |
|
"grad_norm": 0.14413952827453613, |
|
"learning_rate": 3.290625e-05, |
|
"loss": 0.0234, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.7125, |
|
"grad_norm": 0.30371683835983276, |
|
"learning_rate": 3.2875e-05, |
|
"loss": 0.031, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.715625, |
|
"grad_norm": 0.24292483925819397, |
|
"learning_rate": 3.284375e-05, |
|
"loss": 0.0356, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.71875, |
|
"grad_norm": 0.22597646713256836, |
|
"learning_rate": 3.2812500000000005e-05, |
|
"loss": 0.0272, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.721875, |
|
"grad_norm": 0.12884864211082458, |
|
"learning_rate": 3.278125e-05, |
|
"loss": 0.0173, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 0.21374590694904327, |
|
"learning_rate": 3.275e-05, |
|
"loss": 0.0233, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.728125, |
|
"grad_norm": 0.1160837858915329, |
|
"learning_rate": 3.2718750000000006e-05, |
|
"loss": 0.0204, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.73125, |
|
"grad_norm": 0.09611855447292328, |
|
"learning_rate": 3.26875e-05, |
|
"loss": 0.0217, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.734375, |
|
"grad_norm": 0.39267030358314514, |
|
"learning_rate": 3.265625e-05, |
|
"loss": 0.0293, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.7375, |
|
"grad_norm": 0.587985098361969, |
|
"learning_rate": 3.2625e-05, |
|
"loss": 0.0357, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.740625, |
|
"grad_norm": 0.12259811908006668, |
|
"learning_rate": 3.259375e-05, |
|
"loss": 0.0241, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.74375, |
|
"grad_norm": 0.10824635624885559, |
|
"learning_rate": 3.25625e-05, |
|
"loss": 0.0236, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.746875, |
|
"grad_norm": 0.0685066282749176, |
|
"learning_rate": 3.253125e-05, |
|
"loss": 0.0157, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.15844418108463287, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0279, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.753125, |
|
"grad_norm": 0.5560875535011292, |
|
"learning_rate": 3.2468750000000007e-05, |
|
"loss": 0.0504, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.75625, |
|
"grad_norm": 0.15164297819137573, |
|
"learning_rate": 3.24375e-05, |
|
"loss": 0.0285, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.759375, |
|
"grad_norm": 0.09835915267467499, |
|
"learning_rate": 3.240625e-05, |
|
"loss": 0.0231, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.7625, |
|
"grad_norm": 0.33482205867767334, |
|
"learning_rate": 3.2375e-05, |
|
"loss": 0.0276, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.765625, |
|
"grad_norm": 0.16952961683273315, |
|
"learning_rate": 3.2343750000000004e-05, |
|
"loss": 0.028, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.76875, |
|
"grad_norm": 0.5915741324424744, |
|
"learning_rate": 3.23125e-05, |
|
"loss": 0.0241, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.771875, |
|
"grad_norm": 0.0879567340016365, |
|
"learning_rate": 3.228125e-05, |
|
"loss": 0.0182, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 0.13319243490695953, |
|
"learning_rate": 3.2250000000000005e-05, |
|
"loss": 0.0214, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.778125, |
|
"grad_norm": 1.3187066316604614, |
|
"learning_rate": 3.221875e-05, |
|
"loss": 0.0286, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.78125, |
|
"grad_norm": 0.13088369369506836, |
|
"learning_rate": 3.21875e-05, |
|
"loss": 0.0219, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.784375, |
|
"grad_norm": 0.11556047946214676, |
|
"learning_rate": 3.215625e-05, |
|
"loss": 0.0233, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.7875, |
|
"grad_norm": 0.6260407567024231, |
|
"learning_rate": 3.2125e-05, |
|
"loss": 0.0385, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.790625, |
|
"grad_norm": 0.12837764620780945, |
|
"learning_rate": 3.209375e-05, |
|
"loss": 0.0193, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.79375, |
|
"grad_norm": 0.4870481789112091, |
|
"learning_rate": 3.20625e-05, |
|
"loss": 0.032, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.796875, |
|
"grad_norm": 0.21629931032657623, |
|
"learning_rate": 3.203125e-05, |
|
"loss": 0.021, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.34100577235221863, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0249, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.803125, |
|
"grad_norm": 0.5092364549636841, |
|
"learning_rate": 3.196875e-05, |
|
"loss": 0.034, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.80625, |
|
"grad_norm": 0.3579314351081848, |
|
"learning_rate": 3.1937500000000004e-05, |
|
"loss": 0.0231, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.809375, |
|
"grad_norm": 0.3164561688899994, |
|
"learning_rate": 3.1906250000000006e-05, |
|
"loss": 0.0336, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.8125, |
|
"grad_norm": 0.41202688217163086, |
|
"learning_rate": 3.1875e-05, |
|
"loss": 0.0279, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.815625, |
|
"grad_norm": 0.34404581785202026, |
|
"learning_rate": 3.184375e-05, |
|
"loss": 0.0266, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.81875, |
|
"grad_norm": 0.3374820053577423, |
|
"learning_rate": 3.18125e-05, |
|
"loss": 0.0413, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.821875, |
|
"grad_norm": 1.9358925819396973, |
|
"learning_rate": 3.1781250000000003e-05, |
|
"loss": 0.0887, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.825, |
|
"grad_norm": 1.2504078149795532, |
|
"learning_rate": 3.175e-05, |
|
"loss": 0.0873, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.828125, |
|
"grad_norm": 0.500586211681366, |
|
"learning_rate": 3.171875e-05, |
|
"loss": 0.1037, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.83125, |
|
"grad_norm": 0.1820017248392105, |
|
"learning_rate": 3.1687500000000005e-05, |
|
"loss": 0.0266, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.834375, |
|
"grad_norm": 0.22211194038391113, |
|
"learning_rate": 3.165625000000001e-05, |
|
"loss": 0.0225, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.8375, |
|
"grad_norm": 0.28489556908607483, |
|
"learning_rate": 3.1624999999999996e-05, |
|
"loss": 0.0268, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.840625, |
|
"grad_norm": 0.19432474672794342, |
|
"learning_rate": 3.159375e-05, |
|
"loss": 0.0265, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.84375, |
|
"grad_norm": 0.15175053477287292, |
|
"learning_rate": 3.15625e-05, |
|
"loss": 0.0202, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.846875, |
|
"grad_norm": 0.17748066782951355, |
|
"learning_rate": 3.1531250000000004e-05, |
|
"loss": 0.024, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.21079498529434204, |
|
"learning_rate": 3.15e-05, |
|
"loss": 0.0199, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.853125, |
|
"grad_norm": 2.379624128341675, |
|
"learning_rate": 3.146875e-05, |
|
"loss": 0.2896, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.85625, |
|
"grad_norm": 0.2815399467945099, |
|
"learning_rate": 3.1437500000000005e-05, |
|
"loss": 0.0353, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.859375, |
|
"grad_norm": 0.16545546054840088, |
|
"learning_rate": 3.140625e-05, |
|
"loss": 0.0294, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.8625, |
|
"grad_norm": 0.11324725300073624, |
|
"learning_rate": 3.1375e-05, |
|
"loss": 0.0173, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.865625, |
|
"grad_norm": 0.324992299079895, |
|
"learning_rate": 3.134375e-05, |
|
"loss": 0.029, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.86875, |
|
"grad_norm": 0.1070389673113823, |
|
"learning_rate": 3.13125e-05, |
|
"loss": 0.027, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.871875, |
|
"grad_norm": 1.371116042137146, |
|
"learning_rate": 3.128125e-05, |
|
"loss": 0.0534, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 3.377976894378662, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.0479, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.878125, |
|
"grad_norm": 0.20367911458015442, |
|
"learning_rate": 3.121875e-05, |
|
"loss": 0.0282, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.88125, |
|
"grad_norm": 0.20612278580665588, |
|
"learning_rate": 3.1187500000000006e-05, |
|
"loss": 0.0249, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.884375, |
|
"grad_norm": 0.23075002431869507, |
|
"learning_rate": 3.115625e-05, |
|
"loss": 0.0233, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.8875, |
|
"grad_norm": 9.430220603942871, |
|
"learning_rate": 3.1125000000000004e-05, |
|
"loss": 0.1505, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.890625, |
|
"grad_norm": 0.27332308888435364, |
|
"learning_rate": 3.109375e-05, |
|
"loss": 0.0259, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.89375, |
|
"grad_norm": 2.547020673751831, |
|
"learning_rate": 3.10625e-05, |
|
"loss": 0.0565, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.896875, |
|
"grad_norm": 0.22681358456611633, |
|
"learning_rate": 3.103125e-05, |
|
"loss": 0.029, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.47494059801101685, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.0345, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.903125, |
|
"grad_norm": 0.16444207727909088, |
|
"learning_rate": 3.0968750000000004e-05, |
|
"loss": 0.0243, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.90625, |
|
"grad_norm": 0.27043211460113525, |
|
"learning_rate": 3.09375e-05, |
|
"loss": 0.0289, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.909375, |
|
"grad_norm": 0.18597914278507233, |
|
"learning_rate": 3.090625e-05, |
|
"loss": 0.0207, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.9125, |
|
"grad_norm": 0.4550987780094147, |
|
"learning_rate": 3.0875000000000005e-05, |
|
"loss": 0.0259, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.915625, |
|
"grad_norm": 0.9754087924957275, |
|
"learning_rate": 3.084375e-05, |
|
"loss": 0.0615, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.91875, |
|
"grad_norm": 0.07866744697093964, |
|
"learning_rate": 3.08125e-05, |
|
"loss": 0.0206, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.921875, |
|
"grad_norm": 0.3353641927242279, |
|
"learning_rate": 3.078125e-05, |
|
"loss": 0.0284, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.925, |
|
"grad_norm": 0.6235747337341309, |
|
"learning_rate": 3.075e-05, |
|
"loss": 0.0387, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.928125, |
|
"grad_norm": 0.14865784347057343, |
|
"learning_rate": 3.0718750000000005e-05, |
|
"loss": 0.0253, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.93125, |
|
"grad_norm": 0.4304657280445099, |
|
"learning_rate": 3.06875e-05, |
|
"loss": 0.0314, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.934375, |
|
"grad_norm": 0.10021252185106277, |
|
"learning_rate": 3.065625e-05, |
|
"loss": 0.0181, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.9375, |
|
"grad_norm": 0.16892488300800323, |
|
"learning_rate": 3.0625000000000006e-05, |
|
"loss": 0.0276, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.940625, |
|
"grad_norm": 1.247506022453308, |
|
"learning_rate": 3.059375e-05, |
|
"loss": 0.0346, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.94375, |
|
"grad_norm": 0.6756719350814819, |
|
"learning_rate": 3.05625e-05, |
|
"loss": 0.03, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.946875, |
|
"grad_norm": 0.14591379463672638, |
|
"learning_rate": 3.053125e-05, |
|
"loss": 0.0216, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.4332157373428345, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.0357, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.953125, |
|
"grad_norm": 0.8189756274223328, |
|
"learning_rate": 3.0468750000000002e-05, |
|
"loss": 0.021, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.95625, |
|
"grad_norm": 0.2692781388759613, |
|
"learning_rate": 3.04375e-05, |
|
"loss": 0.0207, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.959375, |
|
"grad_norm": 0.10346498340368271, |
|
"learning_rate": 3.0406250000000004e-05, |
|
"loss": 0.0184, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.9625, |
|
"grad_norm": 0.22864669561386108, |
|
"learning_rate": 3.0375000000000003e-05, |
|
"loss": 0.0337, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.965625, |
|
"grad_norm": 0.15825912356376648, |
|
"learning_rate": 3.0343750000000006e-05, |
|
"loss": 0.0258, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.96875, |
|
"grad_norm": 0.29511621594429016, |
|
"learning_rate": 3.0312499999999998e-05, |
|
"loss": 0.0291, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.971875, |
|
"grad_norm": 0.12500648200511932, |
|
"learning_rate": 3.028125e-05, |
|
"loss": 0.0231, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.975, |
|
"grad_norm": 0.09671773761510849, |
|
"learning_rate": 3.025e-05, |
|
"loss": 0.0176, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.978125, |
|
"grad_norm": 0.11605774611234665, |
|
"learning_rate": 3.0218750000000003e-05, |
|
"loss": 0.0184, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.98125, |
|
"grad_norm": 0.7053767442703247, |
|
"learning_rate": 3.0187500000000002e-05, |
|
"loss": 0.0419, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.984375, |
|
"grad_norm": 0.5477724671363831, |
|
"learning_rate": 3.015625e-05, |
|
"loss": 0.0283, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.9875, |
|
"grad_norm": 0.17797940969467163, |
|
"learning_rate": 3.0125000000000004e-05, |
|
"loss": 0.024, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.990625, |
|
"grad_norm": 2.187450408935547, |
|
"learning_rate": 3.0093750000000003e-05, |
|
"loss": 0.0466, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.99375, |
|
"grad_norm": 0.17812545597553253, |
|
"learning_rate": 3.00625e-05, |
|
"loss": 0.0245, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.996875, |
|
"grad_norm": 0.29999840259552, |
|
"learning_rate": 3.0031249999999998e-05, |
|
"loss": 0.0335, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.09049447625875473, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0186, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy_N/A": NaN, |
|
"eval_accuracy_content": 0.9907844483799361, |
|
"eval_iou_N/A": 0.0, |
|
"eval_iou_content": 0.9907844483799361, |
|
"eval_loss": 0.03488548472523689, |
|
"eval_mean_accuracy": 0.9907844483799361, |
|
"eval_mean_iou": 0.49539222418996803, |
|
"eval_overall_accuracy": 0.9907844483799361, |
|
"eval_runtime": 1027.8886, |
|
"eval_samples_per_second": 1.245, |
|
"eval_steps_per_second": 0.078, |
|
"step": 640 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.5441172247101343e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|