{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.601336302895323, "eval_steps": 250, "global_step": 9250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0022271714922048997, "grad_norm": 48.25035095214844, "learning_rate": 1e-06, "loss": 4.3999, "num_input_tokens_seen": 55440, "step": 1 }, { "epoch": 0.0022271714922048997, "loss": 4.068600654602051, "loss_ce": 1.3654756546020508, "loss_iou": 1.1171875, "loss_num": 0.09423828125, "loss_xval": 2.703125, "num_input_tokens_seen": 55440, "step": 1 }, { "epoch": 0.004454342984409799, "grad_norm": 130.20863342285156, "learning_rate": 1e-06, "loss": 3.4252, "num_input_tokens_seen": 111516, "step": 2 }, { "epoch": 0.004454342984409799, "loss": 3.1070704460144043, "loss_ce": 0.6607813835144043, "loss_iou": 1.0390625, "loss_num": 0.072265625, "loss_xval": 2.453125, "num_input_tokens_seen": 111516, "step": 2 }, { "epoch": 0.0066815144766146995, "grad_norm": 219.82708740234375, "learning_rate": 1e-06, "loss": 4.3295, "num_input_tokens_seen": 167164, "step": 3 }, { "epoch": 0.0066815144766146995, "loss": 4.420684814453125, "loss_ce": 1.4587706327438354, "loss_iou": 1.2109375, "loss_num": 0.10888671875, "loss_xval": 2.96875, "num_input_tokens_seen": 167164, "step": 3 }, { "epoch": 0.008908685968819599, "grad_norm": 42.13670349121094, "learning_rate": 1e-06, "loss": 4.1998, "num_input_tokens_seen": 221656, "step": 4 }, { "epoch": 0.008908685968819599, "loss": 4.317120552062988, "loss_ce": 1.9479798078536987, "loss_iou": 0.91015625, "loss_num": 0.10986328125, "loss_xval": 2.375, "num_input_tokens_seen": 221656, "step": 4 }, { "epoch": 0.011135857461024499, "grad_norm": 45.899261474609375, "learning_rate": 1e-06, "loss": 3.8544, "num_input_tokens_seen": 279772, "step": 5 }, { "epoch": 0.011135857461024499, "eval_seeclick_web_CIoU": 0.627178281545639, "eval_seeclick_web_GIoU": 0.6259045600891113, "eval_seeclick_web_IoU": 0.6463199555873871, "eval_seeclick_web_MAE_all": 0.015169001650065184, "eval_seeclick_web_MAE_h": 0.008642073255032301, "eval_seeclick_web_MAE_w": 0.0126046072691679, "eval_seeclick_web_MAE_x_boxes": 0.00952292513102293, "eval_seeclick_web_MAE_y_boxes": 0.022719496861100197, "eval_seeclick_web_inside_bbox": 0.8732638955116272, "eval_seeclick_web_loss": 0.8245152831077576, "eval_seeclick_web_loss_ce": 0.00025572007871232927, "eval_seeclick_web_loss_iou": 0.3670654296875, "eval_seeclick_web_loss_num": 0.011660575866699219, "eval_seeclick_web_loss_xval": 0.7923583984375, "eval_seeclick_web_runtime": 17.4436, "eval_seeclick_web_samples_per_second": 2.866, "eval_seeclick_web_steps_per_second": 0.115, "num_input_tokens_seen": 279772, "step": 5 }, { "epoch": 0.011135857461024499, "eval_icons_CIoU": 0.4010816812515259, "eval_icons_GIoU": 0.4324014186859131, "eval_icons_IoU": 0.49571336805820465, "eval_icons_MAE_all": 0.05109180323779583, "eval_icons_MAE_h": 0.020274315029382706, "eval_icons_MAE_w": 0.07078993320465088, "eval_icons_MAE_x_boxes": 0.05979071371257305, "eval_icons_MAE_y_boxes": 0.02338168118149042, "eval_icons_inside_bbox": 0.7361111044883728, "eval_icons_loss": 1.3772772550582886, "eval_icons_loss_ce": 9.112022598856129e-05, "eval_icons_loss_iou": 0.539794921875, "eval_icons_loss_num": 0.04870033264160156, "eval_icons_loss_xval": 1.323486328125, "eval_icons_runtime": 16.5509, "eval_icons_samples_per_second": 3.021, "eval_icons_steps_per_second": 0.121, "num_input_tokens_seen": 279772, "step": 5 }, { "epoch": 0.011135857461024499, "eval_screenspot_CIoU": 0.10782323777675629, "eval_screenspot_GIoU": 0.12048953274885814, "eval_screenspot_IoU": 0.26688433190186817, "eval_screenspot_MAE_all": 0.1258778969446818, "eval_screenspot_MAE_h": 0.07123650113741557, "eval_screenspot_MAE_w": 0.16419320305188498, "eval_screenspot_MAE_x_boxes": 0.1336704045534134, "eval_screenspot_MAE_y_boxes": 0.13144449392954508, "eval_screenspot_inside_bbox": 0.451666663090388, "eval_screenspot_loss": 2.3829233646392822, "eval_screenspot_loss_ce": 0.0014819869732794662, "eval_screenspot_loss_iou": 0.888671875, "eval_screenspot_loss_num": 0.1301116943359375, "eval_screenspot_loss_xval": 2.427734375, "eval_screenspot_runtime": 26.6936, "eval_screenspot_samples_per_second": 3.334, "eval_screenspot_steps_per_second": 0.112, "num_input_tokens_seen": 279772, "step": 5 }, { "epoch": 0.011135857461024499, "eval_compot_CIoU": 0.3970271050930023, "eval_compot_GIoU": 0.4374036639928818, "eval_compot_IoU": 0.45621901750564575, "eval_compot_MAE_all": 0.01914477813988924, "eval_compot_MAE_h": 0.005190690280869603, "eval_compot_MAE_w": 0.027808972634375095, "eval_compot_MAE_x_boxes": 0.03283216618001461, "eval_compot_MAE_y_boxes": 0.0045464420691132545, "eval_compot_inside_bbox": 0.6458333432674408, "eval_compot_loss": 1.301793098449707, "eval_compot_loss_ce": 0.0011670971289277077, "eval_compot_loss_iou": 0.60400390625, "eval_compot_loss_num": 0.018798828125, "eval_compot_loss_xval": 1.302001953125, "eval_compot_runtime": 16.8489, "eval_compot_samples_per_second": 2.968, "eval_compot_steps_per_second": 0.119, "num_input_tokens_seen": 279772, "step": 5 }, { "epoch": 0.011135857461024499, "eval_custom_ui_val_CIoU": -0.2649508896801207, "eval_custom_ui_val_GIoU": -0.28727365616295075, "eval_custom_ui_val_IoU": 0.02925413821099533, "eval_custom_ui_val_MAE_all": 0.09884808709224065, "eval_custom_ui_val_MAE_h": 0.0829097247785992, "eval_custom_ui_val_MAE_w": 0.06122648136483298, "eval_custom_ui_val_MAE_x_boxes": 0.05147905213137468, "eval_custom_ui_val_MAE_y_boxes": 0.1948982576529185, "eval_custom_ui_val_inside_bbox": 0.034722222222222224, "eval_custom_ui_val_loss": 3.0896787643432617, "eval_custom_ui_val_loss_ce": 0.0023468418318467834, "eval_custom_ui_val_loss_iou": 1.2951931423611112, "eval_custom_ui_val_loss_num": 0.09839375813802083, "eval_custom_ui_val_loss_xval": 3.082248263888889, "eval_custom_ui_val_runtime": 54.828, "eval_custom_ui_val_samples_per_second": 4.833, "eval_custom_ui_val_steps_per_second": 0.164, "num_input_tokens_seen": 279772, "step": 5 }, { "epoch": 0.011135857461024499, "loss": 2.992961883544922, "loss_ce": 0.004680817015469074, "loss_iou": 1.3046875, "loss_num": 0.07470703125, "loss_xval": 2.984375, "num_input_tokens_seen": 279772, "step": 5 }, { "epoch": 0.013363028953229399, "grad_norm": 51.13984298706055, "learning_rate": 1e-06, "loss": 3.8227, "num_input_tokens_seen": 336424, "step": 6 }, { "epoch": 0.013363028953229399, "loss": 3.419996738433838, "loss_ce": 1.4341567754745483, "loss_iou": 0.7578125, "loss_num": 0.09375, "loss_xval": 1.984375, "num_input_tokens_seen": 336424, "step": 6 }, { "epoch": 0.015590200445434299, "grad_norm": 43.559844970703125, "learning_rate": 1e-06, "loss": 3.5541, "num_input_tokens_seen": 391856, "step": 7 }, { "epoch": 0.015590200445434299, "loss": 3.7645998001098633, "loss_ce": 0.9286624193191528, "loss_iou": 1.140625, "loss_num": 0.1123046875, "loss_xval": 2.84375, "num_input_tokens_seen": 391856, "step": 7 }, { "epoch": 0.017817371937639197, "grad_norm": 58.651023864746094, "learning_rate": 1e-06, "loss": 4.0592, "num_input_tokens_seen": 446076, "step": 8 }, { "epoch": 0.017817371937639197, "loss": 3.763951301574707, "loss_ce": 1.845982551574707, "loss_iou": 0.69140625, "loss_num": 0.107421875, "loss_xval": 1.921875, "num_input_tokens_seen": 446076, "step": 8 }, { "epoch": 0.0200445434298441, "grad_norm": 46.13543701171875, "learning_rate": 1e-06, "loss": 3.4206, "num_input_tokens_seen": 503600, "step": 9 }, { "epoch": 0.0200445434298441, "loss": 3.4812936782836914, "loss_ce": 1.1902780532836914, "loss_iou": 0.90234375, "loss_num": 0.0966796875, "loss_xval": 2.296875, "num_input_tokens_seen": 503600, "step": 9 }, { "epoch": 0.022271714922048998, "grad_norm": 42.953372955322266, "learning_rate": 1e-06, "loss": 4.2745, "num_input_tokens_seen": 557604, "step": 10 }, { "epoch": 0.022271714922048998, "loss": 4.187029838562012, "loss_ce": 1.5722839832305908, "loss_iou": 1.0390625, "loss_num": 0.107421875, "loss_xval": 2.609375, "num_input_tokens_seen": 557604, "step": 10 }, { "epoch": 0.024498886414253896, "grad_norm": 53.018638610839844, "learning_rate": 1e-06, "loss": 4.4355, "num_input_tokens_seen": 614780, "step": 11 }, { "epoch": 0.024498886414253896, "loss": 3.7926254272460938, "loss_ce": 1.1353989839553833, "loss_iou": 1.0703125, "loss_num": 0.103515625, "loss_xval": 2.65625, "num_input_tokens_seen": 614780, "step": 11 }, { "epoch": 0.026726057906458798, "grad_norm": 52.364288330078125, "learning_rate": 1e-06, "loss": 3.4929, "num_input_tokens_seen": 672264, "step": 12 }, { "epoch": 0.026726057906458798, "loss": 3.3297152519226074, "loss_ce": 0.778934121131897, "loss_iou": 1.0859375, "loss_num": 0.0751953125, "loss_xval": 2.546875, "num_input_tokens_seen": 672264, "step": 12 }, { "epoch": 0.028953229398663696, "grad_norm": 160.2591552734375, "learning_rate": 1e-06, "loss": 4.0254, "num_input_tokens_seen": 728504, "step": 13 }, { "epoch": 0.028953229398663696, "loss": 4.575099945068359, "loss_ce": 1.8211936950683594, "loss_iou": 1.125, "loss_num": 0.10107421875, "loss_xval": 2.75, "num_input_tokens_seen": 728504, "step": 13 }, { "epoch": 0.031180400890868598, "grad_norm": 48.962005615234375, "learning_rate": 1e-06, "loss": 3.9505, "num_input_tokens_seen": 783692, "step": 14 }, { "epoch": 0.031180400890868598, "loss": 4.143004417419434, "loss_ce": 1.8163442611694336, "loss_iou": 0.94921875, "loss_num": 0.08642578125, "loss_xval": 2.328125, "num_input_tokens_seen": 783692, "step": 14 }, { "epoch": 0.0334075723830735, "grad_norm": 39.02128982543945, "learning_rate": 1e-06, "loss": 4.0185, "num_input_tokens_seen": 838168, "step": 15 }, { "epoch": 0.0334075723830735, "loss": 4.348506927490234, "loss_ce": 2.2537801265716553, "loss_iou": 0.78125, "loss_num": 0.1064453125, "loss_xval": 2.09375, "num_input_tokens_seen": 838168, "step": 15 }, { "epoch": 0.035634743875278395, "grad_norm": 35.96154022216797, "learning_rate": 1e-06, "loss": 3.6832, "num_input_tokens_seen": 895276, "step": 16 }, { "epoch": 0.035634743875278395, "loss": 3.568532943725586, "loss_ce": 1.519704818725586, "loss_iou": 0.83203125, "loss_num": 0.0771484375, "loss_xval": 2.046875, "num_input_tokens_seen": 895276, "step": 16 }, { "epoch": 0.0378619153674833, "grad_norm": 49.04829788208008, "learning_rate": 1e-06, "loss": 3.519, "num_input_tokens_seen": 951180, "step": 17 }, { "epoch": 0.0378619153674833, "loss": 3.1685400009155273, "loss_ce": 0.9458837509155273, "loss_iou": 0.88671875, "loss_num": 0.0908203125, "loss_xval": 2.21875, "num_input_tokens_seen": 951180, "step": 17 }, { "epoch": 0.0400890868596882, "grad_norm": 105.61235809326172, "learning_rate": 1e-06, "loss": 3.7186, "num_input_tokens_seen": 1008684, "step": 18 }, { "epoch": 0.0400890868596882, "loss": 3.8322296142578125, "loss_ce": 1.353714108467102, "loss_iou": 0.96484375, "loss_num": 0.10986328125, "loss_xval": 2.484375, "num_input_tokens_seen": 1008684, "step": 18 }, { "epoch": 0.042316258351893093, "grad_norm": 114.26968383789062, "learning_rate": 1e-06, "loss": 4.0211, "num_input_tokens_seen": 1062212, "step": 19 }, { "epoch": 0.042316258351893093, "loss": 3.5515711307525635, "loss_ce": 1.0281336307525635, "loss_iou": 1.0234375, "loss_num": 0.09521484375, "loss_xval": 2.53125, "num_input_tokens_seen": 1062212, "step": 19 }, { "epoch": 0.044543429844097995, "grad_norm": 53.97627258300781, "learning_rate": 1e-06, "loss": 4.2972, "num_input_tokens_seen": 1114576, "step": 20 }, { "epoch": 0.044543429844097995, "loss": 4.559757709503174, "loss_ce": 1.2492105960845947, "loss_iou": 1.3046875, "loss_num": 0.138671875, "loss_xval": 3.3125, "num_input_tokens_seen": 1114576, "step": 20 }, { "epoch": 0.0467706013363029, "grad_norm": 38.33933639526367, "learning_rate": 1e-06, "loss": 3.5725, "num_input_tokens_seen": 1169836, "step": 21 }, { "epoch": 0.0467706013363029, "loss": 3.7388784885406494, "loss_ce": 1.3677849769592285, "loss_iou": 0.98828125, "loss_num": 0.0791015625, "loss_xval": 2.375, "num_input_tokens_seen": 1169836, "step": 21 }, { "epoch": 0.04899777282850779, "grad_norm": 59.448753356933594, "learning_rate": 1e-06, "loss": 3.8666, "num_input_tokens_seen": 1228308, "step": 22 }, { "epoch": 0.04899777282850779, "loss": 3.630049705505371, "loss_ce": 1.1505573987960815, "loss_iou": 0.9921875, "loss_num": 0.0986328125, "loss_xval": 2.484375, "num_input_tokens_seen": 1228308, "step": 22 }, { "epoch": 0.051224944320712694, "grad_norm": 52.553958892822266, "learning_rate": 1e-06, "loss": 3.362, "num_input_tokens_seen": 1287564, "step": 23 }, { "epoch": 0.051224944320712694, "loss": 2.8519859313964844, "loss_ce": 1.0302085876464844, "loss_iou": 0.75390625, "loss_num": 0.0625, "loss_xval": 1.8203125, "num_input_tokens_seen": 1287564, "step": 23 }, { "epoch": 0.053452115812917596, "grad_norm": 49.04064178466797, "learning_rate": 1e-06, "loss": 4.1017, "num_input_tokens_seen": 1343120, "step": 24 }, { "epoch": 0.053452115812917596, "loss": 4.231411933898926, "loss_ce": 1.8066072463989258, "loss_iou": 0.96875, "loss_num": 0.09716796875, "loss_xval": 2.421875, "num_input_tokens_seen": 1343120, "step": 24 }, { "epoch": 0.0556792873051225, "grad_norm": 56.048038482666016, "learning_rate": 1e-06, "loss": 4.3803, "num_input_tokens_seen": 1396700, "step": 25 }, { "epoch": 0.0556792873051225, "loss": 4.276466369628906, "loss_ce": 1.6143567562103271, "loss_iou": 1.0234375, "loss_num": 0.1240234375, "loss_xval": 2.65625, "num_input_tokens_seen": 1396700, "step": 25 }, { "epoch": 0.05790645879732739, "grad_norm": 39.70867919921875, "learning_rate": 1e-06, "loss": 3.3416, "num_input_tokens_seen": 1453876, "step": 26 }, { "epoch": 0.05790645879732739, "loss": 3.4961788654327393, "loss_ce": 1.0899291038513184, "loss_iou": 1.0234375, "loss_num": 0.0732421875, "loss_xval": 2.40625, "num_input_tokens_seen": 1453876, "step": 26 }, { "epoch": 0.060133630289532294, "grad_norm": 60.43865966796875, "learning_rate": 1e-06, "loss": 3.5507, "num_input_tokens_seen": 1509556, "step": 27 }, { "epoch": 0.060133630289532294, "loss": 3.679036855697632, "loss_ce": 1.376302719116211, "loss_iou": 0.890625, "loss_num": 0.103515625, "loss_xval": 2.296875, "num_input_tokens_seen": 1509556, "step": 27 }, { "epoch": 0.062360801781737196, "grad_norm": 33.97861099243164, "learning_rate": 1e-06, "loss": 3.5827, "num_input_tokens_seen": 1566848, "step": 28 }, { "epoch": 0.062360801781737196, "loss": 3.9071435928344727, "loss_ce": 1.2176905870437622, "loss_iou": 1.0703125, "loss_num": 0.1103515625, "loss_xval": 2.6875, "num_input_tokens_seen": 1566848, "step": 28 }, { "epoch": 0.0645879732739421, "grad_norm": 71.39791107177734, "learning_rate": 1e-06, "loss": 3.9493, "num_input_tokens_seen": 1622132, "step": 29 }, { "epoch": 0.0645879732739421, "loss": 4.430097579956055, "loss_ce": 1.3754103183746338, "loss_iou": 1.203125, "loss_num": 0.1298828125, "loss_xval": 3.0625, "num_input_tokens_seen": 1622132, "step": 29 }, { "epoch": 0.066815144766147, "grad_norm": 111.8135757446289, "learning_rate": 1e-06, "loss": 3.5729, "num_input_tokens_seen": 1680504, "step": 30 }, { "epoch": 0.066815144766147, "loss": 3.6753127574920654, "loss_ce": 1.620625376701355, "loss_iou": 0.76953125, "loss_num": 0.10302734375, "loss_xval": 2.0625, "num_input_tokens_seen": 1680504, "step": 30 }, { "epoch": 0.06904231625835189, "grad_norm": 48.25176239013672, "learning_rate": 1e-06, "loss": 3.9768, "num_input_tokens_seen": 1735984, "step": 31 }, { "epoch": 0.06904231625835189, "loss": 4.000043869018555, "loss_ce": 1.2251414060592651, "loss_iou": 1.1171875, "loss_num": 0.107421875, "loss_xval": 2.78125, "num_input_tokens_seen": 1735984, "step": 31 }, { "epoch": 0.07126948775055679, "grad_norm": 105.31439208984375, "learning_rate": 1e-06, "loss": 4.4586, "num_input_tokens_seen": 1791036, "step": 32 }, { "epoch": 0.07126948775055679, "loss": 4.315090656280518, "loss_ce": 1.6012234687805176, "loss_iou": 1.078125, "loss_num": 0.1103515625, "loss_xval": 2.71875, "num_input_tokens_seen": 1791036, "step": 32 }, { "epoch": 0.07349665924276169, "grad_norm": 61.652164459228516, "learning_rate": 1e-06, "loss": 4.2882, "num_input_tokens_seen": 1846348, "step": 33 }, { "epoch": 0.07349665924276169, "loss": 3.7140510082244873, "loss_ce": 1.3263558149337769, "loss_iou": 0.9375, "loss_num": 0.1025390625, "loss_xval": 2.390625, "num_input_tokens_seen": 1846348, "step": 33 }, { "epoch": 0.0757238307349666, "grad_norm": 40.629302978515625, "learning_rate": 1e-06, "loss": 3.8681, "num_input_tokens_seen": 1902912, "step": 34 }, { "epoch": 0.0757238307349666, "loss": 3.7667856216430664, "loss_ce": 1.078309178352356, "loss_iou": 1.109375, "loss_num": 0.09375, "loss_xval": 2.6875, "num_input_tokens_seen": 1902912, "step": 34 }, { "epoch": 0.0779510022271715, "grad_norm": 68.22655487060547, "learning_rate": 1e-06, "loss": 4.0071, "num_input_tokens_seen": 1958520, "step": 35 }, { "epoch": 0.0779510022271715, "loss": 3.5187363624572754, "loss_ce": 1.068541169166565, "loss_iou": 0.9296875, "loss_num": 0.11865234375, "loss_xval": 2.453125, "num_input_tokens_seen": 1958520, "step": 35 }, { "epoch": 0.0801781737193764, "grad_norm": 76.44446563720703, "learning_rate": 1e-06, "loss": 4.2565, "num_input_tokens_seen": 2011292, "step": 36 }, { "epoch": 0.0801781737193764, "loss": 3.761260509490967, "loss_ce": 1.2253718376159668, "loss_iou": 1.015625, "loss_num": 0.10205078125, "loss_xval": 2.53125, "num_input_tokens_seen": 2011292, "step": 36 }, { "epoch": 0.08240534521158129, "grad_norm": 40.924400329589844, "learning_rate": 1e-06, "loss": 3.7774, "num_input_tokens_seen": 2068200, "step": 37 }, { "epoch": 0.08240534521158129, "loss": 3.715888500213623, "loss_ce": 1.778876781463623, "loss_iou": 0.75390625, "loss_num": 0.0859375, "loss_xval": 1.9375, "num_input_tokens_seen": 2068200, "step": 37 }, { "epoch": 0.08463251670378619, "grad_norm": 63.25859069824219, "learning_rate": 1e-06, "loss": 3.4187, "num_input_tokens_seen": 2124620, "step": 38 }, { "epoch": 0.08463251670378619, "loss": 3.6475610733032227, "loss_ce": 1.252053141593933, "loss_iou": 0.953125, "loss_num": 0.09765625, "loss_xval": 2.390625, "num_input_tokens_seen": 2124620, "step": 38 }, { "epoch": 0.08685968819599109, "grad_norm": 53.20210266113281, "learning_rate": 1e-06, "loss": 3.69, "num_input_tokens_seen": 2180648, "step": 39 }, { "epoch": 0.08685968819599109, "loss": 3.29498291015625, "loss_ce": 1.070373773574829, "loss_iou": 0.921875, "loss_num": 0.076171875, "loss_xval": 2.21875, "num_input_tokens_seen": 2180648, "step": 39 }, { "epoch": 0.08908685968819599, "grad_norm": 56.693077087402344, "learning_rate": 1e-06, "loss": 3.8869, "num_input_tokens_seen": 2239320, "step": 40 }, { "epoch": 0.08908685968819599, "loss": 4.501180648803711, "loss_ce": 1.5011805295944214, "loss_iou": 1.1796875, "loss_num": 0.1298828125, "loss_xval": 3.0, "num_input_tokens_seen": 2239320, "step": 40 }, { "epoch": 0.09131403118040089, "grad_norm": 40.52269744873047, "learning_rate": 1e-06, "loss": 3.3113, "num_input_tokens_seen": 2294136, "step": 41 }, { "epoch": 0.09131403118040089, "loss": 3.6476950645446777, "loss_ce": 1.1711325645446777, "loss_iou": 0.9609375, "loss_num": 0.1103515625, "loss_xval": 2.46875, "num_input_tokens_seen": 2294136, "step": 41 }, { "epoch": 0.0935412026726058, "grad_norm": 45.07380676269531, "learning_rate": 1e-06, "loss": 4.2714, "num_input_tokens_seen": 2349572, "step": 42 }, { "epoch": 0.0935412026726058, "loss": 4.451573371887207, "loss_ce": 1.497471570968628, "loss_iou": 1.1953125, "loss_num": 0.11279296875, "loss_xval": 2.953125, "num_input_tokens_seen": 2349572, "step": 42 }, { "epoch": 0.0957683741648107, "grad_norm": 43.15333557128906, "learning_rate": 1e-06, "loss": 4.3193, "num_input_tokens_seen": 2405120, "step": 43 }, { "epoch": 0.0957683741648107, "loss": 4.689324378967285, "loss_ce": 1.3651058673858643, "loss_iou": 1.34375, "loss_num": 0.125, "loss_xval": 3.328125, "num_input_tokens_seen": 2405120, "step": 43 }, { "epoch": 0.09799554565701558, "grad_norm": 43.444942474365234, "learning_rate": 1e-06, "loss": 3.2982, "num_input_tokens_seen": 2459840, "step": 44 }, { "epoch": 0.09799554565701558, "loss": 3.244344472885132, "loss_ce": 0.8068445920944214, "loss_iou": 1.03125, "loss_num": 0.07470703125, "loss_xval": 2.4375, "num_input_tokens_seen": 2459840, "step": 44 }, { "epoch": 0.10022271714922049, "grad_norm": 43.407318115234375, "learning_rate": 1e-06, "loss": 3.8361, "num_input_tokens_seen": 2513464, "step": 45 }, { "epoch": 0.10022271714922049, "loss": 3.987287998199463, "loss_ce": 1.3007644414901733, "loss_iou": 1.0625, "loss_num": 0.111328125, "loss_xval": 2.6875, "num_input_tokens_seen": 2513464, "step": 45 }, { "epoch": 0.10244988864142539, "grad_norm": 47.13986587524414, "learning_rate": 1e-06, "loss": 3.9219, "num_input_tokens_seen": 2572040, "step": 46 }, { "epoch": 0.10244988864142539, "loss": 4.833956718444824, "loss_ce": 1.9491908550262451, "loss_iou": 1.0703125, "loss_num": 0.1484375, "loss_xval": 2.890625, "num_input_tokens_seen": 2572040, "step": 46 }, { "epoch": 0.10467706013363029, "grad_norm": 48.692970275878906, "learning_rate": 1e-06, "loss": 3.7242, "num_input_tokens_seen": 2627832, "step": 47 }, { "epoch": 0.10467706013363029, "loss": 3.7583022117614746, "loss_ce": 1.0444347858428955, "loss_iou": 1.078125, "loss_num": 0.11181640625, "loss_xval": 2.71875, "num_input_tokens_seen": 2627832, "step": 47 }, { "epoch": 0.10690423162583519, "grad_norm": 39.18439483642578, "learning_rate": 1e-06, "loss": 4.2013, "num_input_tokens_seen": 2682316, "step": 48 }, { "epoch": 0.10690423162583519, "loss": 4.251529216766357, "loss_ce": 1.5435214042663574, "loss_iou": 1.0625, "loss_num": 0.1162109375, "loss_xval": 2.703125, "num_input_tokens_seen": 2682316, "step": 48 }, { "epoch": 0.1091314031180401, "grad_norm": 40.1721305847168, "learning_rate": 1e-06, "loss": 3.7247, "num_input_tokens_seen": 2738948, "step": 49 }, { "epoch": 0.1091314031180401, "loss": 3.429368734359741, "loss_ce": 1.0699937343597412, "loss_iou": 0.96875, "loss_num": 0.0849609375, "loss_xval": 2.359375, "num_input_tokens_seen": 2738948, "step": 49 }, { "epoch": 0.111358574610245, "grad_norm": 53.2962646484375, "learning_rate": 1e-06, "loss": 3.3685, "num_input_tokens_seen": 2795512, "step": 50 }, { "epoch": 0.111358574610245, "loss": 3.372053861618042, "loss_ce": 1.0507646799087524, "loss_iou": 0.9375, "loss_num": 0.08935546875, "loss_xval": 2.328125, "num_input_tokens_seen": 2795512, "step": 50 }, { "epoch": 0.11358574610244988, "grad_norm": 39.22996139526367, "learning_rate": 1e-06, "loss": 3.5006, "num_input_tokens_seen": 2853264, "step": 51 }, { "epoch": 0.11358574610244988, "loss": 3.352569580078125, "loss_ce": 0.9648742079734802, "loss_iou": 0.9609375, "loss_num": 0.09423828125, "loss_xval": 2.390625, "num_input_tokens_seen": 2853264, "step": 51 }, { "epoch": 0.11581291759465479, "grad_norm": 51.9208869934082, "learning_rate": 1e-06, "loss": 4.002, "num_input_tokens_seen": 2909744, "step": 52 }, { "epoch": 0.11581291759465479, "loss": 3.849562168121338, "loss_ce": 1.003859043121338, "loss_iou": 1.0546875, "loss_num": 0.1455078125, "loss_xval": 2.84375, "num_input_tokens_seen": 2909744, "step": 52 }, { "epoch": 0.11804008908685969, "grad_norm": 36.589988708496094, "learning_rate": 1e-06, "loss": 3.8423, "num_input_tokens_seen": 2963684, "step": 53 }, { "epoch": 0.11804008908685969, "loss": 3.315175771713257, "loss_ce": 1.3342187404632568, "loss_iou": 0.77734375, "loss_num": 0.0859375, "loss_xval": 1.984375, "num_input_tokens_seen": 2963684, "step": 53 }, { "epoch": 0.12026726057906459, "grad_norm": 93.62321472167969, "learning_rate": 1e-06, "loss": 3.6344, "num_input_tokens_seen": 3019120, "step": 54 }, { "epoch": 0.12026726057906459, "loss": 3.678225040435791, "loss_ce": 1.3032249212265015, "loss_iou": 0.95703125, "loss_num": 0.09228515625, "loss_xval": 2.375, "num_input_tokens_seen": 3019120, "step": 54 }, { "epoch": 0.12249443207126949, "grad_norm": 73.76739501953125, "learning_rate": 1e-06, "loss": 4.5231, "num_input_tokens_seen": 3074692, "step": 55 }, { "epoch": 0.12249443207126949, "loss": 4.42036247253418, "loss_ce": 1.8959481716156006, "loss_iou": 0.91796875, "loss_num": 0.1376953125, "loss_xval": 2.53125, "num_input_tokens_seen": 3074692, "step": 55 }, { "epoch": 0.12472160356347439, "grad_norm": 58.37528991699219, "learning_rate": 1e-06, "loss": 4.4363, "num_input_tokens_seen": 3128300, "step": 56 }, { "epoch": 0.12472160356347439, "loss": 4.376946926116943, "loss_ce": 1.451165795326233, "loss_iou": 1.140625, "loss_num": 0.12890625, "loss_xval": 2.921875, "num_input_tokens_seen": 3128300, "step": 56 }, { "epoch": 0.12694877505567928, "grad_norm": 50.1368293762207, "learning_rate": 1e-06, "loss": 3.0486, "num_input_tokens_seen": 3186576, "step": 57 }, { "epoch": 0.12694877505567928, "loss": 3.0530099868774414, "loss_ce": 0.8830881118774414, "loss_iou": 0.828125, "loss_num": 0.1025390625, "loss_xval": 2.171875, "num_input_tokens_seen": 3186576, "step": 57 }, { "epoch": 0.1291759465478842, "grad_norm": 88.61648559570312, "learning_rate": 1e-06, "loss": 3.6031, "num_input_tokens_seen": 3243908, "step": 58 }, { "epoch": 0.1291759465478842, "loss": 3.6260769367218018, "loss_ce": 1.0352567434310913, "loss_iou": 1.0546875, "loss_num": 0.095703125, "loss_xval": 2.59375, "num_input_tokens_seen": 3243908, "step": 58 }, { "epoch": 0.13140311804008908, "grad_norm": 366.4051818847656, "learning_rate": 1e-06, "loss": 2.9703, "num_input_tokens_seen": 3301940, "step": 59 }, { "epoch": 0.13140311804008908, "loss": 2.9112725257873535, "loss_ce": 0.6681084632873535, "loss_iou": 0.890625, "loss_num": 0.09326171875, "loss_xval": 2.25, "num_input_tokens_seen": 3301940, "step": 59 }, { "epoch": 0.133630289532294, "grad_norm": 60.00054931640625, "learning_rate": 1e-06, "loss": 3.4228, "num_input_tokens_seen": 3357440, "step": 60 }, { "epoch": 0.133630289532294, "loss": 4.004701137542725, "loss_ce": 1.8259902000427246, "loss_iou": 0.8125, "loss_num": 0.10986328125, "loss_xval": 2.171875, "num_input_tokens_seen": 3357440, "step": 60 }, { "epoch": 0.1358574610244989, "grad_norm": 75.00879669189453, "learning_rate": 1e-06, "loss": 3.1458, "num_input_tokens_seen": 3416064, "step": 61 }, { "epoch": 0.1358574610244989, "loss": 3.1031904220581055, "loss_ce": 1.0651044845581055, "loss_iou": 0.828125, "loss_num": 0.076171875, "loss_xval": 2.03125, "num_input_tokens_seen": 3416064, "step": 61 }, { "epoch": 0.13808463251670378, "grad_norm": 44.26047134399414, "learning_rate": 1e-06, "loss": 3.4534, "num_input_tokens_seen": 3474300, "step": 62 }, { "epoch": 0.13808463251670378, "loss": 3.3606114387512207, "loss_ce": 0.8108068704605103, "loss_iou": 0.98828125, "loss_num": 0.1142578125, "loss_xval": 2.546875, "num_input_tokens_seen": 3474300, "step": 62 }, { "epoch": 0.1403118040089087, "grad_norm": 43.13284683227539, "learning_rate": 1e-06, "loss": 4.0539, "num_input_tokens_seen": 3526756, "step": 63 }, { "epoch": 0.1403118040089087, "loss": 3.9055140018463135, "loss_ce": 1.0422327518463135, "loss_iou": 1.1015625, "loss_num": 0.1328125, "loss_xval": 2.859375, "num_input_tokens_seen": 3526756, "step": 63 }, { "epoch": 0.14253897550111358, "grad_norm": 48.860225677490234, "learning_rate": 1e-06, "loss": 3.7684, "num_input_tokens_seen": 3582620, "step": 64 }, { "epoch": 0.14253897550111358, "loss": 3.5178382396698, "loss_ce": 0.9514319896697998, "loss_iou": 1.0234375, "loss_num": 0.10400390625, "loss_xval": 2.5625, "num_input_tokens_seen": 3582620, "step": 64 }, { "epoch": 0.1447661469933185, "grad_norm": 51.4886474609375, "learning_rate": 1e-06, "loss": 4.0096, "num_input_tokens_seen": 3636136, "step": 65 }, { "epoch": 0.1447661469933185, "loss": 4.009946823120117, "loss_ce": 1.5128767490386963, "loss_iou": 0.9765625, "loss_num": 0.10888671875, "loss_xval": 2.5, "num_input_tokens_seen": 3636136, "step": 65 }, { "epoch": 0.14699331848552338, "grad_norm": 75.6802978515625, "learning_rate": 1e-06, "loss": 4.0291, "num_input_tokens_seen": 3692172, "step": 66 }, { "epoch": 0.14699331848552338, "loss": 3.9964442253112793, "loss_ce": 1.5921471118927002, "loss_iou": 0.84765625, "loss_num": 0.1416015625, "loss_xval": 2.40625, "num_input_tokens_seen": 3692172, "step": 66 }, { "epoch": 0.1492204899777283, "grad_norm": 44.05568313598633, "learning_rate": 1e-06, "loss": 2.7517, "num_input_tokens_seen": 3750032, "step": 67 }, { "epoch": 0.1492204899777283, "loss": 2.7267603874206543, "loss_ce": 0.6720730662345886, "loss_iou": 0.8671875, "loss_num": 0.06396484375, "loss_xval": 2.0625, "num_input_tokens_seen": 3750032, "step": 67 }, { "epoch": 0.1514476614699332, "grad_norm": 33.830841064453125, "learning_rate": 1e-06, "loss": 3.3022, "num_input_tokens_seen": 3804556, "step": 68 }, { "epoch": 0.1514476614699332, "loss": 3.4905872344970703, "loss_ce": 0.7855091094970703, "loss_iou": 1.046875, "loss_num": 0.12451171875, "loss_xval": 2.703125, "num_input_tokens_seen": 3804556, "step": 68 }, { "epoch": 0.15367483296213807, "grad_norm": 47.166080474853516, "learning_rate": 1e-06, "loss": 3.6395, "num_input_tokens_seen": 3860224, "step": 69 }, { "epoch": 0.15367483296213807, "loss": 3.921863555908203, "loss_ce": 1.5263557434082031, "loss_iou": 0.8828125, "loss_num": 0.1259765625, "loss_xval": 2.390625, "num_input_tokens_seen": 3860224, "step": 69 }, { "epoch": 0.155902004454343, "grad_norm": 60.735130310058594, "learning_rate": 1e-06, "loss": 4.3284, "num_input_tokens_seen": 3911976, "step": 70 }, { "epoch": 0.155902004454343, "loss": 4.273301124572754, "loss_ce": 1.4632422924041748, "loss_iou": 1.1328125, "loss_num": 0.10791015625, "loss_xval": 2.8125, "num_input_tokens_seen": 3911976, "step": 70 }, { "epoch": 0.15812917594654788, "grad_norm": 40.85783004760742, "learning_rate": 1e-06, "loss": 3.3411, "num_input_tokens_seen": 3969416, "step": 71 }, { "epoch": 0.15812917594654788, "loss": 3.9168148040771484, "loss_ce": 1.2000181674957275, "loss_iou": 0.99609375, "loss_num": 0.1455078125, "loss_xval": 2.71875, "num_input_tokens_seen": 3969416, "step": 71 }, { "epoch": 0.1603563474387528, "grad_norm": 37.3762092590332, "learning_rate": 1e-06, "loss": 4.0877, "num_input_tokens_seen": 4024764, "step": 72 }, { "epoch": 0.1603563474387528, "loss": 3.9297492504119873, "loss_ce": 1.3613898754119873, "loss_iou": 1.0390625, "loss_num": 0.09912109375, "loss_xval": 2.5625, "num_input_tokens_seen": 4024764, "step": 72 }, { "epoch": 0.16258351893095768, "grad_norm": 37.51104736328125, "learning_rate": 1e-06, "loss": 3.6427, "num_input_tokens_seen": 4081604, "step": 73 }, { "epoch": 0.16258351893095768, "loss": 3.7234535217285156, "loss_ce": 1.1570473909378052, "loss_iou": 1.0, "loss_num": 0.11328125, "loss_xval": 2.5625, "num_input_tokens_seen": 4081604, "step": 73 }, { "epoch": 0.16481069042316257, "grad_norm": 58.83037567138672, "learning_rate": 1e-06, "loss": 3.6275, "num_input_tokens_seen": 4138792, "step": 74 }, { "epoch": 0.16481069042316257, "loss": 3.8506052494049072, "loss_ce": 1.0537302494049072, "loss_iou": 1.078125, "loss_num": 0.12890625, "loss_xval": 2.796875, "num_input_tokens_seen": 4138792, "step": 74 }, { "epoch": 0.16703786191536749, "grad_norm": 74.164794921875, "learning_rate": 1e-06, "loss": 3.8793, "num_input_tokens_seen": 4195448, "step": 75 }, { "epoch": 0.16703786191536749, "loss": 4.057165145874023, "loss_ce": 0.9848995208740234, "loss_iou": 1.1796875, "loss_num": 0.1416015625, "loss_xval": 3.078125, "num_input_tokens_seen": 4195448, "step": 75 }, { "epoch": 0.16926503340757237, "grad_norm": 103.62609100341797, "learning_rate": 1e-06, "loss": 3.7588, "num_input_tokens_seen": 4247988, "step": 76 }, { "epoch": 0.16926503340757237, "loss": 3.4790072441101074, "loss_ce": 1.3315460681915283, "loss_iou": 0.83203125, "loss_num": 0.0966796875, "loss_xval": 2.140625, "num_input_tokens_seen": 4247988, "step": 76 }, { "epoch": 0.1714922048997773, "grad_norm": 36.90887451171875, "learning_rate": 1e-06, "loss": 3.8448, "num_input_tokens_seen": 4301000, "step": 77 }, { "epoch": 0.1714922048997773, "loss": 4.019002437591553, "loss_ce": 1.0971274375915527, "loss_iou": 1.1328125, "loss_num": 0.1328125, "loss_xval": 2.921875, "num_input_tokens_seen": 4301000, "step": 77 }, { "epoch": 0.17371937639198218, "grad_norm": 60.749755859375, "learning_rate": 1e-06, "loss": 3.6311, "num_input_tokens_seen": 4358284, "step": 78 }, { "epoch": 0.17371937639198218, "loss": 3.9924979209899902, "loss_ce": 1.3831228017807007, "loss_iou": 1.0390625, "loss_num": 0.10693359375, "loss_xval": 2.609375, "num_input_tokens_seen": 4358284, "step": 78 }, { "epoch": 0.1759465478841871, "grad_norm": 43.58686065673828, "learning_rate": 1e-06, "loss": 3.7598, "num_input_tokens_seen": 4412984, "step": 79 }, { "epoch": 0.1759465478841871, "loss": 4.099183559417725, "loss_ce": 1.1939101219177246, "loss_iou": 1.0859375, "loss_num": 0.1474609375, "loss_xval": 2.90625, "num_input_tokens_seen": 4412984, "step": 79 }, { "epoch": 0.17817371937639198, "grad_norm": 46.925628662109375, "learning_rate": 1e-06, "loss": 3.6665, "num_input_tokens_seen": 4469440, "step": 80 }, { "epoch": 0.17817371937639198, "loss": 3.7074530124664307, "loss_ce": 1.1742497682571411, "loss_iou": 0.9609375, "loss_num": 0.12109375, "loss_xval": 2.53125, "num_input_tokens_seen": 4469440, "step": 80 }, { "epoch": 0.18040089086859687, "grad_norm": 37.318233489990234, "learning_rate": 1e-06, "loss": 3.108, "num_input_tokens_seen": 4528044, "step": 81 }, { "epoch": 0.18040089086859687, "loss": 3.302272319793701, "loss_ce": 0.8784441947937012, "loss_iou": 0.91796875, "loss_num": 0.11767578125, "loss_xval": 2.421875, "num_input_tokens_seen": 4528044, "step": 81 }, { "epoch": 0.18262806236080179, "grad_norm": 89.16364288330078, "learning_rate": 1e-06, "loss": 2.8823, "num_input_tokens_seen": 4585856, "step": 82 }, { "epoch": 0.18262806236080179, "loss": 2.5211141109466553, "loss_ce": 0.4742392301559448, "loss_iou": 0.84765625, "loss_num": 0.06982421875, "loss_xval": 2.046875, "num_input_tokens_seen": 4585856, "step": 82 }, { "epoch": 0.18485523385300667, "grad_norm": 75.21393585205078, "learning_rate": 1e-06, "loss": 3.7455, "num_input_tokens_seen": 4640456, "step": 83 }, { "epoch": 0.18485523385300667, "loss": 4.00370979309082, "loss_ce": 1.2458975315093994, "loss_iou": 1.09375, "loss_num": 0.11328125, "loss_xval": 2.75, "num_input_tokens_seen": 4640456, "step": 83 }, { "epoch": 0.1870824053452116, "grad_norm": 47.31288146972656, "learning_rate": 1e-06, "loss": 3.7102, "num_input_tokens_seen": 4695604, "step": 84 }, { "epoch": 0.1870824053452116, "loss": 3.4363784790039062, "loss_ce": 1.1434097290039062, "loss_iou": 0.8671875, "loss_num": 0.1123046875, "loss_xval": 2.296875, "num_input_tokens_seen": 4695604, "step": 84 }, { "epoch": 0.18930957683741648, "grad_norm": 103.16661071777344, "learning_rate": 1e-06, "loss": 3.9615, "num_input_tokens_seen": 4750568, "step": 85 }, { "epoch": 0.18930957683741648, "loss": 3.598466396331787, "loss_ce": 0.9617477655410767, "loss_iou": 0.98046875, "loss_num": 0.134765625, "loss_xval": 2.640625, "num_input_tokens_seen": 4750568, "step": 85 }, { "epoch": 0.1915367483296214, "grad_norm": 30.83519172668457, "learning_rate": 1e-06, "loss": 3.4149, "num_input_tokens_seen": 4802048, "step": 86 }, { "epoch": 0.1915367483296214, "loss": 3.000054359436035, "loss_ce": 0.6123592257499695, "loss_iou": 0.890625, "loss_num": 0.12158203125, "loss_xval": 2.390625, "num_input_tokens_seen": 4802048, "step": 86 }, { "epoch": 0.19376391982182628, "grad_norm": 44.40241241455078, "learning_rate": 1e-06, "loss": 3.8462, "num_input_tokens_seen": 4857292, "step": 87 }, { "epoch": 0.19376391982182628, "loss": 3.8453128337860107, "loss_ce": 1.0943361520767212, "loss_iou": 1.0546875, "loss_num": 0.1279296875, "loss_xval": 2.75, "num_input_tokens_seen": 4857292, "step": 87 }, { "epoch": 0.19599109131403117, "grad_norm": 36.97007751464844, "learning_rate": 1e-06, "loss": 3.4149, "num_input_tokens_seen": 4914020, "step": 88 }, { "epoch": 0.19599109131403117, "loss": 3.3789005279541016, "loss_ce": 1.074212908744812, "loss_iou": 0.84375, "loss_num": 0.1220703125, "loss_xval": 2.3125, "num_input_tokens_seen": 4914020, "step": 88 }, { "epoch": 0.19821826280623608, "grad_norm": 100.41455841064453, "learning_rate": 1e-06, "loss": 3.7294, "num_input_tokens_seen": 4970596, "step": 89 }, { "epoch": 0.19821826280623608, "loss": 3.8161191940307617, "loss_ce": 1.1676816940307617, "loss_iou": 1.046875, "loss_num": 0.1123046875, "loss_xval": 2.65625, "num_input_tokens_seen": 4970596, "step": 89 }, { "epoch": 0.20044543429844097, "grad_norm": 56.474788665771484, "learning_rate": 1e-06, "loss": 3.628, "num_input_tokens_seen": 5022084, "step": 90 }, { "epoch": 0.20044543429844097, "loss": 3.246203899383545, "loss_ce": 0.770129919052124, "loss_iou": 0.89453125, "loss_num": 0.13671875, "loss_xval": 2.46875, "num_input_tokens_seen": 5022084, "step": 90 }, { "epoch": 0.2026726057906459, "grad_norm": 45.0289192199707, "learning_rate": 1e-06, "loss": 2.9406, "num_input_tokens_seen": 5076540, "step": 91 }, { "epoch": 0.2026726057906459, "loss": 2.8975300788879395, "loss_ce": 0.491280198097229, "loss_iou": 0.9296875, "loss_num": 0.109375, "loss_xval": 2.40625, "num_input_tokens_seen": 5076540, "step": 91 }, { "epoch": 0.20489977728285078, "grad_norm": 33.83285140991211, "learning_rate": 1e-06, "loss": 2.8553, "num_input_tokens_seen": 5132312, "step": 92 }, { "epoch": 0.20489977728285078, "loss": 2.73876953125, "loss_ce": 0.7338864803314209, "loss_iou": 0.75390625, "loss_num": 0.09912109375, "loss_xval": 2.0, "num_input_tokens_seen": 5132312, "step": 92 }, { "epoch": 0.2071269487750557, "grad_norm": 55.93543243408203, "learning_rate": 1e-06, "loss": 3.3996, "num_input_tokens_seen": 5188188, "step": 93 }, { "epoch": 0.2071269487750557, "loss": 3.369142770767212, "loss_ce": 0.7919942140579224, "loss_iou": 0.95703125, "loss_num": 0.1318359375, "loss_xval": 2.578125, "num_input_tokens_seen": 5188188, "step": 93 }, { "epoch": 0.20935412026726058, "grad_norm": 42.92362976074219, "learning_rate": 1e-06, "loss": 3.3957, "num_input_tokens_seen": 5245580, "step": 94 }, { "epoch": 0.20935412026726058, "loss": 3.3484046459198, "loss_ce": 0.7302405834197998, "loss_iou": 0.9609375, "loss_num": 0.1396484375, "loss_xval": 2.625, "num_input_tokens_seen": 5245580, "step": 94 }, { "epoch": 0.21158129175946547, "grad_norm": 38.73051452636719, "learning_rate": 1e-06, "loss": 2.8372, "num_input_tokens_seen": 5302720, "step": 95 }, { "epoch": 0.21158129175946547, "loss": 3.1018481254577637, "loss_ce": 0.6829028129577637, "loss_iou": 0.90625, "loss_num": 0.12109375, "loss_xval": 2.421875, "num_input_tokens_seen": 5302720, "step": 95 }, { "epoch": 0.21380846325167038, "grad_norm": 51.393985748291016, "learning_rate": 1e-06, "loss": 3.4311, "num_input_tokens_seen": 5359268, "step": 96 }, { "epoch": 0.21380846325167038, "loss": 3.3091301918029785, "loss_ce": 1.068895697593689, "loss_iou": 0.796875, "loss_num": 0.1298828125, "loss_xval": 2.234375, "num_input_tokens_seen": 5359268, "step": 96 }, { "epoch": 0.21603563474387527, "grad_norm": 34.125511169433594, "learning_rate": 1e-06, "loss": 3.3903, "num_input_tokens_seen": 5415716, "step": 97 }, { "epoch": 0.21603563474387527, "loss": 3.3011648654937744, "loss_ce": 0.8431569933891296, "loss_iou": 0.8828125, "loss_num": 0.138671875, "loss_xval": 2.453125, "num_input_tokens_seen": 5415716, "step": 97 }, { "epoch": 0.2182628062360802, "grad_norm": 47.519710540771484, "learning_rate": 1e-06, "loss": 2.9414, "num_input_tokens_seen": 5473288, "step": 98 }, { "epoch": 0.2182628062360802, "loss": 3.0692644119262695, "loss_ce": 0.71477210521698, "loss_iou": 0.88671875, "loss_num": 0.11669921875, "loss_xval": 2.359375, "num_input_tokens_seen": 5473288, "step": 98 }, { "epoch": 0.22048997772828507, "grad_norm": 26.452730178833008, "learning_rate": 1e-06, "loss": 3.3211, "num_input_tokens_seen": 5528484, "step": 99 }, { "epoch": 0.22048997772828507, "loss": 3.3662614822387695, "loss_ce": 0.9463395476341248, "loss_iou": 0.875, "loss_num": 0.1328125, "loss_xval": 2.421875, "num_input_tokens_seen": 5528484, "step": 99 }, { "epoch": 0.22271714922049, "grad_norm": 60.19447708129883, "learning_rate": 1e-06, "loss": 3.2708, "num_input_tokens_seen": 5584888, "step": 100 }, { "epoch": 0.22271714922049, "loss": 3.1879868507385254, "loss_ce": 0.9902329444885254, "loss_iou": 0.79296875, "loss_num": 0.1220703125, "loss_xval": 2.203125, "num_input_tokens_seen": 5584888, "step": 100 }, { "epoch": 0.22494432071269488, "grad_norm": 70.54967498779297, "learning_rate": 1e-06, "loss": 2.7226, "num_input_tokens_seen": 5642204, "step": 101 }, { "epoch": 0.22494432071269488, "loss": 2.864590644836426, "loss_ce": 0.7434969544410706, "loss_iou": 0.765625, "loss_num": 0.11865234375, "loss_xval": 2.125, "num_input_tokens_seen": 5642204, "step": 101 }, { "epoch": 0.22717149220489977, "grad_norm": 30.121597290039062, "learning_rate": 1e-06, "loss": 3.6091, "num_input_tokens_seen": 5696676, "step": 102 }, { "epoch": 0.22717149220489977, "loss": 3.1128854751586914, "loss_ce": 0.7256782054901123, "loss_iou": 0.8828125, "loss_num": 0.12353515625, "loss_xval": 2.390625, "num_input_tokens_seen": 5696676, "step": 102 }, { "epoch": 0.22939866369710468, "grad_norm": 38.39320373535156, "learning_rate": 1e-06, "loss": 2.9509, "num_input_tokens_seen": 5751512, "step": 103 }, { "epoch": 0.22939866369710468, "loss": 3.0732927322387695, "loss_ce": 0.5625505447387695, "loss_iou": 0.90234375, "loss_num": 0.140625, "loss_xval": 2.515625, "num_input_tokens_seen": 5751512, "step": 103 }, { "epoch": 0.23162583518930957, "grad_norm": 38.619789123535156, "learning_rate": 1e-06, "loss": 3.2933, "num_input_tokens_seen": 5806704, "step": 104 }, { "epoch": 0.23162583518930957, "loss": 3.1526646614074707, "loss_ce": 0.8069615960121155, "loss_iou": 0.89453125, "loss_num": 0.11083984375, "loss_xval": 2.34375, "num_input_tokens_seen": 5806704, "step": 104 }, { "epoch": 0.23385300668151449, "grad_norm": 39.458309173583984, "learning_rate": 1e-06, "loss": 2.705, "num_input_tokens_seen": 5864592, "step": 105 }, { "epoch": 0.23385300668151449, "loss": 2.9264371395111084, "loss_ce": 0.6334683895111084, "loss_iou": 0.859375, "loss_num": 0.11474609375, "loss_xval": 2.296875, "num_input_tokens_seen": 5864592, "step": 105 }, { "epoch": 0.23608017817371937, "grad_norm": 91.0936050415039, "learning_rate": 1e-06, "loss": 2.6971, "num_input_tokens_seen": 5920932, "step": 106 }, { "epoch": 0.23608017817371937, "loss": 2.743122100830078, "loss_ce": 0.6098213195800781, "loss_iou": 0.75, "loss_num": 0.126953125, "loss_xval": 2.140625, "num_input_tokens_seen": 5920932, "step": 106 }, { "epoch": 0.2383073496659243, "grad_norm": 58.84850311279297, "learning_rate": 1e-06, "loss": 3.047, "num_input_tokens_seen": 5978660, "step": 107 }, { "epoch": 0.2383073496659243, "loss": 2.7701728343963623, "loss_ce": 0.4518134593963623, "loss_iou": 0.8125, "loss_num": 0.138671875, "loss_xval": 2.3125, "num_input_tokens_seen": 5978660, "step": 107 }, { "epoch": 0.24053452115812918, "grad_norm": 37.72629928588867, "learning_rate": 1e-06, "loss": 3.1347, "num_input_tokens_seen": 6032308, "step": 108 }, { "epoch": 0.24053452115812918, "loss": 3.364459991455078, "loss_ce": 0.720416784286499, "loss_iou": 0.98828125, "loss_num": 0.1337890625, "loss_xval": 2.640625, "num_input_tokens_seen": 6032308, "step": 108 }, { "epoch": 0.24276169265033407, "grad_norm": 35.40589141845703, "learning_rate": 1e-06, "loss": 2.8689, "num_input_tokens_seen": 6087012, "step": 109 }, { "epoch": 0.24276169265033407, "loss": 2.9507226943969727, "loss_ce": 0.40775376558303833, "loss_iou": 0.9453125, "loss_num": 0.1298828125, "loss_xval": 2.546875, "num_input_tokens_seen": 6087012, "step": 109 }, { "epoch": 0.24498886414253898, "grad_norm": 42.018192291259766, "learning_rate": 1e-06, "loss": 2.9651, "num_input_tokens_seen": 6144992, "step": 110 }, { "epoch": 0.24498886414253898, "loss": 2.959064483642578, "loss_ce": 0.5215646624565125, "loss_iou": 0.88671875, "loss_num": 0.1318359375, "loss_xval": 2.4375, "num_input_tokens_seen": 6144992, "step": 110 }, { "epoch": 0.24721603563474387, "grad_norm": 80.21202850341797, "learning_rate": 1e-06, "loss": 2.9912, "num_input_tokens_seen": 6201808, "step": 111 }, { "epoch": 0.24721603563474387, "loss": 3.193042755126953, "loss_ce": 0.9244881868362427, "loss_iou": 0.875, "loss_num": 0.10400390625, "loss_xval": 2.265625, "num_input_tokens_seen": 6201808, "step": 111 }, { "epoch": 0.24944320712694878, "grad_norm": 54.696163177490234, "learning_rate": 1e-06, "loss": 3.2172, "num_input_tokens_seen": 6253116, "step": 112 }, { "epoch": 0.24944320712694878, "loss": 3.254600763320923, "loss_ce": 0.6891711354255676, "loss_iou": 0.94140625, "loss_num": 0.13671875, "loss_xval": 2.5625, "num_input_tokens_seen": 6253116, "step": 112 }, { "epoch": 0.2516703786191537, "grad_norm": 42.921669006347656, "learning_rate": 1e-06, "loss": 3.186, "num_input_tokens_seen": 6308684, "step": 113 }, { "epoch": 0.2516703786191537, "loss": 3.4225220680236816, "loss_ce": 0.48892807960510254, "loss_iou": 1.046875, "loss_num": 0.16796875, "loss_xval": 2.9375, "num_input_tokens_seen": 6308684, "step": 113 }, { "epoch": 0.25389755011135856, "grad_norm": 78.24114227294922, "learning_rate": 1e-06, "loss": 3.0514, "num_input_tokens_seen": 6364580, "step": 114 }, { "epoch": 0.25389755011135856, "loss": 3.6505331993103027, "loss_ce": 0.7813925743103027, "loss_iou": 1.0546875, "loss_num": 0.1533203125, "loss_xval": 2.875, "num_input_tokens_seen": 6364580, "step": 114 }, { "epoch": 0.2561247216035635, "grad_norm": 43.09880447387695, "learning_rate": 1e-06, "loss": 3.0621, "num_input_tokens_seen": 6418640, "step": 115 }, { "epoch": 0.2561247216035635, "loss": 2.922844648361206, "loss_ce": 0.6381767392158508, "loss_iou": 0.8125, "loss_num": 0.1328125, "loss_xval": 2.28125, "num_input_tokens_seen": 6418640, "step": 115 }, { "epoch": 0.2583518930957684, "grad_norm": 78.85059356689453, "learning_rate": 1e-06, "loss": 2.7225, "num_input_tokens_seen": 6476332, "step": 116 }, { "epoch": 0.2583518930957684, "loss": 2.682021141052246, "loss_ce": 0.4828024208545685, "loss_iou": 0.8046875, "loss_num": 0.119140625, "loss_xval": 2.203125, "num_input_tokens_seen": 6476332, "step": 116 }, { "epoch": 0.26057906458797325, "grad_norm": 39.069583892822266, "learning_rate": 1e-06, "loss": 2.7889, "num_input_tokens_seen": 6531328, "step": 117 }, { "epoch": 0.26057906458797325, "loss": 2.9253664016723633, "loss_ce": 0.6001709699630737, "loss_iou": 0.859375, "loss_num": 0.12060546875, "loss_xval": 2.328125, "num_input_tokens_seen": 6531328, "step": 117 }, { "epoch": 0.26280623608017817, "grad_norm": 40.309391021728516, "learning_rate": 1e-06, "loss": 2.8465, "num_input_tokens_seen": 6589548, "step": 118 }, { "epoch": 0.26280623608017817, "loss": 2.8932766914367676, "loss_ce": 0.43429216742515564, "loss_iou": 0.87890625, "loss_num": 0.1396484375, "loss_xval": 2.453125, "num_input_tokens_seen": 6589548, "step": 118 }, { "epoch": 0.2650334075723831, "grad_norm": 31.454940795898438, "learning_rate": 1e-06, "loss": 2.6428, "num_input_tokens_seen": 6647160, "step": 119 }, { "epoch": 0.2650334075723831, "loss": 2.7675888538360596, "loss_ce": 0.48755955696105957, "loss_iou": 0.78515625, "loss_num": 0.142578125, "loss_xval": 2.28125, "num_input_tokens_seen": 6647160, "step": 119 }, { "epoch": 0.267260579064588, "grad_norm": 108.88129425048828, "learning_rate": 1e-06, "loss": 2.7364, "num_input_tokens_seen": 6705976, "step": 120 }, { "epoch": 0.267260579064588, "loss": 2.662890911102295, "loss_ce": 0.4177738428115845, "loss_iou": 0.796875, "loss_num": 0.1298828125, "loss_xval": 2.25, "num_input_tokens_seen": 6705976, "step": 120 }, { "epoch": 0.26948775055679286, "grad_norm": 63.05554962158203, "learning_rate": 1e-06, "loss": 3.2316, "num_input_tokens_seen": 6760876, "step": 121 }, { "epoch": 0.26948775055679286, "loss": 3.1148786544799805, "loss_ce": 0.5191754102706909, "loss_iou": 0.96875, "loss_num": 0.130859375, "loss_xval": 2.59375, "num_input_tokens_seen": 6760876, "step": 121 }, { "epoch": 0.2717149220489978, "grad_norm": 39.52189254760742, "learning_rate": 1e-06, "loss": 3.0243, "num_input_tokens_seen": 6815624, "step": 122 }, { "epoch": 0.2717149220489978, "loss": 2.8611388206481934, "loss_ce": 0.37090444564819336, "loss_iou": 0.91796875, "loss_num": 0.1298828125, "loss_xval": 2.484375, "num_input_tokens_seen": 6815624, "step": 122 }, { "epoch": 0.2739420935412027, "grad_norm": 44.9962158203125, "learning_rate": 1e-06, "loss": 2.9217, "num_input_tokens_seen": 6870556, "step": 123 }, { "epoch": 0.2739420935412027, "loss": 3.207249164581299, "loss_ce": 0.7170149087905884, "loss_iou": 0.94921875, "loss_num": 0.11865234375, "loss_xval": 2.484375, "num_input_tokens_seen": 6870556, "step": 123 }, { "epoch": 0.27616926503340755, "grad_norm": 25.661319732666016, "learning_rate": 1e-06, "loss": 2.9669, "num_input_tokens_seen": 6925992, "step": 124 }, { "epoch": 0.27616926503340755, "loss": 2.76767897605896, "loss_ce": 0.3028353154659271, "loss_iou": 0.91015625, "loss_num": 0.1298828125, "loss_xval": 2.46875, "num_input_tokens_seen": 6925992, "step": 124 }, { "epoch": 0.27839643652561247, "grad_norm": 41.912784576416016, "learning_rate": 1e-06, "loss": 2.6341, "num_input_tokens_seen": 6983364, "step": 125 }, { "epoch": 0.27839643652561247, "loss": 2.6220178604125977, "loss_ce": 0.4081503748893738, "loss_iou": 0.86328125, "loss_num": 0.09765625, "loss_xval": 2.21875, "num_input_tokens_seen": 6983364, "step": 125 }, { "epoch": 0.2806236080178174, "grad_norm": 60.407127380371094, "learning_rate": 1e-06, "loss": 2.6668, "num_input_tokens_seen": 7037256, "step": 126 }, { "epoch": 0.2806236080178174, "loss": 2.8176770210266113, "loss_ce": 0.3831067681312561, "loss_iou": 0.86328125, "loss_num": 0.140625, "loss_xval": 2.4375, "num_input_tokens_seen": 7037256, "step": 126 }, { "epoch": 0.2828507795100223, "grad_norm": 102.43517303466797, "learning_rate": 1e-06, "loss": 2.8805, "num_input_tokens_seen": 7091296, "step": 127 }, { "epoch": 0.2828507795100223, "loss": 2.8854475021362305, "loss_ce": 0.7106426954269409, "loss_iou": 0.79296875, "loss_num": 0.1171875, "loss_xval": 2.171875, "num_input_tokens_seen": 7091296, "step": 127 }, { "epoch": 0.28507795100222716, "grad_norm": 44.01148223876953, "learning_rate": 1e-06, "loss": 2.8418, "num_input_tokens_seen": 7146940, "step": 128 }, { "epoch": 0.28507795100222716, "loss": 2.973205089569092, "loss_ce": 0.391173779964447, "loss_iou": 0.90625, "loss_num": 0.154296875, "loss_xval": 2.578125, "num_input_tokens_seen": 7146940, "step": 128 }, { "epoch": 0.2873051224944321, "grad_norm": 116.4285659790039, "learning_rate": 1e-06, "loss": 2.5866, "num_input_tokens_seen": 7203656, "step": 129 }, { "epoch": 0.2873051224944321, "loss": 3.1285247802734375, "loss_ce": 0.44102469086647034, "loss_iou": 0.9609375, "loss_num": 0.15234375, "loss_xval": 2.6875, "num_input_tokens_seen": 7203656, "step": 129 }, { "epoch": 0.289532293986637, "grad_norm": 24.682891845703125, "learning_rate": 1e-06, "loss": 2.6812, "num_input_tokens_seen": 7258748, "step": 130 }, { "epoch": 0.289532293986637, "loss": 2.730166435241699, "loss_ce": 0.5348541140556335, "loss_iou": 0.8046875, "loss_num": 0.11767578125, "loss_xval": 2.1875, "num_input_tokens_seen": 7258748, "step": 130 }, { "epoch": 0.29175946547884185, "grad_norm": 34.84796142578125, "learning_rate": 1e-06, "loss": 2.5544, "num_input_tokens_seen": 7315644, "step": 131 }, { "epoch": 0.29175946547884185, "loss": 2.3853559494018555, "loss_ce": 0.3248090147972107, "loss_iou": 0.78515625, "loss_num": 0.09716796875, "loss_xval": 2.0625, "num_input_tokens_seen": 7315644, "step": 131 }, { "epoch": 0.29398663697104677, "grad_norm": 33.59828186035156, "learning_rate": 1e-06, "loss": 2.6018, "num_input_tokens_seen": 7371296, "step": 132 }, { "epoch": 0.29398663697104677, "loss": 2.7992615699768066, "loss_ce": 0.5941836833953857, "loss_iou": 0.78125, "loss_num": 0.12890625, "loss_xval": 2.203125, "num_input_tokens_seen": 7371296, "step": 132 }, { "epoch": 0.2962138084632517, "grad_norm": 26.216712951660156, "learning_rate": 1e-06, "loss": 2.4007, "num_input_tokens_seen": 7427892, "step": 133 }, { "epoch": 0.2962138084632517, "loss": 2.259786605834961, "loss_ce": 0.35256001353263855, "loss_iou": 0.73828125, "loss_num": 0.0859375, "loss_xval": 1.90625, "num_input_tokens_seen": 7427892, "step": 133 }, { "epoch": 0.2984409799554566, "grad_norm": 148.89346313476562, "learning_rate": 1e-06, "loss": 2.645, "num_input_tokens_seen": 7484960, "step": 134 }, { "epoch": 0.2984409799554566, "loss": 2.5535106658935547, "loss_ce": 0.44901835918426514, "loss_iou": 0.80078125, "loss_num": 0.099609375, "loss_xval": 2.109375, "num_input_tokens_seen": 7484960, "step": 134 }, { "epoch": 0.30066815144766146, "grad_norm": 29.48415184020996, "learning_rate": 1e-06, "loss": 2.4559, "num_input_tokens_seen": 7543972, "step": 135 }, { "epoch": 0.30066815144766146, "loss": 2.225175380706787, "loss_ce": 0.2398238629102707, "loss_iou": 0.7421875, "loss_num": 0.10009765625, "loss_xval": 1.984375, "num_input_tokens_seen": 7543972, "step": 135 }, { "epoch": 0.3028953229398664, "grad_norm": 50.64659881591797, "learning_rate": 1e-06, "loss": 2.7228, "num_input_tokens_seen": 7600460, "step": 136 }, { "epoch": 0.3028953229398664, "loss": 2.643749475479126, "loss_ce": 0.2501947581768036, "loss_iou": 0.8515625, "loss_num": 0.1376953125, "loss_xval": 2.390625, "num_input_tokens_seen": 7600460, "step": 136 }, { "epoch": 0.3051224944320713, "grad_norm": 27.465696334838867, "learning_rate": 1e-06, "loss": 2.5444, "num_input_tokens_seen": 7657896, "step": 137 }, { "epoch": 0.3051224944320713, "loss": 2.5869100093841553, "loss_ce": 0.3886679410934448, "loss_iou": 0.796875, "loss_num": 0.12158203125, "loss_xval": 2.203125, "num_input_tokens_seen": 7657896, "step": 137 }, { "epoch": 0.30734966592427615, "grad_norm": 30.95069122314453, "learning_rate": 1e-06, "loss": 2.6368, "num_input_tokens_seen": 7710764, "step": 138 }, { "epoch": 0.30734966592427615, "loss": 2.530655860900879, "loss_ce": 0.41444480419158936, "loss_iou": 0.70703125, "loss_num": 0.140625, "loss_xval": 2.109375, "num_input_tokens_seen": 7710764, "step": 138 }, { "epoch": 0.30957683741648107, "grad_norm": 27.392942428588867, "learning_rate": 1e-06, "loss": 2.3889, "num_input_tokens_seen": 7767452, "step": 139 }, { "epoch": 0.30957683741648107, "loss": 2.26643705368042, "loss_ce": 0.3201477825641632, "loss_iou": 0.7421875, "loss_num": 0.0927734375, "loss_xval": 1.9453125, "num_input_tokens_seen": 7767452, "step": 139 }, { "epoch": 0.311804008908686, "grad_norm": 52.42769241333008, "learning_rate": 1e-06, "loss": 2.5987, "num_input_tokens_seen": 7821504, "step": 140 }, { "epoch": 0.311804008908686, "loss": 2.6357715129852295, "loss_ce": 0.3486621379852295, "loss_iou": 0.8671875, "loss_num": 0.10986328125, "loss_xval": 2.28125, "num_input_tokens_seen": 7821504, "step": 140 }, { "epoch": 0.31403118040089084, "grad_norm": 23.9682674407959, "learning_rate": 1e-06, "loss": 2.6711, "num_input_tokens_seen": 7875900, "step": 141 }, { "epoch": 0.31403118040089084, "loss": 2.9198169708251953, "loss_ce": 0.44423115253448486, "loss_iou": 0.9140625, "loss_num": 0.12890625, "loss_xval": 2.46875, "num_input_tokens_seen": 7875900, "step": 141 }, { "epoch": 0.31625835189309576, "grad_norm": 103.93788146972656, "learning_rate": 1e-06, "loss": 2.5893, "num_input_tokens_seen": 7931732, "step": 142 }, { "epoch": 0.31625835189309576, "loss": 2.553285837173462, "loss_ce": 0.41266071796417236, "loss_iou": 0.82421875, "loss_num": 0.09814453125, "loss_xval": 2.140625, "num_input_tokens_seen": 7931732, "step": 142 }, { "epoch": 0.3184855233853007, "grad_norm": 44.492820739746094, "learning_rate": 1e-06, "loss": 2.4701, "num_input_tokens_seen": 7987608, "step": 143 }, { "epoch": 0.3184855233853007, "loss": 2.3122029304504395, "loss_ce": 0.3502885699272156, "loss_iou": 0.7265625, "loss_num": 0.1025390625, "loss_xval": 1.9609375, "num_input_tokens_seen": 7987608, "step": 143 }, { "epoch": 0.3207126948775056, "grad_norm": 23.186241149902344, "learning_rate": 1e-06, "loss": 2.4726, "num_input_tokens_seen": 8043360, "step": 144 }, { "epoch": 0.3207126948775056, "loss": 2.7117819786071777, "loss_ce": 0.3328755497932434, "loss_iou": 0.88671875, "loss_num": 0.12060546875, "loss_xval": 2.375, "num_input_tokens_seen": 8043360, "step": 144 }, { "epoch": 0.32293986636971045, "grad_norm": 60.27910614013672, "learning_rate": 1e-06, "loss": 2.6172, "num_input_tokens_seen": 8098064, "step": 145 }, { "epoch": 0.32293986636971045, "loss": 2.32407283782959, "loss_ce": 0.28696349263191223, "loss_iou": 0.796875, "loss_num": 0.08837890625, "loss_xval": 2.03125, "num_input_tokens_seen": 8098064, "step": 145 }, { "epoch": 0.32516703786191536, "grad_norm": 28.506025314331055, "learning_rate": 1e-06, "loss": 2.534, "num_input_tokens_seen": 8153264, "step": 146 }, { "epoch": 0.32516703786191536, "loss": 2.474531650543213, "loss_ce": 0.3632035255432129, "loss_iou": 0.7421875, "loss_num": 0.1259765625, "loss_xval": 2.109375, "num_input_tokens_seen": 8153264, "step": 146 }, { "epoch": 0.3273942093541203, "grad_norm": 55.345096588134766, "learning_rate": 1e-06, "loss": 2.5464, "num_input_tokens_seen": 8209784, "step": 147 }, { "epoch": 0.3273942093541203, "loss": 2.5160865783691406, "loss_ce": 0.48581331968307495, "loss_iou": 0.7578125, "loss_num": 0.10205078125, "loss_xval": 2.03125, "num_input_tokens_seen": 8209784, "step": 147 }, { "epoch": 0.32962138084632514, "grad_norm": 44.24598693847656, "learning_rate": 1e-06, "loss": 2.7815, "num_input_tokens_seen": 8262396, "step": 148 }, { "epoch": 0.32962138084632514, "loss": 2.644756317138672, "loss_ce": 0.3185845911502838, "loss_iou": 0.88671875, "loss_num": 0.11083984375, "loss_xval": 2.328125, "num_input_tokens_seen": 8262396, "step": 148 }, { "epoch": 0.33184855233853006, "grad_norm": 39.58821105957031, "learning_rate": 1e-06, "loss": 2.3255, "num_input_tokens_seen": 8316924, "step": 149 }, { "epoch": 0.33184855233853006, "loss": 2.2807397842407227, "loss_ce": 0.3002711534500122, "loss_iou": 0.73046875, "loss_num": 0.103515625, "loss_xval": 1.984375, "num_input_tokens_seen": 8316924, "step": 149 }, { "epoch": 0.33407572383073497, "grad_norm": 30.283483505249023, "learning_rate": 1e-06, "loss": 2.3857, "num_input_tokens_seen": 8372780, "step": 150 }, { "epoch": 0.33407572383073497, "loss": 2.6044678688049316, "loss_ce": 0.25729984045028687, "loss_iou": 0.84375, "loss_num": 0.1318359375, "loss_xval": 2.34375, "num_input_tokens_seen": 8372780, "step": 150 }, { "epoch": 0.3363028953229399, "grad_norm": 30.648853302001953, "learning_rate": 1e-06, "loss": 2.5253, "num_input_tokens_seen": 8426544, "step": 151 }, { "epoch": 0.3363028953229399, "loss": 2.3786497116088867, "loss_ce": 0.2819698452949524, "loss_iou": 0.7734375, "loss_num": 0.10986328125, "loss_xval": 2.09375, "num_input_tokens_seen": 8426544, "step": 151 }, { "epoch": 0.33853006681514475, "grad_norm": 41.66792297363281, "learning_rate": 1e-06, "loss": 2.3784, "num_input_tokens_seen": 8483968, "step": 152 }, { "epoch": 0.33853006681514475, "loss": 2.490370273590088, "loss_ce": 0.3272841274738312, "loss_iou": 0.79296875, "loss_num": 0.115234375, "loss_xval": 2.15625, "num_input_tokens_seen": 8483968, "step": 152 }, { "epoch": 0.34075723830734966, "grad_norm": 39.65934753417969, "learning_rate": 1e-06, "loss": 2.9073, "num_input_tokens_seen": 8537416, "step": 153 }, { "epoch": 0.34075723830734966, "loss": 3.037890911102295, "loss_ce": 0.40898463129997253, "loss_iou": 1.03125, "loss_num": 0.11328125, "loss_xval": 2.625, "num_input_tokens_seen": 8537416, "step": 153 }, { "epoch": 0.3429844097995546, "grad_norm": 43.49128341674805, "learning_rate": 1e-06, "loss": 2.6995, "num_input_tokens_seen": 8592392, "step": 154 }, { "epoch": 0.3429844097995546, "loss": 2.778602123260498, "loss_ce": 0.24051626026630402, "loss_iou": 0.8984375, "loss_num": 0.146484375, "loss_xval": 2.53125, "num_input_tokens_seen": 8592392, "step": 154 }, { "epoch": 0.34521158129175944, "grad_norm": 158.33724975585938, "learning_rate": 1e-06, "loss": 2.5526, "num_input_tokens_seen": 8647228, "step": 155 }, { "epoch": 0.34521158129175944, "loss": 2.642277717590332, "loss_ce": 0.29755133390426636, "loss_iou": 0.89453125, "loss_num": 0.11083984375, "loss_xval": 2.34375, "num_input_tokens_seen": 8647228, "step": 155 }, { "epoch": 0.34743875278396436, "grad_norm": 173.70913696289062, "learning_rate": 1e-06, "loss": 2.8128, "num_input_tokens_seen": 8701864, "step": 156 }, { "epoch": 0.34743875278396436, "loss": 2.8622255325317383, "loss_ce": 0.30753791332244873, "loss_iou": 0.921875, "loss_num": 0.1416015625, "loss_xval": 2.5625, "num_input_tokens_seen": 8701864, "step": 156 }, { "epoch": 0.34966592427616927, "grad_norm": 410.6438903808594, "learning_rate": 1e-06, "loss": 2.4924, "num_input_tokens_seen": 8759524, "step": 157 }, { "epoch": 0.34966592427616927, "loss": 2.3690528869628906, "loss_ce": 0.3465919494628906, "loss_iou": 0.7578125, "loss_num": 0.1025390625, "loss_xval": 2.015625, "num_input_tokens_seen": 8759524, "step": 157 }, { "epoch": 0.3518930957683742, "grad_norm": 40.04865646362305, "learning_rate": 1e-06, "loss": 2.43, "num_input_tokens_seen": 8814804, "step": 158 }, { "epoch": 0.3518930957683742, "loss": 2.7295665740966797, "loss_ce": 0.2725353240966797, "loss_iou": 0.94921875, "loss_num": 0.111328125, "loss_xval": 2.453125, "num_input_tokens_seen": 8814804, "step": 158 }, { "epoch": 0.35412026726057905, "grad_norm": 62.50952911376953, "learning_rate": 1e-06, "loss": 2.4288, "num_input_tokens_seen": 8870392, "step": 159 }, { "epoch": 0.35412026726057905, "loss": 2.16571307182312, "loss_ce": 0.24676772952079773, "loss_iou": 0.7734375, "loss_num": 0.07568359375, "loss_xval": 1.921875, "num_input_tokens_seen": 8870392, "step": 159 }, { "epoch": 0.35634743875278396, "grad_norm": 28.229555130004883, "learning_rate": 1e-06, "loss": 2.3698, "num_input_tokens_seen": 8927972, "step": 160 }, { "epoch": 0.35634743875278396, "loss": 2.608736753463745, "loss_ce": 0.24057263135910034, "loss_iou": 0.8828125, "loss_num": 0.12060546875, "loss_xval": 2.375, "num_input_tokens_seen": 8927972, "step": 160 }, { "epoch": 0.3585746102449889, "grad_norm": 21.421159744262695, "learning_rate": 1e-06, "loss": 2.3587, "num_input_tokens_seen": 8983156, "step": 161 }, { "epoch": 0.3585746102449889, "loss": 1.9863007068634033, "loss_ce": 0.13766781985759735, "loss_iou": 0.75390625, "loss_num": 0.0673828125, "loss_xval": 1.8515625, "num_input_tokens_seen": 8983156, "step": 161 }, { "epoch": 0.36080178173719374, "grad_norm": 23.95926284790039, "learning_rate": 1e-06, "loss": 2.4225, "num_input_tokens_seen": 9038652, "step": 162 }, { "epoch": 0.36080178173719374, "loss": 2.301337480545044, "loss_ce": 0.2510446012020111, "loss_iou": 0.7734375, "loss_num": 0.10009765625, "loss_xval": 2.046875, "num_input_tokens_seen": 9038652, "step": 162 }, { "epoch": 0.36302895322939865, "grad_norm": 47.72632598876953, "learning_rate": 1e-06, "loss": 2.5188, "num_input_tokens_seen": 9096844, "step": 163 }, { "epoch": 0.36302895322939865, "loss": 2.3073041439056396, "loss_ce": 0.20134715735912323, "loss_iou": 0.75, "loss_num": 0.12060546875, "loss_xval": 2.109375, "num_input_tokens_seen": 9096844, "step": 163 }, { "epoch": 0.36525612472160357, "grad_norm": 75.89527893066406, "learning_rate": 1e-06, "loss": 2.5799, "num_input_tokens_seen": 9151260, "step": 164 }, { "epoch": 0.36525612472160357, "loss": 2.6282100677490234, "loss_ce": 0.3332880735397339, "loss_iou": 0.87890625, "loss_num": 0.10693359375, "loss_xval": 2.296875, "num_input_tokens_seen": 9151260, "step": 164 }, { "epoch": 0.3674832962138085, "grad_norm": 44.563873291015625, "learning_rate": 1e-06, "loss": 2.5729, "num_input_tokens_seen": 9206156, "step": 165 }, { "epoch": 0.3674832962138085, "loss": 2.5874228477478027, "loss_ce": 0.30128994584083557, "loss_iou": 0.90625, "loss_num": 0.0947265625, "loss_xval": 2.28125, "num_input_tokens_seen": 9206156, "step": 165 }, { "epoch": 0.36971046770601335, "grad_norm": 57.38001251220703, "learning_rate": 1e-06, "loss": 2.588, "num_input_tokens_seen": 9259864, "step": 166 }, { "epoch": 0.36971046770601335, "loss": 2.448063373565674, "loss_ce": 0.2586103677749634, "loss_iou": 0.875, "loss_num": 0.08740234375, "loss_xval": 2.1875, "num_input_tokens_seen": 9259864, "step": 166 }, { "epoch": 0.37193763919821826, "grad_norm": 53.09568405151367, "learning_rate": 1e-06, "loss": 2.5351, "num_input_tokens_seen": 9317272, "step": 167 }, { "epoch": 0.37193763919821826, "loss": 2.441850185394287, "loss_ce": 0.3842327892780304, "loss_iou": 0.7890625, "loss_num": 0.0966796875, "loss_xval": 2.0625, "num_input_tokens_seen": 9317272, "step": 167 }, { "epoch": 0.3741648106904232, "grad_norm": 36.94442367553711, "learning_rate": 1e-06, "loss": 2.1125, "num_input_tokens_seen": 9372720, "step": 168 }, { "epoch": 0.3741648106904232, "loss": 2.203955888748169, "loss_ce": 0.25278398394584656, "loss_iou": 0.74609375, "loss_num": 0.091796875, "loss_xval": 1.953125, "num_input_tokens_seen": 9372720, "step": 168 }, { "epoch": 0.37639198218262804, "grad_norm": 61.23322296142578, "learning_rate": 1e-06, "loss": 2.5849, "num_input_tokens_seen": 9429244, "step": 169 }, { "epoch": 0.37639198218262804, "loss": 2.3757333755493164, "loss_ce": 0.22045986354351044, "loss_iou": 0.81640625, "loss_num": 0.1044921875, "loss_xval": 2.15625, "num_input_tokens_seen": 9429244, "step": 169 }, { "epoch": 0.37861915367483295, "grad_norm": 44.59577178955078, "learning_rate": 1e-06, "loss": 2.3184, "num_input_tokens_seen": 9485716, "step": 170 }, { "epoch": 0.37861915367483295, "loss": 2.2477307319641113, "loss_ce": 0.21745747327804565, "loss_iou": 0.83203125, "loss_num": 0.0732421875, "loss_xval": 2.03125, "num_input_tokens_seen": 9485716, "step": 170 }, { "epoch": 0.38084632516703787, "grad_norm": 26.56584358215332, "learning_rate": 1e-06, "loss": 2.3085, "num_input_tokens_seen": 9542880, "step": 171 }, { "epoch": 0.38084632516703787, "loss": 2.289217948913574, "loss_ce": 0.17203053832054138, "loss_iou": 0.859375, "loss_num": 0.080078125, "loss_xval": 2.125, "num_input_tokens_seen": 9542880, "step": 171 }, { "epoch": 0.3830734966592428, "grad_norm": 40.40160369873047, "learning_rate": 1e-06, "loss": 2.4104, "num_input_tokens_seen": 9599024, "step": 172 }, { "epoch": 0.3830734966592428, "loss": 2.4211387634277344, "loss_ce": 0.155513733625412, "loss_iou": 0.84765625, "loss_num": 0.11376953125, "loss_xval": 2.265625, "num_input_tokens_seen": 9599024, "step": 172 }, { "epoch": 0.38530066815144765, "grad_norm": 23.74785614013672, "learning_rate": 1e-06, "loss": 2.5009, "num_input_tokens_seen": 9651488, "step": 173 }, { "epoch": 0.38530066815144765, "loss": 2.5731005668640137, "loss_ce": 0.2664598822593689, "loss_iou": 0.890625, "loss_num": 0.1044921875, "loss_xval": 2.3125, "num_input_tokens_seen": 9651488, "step": 173 }, { "epoch": 0.38752783964365256, "grad_norm": 51.20659255981445, "learning_rate": 1e-06, "loss": 2.7359, "num_input_tokens_seen": 9708124, "step": 174 }, { "epoch": 0.38752783964365256, "loss": 2.863722324371338, "loss_ce": 0.31489402055740356, "loss_iou": 0.95703125, "loss_num": 0.126953125, "loss_xval": 2.546875, "num_input_tokens_seen": 9708124, "step": 174 }, { "epoch": 0.3897550111358575, "grad_norm": 18.966318130493164, "learning_rate": 1e-06, "loss": 2.3377, "num_input_tokens_seen": 9766496, "step": 175 }, { "epoch": 0.3897550111358575, "loss": 2.530661106109619, "loss_ce": 0.2992156147956848, "loss_iou": 0.8515625, "loss_num": 0.10546875, "loss_xval": 2.234375, "num_input_tokens_seen": 9766496, "step": 175 }, { "epoch": 0.39198218262806234, "grad_norm": 43.686466217041016, "learning_rate": 1e-06, "loss": 2.5387, "num_input_tokens_seen": 9823256, "step": 176 }, { "epoch": 0.39198218262806234, "loss": 2.393760919570923, "loss_ce": 0.18672963976860046, "loss_iou": 0.8203125, "loss_num": 0.11279296875, "loss_xval": 2.203125, "num_input_tokens_seen": 9823256, "step": 176 }, { "epoch": 0.39420935412026725, "grad_norm": 117.024658203125, "learning_rate": 1e-06, "loss": 2.5409, "num_input_tokens_seen": 9881900, "step": 177 }, { "epoch": 0.39420935412026725, "loss": 2.7967357635498047, "loss_ce": 0.2508372664451599, "loss_iou": 0.90234375, "loss_num": 0.1474609375, "loss_xval": 2.546875, "num_input_tokens_seen": 9881900, "step": 177 }, { "epoch": 0.39643652561247217, "grad_norm": 77.61784362792969, "learning_rate": 1e-06, "loss": 2.5997, "num_input_tokens_seen": 9937420, "step": 178 }, { "epoch": 0.39643652561247217, "loss": 2.7027268409729004, "loss_ce": 0.18905505537986755, "loss_iou": 0.9453125, "loss_num": 0.125, "loss_xval": 2.515625, "num_input_tokens_seen": 9937420, "step": 178 }, { "epoch": 0.3986636971046771, "grad_norm": 22.932493209838867, "learning_rate": 1e-06, "loss": 2.0248, "num_input_tokens_seen": 9994316, "step": 179 }, { "epoch": 0.3986636971046771, "loss": 2.1930716037750244, "loss_ce": 0.20479029417037964, "loss_iou": 0.76953125, "loss_num": 0.08984375, "loss_xval": 1.984375, "num_input_tokens_seen": 9994316, "step": 179 }, { "epoch": 0.40089086859688194, "grad_norm": 61.33550262451172, "learning_rate": 1e-06, "loss": 2.55, "num_input_tokens_seen": 10046516, "step": 180 }, { "epoch": 0.40089086859688194, "loss": 2.5461134910583496, "loss_ce": 0.25509777665138245, "loss_iou": 0.91796875, "loss_num": 0.0908203125, "loss_xval": 2.296875, "num_input_tokens_seen": 10046516, "step": 180 }, { "epoch": 0.40311804008908686, "grad_norm": 29.507160186767578, "learning_rate": 1e-06, "loss": 2.4409, "num_input_tokens_seen": 10103372, "step": 181 }, { "epoch": 0.40311804008908686, "loss": 2.6029810905456543, "loss_ce": 0.23091065883636475, "loss_iou": 0.8984375, "loss_num": 0.11572265625, "loss_xval": 2.375, "num_input_tokens_seen": 10103372, "step": 181 }, { "epoch": 0.4053452115812918, "grad_norm": 90.82682037353516, "learning_rate": 1e-06, "loss": 2.5212, "num_input_tokens_seen": 10159204, "step": 182 }, { "epoch": 0.4053452115812918, "loss": 2.695434331893921, "loss_ce": 0.3145750164985657, "loss_iou": 0.8828125, "loss_num": 0.123046875, "loss_xval": 2.375, "num_input_tokens_seen": 10159204, "step": 182 }, { "epoch": 0.40757238307349664, "grad_norm": 27.87862205505371, "learning_rate": 1e-06, "loss": 2.2165, "num_input_tokens_seen": 10214768, "step": 183 }, { "epoch": 0.40757238307349664, "loss": 2.1518983840942383, "loss_ce": 0.1807069331407547, "loss_iou": 0.73828125, "loss_num": 0.09912109375, "loss_xval": 1.96875, "num_input_tokens_seen": 10214768, "step": 183 }, { "epoch": 0.40979955456570155, "grad_norm": 72.37561798095703, "learning_rate": 1e-06, "loss": 2.2339, "num_input_tokens_seen": 10270868, "step": 184 }, { "epoch": 0.40979955456570155, "loss": 2.1870460510253906, "loss_ce": 0.20511233806610107, "loss_iou": 0.7734375, "loss_num": 0.08642578125, "loss_xval": 1.984375, "num_input_tokens_seen": 10270868, "step": 184 }, { "epoch": 0.41202672605790647, "grad_norm": 22.659595489501953, "learning_rate": 1e-06, "loss": 2.4928, "num_input_tokens_seen": 10322388, "step": 185 }, { "epoch": 0.41202672605790647, "loss": 2.4523706436157227, "loss_ce": 0.2468043565750122, "loss_iou": 0.83984375, "loss_num": 0.1044921875, "loss_xval": 2.203125, "num_input_tokens_seen": 10322388, "step": 185 }, { "epoch": 0.4142538975501114, "grad_norm": 27.231279373168945, "learning_rate": 1e-06, "loss": 2.2191, "num_input_tokens_seen": 10379116, "step": 186 }, { "epoch": 0.4142538975501114, "loss": 2.0813474655151367, "loss_ce": 0.14189457893371582, "loss_iou": 0.765625, "loss_num": 0.08203125, "loss_xval": 1.9375, "num_input_tokens_seen": 10379116, "step": 186 }, { "epoch": 0.41648106904231624, "grad_norm": 20.43091583251953, "learning_rate": 1e-06, "loss": 2.2453, "num_input_tokens_seen": 10434552, "step": 187 }, { "epoch": 0.41648106904231624, "loss": 2.5419094562530518, "loss_ce": 0.25821810960769653, "loss_iou": 0.796875, "loss_num": 0.138671875, "loss_xval": 2.28125, "num_input_tokens_seen": 10434552, "step": 187 }, { "epoch": 0.41870824053452116, "grad_norm": 25.056270599365234, "learning_rate": 1e-06, "loss": 2.2543, "num_input_tokens_seen": 10493740, "step": 188 }, { "epoch": 0.41870824053452116, "loss": 2.3587117195129395, "loss_ce": 0.16730527579784393, "loss_iou": 0.8203125, "loss_num": 0.109375, "loss_xval": 2.1875, "num_input_tokens_seen": 10493740, "step": 188 }, { "epoch": 0.4209354120267261, "grad_norm": 26.688255310058594, "learning_rate": 1e-06, "loss": 2.2537, "num_input_tokens_seen": 10548228, "step": 189 }, { "epoch": 0.4209354120267261, "loss": 2.1556687355041504, "loss_ce": 0.173246830701828, "loss_iou": 0.7890625, "loss_num": 0.08203125, "loss_xval": 1.984375, "num_input_tokens_seen": 10548228, "step": 189 }, { "epoch": 0.42316258351893093, "grad_norm": 56.197242736816406, "learning_rate": 1e-06, "loss": 2.0274, "num_input_tokens_seen": 10604476, "step": 190 }, { "epoch": 0.42316258351893093, "loss": 2.1501314640045166, "loss_ce": 0.188217431306839, "loss_iou": 0.734375, "loss_num": 0.09716796875, "loss_xval": 1.9609375, "num_input_tokens_seen": 10604476, "step": 190 }, { "epoch": 0.42538975501113585, "grad_norm": 32.006675720214844, "learning_rate": 1e-06, "loss": 2.4196, "num_input_tokens_seen": 10660088, "step": 191 }, { "epoch": 0.42538975501113585, "loss": 2.4907290935516357, "loss_ce": 0.21826806664466858, "loss_iou": 0.859375, "loss_num": 0.1103515625, "loss_xval": 2.265625, "num_input_tokens_seen": 10660088, "step": 191 }, { "epoch": 0.42761692650334077, "grad_norm": 23.622976303100586, "learning_rate": 1e-06, "loss": 1.8407, "num_input_tokens_seen": 10719304, "step": 192 }, { "epoch": 0.42761692650334077, "loss": 1.853776454925537, "loss_ce": 0.0735030248761177, "loss_iou": 0.76171875, "loss_num": 0.05078125, "loss_xval": 1.78125, "num_input_tokens_seen": 10719304, "step": 192 }, { "epoch": 0.4298440979955457, "grad_norm": 132.57778930664062, "learning_rate": 1e-06, "loss": 1.9273, "num_input_tokens_seen": 10776824, "step": 193 }, { "epoch": 0.4298440979955457, "loss": 2.2028584480285645, "loss_ce": 0.19748730957508087, "loss_iou": 0.74609375, "loss_num": 0.10302734375, "loss_xval": 2.0, "num_input_tokens_seen": 10776824, "step": 193 }, { "epoch": 0.43207126948775054, "grad_norm": 78.81751251220703, "learning_rate": 1e-06, "loss": 2.282, "num_input_tokens_seen": 10832888, "step": 194 }, { "epoch": 0.43207126948775054, "loss": 2.0263118743896484, "loss_ce": 0.08978863805532455, "loss_iou": 0.78515625, "loss_num": 0.0732421875, "loss_xval": 1.9375, "num_input_tokens_seen": 10832888, "step": 194 }, { "epoch": 0.43429844097995546, "grad_norm": 33.7293586730957, "learning_rate": 1e-06, "loss": 1.9715, "num_input_tokens_seen": 10889816, "step": 195 }, { "epoch": 0.43429844097995546, "loss": 1.9463962316513062, "loss_ce": 0.15049774944782257, "loss_iou": 0.71484375, "loss_num": 0.07373046875, "loss_xval": 1.796875, "num_input_tokens_seen": 10889816, "step": 195 }, { "epoch": 0.4365256124721604, "grad_norm": 42.51150131225586, "learning_rate": 1e-06, "loss": 2.2224, "num_input_tokens_seen": 10947644, "step": 196 }, { "epoch": 0.4365256124721604, "loss": 2.052577495574951, "loss_ce": 0.1531633585691452, "loss_iou": 0.7734375, "loss_num": 0.0703125, "loss_xval": 1.8984375, "num_input_tokens_seen": 10947644, "step": 196 }, { "epoch": 0.43875278396436523, "grad_norm": 21.027219772338867, "learning_rate": 1e-06, "loss": 2.0492, "num_input_tokens_seen": 11004624, "step": 197 }, { "epoch": 0.43875278396436523, "loss": 2.039621353149414, "loss_ce": 0.1343478262424469, "loss_iou": 0.77734375, "loss_num": 0.0703125, "loss_xval": 1.90625, "num_input_tokens_seen": 11004624, "step": 197 }, { "epoch": 0.44097995545657015, "grad_norm": 30.52896499633789, "learning_rate": 1e-06, "loss": 2.0472, "num_input_tokens_seen": 11060372, "step": 198 }, { "epoch": 0.44097995545657015, "loss": 2.0091304779052734, "loss_ce": 0.12045860290527344, "loss_iou": 0.76953125, "loss_num": 0.0703125, "loss_xval": 1.890625, "num_input_tokens_seen": 11060372, "step": 198 }, { "epoch": 0.44320712694877507, "grad_norm": 40.47418975830078, "learning_rate": 1e-06, "loss": 2.1817, "num_input_tokens_seen": 11117700, "step": 199 }, { "epoch": 0.44320712694877507, "loss": 2.1524715423583984, "loss_ce": 0.1846981942653656, "loss_iou": 0.75, "loss_num": 0.09375, "loss_xval": 1.96875, "num_input_tokens_seen": 11117700, "step": 199 }, { "epoch": 0.44543429844098, "grad_norm": 42.97262191772461, "learning_rate": 1e-06, "loss": 2.1472, "num_input_tokens_seen": 11176308, "step": 200 }, { "epoch": 0.44543429844098, "loss": 2.278196096420288, "loss_ce": 0.12389924377202988, "loss_iou": 0.8359375, "loss_num": 0.09716796875, "loss_xval": 2.15625, "num_input_tokens_seen": 11176308, "step": 200 }, { "epoch": 0.44766146993318484, "grad_norm": 46.24988555908203, "learning_rate": 1e-06, "loss": 2.2836, "num_input_tokens_seen": 11232984, "step": 201 }, { "epoch": 0.44766146993318484, "loss": 2.1692562103271484, "loss_ce": 0.16632673144340515, "loss_iou": 0.76953125, "loss_num": 0.09375, "loss_xval": 2.0, "num_input_tokens_seen": 11232984, "step": 201 }, { "epoch": 0.44988864142538976, "grad_norm": 19.978219985961914, "learning_rate": 1e-06, "loss": 1.9362, "num_input_tokens_seen": 11289736, "step": 202 }, { "epoch": 0.44988864142538976, "loss": 2.002622127532959, "loss_ce": 0.14520032703876495, "loss_iou": 0.75, "loss_num": 0.07080078125, "loss_xval": 1.859375, "num_input_tokens_seen": 11289736, "step": 202 }, { "epoch": 0.4521158129175947, "grad_norm": 40.11283493041992, "learning_rate": 1e-06, "loss": 2.0214, "num_input_tokens_seen": 11347688, "step": 203 }, { "epoch": 0.4521158129175947, "loss": 2.089003801345825, "loss_ce": 0.12367182970046997, "loss_iou": 0.76171875, "loss_num": 0.087890625, "loss_xval": 1.96875, "num_input_tokens_seen": 11347688, "step": 203 }, { "epoch": 0.45434298440979953, "grad_norm": 45.47671890258789, "learning_rate": 1e-06, "loss": 1.7514, "num_input_tokens_seen": 11405500, "step": 204 }, { "epoch": 0.45434298440979953, "loss": 1.679763913154602, "loss_ce": 0.08992011845111847, "loss_iou": 0.6328125, "loss_num": 0.06494140625, "loss_xval": 1.59375, "num_input_tokens_seen": 11405500, "step": 204 }, { "epoch": 0.45657015590200445, "grad_norm": 37.67365646362305, "learning_rate": 1e-06, "loss": 2.1156, "num_input_tokens_seen": 11463076, "step": 205 }, { "epoch": 0.45657015590200445, "loss": 1.912656545639038, "loss_ce": 0.1499611884355545, "loss_iou": 0.63671875, "loss_num": 0.09814453125, "loss_xval": 1.765625, "num_input_tokens_seen": 11463076, "step": 205 }, { "epoch": 0.45879732739420936, "grad_norm": 22.140178680419922, "learning_rate": 1e-06, "loss": 2.2171, "num_input_tokens_seen": 11517904, "step": 206 }, { "epoch": 0.45879732739420936, "loss": 2.3471362590789795, "loss_ce": 0.13815191388130188, "loss_iou": 0.875, "loss_num": 0.0908203125, "loss_xval": 2.203125, "num_input_tokens_seen": 11517904, "step": 206 }, { "epoch": 0.4610244988864143, "grad_norm": 42.47764205932617, "learning_rate": 1e-06, "loss": 1.9804, "num_input_tokens_seen": 11572968, "step": 207 }, { "epoch": 0.4610244988864143, "loss": 2.3084888458251953, "loss_ce": 0.2538011372089386, "loss_iou": 0.796875, "loss_num": 0.09228515625, "loss_xval": 2.0625, "num_input_tokens_seen": 11572968, "step": 207 }, { "epoch": 0.46325167037861914, "grad_norm": 32.766109466552734, "learning_rate": 1e-06, "loss": 1.8334, "num_input_tokens_seen": 11629228, "step": 208 }, { "epoch": 0.46325167037861914, "loss": 1.8067898750305176, "loss_ce": 0.10049097239971161, "loss_iou": 0.67578125, "loss_num": 0.07177734375, "loss_xval": 1.703125, "num_input_tokens_seen": 11629228, "step": 208 }, { "epoch": 0.46547884187082406, "grad_norm": 21.969463348388672, "learning_rate": 1e-06, "loss": 1.7864, "num_input_tokens_seen": 11687208, "step": 209 }, { "epoch": 0.46547884187082406, "loss": 1.507708191871643, "loss_ce": 0.09462223947048187, "loss_iou": 0.56640625, "loss_num": 0.055419921875, "loss_xval": 1.4140625, "num_input_tokens_seen": 11687208, "step": 209 }, { "epoch": 0.46770601336302897, "grad_norm": 66.28128814697266, "learning_rate": 1e-06, "loss": 2.1212, "num_input_tokens_seen": 11744372, "step": 210 }, { "epoch": 0.46770601336302897, "loss": 1.94266939163208, "loss_ce": 0.18949554860591888, "loss_iou": 0.66015625, "loss_num": 0.0869140625, "loss_xval": 1.75, "num_input_tokens_seen": 11744372, "step": 210 }, { "epoch": 0.46993318485523383, "grad_norm": 27.227609634399414, "learning_rate": 1e-06, "loss": 2.1373, "num_input_tokens_seen": 11800196, "step": 211 }, { "epoch": 0.46993318485523383, "loss": 2.1795947551727295, "loss_ce": 0.12490727007389069, "loss_iou": 0.79296875, "loss_num": 0.09375, "loss_xval": 2.0625, "num_input_tokens_seen": 11800196, "step": 211 }, { "epoch": 0.47216035634743875, "grad_norm": 27.855268478393555, "learning_rate": 1e-06, "loss": 2.2184, "num_input_tokens_seen": 11857088, "step": 212 }, { "epoch": 0.47216035634743875, "loss": 2.045684337615967, "loss_ce": 0.13650476932525635, "loss_iou": 0.765625, "loss_num": 0.076171875, "loss_xval": 1.90625, "num_input_tokens_seen": 11857088, "step": 212 }, { "epoch": 0.47438752783964366, "grad_norm": 18.745811462402344, "learning_rate": 1e-06, "loss": 1.8398, "num_input_tokens_seen": 11908736, "step": 213 }, { "epoch": 0.47438752783964366, "loss": 1.898270845413208, "loss_ce": 0.1253216713666916, "loss_iou": 0.71875, "loss_num": 0.06689453125, "loss_xval": 1.7734375, "num_input_tokens_seen": 11908736, "step": 213 }, { "epoch": 0.4766146993318486, "grad_norm": 50.83546447753906, "learning_rate": 1e-06, "loss": 1.8401, "num_input_tokens_seen": 11963016, "step": 214 }, { "epoch": 0.4766146993318486, "loss": 1.990050196647644, "loss_ce": 0.1697378158569336, "loss_iou": 0.671875, "loss_num": 0.0947265625, "loss_xval": 1.8203125, "num_input_tokens_seen": 11963016, "step": 214 }, { "epoch": 0.47884187082405344, "grad_norm": 15.607227325439453, "learning_rate": 1e-06, "loss": 2.0483, "num_input_tokens_seen": 12019844, "step": 215 }, { "epoch": 0.47884187082405344, "loss": 1.621078610420227, "loss_ce": 0.10240183770656586, "loss_iou": 0.6328125, "loss_num": 0.05078125, "loss_xval": 1.515625, "num_input_tokens_seen": 12019844, "step": 215 }, { "epoch": 0.48106904231625836, "grad_norm": 561.2518920898438, "learning_rate": 1e-06, "loss": 2.0352, "num_input_tokens_seen": 12079540, "step": 216 }, { "epoch": 0.48106904231625836, "loss": 2.1394166946411133, "loss_ce": 0.18922138214111328, "loss_iou": 0.71484375, "loss_num": 0.1044921875, "loss_xval": 1.953125, "num_input_tokens_seen": 12079540, "step": 216 }, { "epoch": 0.48329621380846327, "grad_norm": 75.96478271484375, "learning_rate": 1e-06, "loss": 1.9004, "num_input_tokens_seen": 12136556, "step": 217 }, { "epoch": 0.48329621380846327, "loss": 2.064810276031494, "loss_ce": 0.13121652603149414, "loss_iou": 0.72265625, "loss_num": 0.09765625, "loss_xval": 1.9375, "num_input_tokens_seen": 12136556, "step": 217 }, { "epoch": 0.48552338530066813, "grad_norm": 21.790206909179688, "learning_rate": 1e-06, "loss": 1.9754, "num_input_tokens_seen": 12190060, "step": 218 }, { "epoch": 0.48552338530066813, "loss": 2.044926404953003, "loss_ce": 0.13574674725532532, "loss_iou": 0.70703125, "loss_num": 0.09912109375, "loss_xval": 1.90625, "num_input_tokens_seen": 12190060, "step": 218 }, { "epoch": 0.48775055679287305, "grad_norm": 127.96965026855469, "learning_rate": 1e-06, "loss": 2.0309, "num_input_tokens_seen": 12247900, "step": 219 }, { "epoch": 0.48775055679287305, "loss": 2.0088348388671875, "loss_ce": 0.12211604416370392, "loss_iou": 0.74609375, "loss_num": 0.0791015625, "loss_xval": 1.890625, "num_input_tokens_seen": 12247900, "step": 219 }, { "epoch": 0.48997772828507796, "grad_norm": 37.08074951171875, "learning_rate": 1e-06, "loss": 1.8653, "num_input_tokens_seen": 12303552, "step": 220 }, { "epoch": 0.48997772828507796, "loss": 1.8836941719055176, "loss_ce": 0.10244403779506683, "loss_iou": 0.72265625, "loss_num": 0.0673828125, "loss_xval": 1.78125, "num_input_tokens_seen": 12303552, "step": 220 }, { "epoch": 0.4922048997772829, "grad_norm": 32.377262115478516, "learning_rate": 1e-06, "loss": 1.8778, "num_input_tokens_seen": 12359432, "step": 221 }, { "epoch": 0.4922048997772829, "loss": 1.9408328533172607, "loss_ce": 0.10782508552074432, "loss_iou": 0.7421875, "loss_num": 0.06884765625, "loss_xval": 1.8359375, "num_input_tokens_seen": 12359432, "step": 221 }, { "epoch": 0.49443207126948774, "grad_norm": 45.29218673706055, "learning_rate": 1e-06, "loss": 2.1275, "num_input_tokens_seen": 12415808, "step": 222 }, { "epoch": 0.49443207126948774, "loss": 2.1390140056610107, "loss_ce": 0.1058109700679779, "loss_iou": 0.77734375, "loss_num": 0.09619140625, "loss_xval": 2.03125, "num_input_tokens_seen": 12415808, "step": 222 }, { "epoch": 0.49665924276169265, "grad_norm": 23.249635696411133, "learning_rate": 1e-06, "loss": 2.2482, "num_input_tokens_seen": 12468328, "step": 223 }, { "epoch": 0.49665924276169265, "loss": 2.2763051986694336, "loss_ce": 0.13763317465782166, "loss_iou": 0.8125, "loss_num": 0.10302734375, "loss_xval": 2.140625, "num_input_tokens_seen": 12468328, "step": 223 }, { "epoch": 0.49888641425389757, "grad_norm": 26.10118293762207, "learning_rate": 1e-06, "loss": 1.745, "num_input_tokens_seen": 12526672, "step": 224 }, { "epoch": 0.49888641425389757, "loss": 1.4877617359161377, "loss_ce": 0.06539853662252426, "loss_iou": 0.57421875, "loss_num": 0.055419921875, "loss_xval": 1.421875, "num_input_tokens_seen": 12526672, "step": 224 }, { "epoch": 0.5011135857461024, "grad_norm": 28.74011993408203, "learning_rate": 1e-06, "loss": 1.8918, "num_input_tokens_seen": 12582496, "step": 225 }, { "epoch": 0.5011135857461024, "loss": 2.0152716636657715, "loss_ce": 0.0636114627122879, "loss_iou": 0.77734375, "loss_num": 0.0791015625, "loss_xval": 1.953125, "num_input_tokens_seen": 12582496, "step": 225 }, { "epoch": 0.5033407572383074, "grad_norm": 39.25506591796875, "learning_rate": 1e-06, "loss": 2.072, "num_input_tokens_seen": 12637180, "step": 226 }, { "epoch": 0.5033407572383074, "loss": 2.0042176246643066, "loss_ce": 0.0706239864230156, "loss_iou": 0.76171875, "loss_num": 0.08154296875, "loss_xval": 1.9375, "num_input_tokens_seen": 12637180, "step": 226 }, { "epoch": 0.5055679287305123, "grad_norm": 58.19468307495117, "learning_rate": 1e-06, "loss": 1.9587, "num_input_tokens_seen": 12696256, "step": 227 }, { "epoch": 0.5055679287305123, "loss": 2.141087532043457, "loss_ce": 0.10886099934577942, "loss_iou": 0.80078125, "loss_num": 0.08642578125, "loss_xval": 2.03125, "num_input_tokens_seen": 12696256, "step": 227 }, { "epoch": 0.5077951002227171, "grad_norm": 26.209997177124023, "learning_rate": 1e-06, "loss": 2.0101, "num_input_tokens_seen": 12751124, "step": 228 }, { "epoch": 0.5077951002227171, "loss": 1.975238561630249, "loss_ce": 0.10316816717386246, "loss_iou": 0.76953125, "loss_num": 0.06689453125, "loss_xval": 1.875, "num_input_tokens_seen": 12751124, "step": 228 }, { "epoch": 0.5100222717149221, "grad_norm": 29.183605194091797, "learning_rate": 1e-06, "loss": 1.8975, "num_input_tokens_seen": 12807368, "step": 229 }, { "epoch": 0.5100222717149221, "loss": 2.0033857822418213, "loss_ce": 0.1225263923406601, "loss_iou": 0.71875, "loss_num": 0.08837890625, "loss_xval": 1.8828125, "num_input_tokens_seen": 12807368, "step": 229 }, { "epoch": 0.512249443207127, "grad_norm": 32.19590377807617, "learning_rate": 1e-06, "loss": 1.9127, "num_input_tokens_seen": 12866044, "step": 230 }, { "epoch": 0.512249443207127, "loss": 1.722662329673767, "loss_ce": 0.10645144432783127, "loss_iou": 0.63671875, "loss_num": 0.068359375, "loss_xval": 1.6171875, "num_input_tokens_seen": 12866044, "step": 230 }, { "epoch": 0.5144766146993318, "grad_norm": 88.9583740234375, "learning_rate": 1e-06, "loss": 1.8966, "num_input_tokens_seen": 12919828, "step": 231 }, { "epoch": 0.5144766146993318, "loss": 1.7427191734313965, "loss_ce": 0.08256293088197708, "loss_iou": 0.66796875, "loss_num": 0.06494140625, "loss_xval": 1.65625, "num_input_tokens_seen": 12919828, "step": 231 }, { "epoch": 0.5167037861915368, "grad_norm": 38.6852912902832, "learning_rate": 1e-06, "loss": 1.8535, "num_input_tokens_seen": 12976452, "step": 232 }, { "epoch": 0.5167037861915368, "loss": 1.5481868982315063, "loss_ce": 0.09603846073150635, "loss_iou": 0.60546875, "loss_num": 0.04833984375, "loss_xval": 1.453125, "num_input_tokens_seen": 12976452, "step": 232 }, { "epoch": 0.5189309576837416, "grad_norm": 40.1108512878418, "learning_rate": 1e-06, "loss": 1.7714, "num_input_tokens_seen": 13032624, "step": 233 }, { "epoch": 0.5189309576837416, "loss": 1.8302245140075684, "loss_ce": 0.043115146458148956, "loss_iou": 0.765625, "loss_num": 0.05126953125, "loss_xval": 1.7890625, "num_input_tokens_seen": 13032624, "step": 233 }, { "epoch": 0.5211581291759465, "grad_norm": 35.25422286987305, "learning_rate": 1e-06, "loss": 1.9139, "num_input_tokens_seen": 13089524, "step": 234 }, { "epoch": 0.5211581291759465, "loss": 2.1705551147460938, "loss_ce": 0.11000814288854599, "loss_iou": 0.8203125, "loss_num": 0.083984375, "loss_xval": 2.0625, "num_input_tokens_seen": 13089524, "step": 234 }, { "epoch": 0.5233853006681515, "grad_norm": 92.00837707519531, "learning_rate": 1e-06, "loss": 2.0319, "num_input_tokens_seen": 13146916, "step": 235 }, { "epoch": 0.5233853006681515, "loss": 1.8411858081817627, "loss_ce": 0.04724044352769852, "loss_iou": 0.7578125, "loss_num": 0.05517578125, "loss_xval": 1.796875, "num_input_tokens_seen": 13146916, "step": 235 }, { "epoch": 0.5256124721603563, "grad_norm": 61.55561065673828, "learning_rate": 1e-06, "loss": 1.938, "num_input_tokens_seen": 13206136, "step": 236 }, { "epoch": 0.5256124721603563, "loss": 1.859898328781128, "loss_ce": 0.0937848836183548, "loss_iou": 0.6875, "loss_num": 0.078125, "loss_xval": 1.765625, "num_input_tokens_seen": 13206136, "step": 236 }, { "epoch": 0.5278396436525612, "grad_norm": 24.729263305664062, "learning_rate": 1e-06, "loss": 1.6522, "num_input_tokens_seen": 13262240, "step": 237 }, { "epoch": 0.5278396436525612, "loss": 1.7375231981277466, "loss_ce": 0.05246463418006897, "loss_iou": 0.671875, "loss_num": 0.06884765625, "loss_xval": 1.6875, "num_input_tokens_seen": 13262240, "step": 237 }, { "epoch": 0.5300668151447662, "grad_norm": 46.568321228027344, "learning_rate": 1e-06, "loss": 1.8631, "num_input_tokens_seen": 13317024, "step": 238 }, { "epoch": 0.5300668151447662, "loss": 1.6511387825012207, "loss_ce": 0.043716952204704285, "loss_iou": 0.66796875, "loss_num": 0.05517578125, "loss_xval": 1.609375, "num_input_tokens_seen": 13317024, "step": 238 }, { "epoch": 0.532293986636971, "grad_norm": 433.0378112792969, "learning_rate": 1e-06, "loss": 1.9798, "num_input_tokens_seen": 13370596, "step": 239 }, { "epoch": 0.532293986636971, "loss": 1.9186971187591553, "loss_ce": 0.052486199885606766, "loss_iou": 0.7421875, "loss_num": 0.076171875, "loss_xval": 1.8671875, "num_input_tokens_seen": 13370596, "step": 239 }, { "epoch": 0.534521158129176, "grad_norm": 25.267301559448242, "learning_rate": 1e-06, "loss": 1.6908, "num_input_tokens_seen": 13427652, "step": 240 }, { "epoch": 0.534521158129176, "loss": 1.6441099643707275, "loss_ce": 0.04157081991434097, "loss_iou": 0.625, "loss_num": 0.06982421875, "loss_xval": 1.6015625, "num_input_tokens_seen": 13427652, "step": 240 }, { "epoch": 0.5367483296213809, "grad_norm": 23.9971981048584, "learning_rate": 1e-06, "loss": 1.944, "num_input_tokens_seen": 13484652, "step": 241 }, { "epoch": 0.5367483296213809, "loss": 1.9427827596664429, "loss_ce": 0.0628998875617981, "loss_iou": 0.78515625, "loss_num": 0.061279296875, "loss_xval": 1.8828125, "num_input_tokens_seen": 13484652, "step": 241 }, { "epoch": 0.5389755011135857, "grad_norm": 28.519479751586914, "learning_rate": 1e-06, "loss": 1.7093, "num_input_tokens_seen": 13539076, "step": 242 }, { "epoch": 0.5389755011135857, "loss": 1.5857552289962769, "loss_ce": 0.030091160908341408, "loss_iou": 0.61328125, "loss_num": 0.06494140625, "loss_xval": 1.5546875, "num_input_tokens_seen": 13539076, "step": 242 }, { "epoch": 0.5412026726057907, "grad_norm": 25.626075744628906, "learning_rate": 1e-06, "loss": 1.8502, "num_input_tokens_seen": 13596724, "step": 243 }, { "epoch": 0.5412026726057907, "loss": 1.6758081912994385, "loss_ce": 0.09328873455524445, "loss_iou": 0.6640625, "loss_num": 0.05029296875, "loss_xval": 1.5859375, "num_input_tokens_seen": 13596724, "step": 243 }, { "epoch": 0.5434298440979956, "grad_norm": 41.99237823486328, "learning_rate": 1e-06, "loss": 1.6974, "num_input_tokens_seen": 13652844, "step": 244 }, { "epoch": 0.5434298440979956, "loss": 1.7878342866897583, "loss_ce": 0.05052957311272621, "loss_iou": 0.7421875, "loss_num": 0.05029296875, "loss_xval": 1.734375, "num_input_tokens_seen": 13652844, "step": 244 }, { "epoch": 0.5456570155902004, "grad_norm": 36.3553352355957, "learning_rate": 1e-06, "loss": 1.9218, "num_input_tokens_seen": 13707612, "step": 245 }, { "epoch": 0.5456570155902004, "loss": 1.8159754276275635, "loss_ce": 0.04546765610575676, "loss_iou": 0.76171875, "loss_num": 0.04931640625, "loss_xval": 1.7734375, "num_input_tokens_seen": 13707612, "step": 245 }, { "epoch": 0.5478841870824054, "grad_norm": 25.488935470581055, "learning_rate": 1e-06, "loss": 1.7327, "num_input_tokens_seen": 13764768, "step": 246 }, { "epoch": 0.5478841870824054, "loss": 1.66471266746521, "loss_ce": 0.040689267218112946, "loss_iou": 0.65625, "loss_num": 0.06298828125, "loss_xval": 1.625, "num_input_tokens_seen": 13764768, "step": 246 }, { "epoch": 0.5501113585746102, "grad_norm": 43.198577880859375, "learning_rate": 1e-06, "loss": 2.2297, "num_input_tokens_seen": 13817324, "step": 247 }, { "epoch": 0.5501113585746102, "loss": 2.3575921058654785, "loss_ce": 0.06559999287128448, "loss_iou": 0.859375, "loss_num": 0.115234375, "loss_xval": 2.296875, "num_input_tokens_seen": 13817324, "step": 247 }, { "epoch": 0.5523385300668151, "grad_norm": 40.075462341308594, "learning_rate": 1e-06, "loss": 1.6199, "num_input_tokens_seen": 13873804, "step": 248 }, { "epoch": 0.5523385300668151, "loss": 1.5571107864379883, "loss_ce": 0.028302079066634178, "loss_iou": 0.61328125, "loss_num": 0.060546875, "loss_xval": 1.53125, "num_input_tokens_seen": 13873804, "step": 248 }, { "epoch": 0.5545657015590201, "grad_norm": 36.94553756713867, "learning_rate": 1e-06, "loss": 1.4597, "num_input_tokens_seen": 13932404, "step": 249 }, { "epoch": 0.5545657015590201, "loss": 1.564407229423523, "loss_ce": 0.05122363194823265, "loss_iou": 0.64453125, "loss_num": 0.044677734375, "loss_xval": 1.515625, "num_input_tokens_seen": 13932404, "step": 249 }, { "epoch": 0.5567928730512249, "grad_norm": 48.372737884521484, "learning_rate": 1e-06, "loss": 1.6582, "num_input_tokens_seen": 13988996, "step": 250 }, { "epoch": 0.5567928730512249, "eval_seeclick_web_CIoU": 0.45204322040081024, "eval_seeclick_web_GIoU": 0.446009561419487, "eval_seeclick_web_IoU": 0.48508621752262115, "eval_seeclick_web_MAE_all": 0.015304200816899538, "eval_seeclick_web_MAE_h": 0.01132917869836092, "eval_seeclick_web_MAE_w": 0.01562582701444626, "eval_seeclick_web_MAE_x_boxes": 0.010999062564224005, "eval_seeclick_web_MAE_y_boxes": 0.02148408070206642, "eval_seeclick_web_inside_bbox": 0.8263888955116272, "eval_seeclick_web_loss": 1.2039486169815063, "eval_seeclick_web_loss_ce": 0.0032457184279337525, "eval_seeclick_web_loss_iou": 0.554443359375, "eval_seeclick_web_loss_num": 0.013319015502929688, "eval_seeclick_web_loss_xval": 1.176025390625, "eval_seeclick_web_runtime": 17.9468, "eval_seeclick_web_samples_per_second": 2.786, "eval_seeclick_web_steps_per_second": 0.111, "num_input_tokens_seen": 13988996, "step": 250 }, { "epoch": 0.5567928730512249, "eval_icons_CIoU": 0.16195975244045258, "eval_icons_GIoU": 0.18568327277898788, "eval_icons_IoU": 0.2861369401216507, "eval_icons_MAE_all": 0.05986557714641094, "eval_icons_MAE_h": 0.0345042385160923, "eval_icons_MAE_w": 0.07578632980585098, "eval_icons_MAE_x_boxes": 0.06385871395468712, "eval_icons_MAE_y_boxes": 0.02914611343294382, "eval_icons_inside_bbox": 0.4322916716337204, "eval_icons_loss": 1.9311987161636353, "eval_icons_loss_ce": 0.01602194458246231, "eval_icons_loss_iou": 0.7734375, "eval_icons_loss_num": 0.05643463134765625, "eval_icons_loss_xval": 1.828125, "eval_icons_runtime": 16.6678, "eval_icons_samples_per_second": 3.0, "eval_icons_steps_per_second": 0.12, "num_input_tokens_seen": 13988996, "step": 250 }, { "epoch": 0.5567928730512249, "eval_screenspot_CIoU": 0.13644553472598395, "eval_screenspot_GIoU": 0.13833926369746527, "eval_screenspot_IoU": 0.27226150035858154, "eval_screenspot_MAE_all": 0.11441413809855779, "eval_screenspot_MAE_h": 0.06734236205617587, "eval_screenspot_MAE_w": 0.15528815736373267, "eval_screenspot_MAE_x_boxes": 0.13489964107672373, "eval_screenspot_MAE_y_boxes": 0.08852330843607585, "eval_screenspot_inside_bbox": 0.4887500007947286, "eval_screenspot_loss": 2.3626174926757812, "eval_screenspot_loss_ce": 0.06506530692179997, "eval_screenspot_loss_iou": 0.8843587239583334, "eval_screenspot_loss_num": 0.11939748128255208, "eval_screenspot_loss_xval": 2.365234375, "eval_screenspot_runtime": 26.8445, "eval_screenspot_samples_per_second": 3.315, "eval_screenspot_steps_per_second": 0.112, "num_input_tokens_seen": 13988996, "step": 250 }, { "epoch": 0.5567928730512249, "eval_compot_CIoU": 0.2119811549782753, "eval_compot_GIoU": 0.22996322065591812, "eval_compot_IoU": 0.2962482124567032, "eval_compot_MAE_all": 0.029227093793451786, "eval_compot_MAE_h": 0.021410066168755293, "eval_compot_MAE_w": 0.032643974758684635, "eval_compot_MAE_x_boxes": 0.03914828971028328, "eval_compot_MAE_y_boxes": 0.012671195901930332, "eval_compot_inside_bbox": 0.46875, "eval_compot_loss": 1.7058837413787842, "eval_compot_loss_ce": 0.0029119880637153983, "eval_compot_loss_iou": 0.745849609375, "eval_compot_loss_num": 0.02825927734375, "eval_compot_loss_xval": 1.6337890625, "eval_compot_runtime": 18.0854, "eval_compot_samples_per_second": 2.765, "eval_compot_steps_per_second": 0.111, "num_input_tokens_seen": 13988996, "step": 250 }, { "epoch": 0.5567928730512249, "eval_custom_ui_val_CIoU": 0.18997877753443188, "eval_custom_ui_val_GIoU": 0.199455168719093, "eval_custom_ui_val_IoU": 0.26526735723018646, "eval_custom_ui_val_MAE_all": 0.06052247662511137, "eval_custom_ui_val_MAE_h": 0.03889055632882648, "eval_custom_ui_val_MAE_w": 0.07028790439168613, "eval_custom_ui_val_MAE_x_boxes": 0.058271253067586154, "eval_custom_ui_val_MAE_y_boxes": 0.050626704883244306, "eval_custom_ui_val_inside_bbox": 0.5111882719728682, "eval_custom_ui_val_loss": 1.941453218460083, "eval_custom_ui_val_loss_ce": 0.020650964023338422, "eval_custom_ui_val_loss_iou": 0.7940809461805556, "eval_custom_ui_val_loss_num": 0.05839665730794271, "eval_custom_ui_val_loss_xval": 1.8801540798611112, "eval_custom_ui_val_runtime": 55.8224, "eval_custom_ui_val_samples_per_second": 4.747, "eval_custom_ui_val_steps_per_second": 0.161, "num_input_tokens_seen": 13988996, "step": 250 }, { "epoch": 0.5567928730512249, "loss": 1.535244107246399, "loss_ce": 0.023525364696979523, "loss_iou": 0.66015625, "loss_num": 0.038330078125, "loss_xval": 1.515625, "num_input_tokens_seen": 13988996, "step": 250 }, { "epoch": 0.5590200445434298, "grad_norm": 25.04877281188965, "learning_rate": 1e-06, "loss": 1.6859, "num_input_tokens_seen": 14044712, "step": 251 }, { "epoch": 0.5590200445434298, "loss": 1.5795611143112183, "loss_ce": 0.04782275855541229, "loss_iou": 0.609375, "loss_num": 0.06298828125, "loss_xval": 1.53125, "num_input_tokens_seen": 14044712, "step": 251 }, { "epoch": 0.5612472160356348, "grad_norm": 88.63025665283203, "learning_rate": 1e-06, "loss": 1.3939, "num_input_tokens_seen": 14101200, "step": 252 }, { "epoch": 0.5612472160356348, "loss": 1.3761667013168335, "loss_ce": 0.0680612325668335, "loss_iou": 0.54296875, "loss_num": 0.044921875, "loss_xval": 1.3046875, "num_input_tokens_seen": 14101200, "step": 252 }, { "epoch": 0.5634743875278396, "grad_norm": 28.615373611450195, "learning_rate": 1e-06, "loss": 1.5713, "num_input_tokens_seen": 14158672, "step": 253 }, { "epoch": 0.5634743875278396, "loss": 1.540935754776001, "loss_ce": 0.01456859614700079, "loss_iou": 0.64453125, "loss_num": 0.046875, "loss_xval": 1.5234375, "num_input_tokens_seen": 14158672, "step": 253 }, { "epoch": 0.5657015590200446, "grad_norm": 39.433143615722656, "learning_rate": 1e-06, "loss": 1.6708, "num_input_tokens_seen": 14216228, "step": 254 }, { "epoch": 0.5657015590200446, "loss": 1.7911148071289062, "loss_ce": 0.05381014943122864, "loss_iou": 0.71484375, "loss_num": 0.060546875, "loss_xval": 1.734375, "num_input_tokens_seen": 14216228, "step": 254 }, { "epoch": 0.5679287305122495, "grad_norm": 30.27321434020996, "learning_rate": 1e-06, "loss": 1.9151, "num_input_tokens_seen": 14270952, "step": 255 }, { "epoch": 0.5679287305122495, "loss": 2.0560014247894287, "loss_ce": 0.04672405868768692, "loss_iou": 0.79296875, "loss_num": 0.08447265625, "loss_xval": 2.015625, "num_input_tokens_seen": 14270952, "step": 255 }, { "epoch": 0.5701559020044543, "grad_norm": 25.623844146728516, "learning_rate": 1e-06, "loss": 1.5495, "num_input_tokens_seen": 14327860, "step": 256 }, { "epoch": 0.5701559020044543, "loss": 1.5679357051849365, "loss_ce": 0.032779376953840256, "loss_iou": 0.6015625, "loss_num": 0.0654296875, "loss_xval": 1.53125, "num_input_tokens_seen": 14327860, "step": 256 }, { "epoch": 0.5723830734966593, "grad_norm": 34.12678909301758, "learning_rate": 1e-06, "loss": 1.5758, "num_input_tokens_seen": 14383192, "step": 257 }, { "epoch": 0.5723830734966593, "loss": 1.398708701133728, "loss_ce": 0.030056362971663475, "loss_iou": 0.54296875, "loss_num": 0.056396484375, "loss_xval": 1.3671875, "num_input_tokens_seen": 14383192, "step": 257 }, { "epoch": 0.5746102449888641, "grad_norm": 22.465286254882812, "learning_rate": 1e-06, "loss": 1.618, "num_input_tokens_seen": 14438224, "step": 258 }, { "epoch": 0.5746102449888641, "loss": 1.6812279224395752, "loss_ce": 0.019118648022413254, "loss_iou": 0.72265625, "loss_num": 0.04345703125, "loss_xval": 1.6640625, "num_input_tokens_seen": 14438224, "step": 258 }, { "epoch": 0.576837416481069, "grad_norm": 45.041263580322266, "learning_rate": 1e-06, "loss": 1.7445, "num_input_tokens_seen": 14490804, "step": 259 }, { "epoch": 0.576837416481069, "loss": 1.58518648147583, "loss_ce": 0.05833101272583008, "loss_iou": 0.62890625, "loss_num": 0.053955078125, "loss_xval": 1.5234375, "num_input_tokens_seen": 14490804, "step": 259 }, { "epoch": 0.579064587973274, "grad_norm": 32.25990295410156, "learning_rate": 1e-06, "loss": 1.7383, "num_input_tokens_seen": 14548204, "step": 260 }, { "epoch": 0.579064587973274, "loss": 1.8021881580352783, "loss_ce": 0.019961677491664886, "loss_iou": 0.73046875, "loss_num": 0.06396484375, "loss_xval": 1.78125, "num_input_tokens_seen": 14548204, "step": 260 }, { "epoch": 0.5812917594654788, "grad_norm": 24.800201416015625, "learning_rate": 1e-06, "loss": 1.9518, "num_input_tokens_seen": 14602976, "step": 261 }, { "epoch": 0.5812917594654788, "loss": 1.7187559604644775, "loss_ce": 0.05420520156621933, "loss_iou": 0.70703125, "loss_num": 0.04931640625, "loss_xval": 1.6640625, "num_input_tokens_seen": 14602976, "step": 261 }, { "epoch": 0.5835189309576837, "grad_norm": 38.99089431762695, "learning_rate": 1e-06, "loss": 1.3557, "num_input_tokens_seen": 14659812, "step": 262 }, { "epoch": 0.5835189309576837, "loss": 1.3884127140045166, "loss_ce": 0.003158772364258766, "loss_iou": 0.609375, "loss_num": 0.032470703125, "loss_xval": 1.3828125, "num_input_tokens_seen": 14659812, "step": 262 }, { "epoch": 0.5857461024498887, "grad_norm": 36.124290466308594, "learning_rate": 1e-06, "loss": 1.9584, "num_input_tokens_seen": 14716812, "step": 263 }, { "epoch": 0.5857461024498887, "loss": 2.1227617263793945, "loss_ce": 0.07393358647823334, "loss_iou": 0.83203125, "loss_num": 0.07763671875, "loss_xval": 2.046875, "num_input_tokens_seen": 14716812, "step": 263 }, { "epoch": 0.5879732739420935, "grad_norm": 27.93045997619629, "learning_rate": 1e-06, "loss": 1.6202, "num_input_tokens_seen": 14770856, "step": 264 }, { "epoch": 0.5879732739420935, "loss": 1.2804536819458008, "loss_ce": 0.0348481610417366, "loss_iou": 0.51953125, "loss_num": 0.041748046875, "loss_xval": 1.2421875, "num_input_tokens_seen": 14770856, "step": 264 }, { "epoch": 0.5902004454342984, "grad_norm": 25.890186309814453, "learning_rate": 1e-06, "loss": 1.7844, "num_input_tokens_seen": 14828476, "step": 265 }, { "epoch": 0.5902004454342984, "loss": 1.8109703063964844, "loss_ce": 0.03265002369880676, "loss_iou": 0.7421875, "loss_num": 0.059326171875, "loss_xval": 1.78125, "num_input_tokens_seen": 14828476, "step": 265 }, { "epoch": 0.5924276169265034, "grad_norm": 44.33429718017578, "learning_rate": 1e-06, "loss": 1.8674, "num_input_tokens_seen": 14880352, "step": 266 }, { "epoch": 0.5924276169265034, "loss": 1.7880139350891113, "loss_ce": 0.041920170187950134, "loss_iou": 0.70703125, "loss_num": 0.06640625, "loss_xval": 1.75, "num_input_tokens_seen": 14880352, "step": 266 }, { "epoch": 0.5946547884187082, "grad_norm": 46.44974136352539, "learning_rate": 1e-06, "loss": 1.6601, "num_input_tokens_seen": 14934660, "step": 267 }, { "epoch": 0.5946547884187082, "loss": 1.5294612646102905, "loss_ce": 0.031414370983839035, "loss_iou": 0.6171875, "loss_num": 0.052978515625, "loss_xval": 1.5, "num_input_tokens_seen": 14934660, "step": 267 }, { "epoch": 0.5968819599109132, "grad_norm": 50.88404846191406, "learning_rate": 1e-06, "loss": 1.4847, "num_input_tokens_seen": 14990032, "step": 268 }, { "epoch": 0.5968819599109132, "loss": 1.1937447786331177, "loss_ce": 0.016986995935440063, "loss_iou": 0.5, "loss_num": 0.03564453125, "loss_xval": 1.1796875, "num_input_tokens_seen": 14990032, "step": 268 }, { "epoch": 0.5991091314031181, "grad_norm": 27.827131271362305, "learning_rate": 1e-06, "loss": 1.5246, "num_input_tokens_seen": 15048824, "step": 269 }, { "epoch": 0.5991091314031181, "loss": 1.5418243408203125, "loss_ce": 0.0242463368922472, "loss_iou": 0.63671875, "loss_num": 0.049072265625, "loss_xval": 1.515625, "num_input_tokens_seen": 15048824, "step": 269 }, { "epoch": 0.6013363028953229, "grad_norm": 19.647504806518555, "learning_rate": 1e-06, "loss": 1.637, "num_input_tokens_seen": 15107692, "step": 270 }, { "epoch": 0.6013363028953229, "loss": 1.218379259109497, "loss_ce": 0.010371430777013302, "loss_iou": 0.53125, "loss_num": 0.0284423828125, "loss_xval": 1.2109375, "num_input_tokens_seen": 15107692, "step": 270 }, { "epoch": 0.6035634743875279, "grad_norm": 24.345657348632812, "learning_rate": 1e-06, "loss": 1.8571, "num_input_tokens_seen": 15161936, "step": 271 }, { "epoch": 0.6035634743875279, "loss": 1.8928604125976562, "loss_ce": 0.01151271816343069, "loss_iou": 0.80078125, "loss_num": 0.055908203125, "loss_xval": 1.8828125, "num_input_tokens_seen": 15161936, "step": 271 }, { "epoch": 0.6057906458797327, "grad_norm": 28.753704071044922, "learning_rate": 1e-06, "loss": 1.4391, "num_input_tokens_seen": 15218400, "step": 272 }, { "epoch": 0.6057906458797327, "loss": 1.046234130859375, "loss_ce": 0.030609235167503357, "loss_iou": 0.431640625, "loss_num": 0.0303955078125, "loss_xval": 1.015625, "num_input_tokens_seen": 15218400, "step": 272 }, { "epoch": 0.6080178173719376, "grad_norm": 28.32134437561035, "learning_rate": 1e-06, "loss": 1.6383, "num_input_tokens_seen": 15271292, "step": 273 }, { "epoch": 0.6080178173719376, "loss": 1.7744327783584595, "loss_ce": 0.0346866175532341, "loss_iou": 0.7578125, "loss_num": 0.044921875, "loss_xval": 1.7421875, "num_input_tokens_seen": 15271292, "step": 273 }, { "epoch": 0.6102449888641426, "grad_norm": 26.63081932067871, "learning_rate": 1e-06, "loss": 1.6534, "num_input_tokens_seen": 15324252, "step": 274 }, { "epoch": 0.6102449888641426, "loss": 1.8621397018432617, "loss_ce": 0.015460037626326084, "loss_iou": 0.77734375, "loss_num": 0.05859375, "loss_xval": 1.84375, "num_input_tokens_seen": 15324252, "step": 274 }, { "epoch": 0.6124721603563474, "grad_norm": 159.9815673828125, "learning_rate": 1e-06, "loss": 1.6268, "num_input_tokens_seen": 15380232, "step": 275 }, { "epoch": 0.6124721603563474, "loss": 1.5270476341247559, "loss_ce": 0.024606265127658844, "loss_iou": 0.6015625, "loss_num": 0.060546875, "loss_xval": 1.5, "num_input_tokens_seen": 15380232, "step": 275 }, { "epoch": 0.6146993318485523, "grad_norm": 16.36348533630371, "learning_rate": 1e-06, "loss": 1.8404, "num_input_tokens_seen": 15434716, "step": 276 }, { "epoch": 0.6146993318485523, "loss": 2.1927452087402344, "loss_ce": 0.04479604959487915, "loss_iou": 0.81640625, "loss_num": 0.10400390625, "loss_xval": 2.140625, "num_input_tokens_seen": 15434716, "step": 276 }, { "epoch": 0.6169265033407573, "grad_norm": 35.74172592163086, "learning_rate": 1e-06, "loss": 1.4253, "num_input_tokens_seen": 15491324, "step": 277 }, { "epoch": 0.6169265033407573, "loss": 1.4241523742675781, "loss_ce": 0.007160228211432695, "loss_iou": 0.59765625, "loss_num": 0.045166015625, "loss_xval": 1.4140625, "num_input_tokens_seen": 15491324, "step": 277 }, { "epoch": 0.6191536748329621, "grad_norm": 30.04996681213379, "learning_rate": 1e-06, "loss": 1.7018, "num_input_tokens_seen": 15545848, "step": 278 }, { "epoch": 0.6191536748329621, "loss": 1.6422159671783447, "loss_ce": 0.028934601694345474, "loss_iou": 0.67578125, "loss_num": 0.051513671875, "loss_xval": 1.609375, "num_input_tokens_seen": 15545848, "step": 278 }, { "epoch": 0.621380846325167, "grad_norm": 32.703819274902344, "learning_rate": 1e-06, "loss": 1.6324, "num_input_tokens_seen": 15603716, "step": 279 }, { "epoch": 0.621380846325167, "loss": 1.5165987014770508, "loss_ce": 0.0112276840955019, "loss_iou": 0.6484375, "loss_num": 0.041015625, "loss_xval": 1.5078125, "num_input_tokens_seen": 15603716, "step": 279 }, { "epoch": 0.623608017817372, "grad_norm": 35.6143798828125, "learning_rate": 1e-06, "loss": 1.6699, "num_input_tokens_seen": 15658112, "step": 280 }, { "epoch": 0.623608017817372, "loss": 1.777186393737793, "loss_ce": 0.02865123562514782, "loss_iou": 0.6796875, "loss_num": 0.07763671875, "loss_xval": 1.75, "num_input_tokens_seen": 15658112, "step": 280 }, { "epoch": 0.6258351893095768, "grad_norm": 28.780593872070312, "learning_rate": 1e-06, "loss": 1.6865, "num_input_tokens_seen": 15716760, "step": 281 }, { "epoch": 0.6258351893095768, "loss": 1.422225832939148, "loss_ce": 0.011092978529632092, "loss_iou": 0.6328125, "loss_num": 0.0284423828125, "loss_xval": 1.4140625, "num_input_tokens_seen": 15716760, "step": 281 }, { "epoch": 0.6280623608017817, "grad_norm": 39.134151458740234, "learning_rate": 1e-06, "loss": 2.0141, "num_input_tokens_seen": 15773952, "step": 282 }, { "epoch": 0.6280623608017817, "loss": 2.0597357749938965, "loss_ce": 0.03629831597208977, "loss_iou": 0.79296875, "loss_num": 0.087890625, "loss_xval": 2.03125, "num_input_tokens_seen": 15773952, "step": 282 }, { "epoch": 0.6302895322939867, "grad_norm": 21.123931884765625, "learning_rate": 1e-06, "loss": 1.4487, "num_input_tokens_seen": 15832456, "step": 283 }, { "epoch": 0.6302895322939867, "loss": 1.4959959983825684, "loss_ce": 0.018945157527923584, "loss_iou": 0.6328125, "loss_num": 0.0419921875, "loss_xval": 1.4765625, "num_input_tokens_seen": 15832456, "step": 283 }, { "epoch": 0.6325167037861915, "grad_norm": 137.96141052246094, "learning_rate": 1e-06, "loss": 1.5979, "num_input_tokens_seen": 15890888, "step": 284 }, { "epoch": 0.6325167037861915, "loss": 1.8737279176712036, "loss_ce": 0.05341540277004242, "loss_iou": 0.71875, "loss_num": 0.07763671875, "loss_xval": 1.8203125, "num_input_tokens_seen": 15890888, "step": 284 }, { "epoch": 0.6347438752783965, "grad_norm": 26.675518035888672, "learning_rate": 1e-06, "loss": 1.7712, "num_input_tokens_seen": 15945736, "step": 285 }, { "epoch": 0.6347438752783965, "loss": 1.6105928421020508, "loss_ce": 0.03197958692908287, "loss_iou": 0.65234375, "loss_num": 0.054931640625, "loss_xval": 1.578125, "num_input_tokens_seen": 15945736, "step": 285 }, { "epoch": 0.6369710467706013, "grad_norm": 24.4492130279541, "learning_rate": 1e-06, "loss": 1.5191, "num_input_tokens_seen": 16002312, "step": 286 }, { "epoch": 0.6369710467706013, "loss": 1.766524314880371, "loss_ce": 0.0673055648803711, "loss_iou": 0.6796875, "loss_num": 0.0673828125, "loss_xval": 1.703125, "num_input_tokens_seen": 16002312, "step": 286 }, { "epoch": 0.6391982182628062, "grad_norm": 30.305042266845703, "learning_rate": 1e-06, "loss": 1.4741, "num_input_tokens_seen": 16059496, "step": 287 }, { "epoch": 0.6391982182628062, "loss": 1.4600698947906494, "loss_ce": 0.04307776317000389, "loss_iou": 0.53125, "loss_num": 0.06982421875, "loss_xval": 1.4140625, "num_input_tokens_seen": 16059496, "step": 287 }, { "epoch": 0.6414253897550112, "grad_norm": 43.9190788269043, "learning_rate": 1e-06, "loss": 2.0036, "num_input_tokens_seen": 16114212, "step": 288 }, { "epoch": 0.6414253897550112, "loss": 2.116396188735962, "loss_ce": 0.04706018790602684, "loss_iou": 0.7890625, "loss_num": 0.09912109375, "loss_xval": 2.0625, "num_input_tokens_seen": 16114212, "step": 288 }, { "epoch": 0.643652561247216, "grad_norm": 32.69955825805664, "learning_rate": 1e-06, "loss": 1.4369, "num_input_tokens_seen": 16171136, "step": 289 }, { "epoch": 0.643652561247216, "loss": 1.4707211256027222, "loss_ce": 0.019060947000980377, "loss_iou": 0.6015625, "loss_num": 0.049560546875, "loss_xval": 1.453125, "num_input_tokens_seen": 16171136, "step": 289 }, { "epoch": 0.6458797327394209, "grad_norm": 23.189529418945312, "learning_rate": 1e-06, "loss": 1.6011, "num_input_tokens_seen": 16227256, "step": 290 }, { "epoch": 0.6458797327394209, "loss": 1.716025948524475, "loss_ce": 0.02217838540673256, "loss_iou": 0.7109375, "loss_num": 0.0537109375, "loss_xval": 1.6953125, "num_input_tokens_seen": 16227256, "step": 290 }, { "epoch": 0.6481069042316259, "grad_norm": 83.02802276611328, "learning_rate": 1e-06, "loss": 1.5053, "num_input_tokens_seen": 16283164, "step": 291 }, { "epoch": 0.6481069042316259, "loss": 1.8076732158660889, "loss_ce": 0.03325919434428215, "loss_iou": 0.7265625, "loss_num": 0.06494140625, "loss_xval": 1.7734375, "num_input_tokens_seen": 16283164, "step": 291 }, { "epoch": 0.6503340757238307, "grad_norm": 26.41913604736328, "learning_rate": 1e-06, "loss": 1.482, "num_input_tokens_seen": 16341224, "step": 292 }, { "epoch": 0.6503340757238307, "loss": 1.52240788936615, "loss_ce": 0.02973209135234356, "loss_iou": 0.60546875, "loss_num": 0.055908203125, "loss_xval": 1.4921875, "num_input_tokens_seen": 16341224, "step": 292 }, { "epoch": 0.6525612472160356, "grad_norm": 24.008352279663086, "learning_rate": 1e-06, "loss": 1.2855, "num_input_tokens_seen": 16399488, "step": 293 }, { "epoch": 0.6525612472160356, "loss": 1.323553442955017, "loss_ce": 0.02570188418030739, "loss_iou": 0.515625, "loss_num": 0.052490234375, "loss_xval": 1.296875, "num_input_tokens_seen": 16399488, "step": 293 }, { "epoch": 0.6547884187082406, "grad_norm": 40.153358459472656, "learning_rate": 1e-06, "loss": 2.0784, "num_input_tokens_seen": 16452992, "step": 294 }, { "epoch": 0.6547884187082406, "loss": 2.15143084526062, "loss_ce": 0.020571384578943253, "loss_iou": 0.828125, "loss_num": 0.0947265625, "loss_xval": 2.125, "num_input_tokens_seen": 16452992, "step": 294 }, { "epoch": 0.6570155902004454, "grad_norm": 19.569934844970703, "learning_rate": 1e-06, "loss": 1.6381, "num_input_tokens_seen": 16510300, "step": 295 }, { "epoch": 0.6570155902004454, "loss": 1.743638515472412, "loss_ce": 0.018052466213703156, "loss_iou": 0.7265625, "loss_num": 0.053466796875, "loss_xval": 1.7265625, "num_input_tokens_seen": 16510300, "step": 295 }, { "epoch": 0.6592427616926503, "grad_norm": 24.097932815551758, "learning_rate": 1e-06, "loss": 1.4483, "num_input_tokens_seen": 16565272, "step": 296 }, { "epoch": 0.6592427616926503, "loss": 1.252844214439392, "loss_ce": 0.0033324414398521185, "loss_iou": 0.49609375, "loss_num": 0.051513671875, "loss_xval": 1.25, "num_input_tokens_seen": 16565272, "step": 296 }, { "epoch": 0.6614699331848553, "grad_norm": 20.85422706604004, "learning_rate": 1e-06, "loss": 1.4426, "num_input_tokens_seen": 16620292, "step": 297 }, { "epoch": 0.6614699331848553, "loss": 1.5032652616500854, "loss_ce": 0.02230823040008545, "loss_iou": 0.6015625, "loss_num": 0.055419921875, "loss_xval": 1.484375, "num_input_tokens_seen": 16620292, "step": 297 }, { "epoch": 0.6636971046770601, "grad_norm": 31.075130462646484, "learning_rate": 1e-06, "loss": 1.7549, "num_input_tokens_seen": 16677532, "step": 298 }, { "epoch": 0.6636971046770601, "loss": 1.6268385648727417, "loss_ce": 0.030647173523902893, "loss_iou": 0.64453125, "loss_num": 0.0615234375, "loss_xval": 1.59375, "num_input_tokens_seen": 16677532, "step": 298 }, { "epoch": 0.6659242761692651, "grad_norm": 179.63589477539062, "learning_rate": 1e-06, "loss": 1.5358, "num_input_tokens_seen": 16731944, "step": 299 }, { "epoch": 0.6659242761692651, "loss": 1.3143196105957031, "loss_ce": 0.01890948787331581, "loss_iou": 0.52734375, "loss_num": 0.04736328125, "loss_xval": 1.296875, "num_input_tokens_seen": 16731944, "step": 299 }, { "epoch": 0.6681514476614699, "grad_norm": 78.36969757080078, "learning_rate": 1e-06, "loss": 1.6417, "num_input_tokens_seen": 16785832, "step": 300 }, { "epoch": 0.6681514476614699, "loss": 1.405898928642273, "loss_ce": 0.023086415603756905, "loss_iou": 0.5546875, "loss_num": 0.054931640625, "loss_xval": 1.3828125, "num_input_tokens_seen": 16785832, "step": 300 }, { "epoch": 0.6703786191536748, "grad_norm": 20.544160842895508, "learning_rate": 1e-06, "loss": 1.3546, "num_input_tokens_seen": 16843352, "step": 301 }, { "epoch": 0.6703786191536748, "loss": 1.2849873304367065, "loss_ce": 0.00910840556025505, "loss_iou": 0.55859375, "loss_num": 0.03125, "loss_xval": 1.2734375, "num_input_tokens_seen": 16843352, "step": 301 }, { "epoch": 0.6726057906458798, "grad_norm": 33.55524826049805, "learning_rate": 1e-06, "loss": 1.5479, "num_input_tokens_seen": 16899880, "step": 302 }, { "epoch": 0.6726057906458798, "loss": 1.4881852865219116, "loss_ce": 0.01504078321158886, "loss_iou": 0.625, "loss_num": 0.04443359375, "loss_xval": 1.4765625, "num_input_tokens_seen": 16899880, "step": 302 }, { "epoch": 0.6748329621380846, "grad_norm": 25.37139320373535, "learning_rate": 1e-06, "loss": 1.3608, "num_input_tokens_seen": 16955144, "step": 303 }, { "epoch": 0.6748329621380846, "loss": 1.1685500144958496, "loss_ce": 0.01718279719352722, "loss_iou": 0.484375, "loss_num": 0.03662109375, "loss_xval": 1.1484375, "num_input_tokens_seen": 16955144, "step": 303 }, { "epoch": 0.6770601336302895, "grad_norm": 26.81342887878418, "learning_rate": 1e-06, "loss": 1.3982, "num_input_tokens_seen": 17011748, "step": 304 }, { "epoch": 0.6770601336302895, "loss": 1.2066997289657593, "loss_ce": 0.01138727180659771, "loss_iou": 0.53125, "loss_num": 0.02587890625, "loss_xval": 1.1953125, "num_input_tokens_seen": 17011748, "step": 304 }, { "epoch": 0.6792873051224945, "grad_norm": 27.548656463623047, "learning_rate": 1e-06, "loss": 1.5679, "num_input_tokens_seen": 17068112, "step": 305 }, { "epoch": 0.6792873051224945, "loss": 1.7167648077011108, "loss_ce": 0.04781951755285263, "loss_iou": 0.671875, "loss_num": 0.06396484375, "loss_xval": 1.671875, "num_input_tokens_seen": 17068112, "step": 305 }, { "epoch": 0.6815144766146993, "grad_norm": 40.20294952392578, "learning_rate": 1e-06, "loss": 1.4371, "num_input_tokens_seen": 17122104, "step": 306 }, { "epoch": 0.6815144766146993, "loss": 1.477067470550537, "loss_ce": 0.01710660383105278, "loss_iou": 0.58984375, "loss_num": 0.055419921875, "loss_xval": 1.4609375, "num_input_tokens_seen": 17122104, "step": 306 }, { "epoch": 0.6837416481069042, "grad_norm": 31.41501808166504, "learning_rate": 1e-06, "loss": 1.4743, "num_input_tokens_seen": 17179616, "step": 307 }, { "epoch": 0.6837416481069042, "loss": 1.279785394668579, "loss_ce": 0.05468768998980522, "loss_iou": 0.515625, "loss_num": 0.03857421875, "loss_xval": 1.2265625, "num_input_tokens_seen": 17179616, "step": 307 }, { "epoch": 0.6859688195991092, "grad_norm": 31.942686080932617, "learning_rate": 1e-06, "loss": 1.2797, "num_input_tokens_seen": 17236176, "step": 308 }, { "epoch": 0.6859688195991092, "loss": 1.2551771402359009, "loss_ce": 0.027149761095643044, "loss_iou": 0.5390625, "loss_num": 0.029541015625, "loss_xval": 1.2265625, "num_input_tokens_seen": 17236176, "step": 308 }, { "epoch": 0.688195991091314, "grad_norm": 30.1185302734375, "learning_rate": 1e-06, "loss": 1.3682, "num_input_tokens_seen": 17293040, "step": 309 }, { "epoch": 0.688195991091314, "loss": 1.479234218597412, "loss_ce": 0.037827931344509125, "loss_iou": 0.60546875, "loss_num": 0.0458984375, "loss_xval": 1.4375, "num_input_tokens_seen": 17293040, "step": 309 }, { "epoch": 0.6904231625835189, "grad_norm": 17.059524536132812, "learning_rate": 1e-06, "loss": 1.5945, "num_input_tokens_seen": 17349008, "step": 310 }, { "epoch": 0.6904231625835189, "loss": 1.7185901403427124, "loss_ce": 0.01155892200767994, "loss_iou": 0.71484375, "loss_num": 0.054443359375, "loss_xval": 1.703125, "num_input_tokens_seen": 17349008, "step": 310 }, { "epoch": 0.6926503340757239, "grad_norm": 20.258180618286133, "learning_rate": 1e-06, "loss": 1.4631, "num_input_tokens_seen": 17404740, "step": 311 }, { "epoch": 0.6926503340757239, "loss": 1.595271348953247, "loss_ce": 0.05718539282679558, "loss_iou": 0.625, "loss_num": 0.058349609375, "loss_xval": 1.5390625, "num_input_tokens_seen": 17404740, "step": 311 }, { "epoch": 0.6948775055679287, "grad_norm": 51.31781005859375, "learning_rate": 1e-06, "loss": 1.9344, "num_input_tokens_seen": 17460352, "step": 312 }, { "epoch": 0.6948775055679287, "loss": 1.8748557567596436, "loss_ce": 0.0301292035728693, "loss_iou": 0.7734375, "loss_num": 0.0595703125, "loss_xval": 1.84375, "num_input_tokens_seen": 17460352, "step": 312 }, { "epoch": 0.6971046770601337, "grad_norm": 42.53439712524414, "learning_rate": 1e-06, "loss": 1.2032, "num_input_tokens_seen": 17518148, "step": 313 }, { "epoch": 0.6971046770601337, "loss": 1.2251746654510498, "loss_ce": 0.024491025134921074, "loss_iou": 0.51171875, "loss_num": 0.03515625, "loss_xval": 1.203125, "num_input_tokens_seen": 17518148, "step": 313 }, { "epoch": 0.6993318485523385, "grad_norm": 190.15823364257812, "learning_rate": 1e-06, "loss": 1.5881, "num_input_tokens_seen": 17574740, "step": 314 }, { "epoch": 0.6993318485523385, "loss": 1.6861159801483154, "loss_ce": 0.023518286645412445, "loss_iou": 0.6875, "loss_num": 0.056640625, "loss_xval": 1.6640625, "num_input_tokens_seen": 17574740, "step": 314 }, { "epoch": 0.7015590200445434, "grad_norm": 23.019386291503906, "learning_rate": 1e-06, "loss": 1.336, "num_input_tokens_seen": 17630988, "step": 315 }, { "epoch": 0.7015590200445434, "loss": 1.2523000240325928, "loss_ce": 0.0032766717486083508, "loss_iou": 0.5078125, "loss_num": 0.047119140625, "loss_xval": 1.25, "num_input_tokens_seen": 17630988, "step": 315 }, { "epoch": 0.7037861915367484, "grad_norm": 33.44746017456055, "learning_rate": 1e-06, "loss": 1.4686, "num_input_tokens_seen": 17688132, "step": 316 }, { "epoch": 0.7037861915367484, "loss": 1.3316755294799805, "loss_ce": 0.024058308452367783, "loss_iou": 0.53125, "loss_num": 0.049560546875, "loss_xval": 1.3046875, "num_input_tokens_seen": 17688132, "step": 316 }, { "epoch": 0.7060133630289532, "grad_norm": 26.666362762451172, "learning_rate": 1e-06, "loss": 1.4387, "num_input_tokens_seen": 17744136, "step": 317 }, { "epoch": 0.7060133630289532, "loss": 1.4408788681030273, "loss_ce": 0.038046881556510925, "loss_iou": 0.58984375, "loss_num": 0.044677734375, "loss_xval": 1.40625, "num_input_tokens_seen": 17744136, "step": 317 }, { "epoch": 0.7082405345211581, "grad_norm": 29.52345848083496, "learning_rate": 1e-06, "loss": 1.6314, "num_input_tokens_seen": 17802324, "step": 318 }, { "epoch": 0.7082405345211581, "loss": 1.7159836292266846, "loss_ce": 0.018229741603136063, "loss_iou": 0.6875, "loss_num": 0.06396484375, "loss_xval": 1.6953125, "num_input_tokens_seen": 17802324, "step": 318 }, { "epoch": 0.7104677060133631, "grad_norm": 31.622154235839844, "learning_rate": 1e-06, "loss": 1.765, "num_input_tokens_seen": 17860648, "step": 319 }, { "epoch": 0.7104677060133631, "loss": 1.6791552305221558, "loss_ce": 0.010209913365542889, "loss_iou": 0.671875, "loss_num": 0.06494140625, "loss_xval": 1.671875, "num_input_tokens_seen": 17860648, "step": 319 }, { "epoch": 0.7126948775055679, "grad_norm": 38.03776931762695, "learning_rate": 1e-06, "loss": 1.3441, "num_input_tokens_seen": 17915988, "step": 320 }, { "epoch": 0.7126948775055679, "loss": 1.6516163349151611, "loss_ce": 0.017338957637548447, "loss_iou": 0.65234375, "loss_num": 0.06640625, "loss_xval": 1.6328125, "num_input_tokens_seen": 17915988, "step": 320 }, { "epoch": 0.7149220489977728, "grad_norm": 30.399452209472656, "learning_rate": 1e-06, "loss": 1.4676, "num_input_tokens_seen": 17974328, "step": 321 }, { "epoch": 0.7149220489977728, "loss": 1.5550212860107422, "loss_ce": 0.01888836920261383, "loss_iou": 0.62109375, "loss_num": 0.05908203125, "loss_xval": 1.5390625, "num_input_tokens_seen": 17974328, "step": 321 }, { "epoch": 0.7171492204899778, "grad_norm": 34.999053955078125, "learning_rate": 1e-06, "loss": 1.5055, "num_input_tokens_seen": 18027768, "step": 322 }, { "epoch": 0.7171492204899778, "loss": 1.5647876262664795, "loss_ce": 0.014494719915091991, "loss_iou": 0.65625, "loss_num": 0.048095703125, "loss_xval": 1.546875, "num_input_tokens_seen": 18027768, "step": 322 }, { "epoch": 0.7193763919821826, "grad_norm": 48.161949157714844, "learning_rate": 1e-06, "loss": 1.5941, "num_input_tokens_seen": 18086484, "step": 323 }, { "epoch": 0.7193763919821826, "loss": 1.713207483291626, "loss_ce": 0.02326606959104538, "loss_iou": 0.70703125, "loss_num": 0.05517578125, "loss_xval": 1.6875, "num_input_tokens_seen": 18086484, "step": 323 }, { "epoch": 0.7216035634743875, "grad_norm": 68.18085479736328, "learning_rate": 1e-06, "loss": 1.604, "num_input_tokens_seen": 18142296, "step": 324 }, { "epoch": 0.7216035634743875, "loss": 1.4922294616699219, "loss_ce": 0.012249022722244263, "loss_iou": 0.59375, "loss_num": 0.05810546875, "loss_xval": 1.4765625, "num_input_tokens_seen": 18142296, "step": 324 }, { "epoch": 0.7238307349665924, "grad_norm": 23.24517250061035, "learning_rate": 1e-06, "loss": 1.6705, "num_input_tokens_seen": 18201016, "step": 325 }, { "epoch": 0.7238307349665924, "loss": 1.8428983688354492, "loss_ce": 0.0069608502089977264, "loss_iou": 0.77734375, "loss_num": 0.056396484375, "loss_xval": 1.8359375, "num_input_tokens_seen": 18201016, "step": 325 }, { "epoch": 0.7260579064587973, "grad_norm": 95.58949279785156, "learning_rate": 1e-06, "loss": 1.7858, "num_input_tokens_seen": 18256504, "step": 326 }, { "epoch": 0.7260579064587973, "loss": 1.7894692420959473, "loss_ce": 0.013102035038173199, "loss_iou": 0.69921875, "loss_num": 0.07568359375, "loss_xval": 1.7734375, "num_input_tokens_seen": 18256504, "step": 326 }, { "epoch": 0.7282850779510023, "grad_norm": 23.247690200805664, "learning_rate": 1e-06, "loss": 1.5796, "num_input_tokens_seen": 18310452, "step": 327 }, { "epoch": 0.7282850779510023, "loss": 1.59755539894104, "loss_ce": 0.010641279630362988, "loss_iou": 0.65234375, "loss_num": 0.05615234375, "loss_xval": 1.5859375, "num_input_tokens_seen": 18310452, "step": 327 }, { "epoch": 0.7305122494432071, "grad_norm": 67.12216186523438, "learning_rate": 1e-06, "loss": 1.4647, "num_input_tokens_seen": 18367084, "step": 328 }, { "epoch": 0.7305122494432071, "loss": 1.48179030418396, "loss_ce": 0.014016897417604923, "loss_iou": 0.5859375, "loss_num": 0.05908203125, "loss_xval": 1.46875, "num_input_tokens_seen": 18367084, "step": 328 }, { "epoch": 0.732739420935412, "grad_norm": 31.022777557373047, "learning_rate": 1e-06, "loss": 1.4833, "num_input_tokens_seen": 18424920, "step": 329 }, { "epoch": 0.732739420935412, "loss": 1.486365795135498, "loss_ce": 0.014686101116240025, "loss_iou": 0.62109375, "loss_num": 0.0458984375, "loss_xval": 1.46875, "num_input_tokens_seen": 18424920, "step": 329 }, { "epoch": 0.734966592427617, "grad_norm": 29.17088508605957, "learning_rate": 1e-06, "loss": 1.1757, "num_input_tokens_seen": 18478976, "step": 330 }, { "epoch": 0.734966592427617, "loss": 1.1004878282546997, "loss_ce": 0.005761317443102598, "loss_iou": 0.453125, "loss_num": 0.037841796875, "loss_xval": 1.09375, "num_input_tokens_seen": 18478976, "step": 330 }, { "epoch": 0.7371937639198218, "grad_norm": 20.834753036499023, "learning_rate": 1e-06, "loss": 1.1889, "num_input_tokens_seen": 18536144, "step": 331 }, { "epoch": 0.7371937639198218, "loss": 1.3901407718658447, "loss_ce": 0.0014689104864373803, "loss_iou": 0.578125, "loss_num": 0.04638671875, "loss_xval": 1.390625, "num_input_tokens_seen": 18536144, "step": 331 }, { "epoch": 0.7394209354120267, "grad_norm": 68.39385986328125, "learning_rate": 1e-06, "loss": 1.5586, "num_input_tokens_seen": 18593860, "step": 332 }, { "epoch": 0.7394209354120267, "loss": 1.5588513612747192, "loss_ce": 0.005140438210219145, "loss_iou": 0.6484375, "loss_num": 0.052001953125, "loss_xval": 1.5546875, "num_input_tokens_seen": 18593860, "step": 332 }, { "epoch": 0.7416481069042317, "grad_norm": 19.545167922973633, "learning_rate": 1e-06, "loss": 1.5776, "num_input_tokens_seen": 18647892, "step": 333 }, { "epoch": 0.7416481069042317, "loss": 1.6709346771240234, "loss_ce": 0.017126010730862617, "loss_iou": 0.640625, "loss_num": 0.07421875, "loss_xval": 1.65625, "num_input_tokens_seen": 18647892, "step": 333 }, { "epoch": 0.7438752783964365, "grad_norm": 47.12783432006836, "learning_rate": 1e-06, "loss": 1.503, "num_input_tokens_seen": 18704784, "step": 334 }, { "epoch": 0.7438752783964365, "loss": 1.865045428276062, "loss_ce": 0.012994609773159027, "loss_iou": 0.796875, "loss_num": 0.052001953125, "loss_xval": 1.8515625, "num_input_tokens_seen": 18704784, "step": 334 }, { "epoch": 0.7461024498886414, "grad_norm": 25.380210876464844, "learning_rate": 1e-06, "loss": 1.1623, "num_input_tokens_seen": 18761996, "step": 335 }, { "epoch": 0.7461024498886414, "loss": 1.0759522914886475, "loss_ce": 0.015649594366550446, "loss_iou": 0.423828125, "loss_num": 0.042724609375, "loss_xval": 1.0625, "num_input_tokens_seen": 18761996, "step": 335 }, { "epoch": 0.7483296213808464, "grad_norm": 19.682802200317383, "learning_rate": 1e-06, "loss": 1.4553, "num_input_tokens_seen": 18819652, "step": 336 }, { "epoch": 0.7483296213808464, "loss": 1.3878556489944458, "loss_ce": 0.011879058554768562, "loss_iou": 0.5546875, "loss_num": 0.053466796875, "loss_xval": 1.375, "num_input_tokens_seen": 18819652, "step": 336 }, { "epoch": 0.7505567928730512, "grad_norm": 91.77301025390625, "learning_rate": 1e-06, "loss": 1.6139, "num_input_tokens_seen": 18877576, "step": 337 }, { "epoch": 0.7505567928730512, "loss": 1.334415316581726, "loss_ce": 0.008731753565371037, "loss_iou": 0.546875, "loss_num": 0.0458984375, "loss_xval": 1.328125, "num_input_tokens_seen": 18877576, "step": 337 }, { "epoch": 0.7527839643652561, "grad_norm": 28.926156997680664, "learning_rate": 1e-06, "loss": 1.5567, "num_input_tokens_seen": 18935300, "step": 338 }, { "epoch": 0.7527839643652561, "loss": 1.7415505647659302, "loss_ce": 0.01205837819725275, "loss_iou": 0.671875, "loss_num": 0.076171875, "loss_xval": 1.7265625, "num_input_tokens_seen": 18935300, "step": 338 }, { "epoch": 0.755011135857461, "grad_norm": 36.80242919921875, "learning_rate": 1e-06, "loss": 1.783, "num_input_tokens_seen": 18989472, "step": 339 }, { "epoch": 0.755011135857461, "loss": 1.8453316688537598, "loss_ce": 0.012812146916985512, "loss_iou": 0.77734375, "loss_num": 0.0546875, "loss_xval": 1.8359375, "num_input_tokens_seen": 18989472, "step": 339 }, { "epoch": 0.7572383073496659, "grad_norm": 19.47812271118164, "learning_rate": 1e-06, "loss": 1.5968, "num_input_tokens_seen": 19041164, "step": 340 }, { "epoch": 0.7572383073496659, "loss": 1.3463078737258911, "loss_ce": 0.017938785254955292, "loss_iou": 0.56640625, "loss_num": 0.039306640625, "loss_xval": 1.328125, "num_input_tokens_seen": 19041164, "step": 340 }, { "epoch": 0.7594654788418709, "grad_norm": 21.5255126953125, "learning_rate": 1e-06, "loss": 1.2247, "num_input_tokens_seen": 19098404, "step": 341 }, { "epoch": 0.7594654788418709, "loss": 1.1793166399002075, "loss_ce": 0.015254099853336811, "loss_iou": 0.470703125, "loss_num": 0.04443359375, "loss_xval": 1.1640625, "num_input_tokens_seen": 19098404, "step": 341 }, { "epoch": 0.7616926503340757, "grad_norm": 21.701622009277344, "learning_rate": 1e-06, "loss": 1.4873, "num_input_tokens_seen": 19152172, "step": 342 }, { "epoch": 0.7616926503340757, "loss": 1.5115392208099365, "loss_ce": 0.016422055661678314, "loss_iou": 0.5859375, "loss_num": 0.0634765625, "loss_xval": 1.4921875, "num_input_tokens_seen": 19152172, "step": 342 }, { "epoch": 0.7639198218262806, "grad_norm": 24.85000228881836, "learning_rate": 1e-06, "loss": 1.3042, "num_input_tokens_seen": 19208312, "step": 343 }, { "epoch": 0.7639198218262806, "loss": 1.3061310052871704, "loss_ce": 0.011697422713041306, "loss_iou": 0.54296875, "loss_num": 0.041259765625, "loss_xval": 1.296875, "num_input_tokens_seen": 19208312, "step": 343 }, { "epoch": 0.7661469933184856, "grad_norm": 28.263137817382812, "learning_rate": 1e-06, "loss": 1.4686, "num_input_tokens_seen": 19263288, "step": 344 }, { "epoch": 0.7661469933184856, "loss": 1.6164791584014893, "loss_ce": 0.008080787025392056, "loss_iou": 0.64453125, "loss_num": 0.064453125, "loss_xval": 1.609375, "num_input_tokens_seen": 19263288, "step": 344 }, { "epoch": 0.7683741648106904, "grad_norm": 26.359323501586914, "learning_rate": 1e-06, "loss": 1.5675, "num_input_tokens_seen": 19322276, "step": 345 }, { "epoch": 0.7683741648106904, "loss": 1.6035141944885254, "loss_ce": 0.007078767288476229, "loss_iou": 0.6484375, "loss_num": 0.0595703125, "loss_xval": 1.59375, "num_input_tokens_seen": 19322276, "step": 345 }, { "epoch": 0.7706013363028953, "grad_norm": 28.58954429626465, "learning_rate": 1e-06, "loss": 1.4959, "num_input_tokens_seen": 19377088, "step": 346 }, { "epoch": 0.7706013363028953, "loss": 1.7983763217926025, "loss_ce": 0.02982161194086075, "loss_iou": 0.65234375, "loss_num": 0.09326171875, "loss_xval": 1.765625, "num_input_tokens_seen": 19377088, "step": 346 }, { "epoch": 0.7728285077951003, "grad_norm": 32.53110885620117, "learning_rate": 1e-06, "loss": 1.468, "num_input_tokens_seen": 19432868, "step": 347 }, { "epoch": 0.7728285077951003, "loss": 1.644303560256958, "loss_ce": 0.006120047532021999, "loss_iou": 0.703125, "loss_num": 0.04638671875, "loss_xval": 1.640625, "num_input_tokens_seen": 19432868, "step": 347 }, { "epoch": 0.7750556792873051, "grad_norm": 19.344207763671875, "learning_rate": 1e-06, "loss": 1.4323, "num_input_tokens_seen": 19490488, "step": 348 }, { "epoch": 0.7750556792873051, "loss": 1.4876625537872314, "loss_ce": 0.005729038268327713, "loss_iou": 0.5546875, "loss_num": 0.07373046875, "loss_xval": 1.484375, "num_input_tokens_seen": 19490488, "step": 348 }, { "epoch": 0.77728285077951, "grad_norm": 142.5430145263672, "learning_rate": 1e-06, "loss": 1.4212, "num_input_tokens_seen": 19546108, "step": 349 }, { "epoch": 0.77728285077951, "loss": 1.081338882446289, "loss_ce": 0.0024814759381115437, "loss_iou": 0.4453125, "loss_num": 0.037841796875, "loss_xval": 1.078125, "num_input_tokens_seen": 19546108, "step": 349 }, { "epoch": 0.779510022271715, "grad_norm": 27.834339141845703, "learning_rate": 1e-06, "loss": 1.4574, "num_input_tokens_seen": 19602608, "step": 350 }, { "epoch": 0.779510022271715, "loss": 1.220694899559021, "loss_ce": 0.0029214350506663322, "loss_iou": 0.494140625, "loss_num": 0.0458984375, "loss_xval": 1.21875, "num_input_tokens_seen": 19602608, "step": 350 }, { "epoch": 0.7817371937639198, "grad_norm": 51.78019332885742, "learning_rate": 1e-06, "loss": 1.3015, "num_input_tokens_seen": 19659844, "step": 351 }, { "epoch": 0.7817371937639198, "loss": 0.9219905138015747, "loss_ce": 0.007439759094268084, "loss_iou": 0.388671875, "loss_num": 0.0277099609375, "loss_xval": 0.9140625, "num_input_tokens_seen": 19659844, "step": 351 }, { "epoch": 0.7839643652561247, "grad_norm": 19.256717681884766, "learning_rate": 1e-06, "loss": 1.552, "num_input_tokens_seen": 19716640, "step": 352 }, { "epoch": 0.7839643652561247, "loss": 1.5087199211120605, "loss_ce": 0.00481368275359273, "loss_iou": 0.6171875, "loss_num": 0.0537109375, "loss_xval": 1.5, "num_input_tokens_seen": 19716640, "step": 352 }, { "epoch": 0.7861915367483296, "grad_norm": 25.72173500061035, "learning_rate": 1e-06, "loss": 1.4851, "num_input_tokens_seen": 19771512, "step": 353 }, { "epoch": 0.7861915367483296, "loss": 1.3254516124725342, "loss_ce": 0.028576675802469254, "loss_iou": 0.51953125, "loss_num": 0.051025390625, "loss_xval": 1.296875, "num_input_tokens_seen": 19771512, "step": 353 }, { "epoch": 0.7884187082405345, "grad_norm": 27.796306610107422, "learning_rate": 1e-06, "loss": 1.1745, "num_input_tokens_seen": 19827200, "step": 354 }, { "epoch": 0.7884187082405345, "loss": 1.0663065910339355, "loss_ce": 0.0013651238987222314, "loss_iou": 0.458984375, "loss_num": 0.029296875, "loss_xval": 1.0625, "num_input_tokens_seen": 19827200, "step": 354 }, { "epoch": 0.7906458797327395, "grad_norm": 29.534481048583984, "learning_rate": 1e-06, "loss": 1.2979, "num_input_tokens_seen": 19883360, "step": 355 }, { "epoch": 0.7906458797327395, "loss": 1.285064697265625, "loss_ce": 0.014556895941495895, "loss_iou": 0.484375, "loss_num": 0.060791015625, "loss_xval": 1.2734375, "num_input_tokens_seen": 19883360, "step": 355 }, { "epoch": 0.7928730512249443, "grad_norm": 20.099090576171875, "learning_rate": 1e-06, "loss": 1.4715, "num_input_tokens_seen": 19940216, "step": 356 }, { "epoch": 0.7928730512249443, "loss": 1.6332509517669678, "loss_ce": 0.00483296625316143, "loss_iou": 0.6640625, "loss_num": 0.060546875, "loss_xval": 1.625, "num_input_tokens_seen": 19940216, "step": 356 }, { "epoch": 0.7951002227171492, "grad_norm": 23.28925323486328, "learning_rate": 1e-06, "loss": 1.4499, "num_input_tokens_seen": 19996716, "step": 357 }, { "epoch": 0.7951002227171492, "loss": 1.103065848350525, "loss_ce": 0.007850958965718746, "loss_iou": 0.453125, "loss_num": 0.038330078125, "loss_xval": 1.09375, "num_input_tokens_seen": 19996716, "step": 357 }, { "epoch": 0.7973273942093542, "grad_norm": 26.855409622192383, "learning_rate": 1e-06, "loss": 1.1852, "num_input_tokens_seen": 20052192, "step": 358 }, { "epoch": 0.7973273942093542, "loss": 1.3929839134216309, "loss_ce": 0.003823714330792427, "loss_iou": 0.5703125, "loss_num": 0.050537109375, "loss_xval": 1.390625, "num_input_tokens_seen": 20052192, "step": 358 }, { "epoch": 0.799554565701559, "grad_norm": 32.07172393798828, "learning_rate": 1e-06, "loss": 1.5933, "num_input_tokens_seen": 20110068, "step": 359 }, { "epoch": 0.799554565701559, "loss": 1.588189721107483, "loss_ce": 0.005181873217225075, "loss_iou": 0.65234375, "loss_num": 0.05517578125, "loss_xval": 1.5859375, "num_input_tokens_seen": 20110068, "step": 359 }, { "epoch": 0.8017817371937639, "grad_norm": 32.26838684082031, "learning_rate": 1e-06, "loss": 1.451, "num_input_tokens_seen": 20166760, "step": 360 }, { "epoch": 0.8017817371937639, "loss": 1.4686858654022217, "loss_ce": 0.017513982951641083, "loss_iou": 0.609375, "loss_num": 0.04638671875, "loss_xval": 1.453125, "num_input_tokens_seen": 20166760, "step": 360 }, { "epoch": 0.8040089086859689, "grad_norm": 31.04205894470215, "learning_rate": 1e-06, "loss": 1.3697, "num_input_tokens_seen": 20223556, "step": 361 }, { "epoch": 0.8040089086859689, "loss": 1.2945581674575806, "loss_ce": 0.006228114478290081, "loss_iou": 0.515625, "loss_num": 0.05078125, "loss_xval": 1.2890625, "num_input_tokens_seen": 20223556, "step": 361 }, { "epoch": 0.8062360801781737, "grad_norm": 28.388748168945312, "learning_rate": 1e-06, "loss": 1.4442, "num_input_tokens_seen": 20283264, "step": 362 }, { "epoch": 0.8062360801781737, "loss": 1.3582143783569336, "loss_ce": 0.002745626959949732, "loss_iou": 0.5625, "loss_num": 0.04541015625, "loss_xval": 1.359375, "num_input_tokens_seen": 20283264, "step": 362 }, { "epoch": 0.8084632516703786, "grad_norm": 49.78532028198242, "learning_rate": 1e-06, "loss": 1.3255, "num_input_tokens_seen": 20337404, "step": 363 }, { "epoch": 0.8084632516703786, "loss": 1.1450395584106445, "loss_ce": 0.015156792476773262, "loss_iou": 0.412109375, "loss_num": 0.061767578125, "loss_xval": 1.1328125, "num_input_tokens_seen": 20337404, "step": 363 }, { "epoch": 0.8106904231625836, "grad_norm": 48.754234313964844, "learning_rate": 1e-06, "loss": 1.2584, "num_input_tokens_seen": 20393236, "step": 364 }, { "epoch": 0.8106904231625836, "loss": 1.1171329021453857, "loss_ce": 0.007513846270740032, "loss_iou": 0.4453125, "loss_num": 0.04345703125, "loss_xval": 1.109375, "num_input_tokens_seen": 20393236, "step": 364 }, { "epoch": 0.8129175946547884, "grad_norm": 49.22020721435547, "learning_rate": 1e-06, "loss": 1.4507, "num_input_tokens_seen": 20451716, "step": 365 }, { "epoch": 0.8129175946547884, "loss": 1.6032345294952393, "loss_ce": 0.046593837440013885, "loss_iou": 0.6171875, "loss_num": 0.064453125, "loss_xval": 1.5546875, "num_input_tokens_seen": 20451716, "step": 365 }, { "epoch": 0.8151447661469933, "grad_norm": 81.98351287841797, "learning_rate": 1e-06, "loss": 1.5286, "num_input_tokens_seen": 20507752, "step": 366 }, { "epoch": 0.8151447661469933, "loss": 1.5305746793746948, "loss_ce": 0.0032309277448803186, "loss_iou": 0.62890625, "loss_num": 0.054443359375, "loss_xval": 1.53125, "num_input_tokens_seen": 20507752, "step": 366 }, { "epoch": 0.8173719376391982, "grad_norm": 20.340085983276367, "learning_rate": 1e-06, "loss": 1.245, "num_input_tokens_seen": 20561536, "step": 367 }, { "epoch": 0.8173719376391982, "loss": 1.0740153789520264, "loss_ce": 0.01566578447818756, "loss_iou": 0.400390625, "loss_num": 0.05126953125, "loss_xval": 1.0546875, "num_input_tokens_seen": 20561536, "step": 367 }, { "epoch": 0.8195991091314031, "grad_norm": 28.017810821533203, "learning_rate": 1e-06, "loss": 1.4366, "num_input_tokens_seen": 20614508, "step": 368 }, { "epoch": 0.8195991091314031, "loss": 1.5188279151916504, "loss_ce": 0.033476315438747406, "loss_iou": 0.6015625, "loss_num": 0.05615234375, "loss_xval": 1.484375, "num_input_tokens_seen": 20614508, "step": 368 }, { "epoch": 0.821826280623608, "grad_norm": 26.68250846862793, "learning_rate": 1e-06, "loss": 1.1561, "num_input_tokens_seen": 20669580, "step": 369 }, { "epoch": 0.821826280623608, "loss": 1.2030680179595947, "loss_ce": 0.0014078648528084159, "loss_iou": 0.5, "loss_num": 0.040283203125, "loss_xval": 1.203125, "num_input_tokens_seen": 20669580, "step": 369 }, { "epoch": 0.8240534521158129, "grad_norm": 19.57592010498047, "learning_rate": 1e-06, "loss": 1.2601, "num_input_tokens_seen": 20723132, "step": 370 }, { "epoch": 0.8240534521158129, "loss": 1.2386668920516968, "loss_ce": 0.013080945238471031, "loss_iou": 0.5390625, "loss_num": 0.030029296875, "loss_xval": 1.2265625, "num_input_tokens_seen": 20723132, "step": 370 }, { "epoch": 0.8262806236080178, "grad_norm": 38.58708190917969, "learning_rate": 1e-06, "loss": 1.5873, "num_input_tokens_seen": 20779144, "step": 371 }, { "epoch": 0.8262806236080178, "loss": 1.2857810258865356, "loss_ce": 0.013320127502083778, "loss_iou": 0.49609375, "loss_num": 0.055908203125, "loss_xval": 1.2734375, "num_input_tokens_seen": 20779144, "step": 371 }, { "epoch": 0.8285077951002228, "grad_norm": 27.13821792602539, "learning_rate": 1e-06, "loss": 1.7652, "num_input_tokens_seen": 20835076, "step": 372 }, { "epoch": 0.8285077951002228, "loss": 1.7804100513458252, "loss_ce": 0.006972476840019226, "loss_iou": 0.69921875, "loss_num": 0.0751953125, "loss_xval": 1.7734375, "num_input_tokens_seen": 20835076, "step": 372 }, { "epoch": 0.8307349665924276, "grad_norm": 18.151918411254883, "learning_rate": 1e-06, "loss": 1.4218, "num_input_tokens_seen": 20891112, "step": 373 }, { "epoch": 0.8307349665924276, "loss": 1.4063541889190674, "loss_ce": 0.005475334823131561, "loss_iou": 0.56640625, "loss_num": 0.052978515625, "loss_xval": 1.3984375, "num_input_tokens_seen": 20891112, "step": 373 }, { "epoch": 0.8329621380846325, "grad_norm": 20.646316528320312, "learning_rate": 1e-06, "loss": 1.2937, "num_input_tokens_seen": 20947924, "step": 374 }, { "epoch": 0.8329621380846325, "loss": 1.1752688884735107, "loss_ce": 0.02146025560796261, "loss_iou": 0.466796875, "loss_num": 0.043701171875, "loss_xval": 1.15625, "num_input_tokens_seen": 20947924, "step": 374 }, { "epoch": 0.8351893095768375, "grad_norm": 65.90545654296875, "learning_rate": 1e-06, "loss": 1.1793, "num_input_tokens_seen": 21003736, "step": 375 }, { "epoch": 0.8351893095768375, "loss": 1.0895462036132812, "loss_ce": 0.006538336630910635, "loss_iou": 0.4609375, "loss_num": 0.0322265625, "loss_xval": 1.0859375, "num_input_tokens_seen": 21003736, "step": 375 }, { "epoch": 0.8374164810690423, "grad_norm": 30.747459411621094, "learning_rate": 1e-06, "loss": 1.4332, "num_input_tokens_seen": 21058432, "step": 376 }, { "epoch": 0.8374164810690423, "loss": 1.4749113321304321, "loss_ce": 0.005673029460012913, "loss_iou": 0.54296875, "loss_num": 0.076171875, "loss_xval": 1.46875, "num_input_tokens_seen": 21058432, "step": 376 }, { "epoch": 0.8396436525612472, "grad_norm": 48.859676361083984, "learning_rate": 1e-06, "loss": 1.288, "num_input_tokens_seen": 21114724, "step": 377 }, { "epoch": 0.8396436525612472, "loss": 1.3059370517730713, "loss_ce": 0.02029254473745823, "loss_iou": 0.53515625, "loss_num": 0.0439453125, "loss_xval": 1.2890625, "num_input_tokens_seen": 21114724, "step": 377 }, { "epoch": 0.8418708240534521, "grad_norm": 34.50962448120117, "learning_rate": 1e-06, "loss": 1.237, "num_input_tokens_seen": 21169136, "step": 378 }, { "epoch": 0.8418708240534521, "loss": 1.1840803623199463, "loss_ce": 0.0024397093802690506, "loss_iou": 0.44921875, "loss_num": 0.05712890625, "loss_xval": 1.1796875, "num_input_tokens_seen": 21169136, "step": 378 }, { "epoch": 0.844097995545657, "grad_norm": 50.90530776977539, "learning_rate": 1e-06, "loss": 1.3962, "num_input_tokens_seen": 21225176, "step": 379 }, { "epoch": 0.844097995545657, "loss": 1.6650288105010986, "loss_ce": 0.02660096064209938, "loss_iou": 0.65625, "loss_num": 0.0654296875, "loss_xval": 1.640625, "num_input_tokens_seen": 21225176, "step": 379 }, { "epoch": 0.8463251670378619, "grad_norm": 30.996505737304688, "learning_rate": 1e-06, "loss": 1.2789, "num_input_tokens_seen": 21284272, "step": 380 }, { "epoch": 0.8463251670378619, "loss": 1.3432170152664185, "loss_ce": 0.0019084562081843615, "loss_iou": 0.52734375, "loss_num": 0.056884765625, "loss_xval": 1.34375, "num_input_tokens_seen": 21284272, "step": 380 }, { "epoch": 0.8485523385300668, "grad_norm": 20.33319854736328, "learning_rate": 1e-06, "loss": 1.1925, "num_input_tokens_seen": 21339980, "step": 381 }, { "epoch": 0.8485523385300668, "loss": 1.164018154144287, "loss_ce": 0.0019087546970695257, "loss_iou": 0.47265625, "loss_num": 0.043212890625, "loss_xval": 1.1640625, "num_input_tokens_seen": 21339980, "step": 381 }, { "epoch": 0.8507795100222717, "grad_norm": 47.0328369140625, "learning_rate": 1e-06, "loss": 1.4929, "num_input_tokens_seen": 21397864, "step": 382 }, { "epoch": 0.8507795100222717, "loss": 1.355049729347229, "loss_ce": 0.005440343637019396, "loss_iou": 0.55859375, "loss_num": 0.046142578125, "loss_xval": 1.3515625, "num_input_tokens_seen": 21397864, "step": 382 }, { "epoch": 0.8530066815144766, "grad_norm": 84.89624786376953, "learning_rate": 1e-06, "loss": 1.4523, "num_input_tokens_seen": 21453536, "step": 383 }, { "epoch": 0.8530066815144766, "loss": 1.6871811151504517, "loss_ce": 0.00944666936993599, "loss_iou": 0.65625, "loss_num": 0.0732421875, "loss_xval": 1.6796875, "num_input_tokens_seen": 21453536, "step": 383 }, { "epoch": 0.8552338530066815, "grad_norm": 17.764833450317383, "learning_rate": 1e-06, "loss": 1.4082, "num_input_tokens_seen": 21508716, "step": 384 }, { "epoch": 0.8552338530066815, "loss": 1.6076984405517578, "loss_ce": 0.015901662409305573, "loss_iou": 0.66015625, "loss_num": 0.0546875, "loss_xval": 1.59375, "num_input_tokens_seen": 21508716, "step": 384 }, { "epoch": 0.8574610244988864, "grad_norm": 21.195423126220703, "learning_rate": 1e-06, "loss": 1.1628, "num_input_tokens_seen": 21563776, "step": 385 }, { "epoch": 0.8574610244988864, "loss": 1.30448317527771, "loss_ce": 0.010049499571323395, "loss_iou": 0.546875, "loss_num": 0.039794921875, "loss_xval": 1.296875, "num_input_tokens_seen": 21563776, "step": 385 }, { "epoch": 0.8596881959910914, "grad_norm": 31.222137451171875, "learning_rate": 1e-06, "loss": 1.2988, "num_input_tokens_seen": 21617956, "step": 386 }, { "epoch": 0.8596881959910914, "loss": 1.2785615921020508, "loss_ce": 0.0092744380235672, "loss_iou": 0.49609375, "loss_num": 0.05517578125, "loss_xval": 1.265625, "num_input_tokens_seen": 21617956, "step": 386 }, { "epoch": 0.8619153674832962, "grad_norm": 43.02681350708008, "learning_rate": 1e-06, "loss": 1.5262, "num_input_tokens_seen": 21673748, "step": 387 }, { "epoch": 0.8619153674832962, "loss": 1.531335711479187, "loss_ce": 0.003991919104009867, "loss_iou": 0.58203125, "loss_num": 0.07177734375, "loss_xval": 1.53125, "num_input_tokens_seen": 21673748, "step": 387 }, { "epoch": 0.8641425389755011, "grad_norm": 41.39469909667969, "learning_rate": 1e-06, "loss": 1.2424, "num_input_tokens_seen": 21729848, "step": 388 }, { "epoch": 0.8641425389755011, "loss": 1.398338794708252, "loss_ce": 0.0013661817647516727, "loss_iou": 0.55859375, "loss_num": 0.055419921875, "loss_xval": 1.3984375, "num_input_tokens_seen": 21729848, "step": 388 }, { "epoch": 0.8663697104677061, "grad_norm": 24.085895538330078, "learning_rate": 1e-06, "loss": 1.3652, "num_input_tokens_seen": 21789184, "step": 389 }, { "epoch": 0.8663697104677061, "loss": 1.4368267059326172, "loss_ce": 0.012022039853036404, "loss_iou": 0.57421875, "loss_num": 0.0546875, "loss_xval": 1.421875, "num_input_tokens_seen": 21789184, "step": 389 }, { "epoch": 0.8685968819599109, "grad_norm": 21.240936279296875, "learning_rate": 1e-06, "loss": 1.1302, "num_input_tokens_seen": 21845372, "step": 390 }, { "epoch": 0.8685968819599109, "loss": 1.243898868560791, "loss_ce": 0.01001210231333971, "loss_iou": 0.515625, "loss_num": 0.040283203125, "loss_xval": 1.234375, "num_input_tokens_seen": 21845372, "step": 390 }, { "epoch": 0.8708240534521158, "grad_norm": 26.22933578491211, "learning_rate": 1e-06, "loss": 1.2685, "num_input_tokens_seen": 21898080, "step": 391 }, { "epoch": 0.8708240534521158, "loss": 1.6092109680175781, "loss_ce": 0.0179024338722229, "loss_iou": 0.65234375, "loss_num": 0.05810546875, "loss_xval": 1.59375, "num_input_tokens_seen": 21898080, "step": 391 }, { "epoch": 0.8730512249443207, "grad_norm": 68.2789077758789, "learning_rate": 1e-06, "loss": 1.4469, "num_input_tokens_seen": 21954884, "step": 392 }, { "epoch": 0.8730512249443207, "loss": 1.568161964416504, "loss_ce": 0.002732297871261835, "loss_iou": 0.64453125, "loss_num": 0.0556640625, "loss_xval": 1.5625, "num_input_tokens_seen": 21954884, "step": 392 }, { "epoch": 0.8752783964365256, "grad_norm": 27.22370719909668, "learning_rate": 1e-06, "loss": 1.2137, "num_input_tokens_seen": 22008924, "step": 393 }, { "epoch": 0.8752783964365256, "loss": 1.0929490327835083, "loss_ce": 0.004081860650330782, "loss_iou": 0.44921875, "loss_num": 0.038330078125, "loss_xval": 1.0859375, "num_input_tokens_seen": 22008924, "step": 393 }, { "epoch": 0.8775055679287305, "grad_norm": 82.91708374023438, "learning_rate": 1e-06, "loss": 1.6931, "num_input_tokens_seen": 22064736, "step": 394 }, { "epoch": 0.8775055679287305, "loss": 1.915067434310913, "loss_ce": 0.007840840145945549, "loss_iou": 0.765625, "loss_num": 0.07568359375, "loss_xval": 1.90625, "num_input_tokens_seen": 22064736, "step": 394 }, { "epoch": 0.8797327394209354, "grad_norm": 25.599044799804688, "learning_rate": 1e-06, "loss": 1.3419, "num_input_tokens_seen": 22119196, "step": 395 }, { "epoch": 0.8797327394209354, "loss": 1.3939309120178223, "loss_ce": 0.003794201649725437, "loss_iou": 0.55078125, "loss_num": 0.05712890625, "loss_xval": 1.390625, "num_input_tokens_seen": 22119196, "step": 395 }, { "epoch": 0.8819599109131403, "grad_norm": 38.43741226196289, "learning_rate": 1e-06, "loss": 1.3195, "num_input_tokens_seen": 22175900, "step": 396 }, { "epoch": 0.8819599109131403, "loss": 1.2486144304275513, "loss_ce": 0.013262815773487091, "loss_iou": 0.4921875, "loss_num": 0.050048828125, "loss_xval": 1.234375, "num_input_tokens_seen": 22175900, "step": 396 }, { "epoch": 0.8841870824053452, "grad_norm": 51.51115417480469, "learning_rate": 1e-06, "loss": 1.3812, "num_input_tokens_seen": 22232828, "step": 397 }, { "epoch": 0.8841870824053452, "loss": 1.6601974964141846, "loss_ce": 0.029338089749217033, "loss_iou": 0.6171875, "loss_num": 0.080078125, "loss_xval": 1.6328125, "num_input_tokens_seen": 22232828, "step": 397 }, { "epoch": 0.8864142538975501, "grad_norm": 29.84437370300293, "learning_rate": 1e-06, "loss": 1.145, "num_input_tokens_seen": 22290996, "step": 398 }, { "epoch": 0.8864142538975501, "loss": 1.0916481018066406, "loss_ce": 0.0008277894230559468, "loss_iou": 0.412109375, "loss_num": 0.053466796875, "loss_xval": 1.09375, "num_input_tokens_seen": 22290996, "step": 398 }, { "epoch": 0.888641425389755, "grad_norm": 34.59022903442383, "learning_rate": 1e-06, "loss": 1.1759, "num_input_tokens_seen": 22345820, "step": 399 }, { "epoch": 0.888641425389755, "loss": 1.1103270053863525, "loss_ce": 0.0009520421735942364, "loss_iou": 0.46875, "loss_num": 0.0341796875, "loss_xval": 1.109375, "num_input_tokens_seen": 22345820, "step": 399 }, { "epoch": 0.89086859688196, "grad_norm": 36.79861068725586, "learning_rate": 1e-06, "loss": 1.3186, "num_input_tokens_seen": 22403684, "step": 400 }, { "epoch": 0.89086859688196, "loss": 1.1463134288787842, "loss_ce": 0.005688320379704237, "loss_iou": 0.47265625, "loss_num": 0.0390625, "loss_xval": 1.140625, "num_input_tokens_seen": 22403684, "step": 400 }, { "epoch": 0.8930957683741648, "grad_norm": 21.335620880126953, "learning_rate": 1e-06, "loss": 1.3037, "num_input_tokens_seen": 22461644, "step": 401 }, { "epoch": 0.8930957683741648, "loss": 1.3235667943954468, "loss_ce": 0.007160472683608532, "loss_iou": 0.5390625, "loss_num": 0.048095703125, "loss_xval": 1.3125, "num_input_tokens_seen": 22461644, "step": 401 }, { "epoch": 0.8953229398663697, "grad_norm": 50.855709075927734, "learning_rate": 1e-06, "loss": 1.6207, "num_input_tokens_seen": 22518540, "step": 402 }, { "epoch": 0.8953229398663697, "loss": 1.8505743741989136, "loss_ce": 0.002918071812018752, "loss_iou": 0.71875, "loss_num": 0.08203125, "loss_xval": 1.84375, "num_input_tokens_seen": 22518540, "step": 402 }, { "epoch": 0.8975501113585747, "grad_norm": 25.960264205932617, "learning_rate": 1e-06, "loss": 1.412, "num_input_tokens_seen": 22575984, "step": 403 }, { "epoch": 0.8975501113585747, "loss": 1.4437758922576904, "loss_ce": 0.0028578725177794695, "loss_iou": 0.59375, "loss_num": 0.05126953125, "loss_xval": 1.4375, "num_input_tokens_seen": 22575984, "step": 403 }, { "epoch": 0.8997772828507795, "grad_norm": 61.44157791137695, "learning_rate": 1e-06, "loss": 1.1071, "num_input_tokens_seen": 22632724, "step": 404 }, { "epoch": 0.8997772828507795, "loss": 1.1888924837112427, "loss_ce": 0.002613153774291277, "loss_iou": 0.498046875, "loss_num": 0.0380859375, "loss_xval": 1.1875, "num_input_tokens_seen": 22632724, "step": 404 }, { "epoch": 0.9020044543429844, "grad_norm": 32.13711929321289, "learning_rate": 1e-06, "loss": 1.3325, "num_input_tokens_seen": 22689348, "step": 405 }, { "epoch": 0.9020044543429844, "loss": 1.1436662673950195, "loss_ce": 0.002064717700704932, "loss_iou": 0.494140625, "loss_num": 0.030517578125, "loss_xval": 1.140625, "num_input_tokens_seen": 22689348, "step": 405 }, { "epoch": 0.9042316258351893, "grad_norm": 22.586368560791016, "learning_rate": 1e-06, "loss": 1.3851, "num_input_tokens_seen": 22744560, "step": 406 }, { "epoch": 0.9042316258351893, "loss": 1.374314546585083, "loss_ce": 0.010056735947728157, "loss_iou": 0.5390625, "loss_num": 0.056884765625, "loss_xval": 1.3671875, "num_input_tokens_seen": 22744560, "step": 406 }, { "epoch": 0.9064587973273942, "grad_norm": 19.449066162109375, "learning_rate": 1e-06, "loss": 1.4187, "num_input_tokens_seen": 22796036, "step": 407 }, { "epoch": 0.9064587973273942, "loss": 1.272933006286621, "loss_ce": 0.0038900894578546286, "loss_iou": 0.5390625, "loss_num": 0.03857421875, "loss_xval": 1.265625, "num_input_tokens_seen": 22796036, "step": 407 }, { "epoch": 0.9086859688195991, "grad_norm": 17.967327117919922, "learning_rate": 1e-06, "loss": 1.101, "num_input_tokens_seen": 22851848, "step": 408 }, { "epoch": 0.9086859688195991, "loss": 0.8791401386260986, "loss_ce": 0.009511251002550125, "loss_iou": 0.35546875, "loss_num": 0.0311279296875, "loss_xval": 0.87109375, "num_input_tokens_seen": 22851848, "step": 408 }, { "epoch": 0.910913140311804, "grad_norm": 34.27125549316406, "learning_rate": 1e-06, "loss": 1.279, "num_input_tokens_seen": 22906068, "step": 409 }, { "epoch": 0.910913140311804, "loss": 1.3037530183792114, "loss_ce": 0.008098645135760307, "loss_iou": 0.546875, "loss_num": 0.041015625, "loss_xval": 1.296875, "num_input_tokens_seen": 22906068, "step": 409 }, { "epoch": 0.9131403118040089, "grad_norm": 21.108455657958984, "learning_rate": 1e-06, "loss": 1.3891, "num_input_tokens_seen": 22963800, "step": 410 }, { "epoch": 0.9131403118040089, "loss": 1.512423038482666, "loss_ce": 0.009981658309698105, "loss_iou": 0.6640625, "loss_num": 0.034912109375, "loss_xval": 1.5, "num_input_tokens_seen": 22963800, "step": 410 }, { "epoch": 0.9153674832962138, "grad_norm": 26.992612838745117, "learning_rate": 1e-06, "loss": 1.39, "num_input_tokens_seen": 23019888, "step": 411 }, { "epoch": 0.9153674832962138, "loss": 1.4112193584442139, "loss_ce": 0.0020396006293594837, "loss_iou": 0.5703125, "loss_num": 0.052490234375, "loss_xval": 1.40625, "num_input_tokens_seen": 23019888, "step": 411 }, { "epoch": 0.9175946547884187, "grad_norm": 25.17546272277832, "learning_rate": 1e-06, "loss": 1.2354, "num_input_tokens_seen": 23077784, "step": 412 }, { "epoch": 0.9175946547884187, "loss": 1.2591149806976318, "loss_ce": 0.02327517233788967, "loss_iou": 0.515625, "loss_num": 0.04052734375, "loss_xval": 1.234375, "num_input_tokens_seen": 23077784, "step": 412 }, { "epoch": 0.9198218262806236, "grad_norm": 50.736148834228516, "learning_rate": 1e-06, "loss": 1.1223, "num_input_tokens_seen": 23133656, "step": 413 }, { "epoch": 0.9198218262806236, "loss": 1.0429635047912598, "loss_ce": 0.0029244269244372845, "loss_iou": 0.404296875, "loss_num": 0.046630859375, "loss_xval": 1.0390625, "num_input_tokens_seen": 23133656, "step": 413 }, { "epoch": 0.9220489977728286, "grad_norm": 49.43931579589844, "learning_rate": 1e-06, "loss": 1.1577, "num_input_tokens_seen": 23190832, "step": 414 }, { "epoch": 0.9220489977728286, "loss": 1.1161892414093018, "loss_ce": 0.009988099336624146, "loss_iou": 0.431640625, "loss_num": 0.04833984375, "loss_xval": 1.109375, "num_input_tokens_seen": 23190832, "step": 414 }, { "epoch": 0.9242761692650334, "grad_norm": 31.02313232421875, "learning_rate": 1e-06, "loss": 1.4122, "num_input_tokens_seen": 23247304, "step": 415 }, { "epoch": 0.9242761692650334, "loss": 1.5239001512527466, "loss_ce": 0.003880674485117197, "loss_iou": 0.640625, "loss_num": 0.04833984375, "loss_xval": 1.5234375, "num_input_tokens_seen": 23247304, "step": 415 }, { "epoch": 0.9265033407572383, "grad_norm": 24.66490936279297, "learning_rate": 1e-06, "loss": 1.3191, "num_input_tokens_seen": 23302872, "step": 416 }, { "epoch": 0.9265033407572383, "loss": 1.2839527130126953, "loss_ce": 0.00563238188624382, "loss_iou": 0.51953125, "loss_num": 0.04833984375, "loss_xval": 1.28125, "num_input_tokens_seen": 23302872, "step": 416 }, { "epoch": 0.9287305122494433, "grad_norm": 29.012544631958008, "learning_rate": 1e-06, "loss": 1.4372, "num_input_tokens_seen": 23361228, "step": 417 }, { "epoch": 0.9287305122494433, "loss": 1.4470521211624146, "loss_ce": 0.008087254129350185, "loss_iou": 0.578125, "loss_num": 0.056640625, "loss_xval": 1.4375, "num_input_tokens_seen": 23361228, "step": 417 }, { "epoch": 0.9309576837416481, "grad_norm": 29.133703231811523, "learning_rate": 1e-06, "loss": 1.3035, "num_input_tokens_seen": 23417480, "step": 418 }, { "epoch": 0.9309576837416481, "loss": 1.2170838117599487, "loss_ce": 0.001263482728973031, "loss_iou": 0.482421875, "loss_num": 0.050537109375, "loss_xval": 1.21875, "num_input_tokens_seen": 23417480, "step": 418 }, { "epoch": 0.933184855233853, "grad_norm": 24.859573364257812, "learning_rate": 1e-06, "loss": 1.6284, "num_input_tokens_seen": 23474768, "step": 419 }, { "epoch": 0.933184855233853, "loss": 1.9370301961898804, "loss_ce": 0.03078020177781582, "loss_iou": 0.76171875, "loss_num": 0.0771484375, "loss_xval": 1.90625, "num_input_tokens_seen": 23474768, "step": 419 }, { "epoch": 0.9354120267260579, "grad_norm": 25.39378547668457, "learning_rate": 1e-06, "loss": 1.1607, "num_input_tokens_seen": 23530896, "step": 420 }, { "epoch": 0.9354120267260579, "loss": 1.2405550479888916, "loss_ce": 0.00471511110663414, "loss_iou": 0.53515625, "loss_num": 0.033935546875, "loss_xval": 1.234375, "num_input_tokens_seen": 23530896, "step": 420 }, { "epoch": 0.9376391982182628, "grad_norm": 39.46179962158203, "learning_rate": 1e-06, "loss": 1.4029, "num_input_tokens_seen": 23587564, "step": 421 }, { "epoch": 0.9376391982182628, "loss": 1.2013354301452637, "loss_ce": 0.0026050377637147903, "loss_iou": 0.466796875, "loss_num": 0.053466796875, "loss_xval": 1.1953125, "num_input_tokens_seen": 23587564, "step": 421 }, { "epoch": 0.9398663697104677, "grad_norm": 20.98834991455078, "learning_rate": 1e-06, "loss": 1.4864, "num_input_tokens_seen": 23641996, "step": 422 }, { "epoch": 0.9398663697104677, "loss": 1.4933122396469116, "loss_ce": 0.005030961707234383, "loss_iou": 0.6015625, "loss_num": 0.057373046875, "loss_xval": 1.484375, "num_input_tokens_seen": 23641996, "step": 422 }, { "epoch": 0.9420935412026726, "grad_norm": 28.44550895690918, "learning_rate": 1e-06, "loss": 1.3014, "num_input_tokens_seen": 23695464, "step": 423 }, { "epoch": 0.9420935412026726, "loss": 1.5061261653900146, "loss_ce": 0.021751180291175842, "loss_iou": 0.61328125, "loss_num": 0.05224609375, "loss_xval": 1.484375, "num_input_tokens_seen": 23695464, "step": 423 }, { "epoch": 0.9443207126948775, "grad_norm": 24.33575439453125, "learning_rate": 1e-06, "loss": 1.104, "num_input_tokens_seen": 23747676, "step": 424 }, { "epoch": 0.9443207126948775, "loss": 1.0683302879333496, "loss_ce": 0.0016799941658973694, "loss_iou": 0.439453125, "loss_num": 0.03759765625, "loss_xval": 1.0703125, "num_input_tokens_seen": 23747676, "step": 424 }, { "epoch": 0.9465478841870824, "grad_norm": 29.6976375579834, "learning_rate": 1e-06, "loss": 1.1804, "num_input_tokens_seen": 23805080, "step": 425 }, { "epoch": 0.9465478841870824, "loss": 1.1131433248519897, "loss_ce": 0.00132695899810642, "loss_iou": 0.453125, "loss_num": 0.041748046875, "loss_xval": 1.109375, "num_input_tokens_seen": 23805080, "step": 425 }, { "epoch": 0.9487750556792873, "grad_norm": 24.570606231689453, "learning_rate": 1e-06, "loss": 1.0864, "num_input_tokens_seen": 23862348, "step": 426 }, { "epoch": 0.9487750556792873, "loss": 0.9711905717849731, "loss_ce": 0.000975710921920836, "loss_iou": 0.416015625, "loss_num": 0.0277099609375, "loss_xval": 0.96875, "num_input_tokens_seen": 23862348, "step": 426 }, { "epoch": 0.9510022271714922, "grad_norm": 34.240631103515625, "learning_rate": 1e-06, "loss": 1.1878, "num_input_tokens_seen": 23919428, "step": 427 }, { "epoch": 0.9510022271714922, "loss": 1.3001813888549805, "loss_ce": 0.0015973602421581745, "loss_iou": 0.53125, "loss_num": 0.04736328125, "loss_xval": 1.296875, "num_input_tokens_seen": 23919428, "step": 427 }, { "epoch": 0.9532293986636972, "grad_norm": 22.28962516784668, "learning_rate": 1e-06, "loss": 1.0995, "num_input_tokens_seen": 23975564, "step": 428 }, { "epoch": 0.9532293986636972, "loss": 1.0272424221038818, "loss_ce": 0.01918586902320385, "loss_iou": 0.41796875, "loss_num": 0.0341796875, "loss_xval": 1.0078125, "num_input_tokens_seen": 23975564, "step": 428 }, { "epoch": 0.955456570155902, "grad_norm": 50.77242660522461, "learning_rate": 1e-06, "loss": 1.4387, "num_input_tokens_seen": 24032664, "step": 429 }, { "epoch": 0.955456570155902, "loss": 1.2324780225753784, "loss_ce": 0.019099093973636627, "loss_iou": 0.451171875, "loss_num": 0.0625, "loss_xval": 1.2109375, "num_input_tokens_seen": 24032664, "step": 429 }, { "epoch": 0.9576837416481069, "grad_norm": 17.031267166137695, "learning_rate": 1e-06, "loss": 1.0689, "num_input_tokens_seen": 24087656, "step": 430 }, { "epoch": 0.9576837416481069, "loss": 1.104911208152771, "loss_ce": 0.008475645445287228, "loss_iou": 0.4296875, "loss_num": 0.0478515625, "loss_xval": 1.09375, "num_input_tokens_seen": 24087656, "step": 430 }, { "epoch": 0.9599109131403119, "grad_norm": 29.331024169921875, "learning_rate": 1e-06, "loss": 1.404, "num_input_tokens_seen": 24141584, "step": 431 }, { "epoch": 0.9599109131403119, "loss": 1.457468032836914, "loss_ce": 0.027536382898688316, "loss_iou": 0.55859375, "loss_num": 0.06298828125, "loss_xval": 1.4296875, "num_input_tokens_seen": 24141584, "step": 431 }, { "epoch": 0.9621380846325167, "grad_norm": 23.636056900024414, "learning_rate": 1e-06, "loss": 1.379, "num_input_tokens_seen": 24197980, "step": 432 }, { "epoch": 0.9621380846325167, "loss": 1.2647449970245361, "loss_ce": 0.002049737609922886, "loss_iou": 0.51171875, "loss_num": 0.04833984375, "loss_xval": 1.265625, "num_input_tokens_seen": 24197980, "step": 432 }, { "epoch": 0.9643652561247216, "grad_norm": 94.4756851196289, "learning_rate": 1e-06, "loss": 1.3875, "num_input_tokens_seen": 24252980, "step": 433 }, { "epoch": 0.9643652561247216, "loss": 1.2504823207855225, "loss_ce": 0.0009705987758934498, "loss_iou": 0.474609375, "loss_num": 0.059814453125, "loss_xval": 1.25, "num_input_tokens_seen": 24252980, "step": 433 }, { "epoch": 0.9665924276169265, "grad_norm": 29.35396957397461, "learning_rate": 1e-06, "loss": 1.313, "num_input_tokens_seen": 24310388, "step": 434 }, { "epoch": 0.9665924276169265, "loss": 1.3304047584533691, "loss_ce": 0.015951739624142647, "loss_iou": 0.484375, "loss_num": 0.06884765625, "loss_xval": 1.3125, "num_input_tokens_seen": 24310388, "step": 434 }, { "epoch": 0.9688195991091314, "grad_norm": 19.95311737060547, "learning_rate": 1e-06, "loss": 1.0571, "num_input_tokens_seen": 24367988, "step": 435 }, { "epoch": 0.9688195991091314, "loss": 0.8543475270271301, "loss_ce": 0.0008319243206642568, "loss_iou": 0.345703125, "loss_num": 0.03271484375, "loss_xval": 0.8515625, "num_input_tokens_seen": 24367988, "step": 435 }, { "epoch": 0.9710467706013363, "grad_norm": 17.18592071533203, "learning_rate": 1e-06, "loss": 1.4076, "num_input_tokens_seen": 24421072, "step": 436 }, { "epoch": 0.9710467706013363, "loss": 1.2863093614578247, "loss_ce": 0.0038386958185583353, "loss_iou": 0.5078125, "loss_num": 0.053466796875, "loss_xval": 1.28125, "num_input_tokens_seen": 24421072, "step": 436 }, { "epoch": 0.9732739420935412, "grad_norm": 45.679954528808594, "learning_rate": 1e-06, "loss": 1.4846, "num_input_tokens_seen": 24476364, "step": 437 }, { "epoch": 0.9732739420935412, "loss": 1.2888422012329102, "loss_ce": 0.0017329129623249173, "loss_iou": 0.5078125, "loss_num": 0.0537109375, "loss_xval": 1.2890625, "num_input_tokens_seen": 24476364, "step": 437 }, { "epoch": 0.9755011135857461, "grad_norm": 26.56547737121582, "learning_rate": 1e-06, "loss": 1.4019, "num_input_tokens_seen": 24532516, "step": 438 }, { "epoch": 0.9755011135857461, "loss": 1.554681420326233, "loss_ce": 0.003900158451870084, "loss_iou": 0.609375, "loss_num": 0.06640625, "loss_xval": 1.546875, "num_input_tokens_seen": 24532516, "step": 438 }, { "epoch": 0.977728285077951, "grad_norm": 37.293983459472656, "learning_rate": 1e-06, "loss": 1.2445, "num_input_tokens_seen": 24586972, "step": 439 }, { "epoch": 0.977728285077951, "loss": 1.148227572441101, "loss_ce": 0.0100439777597785, "loss_iou": 0.4140625, "loss_num": 0.06201171875, "loss_xval": 1.140625, "num_input_tokens_seen": 24586972, "step": 439 }, { "epoch": 0.9799554565701559, "grad_norm": 29.18305778503418, "learning_rate": 1e-06, "loss": 1.1585, "num_input_tokens_seen": 24643012, "step": 440 }, { "epoch": 0.9799554565701559, "loss": 1.3668557405471802, "loss_ce": 0.004306901711970568, "loss_iou": 0.5625, "loss_num": 0.0478515625, "loss_xval": 1.359375, "num_input_tokens_seen": 24643012, "step": 440 }, { "epoch": 0.9821826280623608, "grad_norm": 47.63218688964844, "learning_rate": 1e-06, "loss": 1.4442, "num_input_tokens_seen": 24697816, "step": 441 }, { "epoch": 0.9821826280623608, "loss": 1.5267517566680908, "loss_ce": 0.0018494933610782027, "loss_iou": 0.60546875, "loss_num": 0.0625, "loss_xval": 1.5234375, "num_input_tokens_seen": 24697816, "step": 441 }, { "epoch": 0.9844097995545658, "grad_norm": 21.678260803222656, "learning_rate": 1e-06, "loss": 1.3717, "num_input_tokens_seen": 24752152, "step": 442 }, { "epoch": 0.9844097995545658, "loss": 1.6792147159576416, "loss_ce": 0.005386614240705967, "loss_iou": 0.60546875, "loss_num": 0.0927734375, "loss_xval": 1.671875, "num_input_tokens_seen": 24752152, "step": 442 }, { "epoch": 0.9866369710467706, "grad_norm": 52.10433578491211, "learning_rate": 1e-06, "loss": 1.3047, "num_input_tokens_seen": 24810384, "step": 443 }, { "epoch": 0.9866369710467706, "loss": 1.4768739938735962, "loss_ce": 0.005682523362338543, "loss_iou": 0.58203125, "loss_num": 0.06201171875, "loss_xval": 1.46875, "num_input_tokens_seen": 24810384, "step": 443 }, { "epoch": 0.9888641425389755, "grad_norm": 35.90444564819336, "learning_rate": 1e-06, "loss": 1.4123, "num_input_tokens_seen": 24864948, "step": 444 }, { "epoch": 0.9888641425389755, "loss": 1.7221654653549194, "loss_ce": 0.0039037105161696672, "loss_iou": 0.69921875, "loss_num": 0.06494140625, "loss_xval": 1.71875, "num_input_tokens_seen": 24864948, "step": 444 }, { "epoch": 0.9910913140311804, "grad_norm": 20.43412208557129, "learning_rate": 1e-06, "loss": 1.3585, "num_input_tokens_seen": 24919520, "step": 445 }, { "epoch": 0.9910913140311804, "loss": 1.4697803258895874, "loss_ce": 0.01421389076858759, "loss_iou": 0.58984375, "loss_num": 0.05517578125, "loss_xval": 1.453125, "num_input_tokens_seen": 24919520, "step": 445 }, { "epoch": 0.9933184855233853, "grad_norm": 48.228004455566406, "learning_rate": 1e-06, "loss": 1.6479, "num_input_tokens_seen": 24977308, "step": 446 }, { "epoch": 0.9933184855233853, "loss": 1.6608734130859375, "loss_ce": 0.0016936406027525663, "loss_iou": 0.67578125, "loss_num": 0.0615234375, "loss_xval": 1.65625, "num_input_tokens_seen": 24977308, "step": 446 }, { "epoch": 0.9955456570155902, "grad_norm": 43.12616729736328, "learning_rate": 1e-06, "loss": 1.6302, "num_input_tokens_seen": 25033208, "step": 447 }, { "epoch": 0.9955456570155902, "loss": 1.7152773141860962, "loss_ce": 0.004828128032386303, "loss_iou": 0.61328125, "loss_num": 0.0966796875, "loss_xval": 1.7109375, "num_input_tokens_seen": 25033208, "step": 447 }, { "epoch": 0.9977728285077951, "grad_norm": 32.99639892578125, "learning_rate": 1e-06, "loss": 1.43, "num_input_tokens_seen": 25089796, "step": 448 }, { "epoch": 0.9977728285077951, "loss": 1.0105788707733154, "loss_ce": 0.005207820795476437, "loss_iou": 0.423828125, "loss_num": 0.031494140625, "loss_xval": 1.0078125, "num_input_tokens_seen": 25089796, "step": 448 }, { "epoch": 1.0, "grad_norm": 22.11530303955078, "learning_rate": 1e-06, "loss": 1.0883, "num_input_tokens_seen": 25146032, "step": 449 }, { "epoch": 1.0, "loss": 1.1784168481826782, "loss_ce": 0.006053549237549305, "loss_iou": 0.4609375, "loss_num": 0.05029296875, "loss_xval": 1.171875, "num_input_tokens_seen": 25146032, "step": 449 }, { "epoch": 1.0022271714922049, "grad_norm": 18.13511848449707, "learning_rate": 1e-06, "loss": 1.2001, "num_input_tokens_seen": 25204016, "step": 450 }, { "epoch": 1.0022271714922049, "loss": 1.3148114681243896, "loss_ce": 0.0042645614594221115, "loss_iou": 0.55078125, "loss_num": 0.042236328125, "loss_xval": 1.3125, "num_input_tokens_seen": 25204016, "step": 450 }, { "epoch": 1.0044543429844097, "grad_norm": 72.61564636230469, "learning_rate": 1e-06, "loss": 1.3781, "num_input_tokens_seen": 25258404, "step": 451 }, { "epoch": 1.0044543429844097, "loss": 1.4480741024017334, "loss_ce": 0.015945199877023697, "loss_iou": 0.55078125, "loss_num": 0.06689453125, "loss_xval": 1.4296875, "num_input_tokens_seen": 25258404, "step": 451 }, { "epoch": 1.0066815144766148, "grad_norm": 40.180419921875, "learning_rate": 1e-06, "loss": 1.4411, "num_input_tokens_seen": 25315080, "step": 452 }, { "epoch": 1.0066815144766148, "loss": 1.4863982200622559, "loss_ce": 0.005441202782094479, "loss_iou": 0.5703125, "loss_num": 0.068359375, "loss_xval": 1.484375, "num_input_tokens_seen": 25315080, "step": 452 }, { "epoch": 1.0089086859688197, "grad_norm": 28.12888526916504, "learning_rate": 1e-06, "loss": 1.1451, "num_input_tokens_seen": 25370644, "step": 453 }, { "epoch": 1.0089086859688197, "loss": 1.1297539472579956, "loss_ce": 0.01940234750509262, "loss_iou": 0.4453125, "loss_num": 0.043701171875, "loss_xval": 1.109375, "num_input_tokens_seen": 25370644, "step": 453 }, { "epoch": 1.0111358574610245, "grad_norm": 26.153356552124023, "learning_rate": 1e-06, "loss": 1.2351, "num_input_tokens_seen": 25425148, "step": 454 }, { "epoch": 1.0111358574610245, "loss": 1.6115047931671143, "loss_ce": 0.002618103986606002, "loss_iou": 0.5859375, "loss_num": 0.08837890625, "loss_xval": 1.609375, "num_input_tokens_seen": 25425148, "step": 454 }, { "epoch": 1.0133630289532294, "grad_norm": 41.62250900268555, "learning_rate": 1e-06, "loss": 1.4871, "num_input_tokens_seen": 25478780, "step": 455 }, { "epoch": 1.0133630289532294, "loss": 1.8347097635269165, "loss_ce": 0.004143323749303818, "loss_iou": 0.703125, "loss_num": 0.08544921875, "loss_xval": 1.828125, "num_input_tokens_seen": 25478780, "step": 455 }, { "epoch": 1.0155902004454342, "grad_norm": 54.628746032714844, "learning_rate": 1e-06, "loss": 1.2763, "num_input_tokens_seen": 25533068, "step": 456 }, { "epoch": 1.0155902004454342, "loss": 1.017959713935852, "loss_ce": 0.007217581383883953, "loss_iou": 0.384765625, "loss_num": 0.048583984375, "loss_xval": 1.0078125, "num_input_tokens_seen": 25533068, "step": 456 }, { "epoch": 1.017817371937639, "grad_norm": 19.17976951599121, "learning_rate": 1e-06, "loss": 1.1245, "num_input_tokens_seen": 25588116, "step": 457 }, { "epoch": 1.017817371937639, "loss": 1.1571969985961914, "loss_ce": 0.0024117890279740095, "loss_iou": 0.5078125, "loss_num": 0.02734375, "loss_xval": 1.15625, "num_input_tokens_seen": 25588116, "step": 457 }, { "epoch": 1.0200445434298442, "grad_norm": 59.96177291870117, "learning_rate": 1e-06, "loss": 1.213, "num_input_tokens_seen": 25645984, "step": 458 }, { "epoch": 1.0200445434298442, "loss": 1.2157493829727173, "loss_ce": 0.006276742089539766, "loss_iou": 0.455078125, "loss_num": 0.06005859375, "loss_xval": 1.2109375, "num_input_tokens_seen": 25645984, "step": 458 }, { "epoch": 1.022271714922049, "grad_norm": 22.60318946838379, "learning_rate": 1e-06, "loss": 1.5715, "num_input_tokens_seen": 25701456, "step": 459 }, { "epoch": 1.022271714922049, "loss": 1.8298416137695312, "loss_ce": 0.006599403452128172, "loss_iou": 0.75390625, "loss_num": 0.06298828125, "loss_xval": 1.8203125, "num_input_tokens_seen": 25701456, "step": 459 }, { "epoch": 1.024498886414254, "grad_norm": 30.009733200073242, "learning_rate": 1e-06, "loss": 1.2751, "num_input_tokens_seen": 25760304, "step": 460 }, { "epoch": 1.024498886414254, "loss": 0.9823180437088013, "loss_ce": 0.008441124111413956, "loss_iou": 0.388671875, "loss_num": 0.0390625, "loss_xval": 0.97265625, "num_input_tokens_seen": 25760304, "step": 460 }, { "epoch": 1.0267260579064588, "grad_norm": 29.346010208129883, "learning_rate": 1e-06, "loss": 1.0664, "num_input_tokens_seen": 25812724, "step": 461 }, { "epoch": 1.0267260579064588, "loss": 0.9884135723114014, "loss_ce": 0.006968258880078793, "loss_iou": 0.396484375, "loss_num": 0.03759765625, "loss_xval": 0.98046875, "num_input_tokens_seen": 25812724, "step": 461 }, { "epoch": 1.0289532293986636, "grad_norm": 75.70279693603516, "learning_rate": 1e-06, "loss": 1.2436, "num_input_tokens_seen": 25866680, "step": 462 }, { "epoch": 1.0289532293986636, "loss": 1.3562824726104736, "loss_ce": 0.010579358786344528, "loss_iou": 0.5625, "loss_num": 0.04443359375, "loss_xval": 1.34375, "num_input_tokens_seen": 25866680, "step": 462 }, { "epoch": 1.0311804008908685, "grad_norm": 54.88785171508789, "learning_rate": 1e-06, "loss": 1.2655, "num_input_tokens_seen": 25920128, "step": 463 }, { "epoch": 1.0311804008908685, "loss": 1.265822172164917, "loss_ce": 0.002150336978957057, "loss_iou": 0.486328125, "loss_num": 0.05810546875, "loss_xval": 1.265625, "num_input_tokens_seen": 25920128, "step": 463 }, { "epoch": 1.0334075723830736, "grad_norm": 24.761333465576172, "learning_rate": 1e-06, "loss": 1.2714, "num_input_tokens_seen": 25973036, "step": 464 }, { "epoch": 1.0334075723830736, "loss": 1.3659954071044922, "loss_ce": 0.0012494358234107494, "loss_iou": 0.515625, "loss_num": 0.0673828125, "loss_xval": 1.3671875, "num_input_tokens_seen": 25973036, "step": 464 }, { "epoch": 1.0356347438752784, "grad_norm": 23.98271369934082, "learning_rate": 1e-06, "loss": 1.3715, "num_input_tokens_seen": 26025288, "step": 465 }, { "epoch": 1.0356347438752784, "loss": 1.3842101097106934, "loss_ce": 0.007867315784096718, "loss_iou": 0.546875, "loss_num": 0.056640625, "loss_xval": 1.375, "num_input_tokens_seen": 26025288, "step": 465 }, { "epoch": 1.0378619153674833, "grad_norm": 21.38801383972168, "learning_rate": 1e-06, "loss": 1.2095, "num_input_tokens_seen": 26082480, "step": 466 }, { "epoch": 1.0378619153674833, "loss": 1.2851924896240234, "loss_ce": 0.0010128666181117296, "loss_iou": 0.50390625, "loss_num": 0.054931640625, "loss_xval": 1.28125, "num_input_tokens_seen": 26082480, "step": 466 }, { "epoch": 1.0400890868596881, "grad_norm": 25.396892547607422, "learning_rate": 1e-06, "loss": 1.0663, "num_input_tokens_seen": 26139504, "step": 467 }, { "epoch": 1.0400890868596881, "loss": 1.0588932037353516, "loss_ce": 0.0007877358002588153, "loss_iou": 0.44140625, "loss_num": 0.03466796875, "loss_xval": 1.0546875, "num_input_tokens_seen": 26139504, "step": 467 }, { "epoch": 1.042316258351893, "grad_norm": 18.479541778564453, "learning_rate": 1e-06, "loss": 1.2228, "num_input_tokens_seen": 26196448, "step": 468 }, { "epoch": 1.042316258351893, "loss": 1.2266954183578491, "loss_ce": 0.0011094606015831232, "loss_iou": 0.482421875, "loss_num": 0.052001953125, "loss_xval": 1.2265625, "num_input_tokens_seen": 26196448, "step": 468 }, { "epoch": 1.044543429844098, "grad_norm": 19.95421028137207, "learning_rate": 1e-06, "loss": 1.1337, "num_input_tokens_seen": 26250848, "step": 469 }, { "epoch": 1.044543429844098, "loss": 1.1349093914031982, "loss_ce": 0.0011203193571418524, "loss_iou": 0.421875, "loss_num": 0.05859375, "loss_xval": 1.1328125, "num_input_tokens_seen": 26250848, "step": 469 }, { "epoch": 1.046770601336303, "grad_norm": 29.218976974487305, "learning_rate": 1e-06, "loss": 1.0506, "num_input_tokens_seen": 26304848, "step": 470 }, { "epoch": 1.046770601336303, "loss": 0.7857218980789185, "loss_ce": 0.0015422652941197157, "loss_iou": 0.267578125, "loss_num": 0.049560546875, "loss_xval": 0.78515625, "num_input_tokens_seen": 26304848, "step": 470 }, { "epoch": 1.0489977728285078, "grad_norm": 26.657873153686523, "learning_rate": 1e-06, "loss": 1.2585, "num_input_tokens_seen": 26362040, "step": 471 }, { "epoch": 1.0489977728285078, "loss": 1.1070148944854736, "loss_ce": 0.0015461579896509647, "loss_iou": 0.4453125, "loss_num": 0.04345703125, "loss_xval": 1.109375, "num_input_tokens_seen": 26362040, "step": 471 }, { "epoch": 1.0512249443207127, "grad_norm": 24.098112106323242, "learning_rate": 1e-06, "loss": 1.2629, "num_input_tokens_seen": 26418744, "step": 472 }, { "epoch": 1.0512249443207127, "loss": 1.3726317882537842, "loss_ce": 0.008373986929655075, "loss_iou": 0.53125, "loss_num": 0.0595703125, "loss_xval": 1.3671875, "num_input_tokens_seen": 26418744, "step": 472 }, { "epoch": 1.0534521158129175, "grad_norm": 18.4561767578125, "learning_rate": 1e-06, "loss": 1.0389, "num_input_tokens_seen": 26476200, "step": 473 }, { "epoch": 1.0534521158129175, "loss": 1.1621673107147217, "loss_ce": 0.0010345308110117912, "loss_iou": 0.453125, "loss_num": 0.051025390625, "loss_xval": 1.1640625, "num_input_tokens_seen": 26476200, "step": 473 }, { "epoch": 1.0556792873051224, "grad_norm": 22.120088577270508, "learning_rate": 1e-06, "loss": 1.2004, "num_input_tokens_seen": 26532420, "step": 474 }, { "epoch": 1.0556792873051224, "loss": 1.0202289819717407, "loss_ce": 0.0014302136842161417, "loss_iou": 0.384765625, "loss_num": 0.050048828125, "loss_xval": 1.015625, "num_input_tokens_seen": 26532420, "step": 474 }, { "epoch": 1.0579064587973275, "grad_norm": 28.964649200439453, "learning_rate": 1e-06, "loss": 1.2759, "num_input_tokens_seen": 26585900, "step": 475 }, { "epoch": 1.0579064587973275, "loss": 1.5081238746643066, "loss_ce": 0.0017761469352990389, "loss_iou": 0.57421875, "loss_num": 0.072265625, "loss_xval": 1.5078125, "num_input_tokens_seen": 26585900, "step": 475 }, { "epoch": 1.0601336302895323, "grad_norm": 18.955963134765625, "learning_rate": 1e-06, "loss": 1.2483, "num_input_tokens_seen": 26640324, "step": 476 }, { "epoch": 1.0601336302895323, "loss": 1.3863422870635986, "loss_ce": 0.0040180860087275505, "loss_iou": 0.6015625, "loss_num": 0.03564453125, "loss_xval": 1.3828125, "num_input_tokens_seen": 26640324, "step": 476 }, { "epoch": 1.0623608017817372, "grad_norm": 29.491914749145508, "learning_rate": 1e-06, "loss": 1.2475, "num_input_tokens_seen": 26694532, "step": 477 }, { "epoch": 1.0623608017817372, "loss": 0.9849135875701904, "loss_ce": 0.0007827761583030224, "loss_iou": 0.369140625, "loss_num": 0.048828125, "loss_xval": 0.984375, "num_input_tokens_seen": 26694532, "step": 477 }, { "epoch": 1.064587973273942, "grad_norm": 65.8954849243164, "learning_rate": 1e-06, "loss": 1.4668, "num_input_tokens_seen": 26749932, "step": 478 }, { "epoch": 1.064587973273942, "loss": 1.4816560745239258, "loss_ce": 0.001187233254313469, "loss_iou": 0.5703125, "loss_num": 0.06787109375, "loss_xval": 1.484375, "num_input_tokens_seen": 26749932, "step": 478 }, { "epoch": 1.066815144766147, "grad_norm": 19.884841918945312, "learning_rate": 1e-06, "loss": 1.1807, "num_input_tokens_seen": 26804456, "step": 479 }, { "epoch": 1.066815144766147, "loss": 1.2711074352264404, "loss_ce": 0.002064512576907873, "loss_iou": 0.5234375, "loss_num": 0.044189453125, "loss_xval": 1.265625, "num_input_tokens_seen": 26804456, "step": 479 }, { "epoch": 1.069042316258352, "grad_norm": 22.818063735961914, "learning_rate": 1e-06, "loss": 1.3075, "num_input_tokens_seen": 26860280, "step": 480 }, { "epoch": 1.069042316258352, "loss": 1.333120584487915, "loss_ce": 0.0010893936268985271, "loss_iou": 0.5546875, "loss_num": 0.045166015625, "loss_xval": 1.328125, "num_input_tokens_seen": 26860280, "step": 480 }, { "epoch": 1.0712694877505569, "grad_norm": 21.60323715209961, "learning_rate": 1e-06, "loss": 1.1227, "num_input_tokens_seen": 26915848, "step": 481 }, { "epoch": 1.0712694877505569, "loss": 1.1749460697174072, "loss_ce": 0.002582841319963336, "loss_iou": 0.498046875, "loss_num": 0.03564453125, "loss_xval": 1.171875, "num_input_tokens_seen": 26915848, "step": 481 }, { "epoch": 1.0734966592427617, "grad_norm": 17.057666778564453, "learning_rate": 1e-06, "loss": 0.963, "num_input_tokens_seen": 26973348, "step": 482 }, { "epoch": 1.0734966592427617, "loss": 1.024942398071289, "loss_ce": 0.001016615773551166, "loss_iou": 0.4296875, "loss_num": 0.033203125, "loss_xval": 1.0234375, "num_input_tokens_seen": 26973348, "step": 482 }, { "epoch": 1.0757238307349666, "grad_norm": 26.440898895263672, "learning_rate": 1e-06, "loss": 1.2304, "num_input_tokens_seen": 27030864, "step": 483 }, { "epoch": 1.0757238307349666, "loss": 1.3861130475997925, "loss_ce": 0.00134742702357471, "loss_iou": 0.52734375, "loss_num": 0.06640625, "loss_xval": 1.3828125, "num_input_tokens_seen": 27030864, "step": 483 }, { "epoch": 1.0779510022271714, "grad_norm": 19.473052978515625, "learning_rate": 1e-06, "loss": 1.494, "num_input_tokens_seen": 27084476, "step": 484 }, { "epoch": 1.0779510022271714, "loss": 1.7679762840270996, "loss_ce": 0.0008863758994266391, "loss_iou": 0.70703125, "loss_num": 0.07080078125, "loss_xval": 1.765625, "num_input_tokens_seen": 27084476, "step": 484 }, { "epoch": 1.0801781737193763, "grad_norm": 22.302900314331055, "learning_rate": 1e-06, "loss": 1.3245, "num_input_tokens_seen": 27138164, "step": 485 }, { "epoch": 1.0801781737193763, "loss": 1.4490606784820557, "loss_ce": 0.0008185390033759177, "loss_iou": 0.6171875, "loss_num": 0.042724609375, "loss_xval": 1.4453125, "num_input_tokens_seen": 27138164, "step": 485 }, { "epoch": 1.0824053452115814, "grad_norm": 27.68915557861328, "learning_rate": 1e-06, "loss": 0.9771, "num_input_tokens_seen": 27195028, "step": 486 }, { "epoch": 1.0824053452115814, "loss": 0.880718469619751, "loss_ce": 0.0018122631590813398, "loss_iou": 0.369140625, "loss_num": 0.0281982421875, "loss_xval": 0.87890625, "num_input_tokens_seen": 27195028, "step": 486 }, { "epoch": 1.0846325167037862, "grad_norm": 27.754865646362305, "learning_rate": 1e-06, "loss": 1.3244, "num_input_tokens_seen": 27250504, "step": 487 }, { "epoch": 1.0846325167037862, "loss": 1.085121750831604, "loss_ce": 0.0006490740925073624, "loss_iou": 0.4296875, "loss_num": 0.044677734375, "loss_xval": 1.0859375, "num_input_tokens_seen": 27250504, "step": 487 }, { "epoch": 1.086859688195991, "grad_norm": 29.184783935546875, "learning_rate": 1e-06, "loss": 1.131, "num_input_tokens_seen": 27306092, "step": 488 }, { "epoch": 1.086859688195991, "loss": 1.2078437805175781, "loss_ce": 0.007404354866594076, "loss_iou": 0.453125, "loss_num": 0.058837890625, "loss_xval": 1.203125, "num_input_tokens_seen": 27306092, "step": 488 }, { "epoch": 1.089086859688196, "grad_norm": 16.471162796020508, "learning_rate": 1e-06, "loss": 1.1771, "num_input_tokens_seen": 27362708, "step": 489 }, { "epoch": 1.089086859688196, "loss": 1.2551052570343018, "loss_ce": 0.0007107860874384642, "loss_iou": 0.52734375, "loss_num": 0.040283203125, "loss_xval": 1.2578125, "num_input_tokens_seen": 27362708, "step": 489 }, { "epoch": 1.0913140311804008, "grad_norm": 36.433353424072266, "learning_rate": 1e-06, "loss": 1.1646, "num_input_tokens_seen": 27421108, "step": 490 }, { "epoch": 1.0913140311804008, "loss": 1.1176813840866089, "loss_ce": 0.0014704548520967364, "loss_iou": 0.46484375, "loss_num": 0.037841796875, "loss_xval": 1.1171875, "num_input_tokens_seen": 27421108, "step": 490 }, { "epoch": 1.093541202672606, "grad_norm": 21.38726806640625, "learning_rate": 1e-06, "loss": 1.1923, "num_input_tokens_seen": 27475860, "step": 491 }, { "epoch": 1.093541202672606, "loss": 1.053163766860962, "loss_ce": 0.0058004953898489475, "loss_iou": 0.41796875, "loss_num": 0.042236328125, "loss_xval": 1.046875, "num_input_tokens_seen": 27475860, "step": 491 }, { "epoch": 1.0957683741648108, "grad_norm": 32.688072204589844, "learning_rate": 1e-06, "loss": 1.0674, "num_input_tokens_seen": 27533372, "step": 492 }, { "epoch": 1.0957683741648108, "loss": 0.7940797805786133, "loss_ce": 0.01527119055390358, "loss_iou": 0.3359375, "loss_num": 0.021240234375, "loss_xval": 0.77734375, "num_input_tokens_seen": 27533372, "step": 492 }, { "epoch": 1.0979955456570156, "grad_norm": 20.838420867919922, "learning_rate": 1e-06, "loss": 1.4187, "num_input_tokens_seen": 27588776, "step": 493 }, { "epoch": 1.0979955456570156, "loss": 1.4859492778778076, "loss_ce": 0.0015742178075015545, "loss_iou": 0.5625, "loss_num": 0.0712890625, "loss_xval": 1.484375, "num_input_tokens_seen": 27588776, "step": 493 }, { "epoch": 1.1002227171492205, "grad_norm": 48.34200668334961, "learning_rate": 1e-06, "loss": 1.3155, "num_input_tokens_seen": 27646412, "step": 494 }, { "epoch": 1.1002227171492205, "loss": 1.6553877592086792, "loss_ce": 0.004997197538614273, "loss_iou": 0.65234375, "loss_num": 0.06884765625, "loss_xval": 1.6484375, "num_input_tokens_seen": 27646412, "step": 494 }, { "epoch": 1.1024498886414253, "grad_norm": 23.207468032836914, "learning_rate": 1e-06, "loss": 1.1153, "num_input_tokens_seen": 27702240, "step": 495 }, { "epoch": 1.1024498886414253, "loss": 1.4165308475494385, "loss_ce": 0.003933266270905733, "loss_iou": 0.57421875, "loss_num": 0.05322265625, "loss_xval": 1.4140625, "num_input_tokens_seen": 27702240, "step": 495 }, { "epoch": 1.1046770601336302, "grad_norm": 15.092357635498047, "learning_rate": 1e-06, "loss": 0.9602, "num_input_tokens_seen": 27759140, "step": 496 }, { "epoch": 1.1046770601336302, "loss": 1.09522545337677, "loss_ce": 0.002940312959253788, "loss_iou": 0.439453125, "loss_num": 0.04248046875, "loss_xval": 1.09375, "num_input_tokens_seen": 27759140, "step": 496 }, { "epoch": 1.1069042316258353, "grad_norm": 23.996767044067383, "learning_rate": 1e-06, "loss": 1.2124, "num_input_tokens_seen": 27816900, "step": 497 }, { "epoch": 1.1069042316258353, "loss": 1.2346551418304443, "loss_ce": 0.004186346661299467, "loss_iou": 0.53125, "loss_num": 0.033447265625, "loss_xval": 1.234375, "num_input_tokens_seen": 27816900, "step": 497 }, { "epoch": 1.1091314031180401, "grad_norm": 154.3430633544922, "learning_rate": 1e-06, "loss": 1.2454, "num_input_tokens_seen": 27875428, "step": 498 }, { "epoch": 1.1091314031180401, "loss": 1.4324363470077515, "loss_ce": 0.0007956642657518387, "loss_iou": 0.59765625, "loss_num": 0.047607421875, "loss_xval": 1.4296875, "num_input_tokens_seen": 27875428, "step": 498 }, { "epoch": 1.111358574610245, "grad_norm": 30.189279556274414, "learning_rate": 1e-06, "loss": 1.4237, "num_input_tokens_seen": 27930704, "step": 499 }, { "epoch": 1.111358574610245, "loss": 1.6740535497665405, "loss_ce": 0.0021784906275570393, "loss_iou": 0.62109375, "loss_num": 0.08642578125, "loss_xval": 1.671875, "num_input_tokens_seen": 27930704, "step": 499 }, { "epoch": 1.1135857461024499, "grad_norm": 49.67326354980469, "learning_rate": 1e-06, "loss": 1.2495, "num_input_tokens_seen": 27984260, "step": 500 }, { "epoch": 1.1135857461024499, "eval_seeclick_web_CIoU": 0.48640576004981995, "eval_seeclick_web_GIoU": 0.47375138103961945, "eval_seeclick_web_IoU": 0.5116834044456482, "eval_seeclick_web_MAE_all": 0.01661441382020712, "eval_seeclick_web_MAE_h": 0.01039309287443757, "eval_seeclick_web_MAE_w": 0.017452615778893232, "eval_seeclick_web_MAE_x_boxes": 0.010556747671216726, "eval_seeclick_web_MAE_y_boxes": 0.020513739669695497, "eval_seeclick_web_inside_bbox": 0.8576388955116272, "eval_seeclick_web_loss": 1.1193333864212036, "eval_seeclick_web_loss_ce": 0.0005516507662832737, "eval_seeclick_web_loss_iou": 0.519775390625, "eval_seeclick_web_loss_num": 0.014141082763671875, "eval_seeclick_web_loss_xval": 1.110107421875, "eval_seeclick_web_runtime": 19.1191, "eval_seeclick_web_samples_per_second": 2.615, "eval_seeclick_web_steps_per_second": 0.105, "num_input_tokens_seen": 27984260, "step": 500 }, { "epoch": 1.1135857461024499, "eval_icons_CIoU": 0.3478478938341141, "eval_icons_GIoU": 0.39389291405677795, "eval_icons_IoU": 0.4208338111639023, "eval_icons_MAE_all": 0.06633740290999413, "eval_icons_MAE_h": 0.03294991096481681, "eval_icons_MAE_w": 0.06817016191780567, "eval_icons_MAE_x_boxes": 0.06793619319796562, "eval_icons_MAE_y_boxes": 0.033266451209783554, "eval_icons_inside_bbox": 0.7048611044883728, "eval_icons_loss": 1.5932660102844238, "eval_icons_loss_ce": 0.0014974797377362847, "eval_icons_loss_iou": 0.614501953125, "eval_icons_loss_num": 0.06303977966308594, "eval_icons_loss_xval": 1.544677734375, "eval_icons_runtime": 16.8196, "eval_icons_samples_per_second": 2.973, "eval_icons_steps_per_second": 0.119, "num_input_tokens_seen": 27984260, "step": 500 }, { "epoch": 1.1135857461024499, "eval_screenspot_CIoU": 0.2469616780678431, "eval_screenspot_GIoU": 0.2680433491865794, "eval_screenspot_IoU": 0.33675894141197205, "eval_screenspot_MAE_all": 0.09882631152868271, "eval_screenspot_MAE_h": 0.06025169417262077, "eval_screenspot_MAE_w": 0.10315311948458354, "eval_screenspot_MAE_x_boxes": 0.11736861368020375, "eval_screenspot_MAE_y_boxes": 0.0693823571006457, "eval_screenspot_inside_bbox": 0.5570833285649618, "eval_screenspot_loss": 1.9871058464050293, "eval_screenspot_loss_ce": 0.006458223797380924, "eval_screenspot_loss_iou": 0.7589518229166666, "eval_screenspot_loss_num": 0.10675303141276042, "eval_screenspot_loss_xval": 2.0504557291666665, "eval_screenspot_runtime": 27.6396, "eval_screenspot_samples_per_second": 3.22, "eval_screenspot_steps_per_second": 0.109, "num_input_tokens_seen": 27984260, "step": 500 }, { "epoch": 1.1135857461024499, "eval_compot_CIoU": 0.2672244608402252, "eval_compot_GIoU": 0.2929573655128479, "eval_compot_IoU": 0.333434134721756, "eval_compot_MAE_all": 0.03275496046990156, "eval_compot_MAE_h": 0.015514453873038292, "eval_compot_MAE_w": 0.043023936450481415, "eval_compot_MAE_x_boxes": 0.04298969078809023, "eval_compot_MAE_y_boxes": 0.009457055712118745, "eval_compot_inside_bbox": 0.5590277910232544, "eval_compot_loss": 1.6115740537643433, "eval_compot_loss_ce": 0.0006396450917236507, "eval_compot_loss_iou": 0.701904296875, "eval_compot_loss_num": 0.02944183349609375, "eval_compot_loss_xval": 1.55078125, "eval_compot_runtime": 18.007, "eval_compot_samples_per_second": 2.777, "eval_compot_steps_per_second": 0.111, "num_input_tokens_seen": 27984260, "step": 500 }, { "epoch": 1.1135857461024499, "eval_custom_ui_val_CIoU": 0.3800675223271052, "eval_custom_ui_val_GIoU": 0.4050305353270637, "eval_custom_ui_val_IoU": 0.43855932023790145, "eval_custom_ui_val_MAE_all": 0.046337926760315895, "eval_custom_ui_val_MAE_h": 0.026893117154637974, "eval_custom_ui_val_MAE_w": 0.05207906601329645, "eval_custom_ui_val_MAE_x_boxes": 0.046116245082683034, "eval_custom_ui_val_MAE_y_boxes": 0.030045698396861553, "eval_custom_ui_val_inside_bbox": 0.6608796318372091, "eval_custom_ui_val_loss": 1.4429850578308105, "eval_custom_ui_val_loss_ce": 0.0016363520019998152, "eval_custom_ui_val_loss_iou": 0.5924343532986112, "eval_custom_ui_val_loss_num": 0.04485532972547743, "eval_custom_ui_val_loss_xval": 1.4086642795138888, "eval_custom_ui_val_runtime": 55.8893, "eval_custom_ui_val_samples_per_second": 4.742, "eval_custom_ui_val_steps_per_second": 0.161, "num_input_tokens_seen": 27984260, "step": 500 }, { "epoch": 1.1135857461024499, "loss": 1.1450822353363037, "loss_ce": 0.0012833788059651852, "loss_iou": 0.482421875, "loss_num": 0.0361328125, "loss_xval": 1.140625, "num_input_tokens_seen": 27984260, "step": 500 }, { "epoch": 1.1158129175946547, "grad_norm": 604.319580078125, "learning_rate": 1e-06, "loss": 1.4076, "num_input_tokens_seen": 28041640, "step": 501 }, { "epoch": 1.1158129175946547, "loss": 1.3554702997207642, "loss_ce": 0.0037856735289096832, "loss_iou": 0.55078125, "loss_num": 0.0498046875, "loss_xval": 1.3515625, "num_input_tokens_seen": 28041640, "step": 501 }, { "epoch": 1.1180400890868596, "grad_norm": 25.75654411315918, "learning_rate": 1e-06, "loss": 1.1217, "num_input_tokens_seen": 28095584, "step": 502 }, { "epoch": 1.1180400890868596, "loss": 1.170928955078125, "loss_ce": 0.01809690147638321, "loss_iou": 0.45703125, "loss_num": 0.047607421875, "loss_xval": 1.15625, "num_input_tokens_seen": 28095584, "step": 502 }, { "epoch": 1.1202672605790647, "grad_norm": 25.978004455566406, "learning_rate": 1e-06, "loss": 1.3928, "num_input_tokens_seen": 28152476, "step": 503 }, { "epoch": 1.1202672605790647, "loss": 1.4479464292526245, "loss_ce": 0.001657373970374465, "loss_iou": 0.5625, "loss_num": 0.06396484375, "loss_xval": 1.4453125, "num_input_tokens_seen": 28152476, "step": 503 }, { "epoch": 1.1224944320712695, "grad_norm": 26.65591049194336, "learning_rate": 1e-06, "loss": 1.2271, "num_input_tokens_seen": 28207068, "step": 504 }, { "epoch": 1.1224944320712695, "loss": 1.1432322263717651, "loss_ce": 0.007001784630119801, "loss_iou": 0.455078125, "loss_num": 0.045654296875, "loss_xval": 1.1328125, "num_input_tokens_seen": 28207068, "step": 504 }, { "epoch": 1.1247216035634744, "grad_norm": 54.66550064086914, "learning_rate": 1e-06, "loss": 1.1682, "num_input_tokens_seen": 28259960, "step": 505 }, { "epoch": 1.1247216035634744, "loss": 1.2690539360046387, "loss_ce": 0.0009875732939690351, "loss_iou": 0.51953125, "loss_num": 0.0458984375, "loss_xval": 1.265625, "num_input_tokens_seen": 28259960, "step": 505 }, { "epoch": 1.1269487750556793, "grad_norm": 47.31111145019531, "learning_rate": 1e-06, "loss": 1.1786, "num_input_tokens_seen": 28320020, "step": 506 }, { "epoch": 1.1269487750556793, "loss": 1.4288506507873535, "loss_ce": 0.0045341607183218, "loss_iou": 0.58984375, "loss_num": 0.04931640625, "loss_xval": 1.421875, "num_input_tokens_seen": 28320020, "step": 506 }, { "epoch": 1.1291759465478841, "grad_norm": 19.817325592041016, "learning_rate": 1e-06, "loss": 1.2798, "num_input_tokens_seen": 28376252, "step": 507 }, { "epoch": 1.1291759465478841, "loss": 0.7173053622245789, "loss_ce": 0.0009967784862965345, "loss_iou": 0.294921875, "loss_num": 0.025634765625, "loss_xval": 0.71484375, "num_input_tokens_seen": 28376252, "step": 507 }, { "epoch": 1.131403118040089, "grad_norm": 29.8699951171875, "learning_rate": 1e-06, "loss": 1.0068, "num_input_tokens_seen": 28428956, "step": 508 }, { "epoch": 1.131403118040089, "loss": 1.0889060497283936, "loss_ce": 0.0010153307812288404, "loss_iou": 0.46484375, "loss_num": 0.031982421875, "loss_xval": 1.0859375, "num_input_tokens_seen": 28428956, "step": 508 }, { "epoch": 1.133630289532294, "grad_norm": 33.13240051269531, "learning_rate": 1e-06, "loss": 1.3036, "num_input_tokens_seen": 28484500, "step": 509 }, { "epoch": 1.133630289532294, "loss": 1.068528652191162, "loss_ce": 0.003343157237395644, "loss_iou": 0.4375, "loss_num": 0.037841796875, "loss_xval": 1.0625, "num_input_tokens_seen": 28484500, "step": 509 }, { "epoch": 1.135857461024499, "grad_norm": 28.9699764251709, "learning_rate": 1e-06, "loss": 0.9363, "num_input_tokens_seen": 28537936, "step": 510 }, { "epoch": 1.135857461024499, "loss": 0.9706677198410034, "loss_ce": 0.005823939107358456, "loss_iou": 0.419921875, "loss_num": 0.0252685546875, "loss_xval": 0.96484375, "num_input_tokens_seen": 28537936, "step": 510 }, { "epoch": 1.1380846325167038, "grad_norm": 24.593473434448242, "learning_rate": 1e-06, "loss": 1.2115, "num_input_tokens_seen": 28595228, "step": 511 }, { "epoch": 1.1380846325167038, "loss": 1.0602378845214844, "loss_ce": 0.0006675816257484257, "loss_iou": 0.455078125, "loss_num": 0.0294189453125, "loss_xval": 1.0625, "num_input_tokens_seen": 28595228, "step": 511 }, { "epoch": 1.1403118040089086, "grad_norm": 24.511798858642578, "learning_rate": 1e-06, "loss": 1.2554, "num_input_tokens_seen": 28650712, "step": 512 }, { "epoch": 1.1403118040089086, "loss": 1.273465871810913, "loss_ce": 0.0049112411215901375, "loss_iou": 0.55859375, "loss_num": 0.0306396484375, "loss_xval": 1.265625, "num_input_tokens_seen": 28650712, "step": 512 }, { "epoch": 1.1425389755011135, "grad_norm": 23.49129295349121, "learning_rate": 1e-06, "loss": 1.1208, "num_input_tokens_seen": 28708008, "step": 513 }, { "epoch": 1.1425389755011135, "loss": 1.1443027257919312, "loss_ce": 0.005630870349705219, "loss_iou": 0.46875, "loss_num": 0.040283203125, "loss_xval": 1.140625, "num_input_tokens_seen": 28708008, "step": 513 }, { "epoch": 1.1447661469933186, "grad_norm": 22.09214973449707, "learning_rate": 1e-06, "loss": 1.2605, "num_input_tokens_seen": 28762704, "step": 514 }, { "epoch": 1.1447661469933186, "loss": 1.2734076976776123, "loss_ce": 0.018280737102031708, "loss_iou": 0.484375, "loss_num": 0.05712890625, "loss_xval": 1.2578125, "num_input_tokens_seen": 28762704, "step": 514 }, { "epoch": 1.1469933184855234, "grad_norm": 36.97422790527344, "learning_rate": 1e-06, "loss": 1.2453, "num_input_tokens_seen": 28820084, "step": 515 }, { "epoch": 1.1469933184855234, "loss": 1.241039514541626, "loss_ce": 0.0012934368569403887, "loss_iou": 0.52734375, "loss_num": 0.037109375, "loss_xval": 1.2421875, "num_input_tokens_seen": 28820084, "step": 515 }, { "epoch": 1.1492204899777283, "grad_norm": 19.85295867919922, "learning_rate": 1e-06, "loss": 1.346, "num_input_tokens_seen": 28874500, "step": 516 }, { "epoch": 1.1492204899777283, "loss": 1.231957197189331, "loss_ce": 0.0005119675770401955, "loss_iou": 0.5, "loss_num": 0.046630859375, "loss_xval": 1.234375, "num_input_tokens_seen": 28874500, "step": 516 }, { "epoch": 1.1514476614699332, "grad_norm": 34.39645767211914, "learning_rate": 1e-06, "loss": 1.3985, "num_input_tokens_seen": 28931164, "step": 517 }, { "epoch": 1.1514476614699332, "loss": 1.2988076210021973, "loss_ce": 0.0019326311303302646, "loss_iou": 0.50390625, "loss_num": 0.05810546875, "loss_xval": 1.296875, "num_input_tokens_seen": 28931164, "step": 517 }, { "epoch": 1.153674832962138, "grad_norm": 20.85076141357422, "learning_rate": 1e-06, "loss": 1.3304, "num_input_tokens_seen": 28987576, "step": 518 }, { "epoch": 1.153674832962138, "loss": 1.326310157775879, "loss_ce": 0.005509458482265472, "loss_iou": 0.5234375, "loss_num": 0.0546875, "loss_xval": 1.3203125, "num_input_tokens_seen": 28987576, "step": 518 }, { "epoch": 1.1559020044543429, "grad_norm": 25.91316795349121, "learning_rate": 1e-06, "loss": 1.0788, "num_input_tokens_seen": 29045416, "step": 519 }, { "epoch": 1.1559020044543429, "loss": 0.9822598695755005, "loss_ce": 0.0008145694737322628, "loss_iou": 0.4296875, "loss_num": 0.0242919921875, "loss_xval": 0.98046875, "num_input_tokens_seen": 29045416, "step": 519 }, { "epoch": 1.158129175946548, "grad_norm": 19.741249084472656, "learning_rate": 1e-06, "loss": 1.179, "num_input_tokens_seen": 29100408, "step": 520 }, { "epoch": 1.158129175946548, "loss": 1.2618613243103027, "loss_ce": 0.0020956983789801598, "loss_iou": 0.486328125, "loss_num": 0.05712890625, "loss_xval": 1.2578125, "num_input_tokens_seen": 29100408, "step": 520 }, { "epoch": 1.1603563474387528, "grad_norm": 18.815519332885742, "learning_rate": 1e-06, "loss": 1.0117, "num_input_tokens_seen": 29158568, "step": 521 }, { "epoch": 1.1603563474387528, "loss": 1.15644371509552, "loss_ce": 0.00605308311060071, "loss_iou": 0.482421875, "loss_num": 0.037109375, "loss_xval": 1.1484375, "num_input_tokens_seen": 29158568, "step": 521 }, { "epoch": 1.1625835189309577, "grad_norm": 37.82133865356445, "learning_rate": 1e-06, "loss": 1.617, "num_input_tokens_seen": 29216180, "step": 522 }, { "epoch": 1.1625835189309577, "loss": 1.670715570449829, "loss_ce": 0.003723357105627656, "loss_iou": 0.63671875, "loss_num": 0.0791015625, "loss_xval": 1.6640625, "num_input_tokens_seen": 29216180, "step": 522 }, { "epoch": 1.1648106904231625, "grad_norm": 31.2655029296875, "learning_rate": 1e-06, "loss": 1.1777, "num_input_tokens_seen": 29270996, "step": 523 }, { "epoch": 1.1648106904231625, "loss": 1.0179691314697266, "loss_ce": 0.0013676062226295471, "loss_iou": 0.419921875, "loss_num": 0.035400390625, "loss_xval": 1.015625, "num_input_tokens_seen": 29270996, "step": 523 }, { "epoch": 1.1670378619153674, "grad_norm": 32.841373443603516, "learning_rate": 1e-06, "loss": 1.4131, "num_input_tokens_seen": 29327600, "step": 524 }, { "epoch": 1.1670378619153674, "loss": 1.0806894302368164, "loss_ce": 0.0006113001727499068, "loss_iou": 0.439453125, "loss_num": 0.039794921875, "loss_xval": 1.078125, "num_input_tokens_seen": 29327600, "step": 524 }, { "epoch": 1.1692650334075725, "grad_norm": 14.620657920837402, "learning_rate": 1e-06, "loss": 1.228, "num_input_tokens_seen": 29383468, "step": 525 }, { "epoch": 1.1692650334075725, "loss": 1.1438016891479492, "loss_ce": 0.0007352291722781956, "loss_iou": 0.49609375, "loss_num": 0.030517578125, "loss_xval": 1.140625, "num_input_tokens_seen": 29383468, "step": 525 }, { "epoch": 1.1714922048997773, "grad_norm": 38.983917236328125, "learning_rate": 1e-06, "loss": 1.0997, "num_input_tokens_seen": 29438716, "step": 526 }, { "epoch": 1.1714922048997773, "loss": 1.1212483644485474, "loss_ce": 0.005525712855160236, "loss_iou": 0.447265625, "loss_num": 0.04443359375, "loss_xval": 1.1171875, "num_input_tokens_seen": 29438716, "step": 526 }, { "epoch": 1.1737193763919822, "grad_norm": 38.69413375854492, "learning_rate": 1e-06, "loss": 1.2774, "num_input_tokens_seen": 29494100, "step": 527 }, { "epoch": 1.1737193763919822, "loss": 0.9909718036651611, "loss_ce": 0.0007374268025159836, "loss_iou": 0.375, "loss_num": 0.0478515625, "loss_xval": 0.9921875, "num_input_tokens_seen": 29494100, "step": 527 }, { "epoch": 1.175946547884187, "grad_norm": 20.045331954956055, "learning_rate": 1e-06, "loss": 1.0815, "num_input_tokens_seen": 29549828, "step": 528 }, { "epoch": 1.175946547884187, "loss": 0.9373371005058289, "loss_ce": 0.004231642000377178, "loss_iou": 0.376953125, "loss_num": 0.03564453125, "loss_xval": 0.93359375, "num_input_tokens_seen": 29549828, "step": 528 }, { "epoch": 1.178173719376392, "grad_norm": 29.08791732788086, "learning_rate": 1e-06, "loss": 1.0875, "num_input_tokens_seen": 29605928, "step": 529 }, { "epoch": 1.178173719376392, "loss": 1.0599498748779297, "loss_ce": 0.0018443877343088388, "loss_iou": 0.404296875, "loss_num": 0.050537109375, "loss_xval": 1.0546875, "num_input_tokens_seen": 29605928, "step": 529 }, { "epoch": 1.1804008908685968, "grad_norm": 19.786157608032227, "learning_rate": 1e-06, "loss": 1.2501, "num_input_tokens_seen": 29661192, "step": 530 }, { "epoch": 1.1804008908685968, "loss": 1.2494618892669678, "loss_ce": 0.0014150183415040374, "loss_iou": 0.46484375, "loss_num": 0.0634765625, "loss_xval": 1.25, "num_input_tokens_seen": 29661192, "step": 530 }, { "epoch": 1.1826280623608019, "grad_norm": 56.539695739746094, "learning_rate": 1e-06, "loss": 1.2396, "num_input_tokens_seen": 29716704, "step": 531 }, { "epoch": 1.1826280623608019, "loss": 1.2581884860992432, "loss_ce": 0.0008642220636829734, "loss_iou": 0.546875, "loss_num": 0.033447265625, "loss_xval": 1.2578125, "num_input_tokens_seen": 29716704, "step": 531 }, { "epoch": 1.1848552338530067, "grad_norm": 15.872360229492188, "learning_rate": 1e-06, "loss": 1.0118, "num_input_tokens_seen": 29775024, "step": 532 }, { "epoch": 1.1848552338530067, "loss": 0.7084531784057617, "loss_ce": 0.0009336533839814365, "loss_iou": 0.296875, "loss_num": 0.0228271484375, "loss_xval": 0.70703125, "num_input_tokens_seen": 29775024, "step": 532 }, { "epoch": 1.1870824053452116, "grad_norm": 20.187984466552734, "learning_rate": 1e-06, "loss": 1.2427, "num_input_tokens_seen": 29830880, "step": 533 }, { "epoch": 1.1870824053452116, "loss": 1.1666477918624878, "loss_ce": 0.001608791295439005, "loss_iou": 0.50390625, "loss_num": 0.03125, "loss_xval": 1.1640625, "num_input_tokens_seen": 29830880, "step": 533 }, { "epoch": 1.1893095768374164, "grad_norm": 30.441001892089844, "learning_rate": 1e-06, "loss": 0.9994, "num_input_tokens_seen": 29888668, "step": 534 }, { "epoch": 1.1893095768374164, "loss": 1.0735273361206055, "loss_ce": 0.00077339808922261, "loss_iou": 0.453125, "loss_num": 0.033203125, "loss_xval": 1.0703125, "num_input_tokens_seen": 29888668, "step": 534 }, { "epoch": 1.1915367483296213, "grad_norm": 24.198322296142578, "learning_rate": 1e-06, "loss": 0.9697, "num_input_tokens_seen": 29946592, "step": 535 }, { "epoch": 1.1915367483296213, "loss": 0.9628783464431763, "loss_ce": 0.0009642738732509315, "loss_iou": 0.376953125, "loss_num": 0.04150390625, "loss_xval": 0.9609375, "num_input_tokens_seen": 29946592, "step": 535 }, { "epoch": 1.1937639198218264, "grad_norm": 30.70039939880371, "learning_rate": 1e-06, "loss": 1.2909, "num_input_tokens_seen": 30004576, "step": 536 }, { "epoch": 1.1937639198218264, "loss": 1.1397030353546143, "loss_ce": 0.0010311845690011978, "loss_iou": 0.45703125, "loss_num": 0.04541015625, "loss_xval": 1.140625, "num_input_tokens_seen": 30004576, "step": 536 }, { "epoch": 1.1959910913140313, "grad_norm": 40.41281509399414, "learning_rate": 1e-06, "loss": 1.3186, "num_input_tokens_seen": 30056972, "step": 537 }, { "epoch": 1.1959910913140313, "loss": 1.1098320484161377, "loss_ce": 0.0004569512093439698, "loss_iou": 0.490234375, "loss_num": 0.0260009765625, "loss_xval": 1.109375, "num_input_tokens_seen": 30056972, "step": 537 }, { "epoch": 1.1982182628062361, "grad_norm": 25.64579963684082, "learning_rate": 1e-06, "loss": 1.2267, "num_input_tokens_seen": 30111776, "step": 538 }, { "epoch": 1.1982182628062361, "loss": 1.4003875255584717, "loss_ce": 0.002682518446817994, "loss_iou": 0.5625, "loss_num": 0.05517578125, "loss_xval": 1.3984375, "num_input_tokens_seen": 30111776, "step": 538 }, { "epoch": 1.200445434298441, "grad_norm": 62.563392639160156, "learning_rate": 1e-06, "loss": 1.1343, "num_input_tokens_seen": 30168204, "step": 539 }, { "epoch": 1.200445434298441, "loss": 1.2498741149902344, "loss_ce": 0.00353623297996819, "loss_iou": 0.48828125, "loss_num": 0.053955078125, "loss_xval": 1.25, "num_input_tokens_seen": 30168204, "step": 539 }, { "epoch": 1.2026726057906458, "grad_norm": 24.099092483520508, "learning_rate": 1e-06, "loss": 1.5677, "num_input_tokens_seen": 30222172, "step": 540 }, { "epoch": 1.2026726057906458, "loss": 1.189444899559021, "loss_ce": 0.001456591533496976, "loss_iou": 0.470703125, "loss_num": 0.049560546875, "loss_xval": 1.1875, "num_input_tokens_seen": 30222172, "step": 540 }, { "epoch": 1.2048997772828507, "grad_norm": 14.803204536437988, "learning_rate": 1e-06, "loss": 1.2895, "num_input_tokens_seen": 30277260, "step": 541 }, { "epoch": 1.2048997772828507, "loss": 1.3928616046905518, "loss_ce": 0.0027249492704868317, "loss_iou": 0.5625, "loss_num": 0.052490234375, "loss_xval": 1.390625, "num_input_tokens_seen": 30277260, "step": 541 }, { "epoch": 1.2071269487750558, "grad_norm": 56.08818054199219, "learning_rate": 1e-06, "loss": 1.4108, "num_input_tokens_seen": 30333120, "step": 542 }, { "epoch": 1.2071269487750558, "loss": 1.4470124244689941, "loss_ce": 0.001211727038025856, "loss_iou": 0.59765625, "loss_num": 0.049560546875, "loss_xval": 1.4453125, "num_input_tokens_seen": 30333120, "step": 542 }, { "epoch": 1.2093541202672606, "grad_norm": 33.44521713256836, "learning_rate": 1e-06, "loss": 1.0997, "num_input_tokens_seen": 30385752, "step": 543 }, { "epoch": 1.2093541202672606, "loss": 0.8862332105636597, "loss_ce": 0.0009793277131393552, "loss_iou": 0.375, "loss_num": 0.027099609375, "loss_xval": 0.88671875, "num_input_tokens_seen": 30385752, "step": 543 }, { "epoch": 1.2115812917594655, "grad_norm": 20.260774612426758, "learning_rate": 1e-06, "loss": 1.3352, "num_input_tokens_seen": 30441532, "step": 544 }, { "epoch": 1.2115812917594655, "loss": 1.403501033782959, "loss_ce": 0.0006689808797091246, "loss_iou": 0.5234375, "loss_num": 0.0712890625, "loss_xval": 1.40625, "num_input_tokens_seen": 30441532, "step": 544 }, { "epoch": 1.2138084632516704, "grad_norm": 20.101301193237305, "learning_rate": 1e-06, "loss": 1.0824, "num_input_tokens_seen": 30495056, "step": 545 }, { "epoch": 1.2138084632516704, "loss": 1.0261245965957642, "loss_ce": 0.0004898015176877379, "loss_iou": 0.431640625, "loss_num": 0.0322265625, "loss_xval": 1.0234375, "num_input_tokens_seen": 30495056, "step": 545 }, { "epoch": 1.2160356347438752, "grad_norm": 27.02719497680664, "learning_rate": 1e-06, "loss": 1.1653, "num_input_tokens_seen": 30550680, "step": 546 }, { "epoch": 1.2160356347438752, "loss": 1.1834897994995117, "loss_ce": 0.0008725329535081983, "loss_iou": 0.5078125, "loss_num": 0.033447265625, "loss_xval": 1.1796875, "num_input_tokens_seen": 30550680, "step": 546 }, { "epoch": 1.2182628062360803, "grad_norm": 17.25623893737793, "learning_rate": 1e-06, "loss": 1.4252, "num_input_tokens_seen": 30606964, "step": 547 }, { "epoch": 1.2182628062360803, "loss": 1.1636087894439697, "loss_ce": 0.0036967378109693527, "loss_iou": 0.45703125, "loss_num": 0.049072265625, "loss_xval": 1.15625, "num_input_tokens_seen": 30606964, "step": 547 }, { "epoch": 1.2204899777282852, "grad_norm": 19.519060134887695, "learning_rate": 1e-06, "loss": 1.1066, "num_input_tokens_seen": 30663460, "step": 548 }, { "epoch": 1.2204899777282852, "loss": 1.1835308074951172, "loss_ce": 0.0018901234725490212, "loss_iou": 0.50390625, "loss_num": 0.034912109375, "loss_xval": 1.1796875, "num_input_tokens_seen": 30663460, "step": 548 }, { "epoch": 1.22271714922049, "grad_norm": 25.55568504333496, "learning_rate": 1e-06, "loss": 1.2514, "num_input_tokens_seen": 30721628, "step": 549 }, { "epoch": 1.22271714922049, "loss": 1.3971433639526367, "loss_ce": 0.0011472116457298398, "loss_iou": 0.5625, "loss_num": 0.0546875, "loss_xval": 1.3984375, "num_input_tokens_seen": 30721628, "step": 549 }, { "epoch": 1.2249443207126949, "grad_norm": 31.964738845825195, "learning_rate": 1e-06, "loss": 1.3476, "num_input_tokens_seen": 30777708, "step": 550 }, { "epoch": 1.2249443207126949, "loss": 1.316117286682129, "loss_ce": 0.0006876069819554687, "loss_iou": 0.5546875, "loss_num": 0.041259765625, "loss_xval": 1.3125, "num_input_tokens_seen": 30777708, "step": 550 }, { "epoch": 1.2271714922048997, "grad_norm": 19.710111618041992, "learning_rate": 1e-06, "loss": 1.3899, "num_input_tokens_seen": 30829420, "step": 551 }, { "epoch": 1.2271714922048997, "loss": 1.4299864768981934, "loss_ce": 0.01006464846432209, "loss_iou": 0.57421875, "loss_num": 0.053955078125, "loss_xval": 1.421875, "num_input_tokens_seen": 30829420, "step": 551 }, { "epoch": 1.2293986636971046, "grad_norm": 49.47341537475586, "learning_rate": 1e-06, "loss": 1.1639, "num_input_tokens_seen": 30887668, "step": 552 }, { "epoch": 1.2293986636971046, "loss": 1.0400408506393433, "loss_ce": 0.0014665467897430062, "loss_iou": 0.396484375, "loss_num": 0.04931640625, "loss_xval": 1.0390625, "num_input_tokens_seen": 30887668, "step": 552 }, { "epoch": 1.2316258351893095, "grad_norm": 21.37660789489746, "learning_rate": 1e-06, "loss": 1.2537, "num_input_tokens_seen": 30942632, "step": 553 }, { "epoch": 1.2316258351893095, "loss": 1.3482673168182373, "loss_ce": 0.0020758796017616987, "loss_iou": 0.53515625, "loss_num": 0.0556640625, "loss_xval": 1.34375, "num_input_tokens_seen": 30942632, "step": 553 }, { "epoch": 1.2338530066815145, "grad_norm": 15.992125511169434, "learning_rate": 1e-06, "loss": 1.1998, "num_input_tokens_seen": 31000952, "step": 554 }, { "epoch": 1.2338530066815145, "loss": 1.3668153285980225, "loss_ce": 0.0006044998299330473, "loss_iou": 0.5859375, "loss_num": 0.039306640625, "loss_xval": 1.3671875, "num_input_tokens_seen": 31000952, "step": 554 }, { "epoch": 1.2360801781737194, "grad_norm": 22.379846572875977, "learning_rate": 1e-06, "loss": 1.3172, "num_input_tokens_seen": 31058164, "step": 555 }, { "epoch": 1.2360801781737194, "loss": 1.7307417392730713, "loss_ce": 0.004179192706942558, "loss_iou": 0.71484375, "loss_num": 0.0595703125, "loss_xval": 1.7265625, "num_input_tokens_seen": 31058164, "step": 555 }, { "epoch": 1.2383073496659243, "grad_norm": 17.48279571533203, "learning_rate": 1e-06, "loss": 1.0391, "num_input_tokens_seen": 31113116, "step": 556 }, { "epoch": 1.2383073496659243, "loss": 1.0837606191635132, "loss_ce": 0.0009969680104404688, "loss_iou": 0.380859375, "loss_num": 0.06396484375, "loss_xval": 1.0859375, "num_input_tokens_seen": 31113116, "step": 556 }, { "epoch": 1.2405345211581291, "grad_norm": 26.064071655273438, "learning_rate": 1e-06, "loss": 1.2376, "num_input_tokens_seen": 31172128, "step": 557 }, { "epoch": 1.2405345211581291, "loss": 1.2718275785446167, "loss_ce": 0.0022963983938097954, "loss_iou": 0.52734375, "loss_num": 0.04248046875, "loss_xval": 1.265625, "num_input_tokens_seen": 31172128, "step": 557 }, { "epoch": 1.242761692650334, "grad_norm": 27.61939811706543, "learning_rate": 1e-06, "loss": 0.9737, "num_input_tokens_seen": 31227572, "step": 558 }, { "epoch": 1.242761692650334, "loss": 1.1241004467010498, "loss_ce": 0.0010536747286096215, "loss_iou": 0.47265625, "loss_num": 0.03564453125, "loss_xval": 1.125, "num_input_tokens_seen": 31227572, "step": 558 }, { "epoch": 1.244988864142539, "grad_norm": 23.97410774230957, "learning_rate": 1e-06, "loss": 1.2226, "num_input_tokens_seen": 31282092, "step": 559 }, { "epoch": 1.244988864142539, "loss": 1.2522716522216797, "loss_ce": 0.000806744210422039, "loss_iou": 0.484375, "loss_num": 0.056396484375, "loss_xval": 1.25, "num_input_tokens_seen": 31282092, "step": 559 }, { "epoch": 1.247216035634744, "grad_norm": 18.71125602722168, "learning_rate": 1e-06, "loss": 1.0672, "num_input_tokens_seen": 31340076, "step": 560 }, { "epoch": 1.247216035634744, "loss": 1.1468454599380493, "loss_ce": 0.0008493656641803682, "loss_iou": 0.482421875, "loss_num": 0.036376953125, "loss_xval": 1.1484375, "num_input_tokens_seen": 31340076, "step": 560 }, { "epoch": 1.2494432071269488, "grad_norm": 70.7640151977539, "learning_rate": 1e-06, "loss": 1.2216, "num_input_tokens_seen": 31396448, "step": 561 }, { "epoch": 1.2494432071269488, "loss": 1.2371618747711182, "loss_ce": 0.013528996147215366, "loss_iou": 0.515625, "loss_num": 0.038330078125, "loss_xval": 1.2265625, "num_input_tokens_seen": 31396448, "step": 561 }, { "epoch": 1.2516703786191536, "grad_norm": 19.904014587402344, "learning_rate": 1e-06, "loss": 1.354, "num_input_tokens_seen": 31453460, "step": 562 }, { "epoch": 1.2516703786191536, "loss": 1.3166056871414185, "loss_ce": 0.00117595330812037, "loss_iou": 0.5234375, "loss_num": 0.05322265625, "loss_xval": 1.3125, "num_input_tokens_seen": 31453460, "step": 562 }, { "epoch": 1.2538975501113585, "grad_norm": 24.498027801513672, "learning_rate": 1e-06, "loss": 1.0603, "num_input_tokens_seen": 31509260, "step": 563 }, { "epoch": 1.2538975501113585, "loss": 1.2601665258407593, "loss_ce": 0.000645035644993186, "loss_iou": 0.53515625, "loss_num": 0.037841796875, "loss_xval": 1.2578125, "num_input_tokens_seen": 31509260, "step": 563 }, { "epoch": 1.2561247216035634, "grad_norm": 17.997339248657227, "learning_rate": 1e-06, "loss": 1.2419, "num_input_tokens_seen": 31561832, "step": 564 }, { "epoch": 1.2561247216035634, "loss": 1.050682783126831, "loss_ce": 0.002831157995387912, "loss_iou": 0.4453125, "loss_num": 0.031494140625, "loss_xval": 1.046875, "num_input_tokens_seen": 31561832, "step": 564 }, { "epoch": 1.2583518930957684, "grad_norm": 25.669340133666992, "learning_rate": 1e-06, "loss": 1.0692, "num_input_tokens_seen": 31619832, "step": 565 }, { "epoch": 1.2583518930957684, "loss": 1.0688444375991821, "loss_ce": 0.0009733220795169473, "loss_iou": 0.4453125, "loss_num": 0.035888671875, "loss_xval": 1.0703125, "num_input_tokens_seen": 31619832, "step": 565 }, { "epoch": 1.2605790645879733, "grad_norm": 16.837244033813477, "learning_rate": 1e-06, "loss": 1.2095, "num_input_tokens_seen": 31676760, "step": 566 }, { "epoch": 1.2605790645879733, "loss": 1.536379337310791, "loss_ce": 0.0017113613430410624, "loss_iou": 0.61328125, "loss_num": 0.0615234375, "loss_xval": 1.53125, "num_input_tokens_seen": 31676760, "step": 566 }, { "epoch": 1.2628062360801782, "grad_norm": 16.443872451782227, "learning_rate": 1e-06, "loss": 1.4209, "num_input_tokens_seen": 31730956, "step": 567 }, { "epoch": 1.2628062360801782, "loss": 1.459883213043213, "loss_ce": 0.0011430047452449799, "loss_iou": 0.56640625, "loss_num": 0.06591796875, "loss_xval": 1.4609375, "num_input_tokens_seen": 31730956, "step": 567 }, { "epoch": 1.265033407572383, "grad_norm": 24.123031616210938, "learning_rate": 1e-06, "loss": 1.0963, "num_input_tokens_seen": 31786932, "step": 568 }, { "epoch": 1.265033407572383, "loss": 1.188434362411499, "loss_ce": 0.0009342934936285019, "loss_iou": 0.47265625, "loss_num": 0.048828125, "loss_xval": 1.1875, "num_input_tokens_seen": 31786932, "step": 568 }, { "epoch": 1.267260579064588, "grad_norm": 20.135202407836914, "learning_rate": 1e-06, "loss": 1.1357, "num_input_tokens_seen": 31843536, "step": 569 }, { "epoch": 1.267260579064588, "loss": 1.4373984336853027, "loss_ce": 0.001607370562851429, "loss_iou": 0.53125, "loss_num": 0.07470703125, "loss_xval": 1.4375, "num_input_tokens_seen": 31843536, "step": 569 }, { "epoch": 1.269487750556793, "grad_norm": 34.270626068115234, "learning_rate": 1e-06, "loss": 1.2551, "num_input_tokens_seen": 31898080, "step": 570 }, { "epoch": 1.269487750556793, "loss": 1.2200250625610352, "loss_ce": 0.0007868170505389571, "loss_iou": 0.478515625, "loss_num": 0.05224609375, "loss_xval": 1.21875, "num_input_tokens_seen": 31898080, "step": 570 }, { "epoch": 1.2717149220489978, "grad_norm": 19.55361557006836, "learning_rate": 1e-06, "loss": 1.1386, "num_input_tokens_seen": 31951824, "step": 571 }, { "epoch": 1.2717149220489978, "loss": 1.242485761642456, "loss_ce": 0.0007865370716899633, "loss_iou": 0.5078125, "loss_num": 0.045654296875, "loss_xval": 1.2421875, "num_input_tokens_seen": 31951824, "step": 571 }, { "epoch": 1.2739420935412027, "grad_norm": 64.2752456665039, "learning_rate": 1e-06, "loss": 1.3344, "num_input_tokens_seen": 32006760, "step": 572 }, { "epoch": 1.2739420935412027, "loss": 0.9924131035804749, "loss_ce": 0.002422844059765339, "loss_iou": 0.41796875, "loss_num": 0.03076171875, "loss_xval": 0.98828125, "num_input_tokens_seen": 32006760, "step": 572 }, { "epoch": 1.2761692650334076, "grad_norm": 18.450927734375, "learning_rate": 1e-06, "loss": 1.1749, "num_input_tokens_seen": 32065980, "step": 573 }, { "epoch": 1.2761692650334076, "loss": 1.0965838432312012, "loss_ce": 0.0023455810733139515, "loss_iou": 0.42578125, "loss_num": 0.04833984375, "loss_xval": 1.09375, "num_input_tokens_seen": 32065980, "step": 573 }, { "epoch": 1.2783964365256124, "grad_norm": 20.623170852661133, "learning_rate": 1e-06, "loss": 1.0463, "num_input_tokens_seen": 32120756, "step": 574 }, { "epoch": 1.2783964365256124, "loss": 1.1162598133087158, "loss_ce": 0.0005371532752178609, "loss_iou": 0.48046875, "loss_num": 0.0306396484375, "loss_xval": 1.1171875, "num_input_tokens_seen": 32120756, "step": 574 }, { "epoch": 1.2806236080178173, "grad_norm": 30.17593002319336, "learning_rate": 1e-06, "loss": 1.0251, "num_input_tokens_seen": 32175548, "step": 575 }, { "epoch": 1.2806236080178173, "loss": 1.20136296749115, "loss_ce": 0.000679333577863872, "loss_iou": 0.5078125, "loss_num": 0.037353515625, "loss_xval": 1.203125, "num_input_tokens_seen": 32175548, "step": 575 }, { "epoch": 1.2828507795100224, "grad_norm": 24.09958839416504, "learning_rate": 1e-06, "loss": 1.2353, "num_input_tokens_seen": 32230428, "step": 576 }, { "epoch": 1.2828507795100224, "loss": 1.3486398458480835, "loss_ce": 0.002448498737066984, "loss_iou": 0.5625, "loss_num": 0.0439453125, "loss_xval": 1.34375, "num_input_tokens_seen": 32230428, "step": 576 }, { "epoch": 1.2850779510022272, "grad_norm": 25.068777084350586, "learning_rate": 1e-06, "loss": 1.1451, "num_input_tokens_seen": 32282324, "step": 577 }, { "epoch": 1.2850779510022272, "loss": 1.3363144397735596, "loss_ce": 0.0011094561778008938, "loss_iou": 0.498046875, "loss_num": 0.06787109375, "loss_xval": 1.3359375, "num_input_tokens_seen": 32282324, "step": 577 }, { "epoch": 1.287305122494432, "grad_norm": 21.740449905395508, "learning_rate": 1e-06, "loss": 0.9655, "num_input_tokens_seen": 32339156, "step": 578 }, { "epoch": 1.287305122494432, "loss": 0.8180431127548218, "loss_ce": 0.0009044220205396414, "loss_iou": 0.33984375, "loss_num": 0.02783203125, "loss_xval": 0.81640625, "num_input_tokens_seen": 32339156, "step": 578 }, { "epoch": 1.289532293986637, "grad_norm": 70.63128662109375, "learning_rate": 1e-06, "loss": 1.2932, "num_input_tokens_seen": 32394596, "step": 579 }, { "epoch": 1.289532293986637, "loss": 1.4371894598007202, "loss_ce": 0.0006660318467766047, "loss_iou": 0.5546875, "loss_num": 0.06640625, "loss_xval": 1.4375, "num_input_tokens_seen": 32394596, "step": 579 }, { "epoch": 1.2917594654788418, "grad_norm": 23.278512954711914, "learning_rate": 1e-06, "loss": 1.1461, "num_input_tokens_seen": 32452512, "step": 580 }, { "epoch": 1.2917594654788418, "loss": 1.3186371326446533, "loss_ce": 0.003695748746395111, "loss_iou": 0.494140625, "loss_num": 0.0654296875, "loss_xval": 1.3125, "num_input_tokens_seen": 32452512, "step": 580 }, { "epoch": 1.2939866369710469, "grad_norm": 21.15460205078125, "learning_rate": 1e-06, "loss": 1.1878, "num_input_tokens_seen": 32507568, "step": 581 }, { "epoch": 1.2939866369710469, "loss": 1.161703109741211, "loss_ce": 0.0027676241006702185, "loss_iou": 0.474609375, "loss_num": 0.0419921875, "loss_xval": 1.15625, "num_input_tokens_seen": 32507568, "step": 581 }, { "epoch": 1.2962138084632517, "grad_norm": 20.2482852935791, "learning_rate": 1e-06, "loss": 1.0799, "num_input_tokens_seen": 32564432, "step": 582 }, { "epoch": 1.2962138084632517, "loss": 1.0239415168762207, "loss_ce": 0.0012363542336970568, "loss_iou": 0.419921875, "loss_num": 0.036376953125, "loss_xval": 1.0234375, "num_input_tokens_seen": 32564432, "step": 582 }, { "epoch": 1.2984409799554566, "grad_norm": 39.36937713623047, "learning_rate": 1e-06, "loss": 0.9524, "num_input_tokens_seen": 32618096, "step": 583 }, { "epoch": 1.2984409799554566, "loss": 0.9080791473388672, "loss_ce": 0.0008525372250005603, "loss_iou": 0.34375, "loss_num": 0.044189453125, "loss_xval": 0.90625, "num_input_tokens_seen": 32618096, "step": 583 }, { "epoch": 1.3006681514476615, "grad_norm": 21.902511596679688, "learning_rate": 1e-06, "loss": 1.3062, "num_input_tokens_seen": 32674700, "step": 584 }, { "epoch": 1.3006681514476615, "loss": 1.2775499820709229, "loss_ce": 0.004112505819648504, "loss_iou": 0.5078125, "loss_num": 0.051025390625, "loss_xval": 1.2734375, "num_input_tokens_seen": 32674700, "step": 584 }, { "epoch": 1.3028953229398663, "grad_norm": 18.999221801757812, "learning_rate": 1e-06, "loss": 1.1638, "num_input_tokens_seen": 32732620, "step": 585 }, { "epoch": 1.3028953229398663, "loss": 1.1583878993988037, "loss_ce": 0.0018937456188723445, "loss_iou": 0.431640625, "loss_num": 0.05859375, "loss_xval": 1.15625, "num_input_tokens_seen": 32732620, "step": 585 }, { "epoch": 1.3051224944320712, "grad_norm": 29.080703735351562, "learning_rate": 1e-06, "loss": 1.1476, "num_input_tokens_seen": 32790356, "step": 586 }, { "epoch": 1.3051224944320712, "loss": 1.168489933013916, "loss_ce": 0.0005212133983150125, "loss_iou": 0.48828125, "loss_num": 0.0380859375, "loss_xval": 1.171875, "num_input_tokens_seen": 32790356, "step": 586 }, { "epoch": 1.307349665924276, "grad_norm": 26.149927139282227, "learning_rate": 1e-06, "loss": 1.1816, "num_input_tokens_seen": 32846304, "step": 587 }, { "epoch": 1.307349665924276, "loss": 1.3775864839553833, "loss_ce": 0.0006334002828225493, "loss_iou": 0.56640625, "loss_num": 0.04931640625, "loss_xval": 1.375, "num_input_tokens_seen": 32846304, "step": 587 }, { "epoch": 1.3095768374164811, "grad_norm": 15.33914566040039, "learning_rate": 1e-06, "loss": 0.9549, "num_input_tokens_seen": 32901992, "step": 588 }, { "epoch": 1.3095768374164811, "loss": 0.846169114112854, "loss_ce": 0.001198451267555356, "loss_iou": 0.345703125, "loss_num": 0.03125, "loss_xval": 0.84375, "num_input_tokens_seen": 32901992, "step": 588 }, { "epoch": 1.311804008908686, "grad_norm": 45.818695068359375, "learning_rate": 1e-06, "loss": 1.2682, "num_input_tokens_seen": 32961136, "step": 589 }, { "epoch": 1.311804008908686, "loss": 1.3090804815292358, "loss_ce": 0.00048678729217499495, "loss_iou": 0.5703125, "loss_num": 0.033203125, "loss_xval": 1.3125, "num_input_tokens_seen": 32961136, "step": 589 }, { "epoch": 1.3140311804008908, "grad_norm": 31.00553321838379, "learning_rate": 1e-06, "loss": 1.1881, "num_input_tokens_seen": 33015672, "step": 590 }, { "epoch": 1.3140311804008908, "loss": 1.2077548503875732, "loss_ce": 0.0007235349621623755, "loss_iou": 0.48828125, "loss_num": 0.04541015625, "loss_xval": 1.203125, "num_input_tokens_seen": 33015672, "step": 590 }, { "epoch": 1.3162583518930957, "grad_norm": 32.82173156738281, "learning_rate": 1e-06, "loss": 1.4302, "num_input_tokens_seen": 33071504, "step": 591 }, { "epoch": 1.3162583518930957, "loss": 1.373945713043213, "loss_ce": 0.00041063432581722736, "loss_iou": 0.546875, "loss_num": 0.056640625, "loss_xval": 1.375, "num_input_tokens_seen": 33071504, "step": 591 }, { "epoch": 1.3184855233853008, "grad_norm": 19.010250091552734, "learning_rate": 1e-06, "loss": 0.9531, "num_input_tokens_seen": 33126464, "step": 592 }, { "epoch": 1.3184855233853008, "loss": 0.9081621766090393, "loss_ce": 0.002156274626031518, "loss_iou": 0.37109375, "loss_num": 0.032958984375, "loss_xval": 0.90625, "num_input_tokens_seen": 33126464, "step": 592 }, { "epoch": 1.3207126948775056, "grad_norm": 19.802019119262695, "learning_rate": 1e-06, "loss": 1.3826, "num_input_tokens_seen": 33182964, "step": 593 }, { "epoch": 1.3207126948775056, "loss": 1.3006547689437866, "loss_ce": 0.0008500526309944689, "loss_iou": 0.52734375, "loss_num": 0.049560546875, "loss_xval": 1.296875, "num_input_tokens_seen": 33182964, "step": 593 }, { "epoch": 1.3229398663697105, "grad_norm": 19.390378952026367, "learning_rate": 1e-06, "loss": 1.3825, "num_input_tokens_seen": 33237572, "step": 594 }, { "epoch": 1.3229398663697105, "loss": 1.126906156539917, "loss_ce": 0.0004412978305481374, "loss_iou": 0.486328125, "loss_num": 0.031005859375, "loss_xval": 1.125, "num_input_tokens_seen": 33237572, "step": 594 }, { "epoch": 1.3251670378619154, "grad_norm": 17.320398330688477, "learning_rate": 1e-06, "loss": 0.8407, "num_input_tokens_seen": 33292472, "step": 595 }, { "epoch": 1.3251670378619154, "loss": 0.7980577945709229, "loss_ce": 0.00679800333455205, "loss_iou": 0.33984375, "loss_num": 0.0223388671875, "loss_xval": 0.79296875, "num_input_tokens_seen": 33292472, "step": 595 }, { "epoch": 1.3273942093541202, "grad_norm": 22.934152603149414, "learning_rate": 1e-06, "loss": 1.2177, "num_input_tokens_seen": 33349680, "step": 596 }, { "epoch": 1.3273942093541202, "loss": 1.237870216369629, "loss_ce": 0.000809596327599138, "loss_iou": 0.50390625, "loss_num": 0.046875, "loss_xval": 1.234375, "num_input_tokens_seen": 33349680, "step": 596 }, { "epoch": 1.329621380846325, "grad_norm": 19.58114242553711, "learning_rate": 1e-06, "loss": 0.9529, "num_input_tokens_seen": 33407408, "step": 597 }, { "epoch": 1.329621380846325, "loss": 1.1202585697174072, "loss_ce": 0.0006297538056969643, "loss_iou": 0.443359375, "loss_num": 0.046630859375, "loss_xval": 1.1171875, "num_input_tokens_seen": 33407408, "step": 597 }, { "epoch": 1.33184855233853, "grad_norm": 34.02892303466797, "learning_rate": 1e-06, "loss": 1.4897, "num_input_tokens_seen": 33461988, "step": 598 }, { "epoch": 1.33184855233853, "loss": 1.4317845106124878, "loss_ce": 0.0016087474068626761, "loss_iou": 0.59765625, "loss_num": 0.04736328125, "loss_xval": 1.4296875, "num_input_tokens_seen": 33461988, "step": 598 }, { "epoch": 1.334075723830735, "grad_norm": 51.48478698730469, "learning_rate": 1e-06, "loss": 1.156, "num_input_tokens_seen": 33517444, "step": 599 }, { "epoch": 1.334075723830735, "loss": 0.9608005881309509, "loss_ce": 0.0010837747249752283, "loss_iou": 0.400390625, "loss_num": 0.03173828125, "loss_xval": 0.9609375, "num_input_tokens_seen": 33517444, "step": 599 }, { "epoch": 1.3363028953229399, "grad_norm": 24.541122436523438, "learning_rate": 1e-06, "loss": 1.1167, "num_input_tokens_seen": 33573432, "step": 600 }, { "epoch": 1.3363028953229399, "loss": 1.3346822261810303, "loss_ce": 0.0014302851632237434, "loss_iou": 0.578125, "loss_num": 0.035888671875, "loss_xval": 1.3359375, "num_input_tokens_seen": 33573432, "step": 600 }, { "epoch": 1.3385300668151447, "grad_norm": 15.516777038574219, "learning_rate": 1e-06, "loss": 1.2706, "num_input_tokens_seen": 33627888, "step": 601 }, { "epoch": 1.3385300668151447, "loss": 1.197658896446228, "loss_ce": 0.0008815636392682791, "loss_iou": 0.484375, "loss_num": 0.04541015625, "loss_xval": 1.1953125, "num_input_tokens_seen": 33627888, "step": 601 }, { "epoch": 1.3407572383073496, "grad_norm": 22.7963924407959, "learning_rate": 1e-06, "loss": 1.2063, "num_input_tokens_seen": 33685960, "step": 602 }, { "epoch": 1.3407572383073496, "loss": 1.1104426383972168, "loss_ce": 0.0008235453860834241, "loss_iou": 0.46484375, "loss_num": 0.0361328125, "loss_xval": 1.109375, "num_input_tokens_seen": 33685960, "step": 602 }, { "epoch": 1.3429844097995547, "grad_norm": 32.18126678466797, "learning_rate": 1e-06, "loss": 1.2209, "num_input_tokens_seen": 33742816, "step": 603 }, { "epoch": 1.3429844097995547, "loss": 1.1211140155792236, "loss_ce": 0.0007527406560257077, "loss_iou": 0.453125, "loss_num": 0.042724609375, "loss_xval": 1.1171875, "num_input_tokens_seen": 33742816, "step": 603 }, { "epoch": 1.3452115812917596, "grad_norm": 24.347137451171875, "learning_rate": 1e-06, "loss": 1.1429, "num_input_tokens_seen": 33800708, "step": 604 }, { "epoch": 1.3452115812917596, "loss": 1.146965742111206, "loss_ce": 0.0012137566227465868, "loss_iou": 0.453125, "loss_num": 0.047607421875, "loss_xval": 1.1484375, "num_input_tokens_seen": 33800708, "step": 604 }, { "epoch": 1.3474387527839644, "grad_norm": 27.303401947021484, "learning_rate": 1e-06, "loss": 1.1817, "num_input_tokens_seen": 33853544, "step": 605 }, { "epoch": 1.3474387527839644, "loss": 1.0051369667053223, "loss_ce": 0.006113563664257526, "loss_iou": 0.390625, "loss_num": 0.04345703125, "loss_xval": 1.0, "num_input_tokens_seen": 33853544, "step": 605 }, { "epoch": 1.3496659242761693, "grad_norm": 24.224193572998047, "learning_rate": 1e-06, "loss": 1.0283, "num_input_tokens_seen": 33908172, "step": 606 }, { "epoch": 1.3496659242761693, "loss": 1.0841628313064575, "loss_ce": 0.0006667570560239255, "loss_iou": 0.41015625, "loss_num": 0.05224609375, "loss_xval": 1.0859375, "num_input_tokens_seen": 33908172, "step": 606 }, { "epoch": 1.3518930957683741, "grad_norm": 23.933223724365234, "learning_rate": 1e-06, "loss": 1.3531, "num_input_tokens_seen": 33964128, "step": 607 }, { "epoch": 1.3518930957683741, "loss": 1.0083250999450684, "loss_ce": 0.0005124981980770826, "loss_iou": 0.42578125, "loss_num": 0.03125, "loss_xval": 1.0078125, "num_input_tokens_seen": 33964128, "step": 607 }, { "epoch": 1.354120267260579, "grad_norm": 49.8543815612793, "learning_rate": 1e-06, "loss": 0.9476, "num_input_tokens_seen": 34018964, "step": 608 }, { "epoch": 1.354120267260579, "loss": 1.1164300441741943, "loss_ce": 0.0011956640519201756, "loss_iou": 0.447265625, "loss_num": 0.043701171875, "loss_xval": 1.1171875, "num_input_tokens_seen": 34018964, "step": 608 }, { "epoch": 1.3563474387527839, "grad_norm": 20.391830444335938, "learning_rate": 1e-06, "loss": 1.0838, "num_input_tokens_seen": 34074960, "step": 609 }, { "epoch": 1.3563474387527839, "loss": 1.0016207695007324, "loss_ce": 0.0008883203845471144, "loss_iou": 0.390625, "loss_num": 0.044189453125, "loss_xval": 1.0, "num_input_tokens_seen": 34074960, "step": 609 }, { "epoch": 1.358574610244989, "grad_norm": 21.543336868286133, "learning_rate": 1e-06, "loss": 1.0178, "num_input_tokens_seen": 34131548, "step": 610 }, { "epoch": 1.358574610244989, "loss": 0.7443816661834717, "loss_ce": 0.0036590369418263435, "loss_iou": 0.2890625, "loss_num": 0.03271484375, "loss_xval": 0.7421875, "num_input_tokens_seen": 34131548, "step": 610 }, { "epoch": 1.3608017817371938, "grad_norm": 205.24061584472656, "learning_rate": 1e-06, "loss": 1.1072, "num_input_tokens_seen": 34187164, "step": 611 }, { "epoch": 1.3608017817371938, "loss": 1.3798058032989502, "loss_ce": 0.0013879577163606882, "loss_iou": 0.5625, "loss_num": 0.05078125, "loss_xval": 1.375, "num_input_tokens_seen": 34187164, "step": 611 }, { "epoch": 1.3630289532293987, "grad_norm": 50.390380859375, "learning_rate": 1e-06, "loss": 1.4146, "num_input_tokens_seen": 34240892, "step": 612 }, { "epoch": 1.3630289532293987, "loss": 1.688087821006775, "loss_ce": 0.00644719647243619, "loss_iou": 0.6328125, "loss_num": 0.08251953125, "loss_xval": 1.6796875, "num_input_tokens_seen": 34240892, "step": 612 }, { "epoch": 1.3652561247216035, "grad_norm": 22.16488265991211, "learning_rate": 1e-06, "loss": 1.0745, "num_input_tokens_seen": 34296472, "step": 613 }, { "epoch": 1.3652561247216035, "loss": 1.2694756984710693, "loss_ce": 0.0006769584724679589, "loss_iou": 0.5078125, "loss_num": 0.050537109375, "loss_xval": 1.265625, "num_input_tokens_seen": 34296472, "step": 613 }, { "epoch": 1.3674832962138086, "grad_norm": 44.119998931884766, "learning_rate": 1e-06, "loss": 1.1704, "num_input_tokens_seen": 34352164, "step": 614 }, { "epoch": 1.3674832962138086, "loss": 1.0769069194793701, "loss_ce": 0.0012233321322128177, "loss_iou": 0.431640625, "loss_num": 0.04248046875, "loss_xval": 1.078125, "num_input_tokens_seen": 34352164, "step": 614 }, { "epoch": 1.3697104677060135, "grad_norm": 38.12681198120117, "learning_rate": 1e-06, "loss": 1.2304, "num_input_tokens_seen": 34408956, "step": 615 }, { "epoch": 1.3697104677060135, "loss": 1.0864746570587158, "loss_ce": 0.0005371640436351299, "loss_iou": 0.447265625, "loss_num": 0.038330078125, "loss_xval": 1.0859375, "num_input_tokens_seen": 34408956, "step": 615 }, { "epoch": 1.3719376391982183, "grad_norm": 22.228532791137695, "learning_rate": 1e-06, "loss": 1.0929, "num_input_tokens_seen": 34462868, "step": 616 }, { "epoch": 1.3719376391982183, "loss": 1.1881909370422363, "loss_ce": 0.0011792225996032357, "loss_iou": 0.458984375, "loss_num": 0.053955078125, "loss_xval": 1.1875, "num_input_tokens_seen": 34462868, "step": 616 }, { "epoch": 1.3741648106904232, "grad_norm": 16.62993812561035, "learning_rate": 1e-06, "loss": 0.8739, "num_input_tokens_seen": 34519148, "step": 617 }, { "epoch": 1.3741648106904232, "loss": 0.939282238483429, "loss_ce": 0.0008056251681409776, "loss_iou": 0.3828125, "loss_num": 0.0341796875, "loss_xval": 0.9375, "num_input_tokens_seen": 34519148, "step": 617 }, { "epoch": 1.376391982182628, "grad_norm": 26.92157745361328, "learning_rate": 1e-06, "loss": 1.16, "num_input_tokens_seen": 34576480, "step": 618 }, { "epoch": 1.376391982182628, "loss": 1.1556055545806885, "loss_ce": 0.002041085623204708, "loss_iou": 0.494140625, "loss_num": 0.033447265625, "loss_xval": 1.15625, "num_input_tokens_seen": 34576480, "step": 618 }, { "epoch": 1.378619153674833, "grad_norm": 78.8863525390625, "learning_rate": 1e-06, "loss": 0.8745, "num_input_tokens_seen": 34631844, "step": 619 }, { "epoch": 1.378619153674833, "loss": 0.6508793234825134, "loss_ce": 0.0004887055838480592, "loss_iou": 0.263671875, "loss_num": 0.0242919921875, "loss_xval": 0.6484375, "num_input_tokens_seen": 34631844, "step": 619 }, { "epoch": 1.3808463251670378, "grad_norm": 18.949325561523438, "learning_rate": 1e-06, "loss": 0.8934, "num_input_tokens_seen": 34687740, "step": 620 }, { "epoch": 1.3808463251670378, "loss": 0.8919047117233276, "loss_ce": 0.000791468657553196, "loss_iou": 0.373046875, "loss_num": 0.029052734375, "loss_xval": 0.890625, "num_input_tokens_seen": 34687740, "step": 620 }, { "epoch": 1.3830734966592428, "grad_norm": 21.888832092285156, "learning_rate": 1e-06, "loss": 1.0868, "num_input_tokens_seen": 34744896, "step": 621 }, { "epoch": 1.3830734966592428, "loss": 1.120755910873413, "loss_ce": 0.0006387863541021943, "loss_iou": 0.486328125, "loss_num": 0.0291748046875, "loss_xval": 1.1171875, "num_input_tokens_seen": 34744896, "step": 621 }, { "epoch": 1.3853006681514477, "grad_norm": 62.403011322021484, "learning_rate": 1e-06, "loss": 0.8124, "num_input_tokens_seen": 34800684, "step": 622 }, { "epoch": 1.3853006681514477, "loss": 0.9201839566230774, "loss_ce": 0.0012385983718559146, "loss_iou": 0.3828125, "loss_num": 0.031005859375, "loss_xval": 0.91796875, "num_input_tokens_seen": 34800684, "step": 622 }, { "epoch": 1.3875278396436526, "grad_norm": 28.83528709411621, "learning_rate": 1e-06, "loss": 1.2444, "num_input_tokens_seen": 34856832, "step": 623 }, { "epoch": 1.3875278396436526, "loss": 1.0172494649887085, "loss_ce": 0.00113617442548275, "loss_iou": 0.41796875, "loss_num": 0.0361328125, "loss_xval": 1.015625, "num_input_tokens_seen": 34856832, "step": 623 }, { "epoch": 1.3897550111358574, "grad_norm": 62.715023040771484, "learning_rate": 1e-06, "loss": 1.3, "num_input_tokens_seen": 34914236, "step": 624 }, { "epoch": 1.3897550111358574, "loss": 1.4728524684906006, "loss_ce": 0.002149291103705764, "loss_iou": 0.5859375, "loss_num": 0.060302734375, "loss_xval": 1.46875, "num_input_tokens_seen": 34914236, "step": 624 }, { "epoch": 1.3919821826280623, "grad_norm": 22.02700424194336, "learning_rate": 1e-06, "loss": 1.218, "num_input_tokens_seen": 34970880, "step": 625 }, { "epoch": 1.3919821826280623, "loss": 1.4092342853546143, "loss_ce": 0.0010311320656910539, "loss_iou": 0.59375, "loss_num": 0.044189453125, "loss_xval": 1.40625, "num_input_tokens_seen": 34970880, "step": 625 }, { "epoch": 1.3942093541202674, "grad_norm": 82.06532287597656, "learning_rate": 1e-06, "loss": 1.2257, "num_input_tokens_seen": 35026436, "step": 626 }, { "epoch": 1.3942093541202674, "loss": 1.3661468029022217, "loss_ce": 0.0018889284692704678, "loss_iou": 0.4921875, "loss_num": 0.0751953125, "loss_xval": 1.3671875, "num_input_tokens_seen": 35026436, "step": 626 }, { "epoch": 1.3964365256124722, "grad_norm": 21.959980010986328, "learning_rate": 1e-06, "loss": 1.0485, "num_input_tokens_seen": 35083640, "step": 627 }, { "epoch": 1.3964365256124722, "loss": 1.2543073892593384, "loss_ce": 0.0008894825004972517, "loss_iou": 0.47265625, "loss_num": 0.061767578125, "loss_xval": 1.25, "num_input_tokens_seen": 35083640, "step": 627 }, { "epoch": 1.398663697104677, "grad_norm": 69.06570434570312, "learning_rate": 1e-06, "loss": 1.1878, "num_input_tokens_seen": 35139632, "step": 628 }, { "epoch": 1.398663697104677, "loss": 1.040403127670288, "loss_ce": 0.003171587362885475, "loss_iou": 0.4375, "loss_num": 0.032958984375, "loss_xval": 1.0390625, "num_input_tokens_seen": 35139632, "step": 628 }, { "epoch": 1.400890868596882, "grad_norm": 32.16525650024414, "learning_rate": 1e-06, "loss": 1.4466, "num_input_tokens_seen": 35194568, "step": 629 }, { "epoch": 1.400890868596882, "loss": 1.3845007419586182, "loss_ce": 0.006571032106876373, "loss_iou": 0.474609375, "loss_num": 0.0849609375, "loss_xval": 1.375, "num_input_tokens_seen": 35194568, "step": 629 }, { "epoch": 1.4031180400890868, "grad_norm": 19.882808685302734, "learning_rate": 1e-06, "loss": 0.7151, "num_input_tokens_seen": 35252680, "step": 630 }, { "epoch": 1.4031180400890868, "loss": 0.6794017553329468, "loss_ce": 0.00044663704466074705, "loss_iou": 0.251953125, "loss_num": 0.034912109375, "loss_xval": 0.6796875, "num_input_tokens_seen": 35252680, "step": 630 }, { "epoch": 1.4053452115812917, "grad_norm": 23.41751480102539, "learning_rate": 1e-06, "loss": 0.8731, "num_input_tokens_seen": 35308480, "step": 631 }, { "epoch": 1.4053452115812917, "loss": 0.9022737741470337, "loss_ce": 0.0009065663907676935, "loss_iou": 0.376953125, "loss_num": 0.02978515625, "loss_xval": 0.90234375, "num_input_tokens_seen": 35308480, "step": 631 }, { "epoch": 1.4075723830734965, "grad_norm": 23.227956771850586, "learning_rate": 1e-06, "loss": 1.2639, "num_input_tokens_seen": 35364808, "step": 632 }, { "epoch": 1.4075723830734965, "loss": 1.2337548732757568, "loss_ce": 0.0023096189834177494, "loss_iou": 0.5, "loss_num": 0.045654296875, "loss_xval": 1.234375, "num_input_tokens_seen": 35364808, "step": 632 }, { "epoch": 1.4097995545657016, "grad_norm": 55.23591232299805, "learning_rate": 1e-06, "loss": 1.1625, "num_input_tokens_seen": 35420704, "step": 633 }, { "epoch": 1.4097995545657016, "loss": 0.9497365951538086, "loss_ce": 0.0005178386345505714, "loss_iou": 0.365234375, "loss_num": 0.043701171875, "loss_xval": 0.94921875, "num_input_tokens_seen": 35420704, "step": 633 }, { "epoch": 1.4120267260579065, "grad_norm": 26.891746520996094, "learning_rate": 1e-06, "loss": 1.1738, "num_input_tokens_seen": 35473184, "step": 634 }, { "epoch": 1.4120267260579065, "loss": 1.1008825302124023, "loss_ce": 0.0012731605675071478, "loss_iou": 0.453125, "loss_num": 0.03857421875, "loss_xval": 1.1015625, "num_input_tokens_seen": 35473184, "step": 634 }, { "epoch": 1.4142538975501113, "grad_norm": 17.678611755371094, "learning_rate": 1e-06, "loss": 1.1334, "num_input_tokens_seen": 35530148, "step": 635 }, { "epoch": 1.4142538975501113, "loss": 1.0327593088150024, "loss_ce": 0.0005327487015165389, "loss_iou": 0.443359375, "loss_num": 0.0294189453125, "loss_xval": 1.03125, "num_input_tokens_seen": 35530148, "step": 635 }, { "epoch": 1.4164810690423162, "grad_norm": 313.0267028808594, "learning_rate": 1e-06, "loss": 1.3829, "num_input_tokens_seen": 35583856, "step": 636 }, { "epoch": 1.4164810690423162, "loss": 1.357082486152649, "loss_ce": 0.0023461126256734133, "loss_iou": 0.5625, "loss_num": 0.046142578125, "loss_xval": 1.3515625, "num_input_tokens_seen": 35583856, "step": 636 }, { "epoch": 1.4187082405345213, "grad_norm": 20.430328369140625, "learning_rate": 1e-06, "loss": 1.2449, "num_input_tokens_seen": 35640836, "step": 637 }, { "epoch": 1.4187082405345213, "loss": 1.4005608558654785, "loss_ce": 0.0006584060029126704, "loss_iou": 0.56640625, "loss_num": 0.052978515625, "loss_xval": 1.3984375, "num_input_tokens_seen": 35640836, "step": 637 }, { "epoch": 1.4209354120267261, "grad_norm": 23.457950592041016, "learning_rate": 1e-06, "loss": 0.8546, "num_input_tokens_seen": 35695288, "step": 638 }, { "epoch": 1.4209354120267261, "loss": 0.8239110112190247, "loss_ce": 0.0017674551345407963, "loss_iou": 0.34375, "loss_num": 0.02685546875, "loss_xval": 0.8203125, "num_input_tokens_seen": 35695288, "step": 638 }, { "epoch": 1.423162583518931, "grad_norm": 22.952999114990234, "learning_rate": 1e-06, "loss": 0.8464, "num_input_tokens_seen": 35751468, "step": 639 }, { "epoch": 1.423162583518931, "loss": 1.025329351425171, "loss_ce": 0.0009152949205599725, "loss_iou": 0.423828125, "loss_num": 0.03515625, "loss_xval": 1.0234375, "num_input_tokens_seen": 35751468, "step": 639 }, { "epoch": 1.4253897550111359, "grad_norm": 17.500404357910156, "learning_rate": 1e-06, "loss": 0.8751, "num_input_tokens_seen": 35809384, "step": 640 }, { "epoch": 1.4253897550111359, "loss": 0.8379029035568237, "loss_ce": 0.0005006167921237648, "loss_iou": 0.34765625, "loss_num": 0.0286865234375, "loss_xval": 0.8359375, "num_input_tokens_seen": 35809384, "step": 640 }, { "epoch": 1.4276169265033407, "grad_norm": 30.681381225585938, "learning_rate": 1e-06, "loss": 1.1528, "num_input_tokens_seen": 35865544, "step": 641 }, { "epoch": 1.4276169265033407, "loss": 1.2849559783935547, "loss_ce": 0.0005320889176800847, "loss_iou": 0.55078125, "loss_num": 0.037353515625, "loss_xval": 1.28125, "num_input_tokens_seen": 35865544, "step": 641 }, { "epoch": 1.4298440979955456, "grad_norm": 19.769329071044922, "learning_rate": 1e-06, "loss": 1.2354, "num_input_tokens_seen": 35921472, "step": 642 }, { "epoch": 1.4298440979955456, "loss": 1.344813346862793, "loss_ce": 0.0008191849919967353, "loss_iou": 0.5703125, "loss_num": 0.041259765625, "loss_xval": 1.34375, "num_input_tokens_seen": 35921472, "step": 642 }, { "epoch": 1.4320712694877504, "grad_norm": 166.1317901611328, "learning_rate": 1e-06, "loss": 1.3808, "num_input_tokens_seen": 35978028, "step": 643 }, { "epoch": 1.4320712694877504, "loss": 1.7419133186340332, "loss_ce": 0.002655471907928586, "loss_iou": 0.68359375, "loss_num": 0.0751953125, "loss_xval": 1.7421875, "num_input_tokens_seen": 35978028, "step": 643 }, { "epoch": 1.4342984409799555, "grad_norm": 21.949459075927734, "learning_rate": 1e-06, "loss": 0.9716, "num_input_tokens_seen": 36033152, "step": 644 }, { "epoch": 1.4342984409799555, "loss": 0.8689587116241455, "loss_ce": 0.0007946894620545208, "loss_iou": 0.376953125, "loss_num": 0.022705078125, "loss_xval": 0.8671875, "num_input_tokens_seen": 36033152, "step": 644 }, { "epoch": 1.4365256124721604, "grad_norm": 53.461631774902344, "learning_rate": 1e-06, "loss": 1.3388, "num_input_tokens_seen": 36088644, "step": 645 }, { "epoch": 1.4365256124721604, "loss": 1.3191097974777222, "loss_ce": 0.0017270214157178998, "loss_iou": 0.490234375, "loss_num": 0.0673828125, "loss_xval": 1.3203125, "num_input_tokens_seen": 36088644, "step": 645 }, { "epoch": 1.4387527839643652, "grad_norm": 17.125713348388672, "learning_rate": 1e-06, "loss": 1.1615, "num_input_tokens_seen": 36147156, "step": 646 }, { "epoch": 1.4387527839643652, "loss": 1.204886555671692, "loss_ce": 0.0012732010800391436, "loss_iou": 0.46875, "loss_num": 0.052978515625, "loss_xval": 1.203125, "num_input_tokens_seen": 36147156, "step": 646 }, { "epoch": 1.44097995545657, "grad_norm": 20.655792236328125, "learning_rate": 1e-06, "loss": 0.8804, "num_input_tokens_seen": 36205528, "step": 647 }, { "epoch": 1.44097995545657, "loss": 0.9184082746505737, "loss_ce": 0.0004395167634356767, "loss_iou": 0.36328125, "loss_num": 0.037841796875, "loss_xval": 0.91796875, "num_input_tokens_seen": 36205528, "step": 647 }, { "epoch": 1.4432071269487752, "grad_norm": 27.488887786865234, "learning_rate": 1e-06, "loss": 1.0496, "num_input_tokens_seen": 36263608, "step": 648 }, { "epoch": 1.4432071269487752, "loss": 1.052678108215332, "loss_ce": 0.000431923137512058, "loss_iou": 0.423828125, "loss_num": 0.041259765625, "loss_xval": 1.0546875, "num_input_tokens_seen": 36263608, "step": 648 }, { "epoch": 1.44543429844098, "grad_norm": 19.03914451599121, "learning_rate": 1e-06, "loss": 1.0058, "num_input_tokens_seen": 36320740, "step": 649 }, { "epoch": 1.44543429844098, "loss": 1.1877658367156982, "loss_ce": 0.0014864858239889145, "loss_iou": 0.46484375, "loss_num": 0.05126953125, "loss_xval": 1.1875, "num_input_tokens_seen": 36320740, "step": 649 }, { "epoch": 1.447661469933185, "grad_norm": 30.350322723388672, "learning_rate": 1e-06, "loss": 1.1674, "num_input_tokens_seen": 36375948, "step": 650 }, { "epoch": 1.447661469933185, "loss": 1.6142632961273193, "loss_ce": 0.0014703237684443593, "loss_iou": 0.6484375, "loss_num": 0.0625, "loss_xval": 1.609375, "num_input_tokens_seen": 36375948, "step": 650 }, { "epoch": 1.4498886414253898, "grad_norm": 31.322311401367188, "learning_rate": 1e-06, "loss": 1.0709, "num_input_tokens_seen": 36433352, "step": 651 }, { "epoch": 1.4498886414253898, "loss": 1.1504631042480469, "loss_ce": 0.0005606971681118011, "loss_iou": 0.482421875, "loss_num": 0.037109375, "loss_xval": 1.1484375, "num_input_tokens_seen": 36433352, "step": 651 }, { "epoch": 1.4521158129175946, "grad_norm": 58.9162712097168, "learning_rate": 1e-06, "loss": 1.1301, "num_input_tokens_seen": 36489816, "step": 652 }, { "epoch": 1.4521158129175946, "loss": 1.2538143396377563, "loss_ce": 0.001372913713566959, "loss_iou": 0.494140625, "loss_num": 0.052490234375, "loss_xval": 1.25, "num_input_tokens_seen": 36489816, "step": 652 }, { "epoch": 1.4543429844097995, "grad_norm": 17.665956497192383, "learning_rate": 1e-06, "loss": 1.1987, "num_input_tokens_seen": 36542532, "step": 653 }, { "epoch": 1.4543429844097995, "loss": 1.2465870380401611, "loss_ce": 0.0004933135933242738, "loss_iou": 0.50390625, "loss_num": 0.047607421875, "loss_xval": 1.25, "num_input_tokens_seen": 36542532, "step": 653 }, { "epoch": 1.4565701559020043, "grad_norm": 63.455204010009766, "learning_rate": 1e-06, "loss": 1.0736, "num_input_tokens_seen": 36599520, "step": 654 }, { "epoch": 1.4565701559020043, "loss": 1.2124230861663818, "loss_ce": 0.0005090509075671434, "loss_iou": 0.4765625, "loss_num": 0.0517578125, "loss_xval": 1.2109375, "num_input_tokens_seen": 36599520, "step": 654 }, { "epoch": 1.4587973273942094, "grad_norm": 26.223678588867188, "learning_rate": 1e-06, "loss": 1.0227, "num_input_tokens_seen": 36656080, "step": 655 }, { "epoch": 1.4587973273942094, "loss": 0.9443738460540771, "loss_ce": 0.000526183401234448, "loss_iou": 0.353515625, "loss_num": 0.047119140625, "loss_xval": 0.9453125, "num_input_tokens_seen": 36656080, "step": 655 }, { "epoch": 1.4610244988864143, "grad_norm": 41.6849365234375, "learning_rate": 1e-06, "loss": 1.0155, "num_input_tokens_seen": 36712280, "step": 656 }, { "epoch": 1.4610244988864143, "loss": 1.218764066696167, "loss_ce": 0.008314890787005424, "loss_iou": 0.47265625, "loss_num": 0.052978515625, "loss_xval": 1.2109375, "num_input_tokens_seen": 36712280, "step": 656 }, { "epoch": 1.4632516703786191, "grad_norm": 26.14752197265625, "learning_rate": 1e-06, "loss": 1.06, "num_input_tokens_seen": 36769340, "step": 657 }, { "epoch": 1.4632516703786191, "loss": 1.0224517583847046, "loss_ce": 0.0007232209318317473, "loss_iou": 0.39453125, "loss_num": 0.04638671875, "loss_xval": 1.0234375, "num_input_tokens_seen": 36769340, "step": 657 }, { "epoch": 1.465478841870824, "grad_norm": 25.274215698242188, "learning_rate": 1e-06, "loss": 1.2905, "num_input_tokens_seen": 36824936, "step": 658 }, { "epoch": 1.465478841870824, "loss": 1.3570466041564941, "loss_ce": 0.0010896207531914115, "loss_iou": 0.57421875, "loss_num": 0.041748046875, "loss_xval": 1.359375, "num_input_tokens_seen": 36824936, "step": 658 }, { "epoch": 1.467706013363029, "grad_norm": 18.70472526550293, "learning_rate": 1e-06, "loss": 0.9063, "num_input_tokens_seen": 36883996, "step": 659 }, { "epoch": 1.467706013363029, "loss": 0.9597538709640503, "loss_ce": 0.005408107303082943, "loss_iou": 0.376953125, "loss_num": 0.0400390625, "loss_xval": 0.953125, "num_input_tokens_seen": 36883996, "step": 659 }, { "epoch": 1.469933184855234, "grad_norm": 24.98056411743164, "learning_rate": 1e-06, "loss": 0.9572, "num_input_tokens_seen": 36936316, "step": 660 }, { "epoch": 1.469933184855234, "loss": 0.7073796391487122, "loss_ce": 0.006451886147260666, "loss_iou": 0.283203125, "loss_num": 0.02685546875, "loss_xval": 0.69921875, "num_input_tokens_seen": 36936316, "step": 660 }, { "epoch": 1.4721603563474388, "grad_norm": 22.775863647460938, "learning_rate": 1e-06, "loss": 1.0446, "num_input_tokens_seen": 36991328, "step": 661 }, { "epoch": 1.4721603563474388, "loss": 1.084336757659912, "loss_ce": 0.0008406001143157482, "loss_iou": 0.4296875, "loss_num": 0.044921875, "loss_xval": 1.0859375, "num_input_tokens_seen": 36991328, "step": 661 }, { "epoch": 1.4743875278396437, "grad_norm": 25.190065383911133, "learning_rate": 1e-06, "loss": 1.2837, "num_input_tokens_seen": 37044136, "step": 662 }, { "epoch": 1.4743875278396437, "loss": 1.5641846656799316, "loss_ce": 0.0026612321380525827, "loss_iou": 0.609375, "loss_num": 0.06787109375, "loss_xval": 1.5625, "num_input_tokens_seen": 37044136, "step": 662 }, { "epoch": 1.4766146993318485, "grad_norm": 28.881362915039062, "learning_rate": 1e-06, "loss": 0.8291, "num_input_tokens_seen": 37100336, "step": 663 }, { "epoch": 1.4766146993318485, "loss": 0.8968852758407593, "loss_ce": 0.003330609295517206, "loss_iou": 0.35546875, "loss_num": 0.036376953125, "loss_xval": 0.89453125, "num_input_tokens_seen": 37100336, "step": 663 }, { "epoch": 1.4788418708240534, "grad_norm": 24.36873435974121, "learning_rate": 1e-06, "loss": 1.2756, "num_input_tokens_seen": 37154992, "step": 664 }, { "epoch": 1.4788418708240534, "loss": 1.3726325035095215, "loss_ce": 0.0030035879462957382, "loss_iou": 0.5546875, "loss_num": 0.052001953125, "loss_xval": 1.3671875, "num_input_tokens_seen": 37154992, "step": 664 }, { "epoch": 1.4810690423162582, "grad_norm": 40.89757537841797, "learning_rate": 1e-06, "loss": 1.1327, "num_input_tokens_seen": 37208516, "step": 665 }, { "epoch": 1.4810690423162582, "loss": 0.8330258131027222, "loss_ce": 0.0005062957643531263, "loss_iou": 0.35546875, "loss_num": 0.024169921875, "loss_xval": 0.83203125, "num_input_tokens_seen": 37208516, "step": 665 }, { "epoch": 1.4832962138084633, "grad_norm": 33.48430252075195, "learning_rate": 1e-06, "loss": 1.0491, "num_input_tokens_seen": 37263476, "step": 666 }, { "epoch": 1.4832962138084633, "loss": 1.249821424484253, "loss_ce": 0.001530370325781405, "loss_iou": 0.55078125, "loss_num": 0.02978515625, "loss_xval": 1.25, "num_input_tokens_seen": 37263476, "step": 666 }, { "epoch": 1.4855233853006682, "grad_norm": 31.53962516784668, "learning_rate": 1e-06, "loss": 0.8733, "num_input_tokens_seen": 37319864, "step": 667 }, { "epoch": 1.4855233853006682, "loss": 0.6637530326843262, "loss_ce": 0.00213192543014884, "loss_iou": 0.27734375, "loss_num": 0.0218505859375, "loss_xval": 0.66015625, "num_input_tokens_seen": 37319864, "step": 667 }, { "epoch": 1.487750556792873, "grad_norm": 19.535804748535156, "learning_rate": 1e-06, "loss": 1.0933, "num_input_tokens_seen": 37378472, "step": 668 }, { "epoch": 1.487750556792873, "loss": 1.066902756690979, "loss_ce": 0.000496495165862143, "loss_iou": 0.451171875, "loss_num": 0.03271484375, "loss_xval": 1.0625, "num_input_tokens_seen": 37378472, "step": 668 }, { "epoch": 1.489977728285078, "grad_norm": 35.252281188964844, "learning_rate": 1e-06, "loss": 1.4544, "num_input_tokens_seen": 37433128, "step": 669 }, { "epoch": 1.489977728285078, "loss": 1.5701709985733032, "loss_ce": 0.0008350461139343679, "loss_iou": 0.640625, "loss_num": 0.05712890625, "loss_xval": 1.5703125, "num_input_tokens_seen": 37433128, "step": 669 }, { "epoch": 1.492204899777283, "grad_norm": 22.478748321533203, "learning_rate": 1e-06, "loss": 1.0887, "num_input_tokens_seen": 37487148, "step": 670 }, { "epoch": 1.492204899777283, "loss": 0.9821290373802185, "loss_ce": 0.0026368550024926662, "loss_iou": 0.333984375, "loss_num": 0.06201171875, "loss_xval": 0.98046875, "num_input_tokens_seen": 37487148, "step": 670 }, { "epoch": 1.4944320712694878, "grad_norm": 26.4521484375, "learning_rate": 1e-06, "loss": 1.0069, "num_input_tokens_seen": 37545644, "step": 671 }, { "epoch": 1.4944320712694878, "loss": 1.1284525394439697, "loss_ce": 0.0176127590239048, "loss_iou": 0.423828125, "loss_num": 0.052734375, "loss_xval": 1.109375, "num_input_tokens_seen": 37545644, "step": 671 }, { "epoch": 1.4966592427616927, "grad_norm": 20.00921058654785, "learning_rate": 1e-06, "loss": 1.1729, "num_input_tokens_seen": 37602744, "step": 672 }, { "epoch": 1.4966592427616927, "loss": 1.4309265613555908, "loss_ce": 0.0017273698467761278, "loss_iou": 0.57421875, "loss_num": 0.05615234375, "loss_xval": 1.4296875, "num_input_tokens_seen": 37602744, "step": 672 }, { "epoch": 1.4988864142538976, "grad_norm": 21.240440368652344, "learning_rate": 1e-06, "loss": 0.9773, "num_input_tokens_seen": 37658080, "step": 673 }, { "epoch": 1.4988864142538976, "loss": 1.0557523965835571, "loss_ce": 0.0005766096874140203, "loss_iou": 0.42578125, "loss_num": 0.040771484375, "loss_xval": 1.0546875, "num_input_tokens_seen": 37658080, "step": 673 }, { "epoch": 1.5011135857461024, "grad_norm": 20.119047164916992, "learning_rate": 1e-06, "loss": 0.8459, "num_input_tokens_seen": 37713888, "step": 674 }, { "epoch": 1.5011135857461024, "loss": 0.8226086497306824, "loss_ce": 0.0013196287909522653, "loss_iou": 0.34375, "loss_num": 0.0263671875, "loss_xval": 0.8203125, "num_input_tokens_seen": 37713888, "step": 674 }, { "epoch": 1.5033407572383073, "grad_norm": 29.80697250366211, "learning_rate": 1e-06, "loss": 0.9022, "num_input_tokens_seen": 37768756, "step": 675 }, { "epoch": 1.5033407572383073, "loss": 0.9731925129890442, "loss_ce": 0.001024498138576746, "loss_iou": 0.423828125, "loss_num": 0.0247802734375, "loss_xval": 0.97265625, "num_input_tokens_seen": 37768756, "step": 675 }, { "epoch": 1.5055679287305122, "grad_norm": 17.169862747192383, "learning_rate": 1e-06, "loss": 1.2668, "num_input_tokens_seen": 37821224, "step": 676 }, { "epoch": 1.5055679287305122, "loss": 1.339667558670044, "loss_ce": 0.0008004190749488771, "loss_iou": 0.5390625, "loss_num": 0.052490234375, "loss_xval": 1.3359375, "num_input_tokens_seen": 37821224, "step": 676 }, { "epoch": 1.507795100222717, "grad_norm": 48.648136138916016, "learning_rate": 1e-06, "loss": 1.1283, "num_input_tokens_seen": 37876864, "step": 677 }, { "epoch": 1.507795100222717, "loss": 1.0091381072998047, "loss_ce": 0.00047117803478613496, "loss_iou": 0.412109375, "loss_num": 0.03662109375, "loss_xval": 1.0078125, "num_input_tokens_seen": 37876864, "step": 677 }, { "epoch": 1.510022271714922, "grad_norm": 31.37755012512207, "learning_rate": 1e-06, "loss": 1.0842, "num_input_tokens_seen": 37934036, "step": 678 }, { "epoch": 1.510022271714922, "loss": 0.8647042512893677, "loss_ce": 0.0004463824152480811, "loss_iou": 0.37109375, "loss_num": 0.0245361328125, "loss_xval": 0.86328125, "num_input_tokens_seen": 37934036, "step": 678 }, { "epoch": 1.512249443207127, "grad_norm": 75.93875885009766, "learning_rate": 1e-06, "loss": 1.3985, "num_input_tokens_seen": 37987492, "step": 679 }, { "epoch": 1.512249443207127, "loss": 1.21080482006073, "loss_ce": 0.0013321400620043278, "loss_iou": 0.5, "loss_num": 0.041015625, "loss_xval": 1.2109375, "num_input_tokens_seen": 37987492, "step": 679 }, { "epoch": 1.5144766146993318, "grad_norm": 28.709468841552734, "learning_rate": 1e-06, "loss": 1.0242, "num_input_tokens_seen": 38046268, "step": 680 }, { "epoch": 1.5144766146993318, "loss": 0.7978775501251221, "loss_ce": 0.0007584316190332174, "loss_iou": 0.3046875, "loss_num": 0.0380859375, "loss_xval": 0.796875, "num_input_tokens_seen": 38046268, "step": 680 }, { "epoch": 1.516703786191537, "grad_norm": 15.11223030090332, "learning_rate": 1e-06, "loss": 1.0458, "num_input_tokens_seen": 38096816, "step": 681 }, { "epoch": 1.516703786191537, "loss": 0.8654755353927612, "loss_ce": 0.0004853087302763015, "loss_iou": 0.3515625, "loss_num": 0.032470703125, "loss_xval": 0.86328125, "num_input_tokens_seen": 38096816, "step": 681 }, { "epoch": 1.5189309576837418, "grad_norm": 28.030420303344727, "learning_rate": 1e-06, "loss": 1.1786, "num_input_tokens_seen": 38152668, "step": 682 }, { "epoch": 1.5189309576837418, "loss": 0.9609849452972412, "loss_ce": 0.0010240338742733002, "loss_iou": 0.392578125, "loss_num": 0.034912109375, "loss_xval": 0.9609375, "num_input_tokens_seen": 38152668, "step": 682 }, { "epoch": 1.5211581291759466, "grad_norm": 18.825443267822266, "learning_rate": 1e-06, "loss": 1.1633, "num_input_tokens_seen": 38208028, "step": 683 }, { "epoch": 1.5211581291759466, "loss": 1.3014280796051025, "loss_ce": 0.0006468580104410648, "loss_iou": 0.5390625, "loss_num": 0.04541015625, "loss_xval": 1.296875, "num_input_tokens_seen": 38208028, "step": 683 }, { "epoch": 1.5233853006681515, "grad_norm": 33.09317398071289, "learning_rate": 1e-06, "loss": 1.0005, "num_input_tokens_seen": 38266628, "step": 684 }, { "epoch": 1.5233853006681515, "loss": 0.8423627614974976, "loss_ce": 0.0005658682784996927, "loss_iou": 0.326171875, "loss_num": 0.037841796875, "loss_xval": 0.84375, "num_input_tokens_seen": 38266628, "step": 684 }, { "epoch": 1.5256124721603563, "grad_norm": 21.19887924194336, "learning_rate": 1e-06, "loss": 1.2159, "num_input_tokens_seen": 38321396, "step": 685 }, { "epoch": 1.5256124721603563, "loss": 1.512146234512329, "loss_ce": 0.001403960632160306, "loss_iou": 0.6015625, "loss_num": 0.06201171875, "loss_xval": 1.5078125, "num_input_tokens_seen": 38321396, "step": 685 }, { "epoch": 1.5278396436525612, "grad_norm": 108.64661407470703, "learning_rate": 1e-06, "loss": 1.1361, "num_input_tokens_seen": 38376092, "step": 686 }, { "epoch": 1.5278396436525612, "loss": 1.0967187881469727, "loss_ce": 0.0010156568605452776, "loss_iou": 0.453125, "loss_num": 0.038330078125, "loss_xval": 1.09375, "num_input_tokens_seen": 38376092, "step": 686 }, { "epoch": 1.530066815144766, "grad_norm": 18.550582885742188, "learning_rate": 1e-06, "loss": 1.2406, "num_input_tokens_seen": 38432308, "step": 687 }, { "epoch": 1.530066815144766, "loss": 1.231999158859253, "loss_ce": 0.0005538459517993033, "loss_iou": 0.515625, "loss_num": 0.040283203125, "loss_xval": 1.234375, "num_input_tokens_seen": 38432308, "step": 687 }, { "epoch": 1.532293986636971, "grad_norm": 54.29788589477539, "learning_rate": 1e-06, "loss": 0.9656, "num_input_tokens_seen": 38488264, "step": 688 }, { "epoch": 1.532293986636971, "loss": 0.8684132099151611, "loss_ce": 0.0007374430424533784, "loss_iou": 0.318359375, "loss_num": 0.046142578125, "loss_xval": 0.8671875, "num_input_tokens_seen": 38488264, "step": 688 }, { "epoch": 1.534521158129176, "grad_norm": 26.847867965698242, "learning_rate": 1e-06, "loss": 1.318, "num_input_tokens_seen": 38544928, "step": 689 }, { "epoch": 1.534521158129176, "loss": 1.185499906539917, "loss_ce": 0.0019061192870140076, "loss_iou": 0.49609375, "loss_num": 0.03857421875, "loss_xval": 1.1875, "num_input_tokens_seen": 38544928, "step": 689 }, { "epoch": 1.5367483296213809, "grad_norm": 17.0640926361084, "learning_rate": 1e-06, "loss": 1.1527, "num_input_tokens_seen": 38600752, "step": 690 }, { "epoch": 1.5367483296213809, "loss": 1.185793399810791, "loss_ce": 0.0007348479703068733, "loss_iou": 0.484375, "loss_num": 0.04345703125, "loss_xval": 1.1875, "num_input_tokens_seen": 38600752, "step": 690 }, { "epoch": 1.5389755011135857, "grad_norm": 125.89933013916016, "learning_rate": 1e-06, "loss": 1.053, "num_input_tokens_seen": 38656252, "step": 691 }, { "epoch": 1.5389755011135857, "loss": 1.032954454421997, "loss_ce": 0.0009720420930534601, "loss_iou": 0.39453125, "loss_num": 0.048583984375, "loss_xval": 1.03125, "num_input_tokens_seen": 38656252, "step": 691 }, { "epoch": 1.5412026726057908, "grad_norm": 50.515708923339844, "learning_rate": 1e-06, "loss": 1.1837, "num_input_tokens_seen": 38712256, "step": 692 }, { "epoch": 1.5412026726057908, "loss": 1.369874358177185, "loss_ce": 0.001466137240640819, "loss_iou": 0.53515625, "loss_num": 0.05908203125, "loss_xval": 1.3671875, "num_input_tokens_seen": 38712256, "step": 692 }, { "epoch": 1.5434298440979957, "grad_norm": 25.205232620239258, "learning_rate": 1e-06, "loss": 1.059, "num_input_tokens_seen": 38769176, "step": 693 }, { "epoch": 1.5434298440979957, "loss": 1.1137714385986328, "loss_ce": 0.0004902197397314012, "loss_iou": 0.462890625, "loss_num": 0.037353515625, "loss_xval": 1.109375, "num_input_tokens_seen": 38769176, "step": 693 }, { "epoch": 1.5456570155902005, "grad_norm": 17.223934173583984, "learning_rate": 1e-06, "loss": 1.1755, "num_input_tokens_seen": 38825140, "step": 694 }, { "epoch": 1.5456570155902005, "loss": 1.2859344482421875, "loss_ce": 0.0017547393217682838, "loss_iou": 0.48828125, "loss_num": 0.0615234375, "loss_xval": 1.28125, "num_input_tokens_seen": 38825140, "step": 694 }, { "epoch": 1.5478841870824054, "grad_norm": 36.42241287231445, "learning_rate": 1e-06, "loss": 1.0896, "num_input_tokens_seen": 38881164, "step": 695 }, { "epoch": 1.5478841870824054, "loss": 0.9976003170013428, "loss_ce": 0.0010183160193264484, "loss_iou": 0.416015625, "loss_num": 0.033203125, "loss_xval": 0.99609375, "num_input_tokens_seen": 38881164, "step": 695 }, { "epoch": 1.5501113585746102, "grad_norm": 53.83267593383789, "learning_rate": 1e-06, "loss": 1.0186, "num_input_tokens_seen": 38937620, "step": 696 }, { "epoch": 1.5501113585746102, "loss": 1.0366376638412476, "loss_ce": 0.0014814026653766632, "loss_iou": 0.435546875, "loss_num": 0.032958984375, "loss_xval": 1.03125, "num_input_tokens_seen": 38937620, "step": 696 }, { "epoch": 1.552338530066815, "grad_norm": 26.581382751464844, "learning_rate": 1e-06, "loss": 0.9886, "num_input_tokens_seen": 38990964, "step": 697 }, { "epoch": 1.552338530066815, "loss": 1.0419948101043701, "loss_ce": 0.0004909674171358347, "loss_iou": 0.42578125, "loss_num": 0.0380859375, "loss_xval": 1.0390625, "num_input_tokens_seen": 38990964, "step": 697 }, { "epoch": 1.55456570155902, "grad_norm": 20.961688995361328, "learning_rate": 1e-06, "loss": 1.2405, "num_input_tokens_seen": 39046004, "step": 698 }, { "epoch": 1.55456570155902, "loss": 1.3255722522735596, "loss_ce": 0.0023301024921238422, "loss_iou": 0.57421875, "loss_num": 0.035400390625, "loss_xval": 1.3203125, "num_input_tokens_seen": 39046004, "step": 698 }, { "epoch": 1.5567928730512248, "grad_norm": 18.719867706298828, "learning_rate": 1e-06, "loss": 1.0962, "num_input_tokens_seen": 39104440, "step": 699 }, { "epoch": 1.5567928730512248, "loss": 0.9274605512619019, "loss_ce": 0.0011909835739061236, "loss_iou": 0.38671875, "loss_num": 0.0311279296875, "loss_xval": 0.92578125, "num_input_tokens_seen": 39104440, "step": 699 }, { "epoch": 1.5590200445434297, "grad_norm": 27.276357650756836, "learning_rate": 1e-06, "loss": 1.0064, "num_input_tokens_seen": 39161992, "step": 700 }, { "epoch": 1.5590200445434297, "loss": 0.9984862208366394, "loss_ce": 0.0011718107853084803, "loss_iou": 0.408203125, "loss_num": 0.036376953125, "loss_xval": 0.99609375, "num_input_tokens_seen": 39161992, "step": 700 }, { "epoch": 1.5612472160356348, "grad_norm": 17.313262939453125, "learning_rate": 1e-06, "loss": 1.0717, "num_input_tokens_seen": 39216700, "step": 701 }, { "epoch": 1.5612472160356348, "loss": 0.8593438863754272, "loss_ce": 0.0034632175229489803, "loss_iou": 0.35546875, "loss_num": 0.0289306640625, "loss_xval": 0.85546875, "num_input_tokens_seen": 39216700, "step": 701 }, { "epoch": 1.5634743875278396, "grad_norm": 26.77741241455078, "learning_rate": 1e-06, "loss": 0.9991, "num_input_tokens_seen": 39273188, "step": 702 }, { "epoch": 1.5634743875278396, "loss": 1.1402983665466309, "loss_ce": 0.0006498623406514525, "loss_iou": 0.48828125, "loss_num": 0.032958984375, "loss_xval": 1.140625, "num_input_tokens_seen": 39273188, "step": 702 }, { "epoch": 1.5657015590200447, "grad_norm": 25.94588851928711, "learning_rate": 1e-06, "loss": 1.2285, "num_input_tokens_seen": 39326596, "step": 703 }, { "epoch": 1.5657015590200447, "loss": 1.107351541519165, "loss_ce": 0.0016386474017053843, "loss_iou": 0.392578125, "loss_num": 0.064453125, "loss_xval": 1.109375, "num_input_tokens_seen": 39326596, "step": 703 }, { "epoch": 1.5679287305122496, "grad_norm": 108.70765686035156, "learning_rate": 1e-06, "loss": 1.2865, "num_input_tokens_seen": 39381292, "step": 704 }, { "epoch": 1.5679287305122496, "loss": 1.2144091129302979, "loss_ce": 0.0024950681254267693, "loss_iou": 0.4765625, "loss_num": 0.051513671875, "loss_xval": 1.2109375, "num_input_tokens_seen": 39381292, "step": 704 }, { "epoch": 1.5701559020044544, "grad_norm": 27.831453323364258, "learning_rate": 1e-06, "loss": 1.1255, "num_input_tokens_seen": 39438056, "step": 705 }, { "epoch": 1.5701559020044544, "loss": 1.3021111488342285, "loss_ce": 0.0015739843947812915, "loss_iou": 0.5078125, "loss_num": 0.056884765625, "loss_xval": 1.296875, "num_input_tokens_seen": 39438056, "step": 705 }, { "epoch": 1.5723830734966593, "grad_norm": 35.32048797607422, "learning_rate": 1e-06, "loss": 0.9482, "num_input_tokens_seen": 39492676, "step": 706 }, { "epoch": 1.5723830734966593, "loss": 0.9542930722236633, "loss_ce": 0.0006797942915000021, "loss_iou": 0.390625, "loss_num": 0.03515625, "loss_xval": 0.953125, "num_input_tokens_seen": 39492676, "step": 706 }, { "epoch": 1.5746102449888641, "grad_norm": 65.93453216552734, "learning_rate": 1e-06, "loss": 1.0207, "num_input_tokens_seen": 39551388, "step": 707 }, { "epoch": 1.5746102449888641, "loss": 0.9525820016860962, "loss_ce": 0.0006776798400096595, "loss_iou": 0.421875, "loss_num": 0.021484375, "loss_xval": 0.953125, "num_input_tokens_seen": 39551388, "step": 707 }, { "epoch": 1.576837416481069, "grad_norm": 14.823631286621094, "learning_rate": 1e-06, "loss": 1.1039, "num_input_tokens_seen": 39607732, "step": 708 }, { "epoch": 1.576837416481069, "loss": 0.9811808466911316, "loss_ce": 0.00046793223009444773, "loss_iou": 0.404296875, "loss_num": 0.03466796875, "loss_xval": 0.98046875, "num_input_tokens_seen": 39607732, "step": 708 }, { "epoch": 1.5790645879732739, "grad_norm": 33.79949188232422, "learning_rate": 1e-06, "loss": 1.0272, "num_input_tokens_seen": 39664892, "step": 709 }, { "epoch": 1.5790645879732739, "loss": 0.8985086679458618, "loss_ce": 0.0005594115937128663, "loss_iou": 0.392578125, "loss_num": 0.0228271484375, "loss_xval": 0.8984375, "num_input_tokens_seen": 39664892, "step": 709 }, { "epoch": 1.5812917594654787, "grad_norm": 44.01935958862305, "learning_rate": 1e-06, "loss": 1.2837, "num_input_tokens_seen": 39720468, "step": 710 }, { "epoch": 1.5812917594654787, "loss": 1.2381861209869385, "loss_ce": 0.0008813057793304324, "loss_iou": 0.435546875, "loss_num": 0.0732421875, "loss_xval": 1.234375, "num_input_tokens_seen": 39720468, "step": 710 }, { "epoch": 1.5835189309576836, "grad_norm": 16.639955520629883, "learning_rate": 1e-06, "loss": 1.2093, "num_input_tokens_seen": 39776680, "step": 711 }, { "epoch": 1.5835189309576836, "loss": 1.147930383682251, "loss_ce": 0.001445973408408463, "loss_iou": 0.453125, "loss_num": 0.04833984375, "loss_xval": 1.1484375, "num_input_tokens_seen": 39776680, "step": 711 }, { "epoch": 1.5857461024498887, "grad_norm": 21.6805477142334, "learning_rate": 1e-06, "loss": 1.0205, "num_input_tokens_seen": 39830636, "step": 712 }, { "epoch": 1.5857461024498887, "loss": 1.0399500131607056, "loss_ce": 0.0016199484234675765, "loss_iou": 0.427734375, "loss_num": 0.036376953125, "loss_xval": 1.0390625, "num_input_tokens_seen": 39830636, "step": 712 }, { "epoch": 1.5879732739420935, "grad_norm": 50.981754302978516, "learning_rate": 1e-06, "loss": 1.0619, "num_input_tokens_seen": 39887336, "step": 713 }, { "epoch": 1.5879732739420935, "loss": 1.1285226345062256, "loss_ce": 0.0005930241313762963, "loss_iou": 0.43359375, "loss_num": 0.052001953125, "loss_xval": 1.125, "num_input_tokens_seen": 39887336, "step": 713 }, { "epoch": 1.5902004454342984, "grad_norm": 1745.5362548828125, "learning_rate": 1e-06, "loss": 1.0216, "num_input_tokens_seen": 39945364, "step": 714 }, { "epoch": 1.5902004454342984, "loss": 0.8426003456115723, "loss_ce": 0.0008034344646148384, "loss_iou": 0.34375, "loss_num": 0.031005859375, "loss_xval": 0.84375, "num_input_tokens_seen": 39945364, "step": 714 }, { "epoch": 1.5924276169265035, "grad_norm": 141.83999633789062, "learning_rate": 1e-06, "loss": 0.9186, "num_input_tokens_seen": 39998372, "step": 715 }, { "epoch": 1.5924276169265035, "loss": 0.9666658639907837, "loss_ce": 0.00035729241790249944, "loss_iou": 0.376953125, "loss_num": 0.04296875, "loss_xval": 0.96484375, "num_input_tokens_seen": 39998372, "step": 715 }, { "epoch": 1.5946547884187083, "grad_norm": 16.73200225830078, "learning_rate": 1e-06, "loss": 1.0519, "num_input_tokens_seen": 40053772, "step": 716 }, { "epoch": 1.5946547884187083, "loss": 1.3080520629882812, "loss_ce": 0.005317714065313339, "loss_iou": 0.55078125, "loss_num": 0.040771484375, "loss_xval": 1.3046875, "num_input_tokens_seen": 40053772, "step": 716 }, { "epoch": 1.5968819599109132, "grad_norm": 19.21235466003418, "learning_rate": 1e-06, "loss": 0.903, "num_input_tokens_seen": 40111152, "step": 717 }, { "epoch": 1.5968819599109132, "loss": 0.8331512808799744, "loss_ce": 0.00649112556129694, "loss_iou": 0.341796875, "loss_num": 0.0283203125, "loss_xval": 0.828125, "num_input_tokens_seen": 40111152, "step": 717 }, { "epoch": 1.599109131403118, "grad_norm": 26.08073616027832, "learning_rate": 1e-06, "loss": 1.0622, "num_input_tokens_seen": 40170092, "step": 718 }, { "epoch": 1.599109131403118, "loss": 1.0961337089538574, "loss_ce": 0.0006747127044945955, "loss_iou": 0.43359375, "loss_num": 0.045654296875, "loss_xval": 1.09375, "num_input_tokens_seen": 40170092, "step": 718 }, { "epoch": 1.601336302895323, "grad_norm": 38.96022033691406, "learning_rate": 1e-06, "loss": 1.0349, "num_input_tokens_seen": 40222380, "step": 719 }, { "epoch": 1.601336302895323, "loss": 1.0462639331817627, "loss_ce": 0.00048747030086815357, "loss_iou": 0.43359375, "loss_num": 0.03564453125, "loss_xval": 1.046875, "num_input_tokens_seen": 40222380, "step": 719 }, { "epoch": 1.6035634743875278, "grad_norm": 15.093145370483398, "learning_rate": 1e-06, "loss": 1.0315, "num_input_tokens_seen": 40279864, "step": 720 }, { "epoch": 1.6035634743875278, "loss": 1.010891079902649, "loss_ce": 0.0040551056154072285, "loss_iou": 0.416015625, "loss_num": 0.034912109375, "loss_xval": 1.0078125, "num_input_tokens_seen": 40279864, "step": 720 }, { "epoch": 1.6057906458797326, "grad_norm": 24.701709747314453, "learning_rate": 1e-06, "loss": 1.314, "num_input_tokens_seen": 40334520, "step": 721 }, { "epoch": 1.6057906458797326, "loss": 1.5099828243255615, "loss_ce": 0.0011937202652916312, "loss_iou": 0.5625, "loss_num": 0.07763671875, "loss_xval": 1.5078125, "num_input_tokens_seen": 40334520, "step": 721 }, { "epoch": 1.6080178173719375, "grad_norm": 18.33966064453125, "learning_rate": 1e-06, "loss": 1.0704, "num_input_tokens_seen": 40389100, "step": 722 }, { "epoch": 1.6080178173719375, "loss": 0.9851089715957642, "loss_ce": 0.0004898360930383205, "loss_iou": 0.43359375, "loss_num": 0.0233154296875, "loss_xval": 0.984375, "num_input_tokens_seen": 40389100, "step": 722 }, { "epoch": 1.6102449888641426, "grad_norm": 51.41325759887695, "learning_rate": 1e-06, "loss": 0.8882, "num_input_tokens_seen": 40446016, "step": 723 }, { "epoch": 1.6102449888641426, "loss": 1.013216257095337, "loss_ce": 0.0005209506489336491, "loss_iou": 0.431640625, "loss_num": 0.0296630859375, "loss_xval": 1.015625, "num_input_tokens_seen": 40446016, "step": 723 }, { "epoch": 1.6124721603563474, "grad_norm": 22.967365264892578, "learning_rate": 1e-06, "loss": 1.1492, "num_input_tokens_seen": 40500448, "step": 724 }, { "epoch": 1.6124721603563474, "loss": 1.168558120727539, "loss_ce": 0.0005894272471778095, "loss_iou": 0.4453125, "loss_num": 0.0556640625, "loss_xval": 1.171875, "num_input_tokens_seen": 40500448, "step": 724 }, { "epoch": 1.6146993318485523, "grad_norm": 54.2728157043457, "learning_rate": 1e-06, "loss": 1.0244, "num_input_tokens_seen": 40557300, "step": 725 }, { "epoch": 1.6146993318485523, "loss": 0.8771815896034241, "loss_ce": 0.0009608692489564419, "loss_iou": 0.353515625, "loss_num": 0.033935546875, "loss_xval": 0.875, "num_input_tokens_seen": 40557300, "step": 725 }, { "epoch": 1.6169265033407574, "grad_norm": 55.31407928466797, "learning_rate": 1e-06, "loss": 1.0433, "num_input_tokens_seen": 40613104, "step": 726 }, { "epoch": 1.6169265033407574, "loss": 1.0284351110458374, "loss_ce": 0.003044519107788801, "loss_iou": 0.4140625, "loss_num": 0.0390625, "loss_xval": 1.0234375, "num_input_tokens_seen": 40613104, "step": 726 }, { "epoch": 1.6191536748329622, "grad_norm": 24.382633209228516, "learning_rate": 1e-06, "loss": 0.9911, "num_input_tokens_seen": 40667412, "step": 727 }, { "epoch": 1.6191536748329622, "loss": 0.7859052419662476, "loss_ce": 0.0005047998856753111, "loss_iou": 0.33984375, "loss_num": 0.0216064453125, "loss_xval": 0.78515625, "num_input_tokens_seen": 40667412, "step": 727 }, { "epoch": 1.621380846325167, "grad_norm": 22.375062942504883, "learning_rate": 1e-06, "loss": 0.8341, "num_input_tokens_seen": 40724616, "step": 728 }, { "epoch": 1.621380846325167, "loss": 0.7646250128746033, "loss_ce": 0.0004648742906283587, "loss_iou": 0.328125, "loss_num": 0.021728515625, "loss_xval": 0.765625, "num_input_tokens_seen": 40724616, "step": 728 }, { "epoch": 1.623608017817372, "grad_norm": 15.496011734008789, "learning_rate": 1e-06, "loss": 1.1697, "num_input_tokens_seen": 40780212, "step": 729 }, { "epoch": 1.623608017817372, "loss": 1.1705524921417236, "loss_ce": 0.0006306255236268044, "loss_iou": 0.484375, "loss_num": 0.04052734375, "loss_xval": 1.171875, "num_input_tokens_seen": 40780212, "step": 729 }, { "epoch": 1.6258351893095768, "grad_norm": 24.535715103149414, "learning_rate": 1e-06, "loss": 1.0096, "num_input_tokens_seen": 40837012, "step": 730 }, { "epoch": 1.6258351893095768, "loss": 0.9559507369995117, "loss_ce": 0.0008726270170882344, "loss_iou": 0.39453125, "loss_num": 0.033203125, "loss_xval": 0.953125, "num_input_tokens_seen": 40837012, "step": 730 }, { "epoch": 1.6280623608017817, "grad_norm": 16.106016159057617, "learning_rate": 1e-06, "loss": 1.144, "num_input_tokens_seen": 40891652, "step": 731 }, { "epoch": 1.6280623608017817, "loss": 1.1018345355987549, "loss_ce": 0.000516209052875638, "loss_iou": 0.482421875, "loss_num": 0.0274658203125, "loss_xval": 1.1015625, "num_input_tokens_seen": 40891652, "step": 731 }, { "epoch": 1.6302895322939865, "grad_norm": 34.02077102661133, "learning_rate": 1e-06, "loss": 1.173, "num_input_tokens_seen": 40947216, "step": 732 }, { "epoch": 1.6302895322939865, "loss": 1.2337870597839355, "loss_ce": 0.003562505356967449, "loss_iou": 0.45703125, "loss_num": 0.06298828125, "loss_xval": 1.2265625, "num_input_tokens_seen": 40947216, "step": 732 }, { "epoch": 1.6325167037861914, "grad_norm": 23.39971351623535, "learning_rate": 1e-06, "loss": 0.7367, "num_input_tokens_seen": 41005556, "step": 733 }, { "epoch": 1.6325167037861914, "loss": 0.6531933546066284, "loss_ce": 0.000361293728929013, "loss_iou": 0.2578125, "loss_num": 0.02783203125, "loss_xval": 0.65234375, "num_input_tokens_seen": 41005556, "step": 733 }, { "epoch": 1.6347438752783965, "grad_norm": 15.164018630981445, "learning_rate": 1e-06, "loss": 1.2361, "num_input_tokens_seen": 41060208, "step": 734 }, { "epoch": 1.6347438752783965, "loss": 1.2857666015625, "loss_ce": 0.001342757255770266, "loss_iou": 0.484375, "loss_num": 0.06298828125, "loss_xval": 1.28125, "num_input_tokens_seen": 41060208, "step": 734 }, { "epoch": 1.6369710467706013, "grad_norm": 24.00090980529785, "learning_rate": 1e-06, "loss": 1.0606, "num_input_tokens_seen": 41116160, "step": 735 }, { "epoch": 1.6369710467706013, "loss": 0.6998146772384644, "loss_ce": 0.001450410927645862, "loss_iou": 0.283203125, "loss_num": 0.0262451171875, "loss_xval": 0.69921875, "num_input_tokens_seen": 41116160, "step": 735 }, { "epoch": 1.6391982182628062, "grad_norm": 42.26320266723633, "learning_rate": 1e-06, "loss": 0.9948, "num_input_tokens_seen": 41173264, "step": 736 }, { "epoch": 1.6391982182628062, "loss": 0.8883851766586304, "loss_ce": 0.0006898957653902471, "loss_iou": 0.380859375, "loss_num": 0.0252685546875, "loss_xval": 0.88671875, "num_input_tokens_seen": 41173264, "step": 736 }, { "epoch": 1.6414253897550113, "grad_norm": 17.52292823791504, "learning_rate": 1e-06, "loss": 1.1069, "num_input_tokens_seen": 41228928, "step": 737 }, { "epoch": 1.6414253897550113, "loss": 1.1075830459594727, "loss_ce": 0.001137720886617899, "loss_iou": 0.427734375, "loss_num": 0.050048828125, "loss_xval": 1.109375, "num_input_tokens_seen": 41228928, "step": 737 }, { "epoch": 1.6436525612472161, "grad_norm": 17.622894287109375, "learning_rate": 1e-06, "loss": 1.1064, "num_input_tokens_seen": 41285968, "step": 738 }, { "epoch": 1.6436525612472161, "loss": 0.900823712348938, "loss_ce": 0.0004331024829298258, "loss_iou": 0.37890625, "loss_num": 0.0281982421875, "loss_xval": 0.8984375, "num_input_tokens_seen": 41285968, "step": 738 }, { "epoch": 1.645879732739421, "grad_norm": 40.00740051269531, "learning_rate": 1e-06, "loss": 0.9503, "num_input_tokens_seen": 41341372, "step": 739 }, { "epoch": 1.645879732739421, "loss": 0.6320021152496338, "loss_ce": 0.0004103146493434906, "loss_iou": 0.2734375, "loss_num": 0.0167236328125, "loss_xval": 0.6328125, "num_input_tokens_seen": 41341372, "step": 739 }, { "epoch": 1.6481069042316259, "grad_norm": 23.410932540893555, "learning_rate": 1e-06, "loss": 1.1302, "num_input_tokens_seen": 41394872, "step": 740 }, { "epoch": 1.6481069042316259, "loss": 1.1568620204925537, "loss_ce": 0.001100304420106113, "loss_iou": 0.43359375, "loss_num": 0.05810546875, "loss_xval": 1.15625, "num_input_tokens_seen": 41394872, "step": 740 }, { "epoch": 1.6503340757238307, "grad_norm": 15.308507919311523, "learning_rate": 1e-06, "loss": 1.0673, "num_input_tokens_seen": 41451676, "step": 741 }, { "epoch": 1.6503340757238307, "loss": 1.1007647514343262, "loss_ce": 0.0033525261096656322, "loss_iou": 0.416015625, "loss_num": 0.052734375, "loss_xval": 1.09375, "num_input_tokens_seen": 41451676, "step": 741 }, { "epoch": 1.6525612472160356, "grad_norm": 18.964244842529297, "learning_rate": 1e-06, "loss": 1.0727, "num_input_tokens_seen": 41509472, "step": 742 }, { "epoch": 1.6525612472160356, "loss": 0.9488592743873596, "loss_ce": 0.001105397124774754, "loss_iou": 0.404296875, "loss_num": 0.028076171875, "loss_xval": 0.94921875, "num_input_tokens_seen": 41509472, "step": 742 }, { "epoch": 1.6547884187082404, "grad_norm": 25.588586807250977, "learning_rate": 1e-06, "loss": 0.9794, "num_input_tokens_seen": 41565372, "step": 743 }, { "epoch": 1.6547884187082404, "loss": 0.8954076766967773, "loss_ce": 0.00038817772292532027, "loss_iou": 0.3125, "loss_num": 0.054443359375, "loss_xval": 0.89453125, "num_input_tokens_seen": 41565372, "step": 743 }, { "epoch": 1.6570155902004453, "grad_norm": 58.01374435424805, "learning_rate": 1e-06, "loss": 1.0396, "num_input_tokens_seen": 41622096, "step": 744 }, { "epoch": 1.6570155902004453, "loss": 1.068095088005066, "loss_ce": 0.00046813933295197785, "loss_iou": 0.44140625, "loss_num": 0.036865234375, "loss_xval": 1.0703125, "num_input_tokens_seen": 41622096, "step": 744 }, { "epoch": 1.6592427616926502, "grad_norm": 19.471540451049805, "learning_rate": 1e-06, "loss": 1.2403, "num_input_tokens_seen": 41679236, "step": 745 }, { "epoch": 1.6592427616926502, "loss": 1.3742785453796387, "loss_ce": 0.00123158423230052, "loss_iou": 0.5546875, "loss_num": 0.05322265625, "loss_xval": 1.375, "num_input_tokens_seen": 41679236, "step": 745 }, { "epoch": 1.6614699331848553, "grad_norm": 28.21709442138672, "learning_rate": 1e-06, "loss": 1.0587, "num_input_tokens_seen": 41736156, "step": 746 }, { "epoch": 1.6614699331848553, "loss": 0.78886479139328, "loss_ce": 0.0005347341066226363, "loss_iou": 0.33203125, "loss_num": 0.02490234375, "loss_xval": 0.7890625, "num_input_tokens_seen": 41736156, "step": 746 }, { "epoch": 1.6636971046770601, "grad_norm": 18.029945373535156, "learning_rate": 1e-06, "loss": 1.2296, "num_input_tokens_seen": 41791268, "step": 747 }, { "epoch": 1.6636971046770601, "loss": 1.125550389289856, "loss_ce": 0.0007945015095174313, "loss_iou": 0.4609375, "loss_num": 0.041015625, "loss_xval": 1.125, "num_input_tokens_seen": 41791268, "step": 747 }, { "epoch": 1.6659242761692652, "grad_norm": 19.177194595336914, "learning_rate": 1e-06, "loss": 1.143, "num_input_tokens_seen": 41845948, "step": 748 }, { "epoch": 1.6659242761692652, "loss": 1.0622222423553467, "loss_ce": 0.0006987220258451998, "loss_iou": 0.451171875, "loss_num": 0.0322265625, "loss_xval": 1.0625, "num_input_tokens_seen": 41845948, "step": 748 }, { "epoch": 1.66815144766147, "grad_norm": 18.510656356811523, "learning_rate": 1e-06, "loss": 0.9679, "num_input_tokens_seen": 41901836, "step": 749 }, { "epoch": 1.66815144766147, "loss": 1.2074395418167114, "loss_ce": 0.0008966219611465931, "loss_iou": 0.490234375, "loss_num": 0.04541015625, "loss_xval": 1.203125, "num_input_tokens_seen": 41901836, "step": 749 }, { "epoch": 1.670378619153675, "grad_norm": 18.269397735595703, "learning_rate": 1e-06, "loss": 0.8143, "num_input_tokens_seen": 41960764, "step": 750 }, { "epoch": 1.670378619153675, "eval_seeclick_web_CIoU": 0.5411946177482605, "eval_seeclick_web_GIoU": 0.5299433916807175, "eval_seeclick_web_IoU": 0.560671478509903, "eval_seeclick_web_MAE_all": 0.01778533821925521, "eval_seeclick_web_MAE_h": 0.011779951397329569, "eval_seeclick_web_MAE_w": 0.017373694106936455, "eval_seeclick_web_MAE_x_boxes": 0.01111887488514185, "eval_seeclick_web_MAE_y_boxes": 0.02299651806242764, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 1.0158196687698364, "eval_seeclick_web_loss_ce": 0.00048441681428812444, "eval_seeclick_web_loss_iou": 0.462646484375, "eval_seeclick_web_loss_num": 0.014404296875, "eval_seeclick_web_loss_xval": 0.9970703125, "eval_seeclick_web_runtime": 17.3473, "eval_seeclick_web_samples_per_second": 2.882, "eval_seeclick_web_steps_per_second": 0.115, "num_input_tokens_seen": 41960764, "step": 750 }, { "epoch": 1.670378619153675, "eval_icons_CIoU": 0.3312116116285324, "eval_icons_GIoU": 0.36676979064941406, "eval_icons_IoU": 0.40202146768569946, "eval_icons_MAE_all": 0.06723485328257084, "eval_icons_MAE_h": 0.03537856135517359, "eval_icons_MAE_w": 0.07818298228085041, "eval_icons_MAE_x_boxes": 0.058233313262462616, "eval_icons_MAE_y_boxes": 0.03735906444489956, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.6458972692489624, "eval_icons_loss_ce": 0.0018369617173448205, "eval_icons_loss_iou": 0.6297607421875, "eval_icons_loss_num": 0.06496238708496094, "eval_icons_loss_xval": 1.583984375, "eval_icons_runtime": 17.556, "eval_icons_samples_per_second": 2.848, "eval_icons_steps_per_second": 0.114, "num_input_tokens_seen": 41960764, "step": 750 }, { "epoch": 1.670378619153675, "eval_screenspot_CIoU": 0.2800278961658478, "eval_screenspot_GIoU": 0.3007667362689972, "eval_screenspot_IoU": 0.36394914984703064, "eval_screenspot_MAE_all": 0.08516304691632588, "eval_screenspot_MAE_h": 0.04900899901986122, "eval_screenspot_MAE_w": 0.08881760636965434, "eval_screenspot_MAE_x_boxes": 0.11530703057845433, "eval_screenspot_MAE_y_boxes": 0.05522619063655535, "eval_screenspot_inside_bbox": 0.5808333357175192, "eval_screenspot_loss": 1.8781194686889648, "eval_screenspot_loss_ce": 0.00343730168727537, "eval_screenspot_loss_iou": 0.73583984375, "eval_screenspot_loss_num": 0.09470876057942708, "eval_screenspot_loss_xval": 1.9451497395833333, "eval_screenspot_runtime": 27.4038, "eval_screenspot_samples_per_second": 3.248, "eval_screenspot_steps_per_second": 0.109, "num_input_tokens_seen": 41960764, "step": 750 }, { "epoch": 1.670378619153675, "eval_compot_CIoU": 0.3159261643886566, "eval_compot_GIoU": 0.3490176349878311, "eval_compot_IoU": 0.380667582154274, "eval_compot_MAE_all": 0.03044109046459198, "eval_compot_MAE_h": 0.012972671538591385, "eval_compot_MAE_w": 0.040612708777189255, "eval_compot_MAE_x_boxes": 0.03857684042304754, "eval_compot_MAE_y_boxes": 0.007390682585537434, "eval_compot_inside_bbox": 0.5868055522441864, "eval_compot_loss": 1.4768078327178955, "eval_compot_loss_ce": 0.0005472496850416064, "eval_compot_loss_iou": 0.6441650390625, "eval_compot_loss_num": 0.027063369750976562, "eval_compot_loss_xval": 1.423095703125, "eval_compot_runtime": 17.7125, "eval_compot_samples_per_second": 2.823, "eval_compot_steps_per_second": 0.113, "num_input_tokens_seen": 41960764, "step": 750 }, { "epoch": 1.670378619153675, "eval_custom_ui_val_CIoU": 0.3929840202132861, "eval_custom_ui_val_GIoU": 0.4221853729751375, "eval_custom_ui_val_IoU": 0.45630496740341187, "eval_custom_ui_val_MAE_all": 0.043078110449843936, "eval_custom_ui_val_MAE_h": 0.025929110849069223, "eval_custom_ui_val_MAE_w": 0.048152227161659136, "eval_custom_ui_val_MAE_x_boxes": 0.04743420394758383, "eval_custom_ui_val_MAE_y_boxes": 0.02334741482304202, "eval_custom_ui_val_inside_bbox": 0.6608796318372091, "eval_custom_ui_val_loss": 1.3928289413452148, "eval_custom_ui_val_loss_ce": 0.0016487750755105582, "eval_custom_ui_val_loss_iou": 0.5744357638888888, "eval_custom_ui_val_loss_num": 0.0423272450764974, "eval_custom_ui_val_loss_xval": 1.3607584635416667, "eval_custom_ui_val_runtime": 56.9195, "eval_custom_ui_val_samples_per_second": 4.656, "eval_custom_ui_val_steps_per_second": 0.158, "num_input_tokens_seen": 41960764, "step": 750 }, { "epoch": 1.670378619153675, "loss": 1.1135873794555664, "loss_ce": 0.0010384945198893547, "loss_iou": 0.466796875, "loss_num": 0.03515625, "loss_xval": 1.109375, "num_input_tokens_seen": 41960764, "step": 750 }, { "epoch": 1.6726057906458798, "grad_norm": 22.51813507080078, "learning_rate": 1e-06, "loss": 1.0466, "num_input_tokens_seen": 42015972, "step": 751 }, { "epoch": 1.6726057906458798, "loss": 1.0174639225006104, "loss_ce": 0.0008623974863439798, "loss_iou": 0.419921875, "loss_num": 0.034912109375, "loss_xval": 1.015625, "num_input_tokens_seen": 42015972, "step": 751 }, { "epoch": 1.6748329621380846, "grad_norm": 22.246463775634766, "learning_rate": 1e-06, "loss": 1.4559, "num_input_tokens_seen": 42071584, "step": 752 }, { "epoch": 1.6748329621380846, "loss": 1.3008112907409668, "loss_ce": 0.001006676466204226, "loss_iou": 0.4921875, "loss_num": 0.06298828125, "loss_xval": 1.296875, "num_input_tokens_seen": 42071584, "step": 752 }, { "epoch": 1.6770601336302895, "grad_norm": 25.3890380859375, "learning_rate": 1e-06, "loss": 1.0625, "num_input_tokens_seen": 42129356, "step": 753 }, { "epoch": 1.6770601336302895, "loss": 0.8968948721885681, "loss_ce": 0.0004105077823624015, "loss_iou": 0.37109375, "loss_num": 0.0311279296875, "loss_xval": 0.8984375, "num_input_tokens_seen": 42129356, "step": 753 }, { "epoch": 1.6792873051224944, "grad_norm": 19.59492301940918, "learning_rate": 1e-06, "loss": 0.9136, "num_input_tokens_seen": 42186100, "step": 754 }, { "epoch": 1.6792873051224944, "loss": 0.7982625961303711, "loss_ce": 0.0004110607551410794, "loss_iou": 0.33203125, "loss_num": 0.0267333984375, "loss_xval": 0.796875, "num_input_tokens_seen": 42186100, "step": 754 }, { "epoch": 1.6815144766146992, "grad_norm": 24.47226905822754, "learning_rate": 1e-06, "loss": 1.1686, "num_input_tokens_seen": 42240244, "step": 755 }, { "epoch": 1.6815144766146992, "loss": 1.1603378057479858, "loss_ce": 0.0004257457912899554, "loss_iou": 0.470703125, "loss_num": 0.043701171875, "loss_xval": 1.15625, "num_input_tokens_seen": 42240244, "step": 755 }, { "epoch": 1.683741648106904, "grad_norm": 23.5756893157959, "learning_rate": 1e-06, "loss": 1.1523, "num_input_tokens_seen": 42299200, "step": 756 }, { "epoch": 1.683741648106904, "loss": 0.9354584217071533, "loss_ce": 0.00039978878339752555, "loss_iou": 0.388671875, "loss_num": 0.03125, "loss_xval": 0.93359375, "num_input_tokens_seen": 42299200, "step": 756 }, { "epoch": 1.6859688195991092, "grad_norm": 29.683124542236328, "learning_rate": 1e-06, "loss": 1.2437, "num_input_tokens_seen": 42355712, "step": 757 }, { "epoch": 1.6859688195991092, "loss": 1.3413095474243164, "loss_ce": 0.0007333762478083372, "loss_iou": 0.55078125, "loss_num": 0.046875, "loss_xval": 1.34375, "num_input_tokens_seen": 42355712, "step": 757 }, { "epoch": 1.688195991091314, "grad_norm": 15.45508861541748, "learning_rate": 1e-06, "loss": 0.7795, "num_input_tokens_seen": 42413232, "step": 758 }, { "epoch": 1.688195991091314, "loss": 0.9429343938827515, "loss_ce": 0.0005515510565601289, "loss_iou": 0.396484375, "loss_num": 0.0299072265625, "loss_xval": 0.94140625, "num_input_tokens_seen": 42413232, "step": 758 }, { "epoch": 1.6904231625835189, "grad_norm": 24.212417602539062, "learning_rate": 1e-06, "loss": 0.9237, "num_input_tokens_seen": 42470224, "step": 759 }, { "epoch": 1.6904231625835189, "loss": 1.025328516960144, "loss_ce": 0.005308972671627998, "loss_iou": 0.419921875, "loss_num": 0.035888671875, "loss_xval": 1.0234375, "num_input_tokens_seen": 42470224, "step": 759 }, { "epoch": 1.692650334075724, "grad_norm": 18.754596710205078, "learning_rate": 1e-06, "loss": 1.0195, "num_input_tokens_seen": 42525456, "step": 760 }, { "epoch": 1.692650334075724, "loss": 1.1706150770187378, "loss_ce": 0.0036228555254638195, "loss_iou": 0.4921875, "loss_num": 0.036376953125, "loss_xval": 1.1640625, "num_input_tokens_seen": 42525456, "step": 760 }, { "epoch": 1.6948775055679288, "grad_norm": 19.588882446289062, "learning_rate": 1e-06, "loss": 0.695, "num_input_tokens_seen": 42582136, "step": 761 }, { "epoch": 1.6948775055679288, "loss": 0.852080225944519, "loss_ce": 0.0005177696002647281, "loss_iou": 0.376953125, "loss_num": 0.019287109375, "loss_xval": 0.8515625, "num_input_tokens_seen": 42582136, "step": 761 }, { "epoch": 1.6971046770601337, "grad_norm": 32.498355865478516, "learning_rate": 1e-06, "loss": 1.1963, "num_input_tokens_seen": 42637868, "step": 762 }, { "epoch": 1.6971046770601337, "loss": 1.1873914003372192, "loss_ce": 0.0008679982274770737, "loss_iou": 0.484375, "loss_num": 0.04345703125, "loss_xval": 1.1875, "num_input_tokens_seen": 42637868, "step": 762 }, { "epoch": 1.6993318485523385, "grad_norm": 24.11650848388672, "learning_rate": 1e-06, "loss": 1.1282, "num_input_tokens_seen": 42689848, "step": 763 }, { "epoch": 1.6993318485523385, "loss": 1.3757023811340332, "loss_ce": 0.0007023118087090552, "loss_iou": 0.578125, "loss_num": 0.04443359375, "loss_xval": 1.375, "num_input_tokens_seen": 42689848, "step": 763 }, { "epoch": 1.7015590200445434, "grad_norm": 17.393308639526367, "learning_rate": 1e-06, "loss": 1.0435, "num_input_tokens_seen": 42745660, "step": 764 }, { "epoch": 1.7015590200445434, "loss": 0.8240088224411011, "loss_ce": 0.0014990322524681687, "loss_iou": 0.34375, "loss_num": 0.02685546875, "loss_xval": 0.82421875, "num_input_tokens_seen": 42745660, "step": 764 }, { "epoch": 1.7037861915367483, "grad_norm": 26.520469665527344, "learning_rate": 1e-06, "loss": 1.0743, "num_input_tokens_seen": 42802272, "step": 765 }, { "epoch": 1.7037861915367483, "loss": 1.2456159591674805, "loss_ce": 0.0019635630305856466, "loss_iou": 0.5, "loss_num": 0.048095703125, "loss_xval": 1.2421875, "num_input_tokens_seen": 42802272, "step": 765 }, { "epoch": 1.7060133630289531, "grad_norm": 34.5315055847168, "learning_rate": 1e-06, "loss": 1.1088, "num_input_tokens_seen": 42859164, "step": 766 }, { "epoch": 1.7060133630289531, "loss": 1.120169758796692, "loss_ce": 0.0015174560248851776, "loss_iou": 0.47265625, "loss_num": 0.03466796875, "loss_xval": 1.1171875, "num_input_tokens_seen": 42859164, "step": 766 }, { "epoch": 1.708240534521158, "grad_norm": 28.206703186035156, "learning_rate": 1e-06, "loss": 1.1139, "num_input_tokens_seen": 42911928, "step": 767 }, { "epoch": 1.708240534521158, "loss": 1.1372349262237549, "loss_ce": 0.0010045571252703667, "loss_iou": 0.4765625, "loss_num": 0.03662109375, "loss_xval": 1.1328125, "num_input_tokens_seen": 42911928, "step": 767 }, { "epoch": 1.710467706013363, "grad_norm": 44.07477951049805, "learning_rate": 1e-06, "loss": 0.6435, "num_input_tokens_seen": 42970112, "step": 768 }, { "epoch": 1.710467706013363, "loss": 0.7950579524040222, "loss_ce": 0.0003801731509156525, "loss_iou": 0.318359375, "loss_num": 0.031494140625, "loss_xval": 0.79296875, "num_input_tokens_seen": 42970112, "step": 768 }, { "epoch": 1.712694877505568, "grad_norm": 39.26905059814453, "learning_rate": 1e-06, "loss": 1.1469, "num_input_tokens_seen": 43025648, "step": 769 }, { "epoch": 1.712694877505568, "loss": 1.2926554679870605, "loss_ce": 0.0006633971352130175, "loss_iou": 0.50390625, "loss_num": 0.05712890625, "loss_xval": 1.2890625, "num_input_tokens_seen": 43025648, "step": 769 }, { "epoch": 1.7149220489977728, "grad_norm": 39.54010009765625, "learning_rate": 1e-06, "loss": 1.1173, "num_input_tokens_seen": 43083808, "step": 770 }, { "epoch": 1.7149220489977728, "loss": 1.2310657501220703, "loss_ce": 0.0010852674022316933, "loss_iou": 0.482421875, "loss_num": 0.052978515625, "loss_xval": 1.2265625, "num_input_tokens_seen": 43083808, "step": 770 }, { "epoch": 1.7171492204899779, "grad_norm": 19.132938385009766, "learning_rate": 1e-06, "loss": 1.06, "num_input_tokens_seen": 43142508, "step": 771 }, { "epoch": 1.7171492204899779, "loss": 0.9760845303535461, "loss_ce": 0.0004985497798770666, "loss_iou": 0.412109375, "loss_num": 0.0308837890625, "loss_xval": 0.9765625, "num_input_tokens_seen": 43142508, "step": 771 }, { "epoch": 1.7193763919821827, "grad_norm": 23.097333908081055, "learning_rate": 1e-06, "loss": 1.0792, "num_input_tokens_seen": 43196132, "step": 772 }, { "epoch": 1.7193763919821827, "loss": 0.7085492014884949, "loss_ce": 0.0005413593607954681, "loss_iou": 0.28515625, "loss_num": 0.027099609375, "loss_xval": 0.70703125, "num_input_tokens_seen": 43196132, "step": 772 }, { "epoch": 1.7216035634743876, "grad_norm": 30.94361686706543, "learning_rate": 1e-06, "loss": 1.0867, "num_input_tokens_seen": 43253588, "step": 773 }, { "epoch": 1.7216035634743876, "loss": 0.9410488605499268, "loss_ce": 0.0011074627982452512, "loss_iou": 0.3828125, "loss_num": 0.034912109375, "loss_xval": 0.94140625, "num_input_tokens_seen": 43253588, "step": 773 }, { "epoch": 1.7238307349665924, "grad_norm": 23.98365592956543, "learning_rate": 1e-06, "loss": 1.3351, "num_input_tokens_seen": 43308080, "step": 774 }, { "epoch": 1.7238307349665924, "loss": 1.3561934232711792, "loss_ce": 0.0007246616296470165, "loss_iou": 0.515625, "loss_num": 0.0654296875, "loss_xval": 1.359375, "num_input_tokens_seen": 43308080, "step": 774 }, { "epoch": 1.7260579064587973, "grad_norm": 21.451553344726562, "learning_rate": 1e-06, "loss": 0.835, "num_input_tokens_seen": 43365784, "step": 775 }, { "epoch": 1.7260579064587973, "loss": 0.7267694473266602, "loss_ce": 0.0019159411313012242, "loss_iou": 0.306640625, "loss_num": 0.0224609375, "loss_xval": 0.7265625, "num_input_tokens_seen": 43365784, "step": 775 }, { "epoch": 1.7282850779510022, "grad_norm": 32.42961502075195, "learning_rate": 1e-06, "loss": 0.7633, "num_input_tokens_seen": 43420724, "step": 776 }, { "epoch": 1.7282850779510022, "loss": 0.8495993614196777, "loss_ce": 0.0009665663237683475, "loss_iou": 0.345703125, "loss_num": 0.031494140625, "loss_xval": 0.84765625, "num_input_tokens_seen": 43420724, "step": 776 }, { "epoch": 1.730512249443207, "grad_norm": 12.944496154785156, "learning_rate": 1e-06, "loss": 1.1678, "num_input_tokens_seen": 43478128, "step": 777 }, { "epoch": 1.730512249443207, "loss": 1.2062785625457764, "loss_ce": 0.0004680473357439041, "loss_iou": 0.5, "loss_num": 0.041259765625, "loss_xval": 1.203125, "num_input_tokens_seen": 43478128, "step": 777 }, { "epoch": 1.732739420935412, "grad_norm": 24.311969757080078, "learning_rate": 1e-06, "loss": 1.3848, "num_input_tokens_seen": 43535104, "step": 778 }, { "epoch": 1.732739420935412, "loss": 1.19753098487854, "loss_ce": 0.0007536107441410422, "loss_iou": 0.486328125, "loss_num": 0.04443359375, "loss_xval": 1.1953125, "num_input_tokens_seen": 43535104, "step": 778 }, { "epoch": 1.734966592427617, "grad_norm": 24.397178649902344, "learning_rate": 1e-06, "loss": 1.0169, "num_input_tokens_seen": 43592232, "step": 779 }, { "epoch": 1.734966592427617, "loss": 1.0153961181640625, "loss_ce": 0.003433296922594309, "loss_iou": 0.40625, "loss_num": 0.040283203125, "loss_xval": 1.015625, "num_input_tokens_seen": 43592232, "step": 779 }, { "epoch": 1.7371937639198218, "grad_norm": 18.340551376342773, "learning_rate": 1e-06, "loss": 1.0615, "num_input_tokens_seen": 43645688, "step": 780 }, { "epoch": 1.7371937639198218, "loss": 1.023805856704712, "loss_ce": 0.0007346307393163443, "loss_iou": 0.404296875, "loss_num": 0.04248046875, "loss_xval": 1.0234375, "num_input_tokens_seen": 43645688, "step": 780 }, { "epoch": 1.7394209354120267, "grad_norm": 16.935611724853516, "learning_rate": 1e-06, "loss": 1.0056, "num_input_tokens_seen": 43702564, "step": 781 }, { "epoch": 1.7394209354120267, "loss": 0.7179310321807861, "loss_ce": 0.002354835858568549, "loss_iou": 0.291015625, "loss_num": 0.026611328125, "loss_xval": 0.71484375, "num_input_tokens_seen": 43702564, "step": 781 }, { "epoch": 1.7416481069042318, "grad_norm": 22.93832015991211, "learning_rate": 1e-06, "loss": 0.9551, "num_input_tokens_seen": 43760636, "step": 782 }, { "epoch": 1.7416481069042318, "loss": 1.01814603805542, "loss_ce": 0.0010561385424807668, "loss_iou": 0.423828125, "loss_num": 0.033935546875, "loss_xval": 1.015625, "num_input_tokens_seen": 43760636, "step": 782 }, { "epoch": 1.7438752783964366, "grad_norm": 26.112279891967773, "learning_rate": 1e-06, "loss": 1.104, "num_input_tokens_seen": 43818316, "step": 783 }, { "epoch": 1.7438752783964366, "loss": 0.8660245537757874, "loss_ce": 0.0020108623430132866, "loss_iou": 0.37109375, "loss_num": 0.0244140625, "loss_xval": 0.86328125, "num_input_tokens_seen": 43818316, "step": 783 }, { "epoch": 1.7461024498886415, "grad_norm": 20.74223518371582, "learning_rate": 1e-06, "loss": 0.9333, "num_input_tokens_seen": 43874416, "step": 784 }, { "epoch": 1.7461024498886415, "loss": 1.0379929542541504, "loss_ce": 0.0003951968683395535, "loss_iou": 0.42578125, "loss_num": 0.037109375, "loss_xval": 1.0390625, "num_input_tokens_seen": 43874416, "step": 784 }, { "epoch": 1.7483296213808464, "grad_norm": 16.81831169128418, "learning_rate": 1e-06, "loss": 0.8192, "num_input_tokens_seen": 43929028, "step": 785 }, { "epoch": 1.7483296213808464, "loss": 0.7447642087936401, "loss_ce": 0.0003794525982812047, "loss_iou": 0.322265625, "loss_num": 0.020263671875, "loss_xval": 0.74609375, "num_input_tokens_seen": 43929028, "step": 785 }, { "epoch": 1.7505567928730512, "grad_norm": 15.588628768920898, "learning_rate": 1e-06, "loss": 1.0575, "num_input_tokens_seen": 43985228, "step": 786 }, { "epoch": 1.7505567928730512, "loss": 0.9384199380874634, "loss_ce": 0.0004316343110986054, "loss_iou": 0.376953125, "loss_num": 0.03662109375, "loss_xval": 0.9375, "num_input_tokens_seen": 43985228, "step": 786 }, { "epoch": 1.752783964365256, "grad_norm": 47.489871978759766, "learning_rate": 1e-06, "loss": 0.8388, "num_input_tokens_seen": 44042064, "step": 787 }, { "epoch": 1.752783964365256, "loss": 0.9427950382232666, "loss_ce": 0.0006563607603311539, "loss_iou": 0.3828125, "loss_num": 0.034912109375, "loss_xval": 0.94140625, "num_input_tokens_seen": 44042064, "step": 787 }, { "epoch": 1.755011135857461, "grad_norm": 23.84717559814453, "learning_rate": 1e-06, "loss": 1.1627, "num_input_tokens_seen": 44099536, "step": 788 }, { "epoch": 1.755011135857461, "loss": 1.0481215715408325, "loss_ce": 0.0007583041442558169, "loss_iou": 0.42578125, "loss_num": 0.038818359375, "loss_xval": 1.046875, "num_input_tokens_seen": 44099536, "step": 788 }, { "epoch": 1.7572383073496658, "grad_norm": 15.595168113708496, "learning_rate": 1e-06, "loss": 0.9104, "num_input_tokens_seen": 44156140, "step": 789 }, { "epoch": 1.7572383073496658, "loss": 0.9101336002349854, "loss_ce": 0.0004656048258766532, "loss_iou": 0.369140625, "loss_num": 0.034912109375, "loss_xval": 0.91015625, "num_input_tokens_seen": 44156140, "step": 789 }, { "epoch": 1.7594654788418709, "grad_norm": 19.3514404296875, "learning_rate": 1e-06, "loss": 1.3284, "num_input_tokens_seen": 44212504, "step": 790 }, { "epoch": 1.7594654788418709, "loss": 1.7789078950881958, "loss_ce": 0.0010758922435343266, "loss_iou": 0.6953125, "loss_num": 0.07861328125, "loss_xval": 1.78125, "num_input_tokens_seen": 44212504, "step": 790 }, { "epoch": 1.7616926503340757, "grad_norm": 20.171802520751953, "learning_rate": 1e-06, "loss": 0.8222, "num_input_tokens_seen": 44268412, "step": 791 }, { "epoch": 1.7616926503340757, "loss": 0.7054498195648193, "loss_ce": 0.00037169185816310346, "loss_iou": 0.283203125, "loss_num": 0.02783203125, "loss_xval": 0.703125, "num_input_tokens_seen": 44268412, "step": 791 }, { "epoch": 1.7639198218262806, "grad_norm": 26.683834075927734, "learning_rate": 1e-06, "loss": 0.9773, "num_input_tokens_seen": 44326872, "step": 792 }, { "epoch": 1.7639198218262806, "loss": 1.1897196769714355, "loss_ce": 0.0007548188441433012, "loss_iou": 0.47265625, "loss_num": 0.048583984375, "loss_xval": 1.1875, "num_input_tokens_seen": 44326872, "step": 792 }, { "epoch": 1.7661469933184857, "grad_norm": 32.8150749206543, "learning_rate": 1e-06, "loss": 0.9811, "num_input_tokens_seen": 44380344, "step": 793 }, { "epoch": 1.7661469933184857, "loss": 1.1400426626205444, "loss_ce": 0.0006384018342941999, "loss_iou": 0.462890625, "loss_num": 0.042724609375, "loss_xval": 1.140625, "num_input_tokens_seen": 44380344, "step": 793 }, { "epoch": 1.7683741648106905, "grad_norm": 117.90059661865234, "learning_rate": 1e-06, "loss": 1.0223, "num_input_tokens_seen": 44435364, "step": 794 }, { "epoch": 1.7683741648106905, "loss": 0.6915375590324402, "loss_ce": 0.0008637503487989306, "loss_iou": 0.279296875, "loss_num": 0.02685546875, "loss_xval": 0.69140625, "num_input_tokens_seen": 44435364, "step": 794 }, { "epoch": 1.7706013363028954, "grad_norm": 20.04206085205078, "learning_rate": 1e-06, "loss": 0.6872, "num_input_tokens_seen": 44492192, "step": 795 }, { "epoch": 1.7706013363028954, "loss": 0.6449465751647949, "loss_ce": 0.00041528072324581444, "loss_iou": 0.267578125, "loss_num": 0.022216796875, "loss_xval": 0.64453125, "num_input_tokens_seen": 44492192, "step": 795 }, { "epoch": 1.7728285077951003, "grad_norm": 17.540992736816406, "learning_rate": 1e-06, "loss": 0.8579, "num_input_tokens_seen": 44548236, "step": 796 }, { "epoch": 1.7728285077951003, "loss": 0.8379471302032471, "loss_ce": 0.0005447610164992511, "loss_iou": 0.33984375, "loss_num": 0.031494140625, "loss_xval": 0.8359375, "num_input_tokens_seen": 44548236, "step": 796 }, { "epoch": 1.7750556792873051, "grad_norm": 21.848678588867188, "learning_rate": 1e-06, "loss": 0.9379, "num_input_tokens_seen": 44603416, "step": 797 }, { "epoch": 1.7750556792873051, "loss": 1.0255460739135742, "loss_ce": 0.0005215964047238231, "loss_iou": 0.416015625, "loss_num": 0.038818359375, "loss_xval": 1.0234375, "num_input_tokens_seen": 44603416, "step": 797 }, { "epoch": 1.77728285077951, "grad_norm": 17.641189575195312, "learning_rate": 1e-06, "loss": 1.009, "num_input_tokens_seen": 44659364, "step": 798 }, { "epoch": 1.77728285077951, "loss": 1.0162487030029297, "loss_ce": 0.0006236857152543962, "loss_iou": 0.431640625, "loss_num": 0.0301513671875, "loss_xval": 1.015625, "num_input_tokens_seen": 44659364, "step": 798 }, { "epoch": 1.7795100222717148, "grad_norm": 32.69601058959961, "learning_rate": 1e-06, "loss": 1.0617, "num_input_tokens_seen": 44716108, "step": 799 }, { "epoch": 1.7795100222717148, "loss": 1.2807564735412598, "loss_ce": 0.0009712378960102797, "loss_iou": 0.484375, "loss_num": 0.062255859375, "loss_xval": 1.28125, "num_input_tokens_seen": 44716108, "step": 799 }, { "epoch": 1.7817371937639197, "grad_norm": 18.248733520507812, "learning_rate": 1e-06, "loss": 0.9217, "num_input_tokens_seen": 44775240, "step": 800 }, { "epoch": 1.7817371937639197, "loss": 0.9542760252952576, "loss_ce": 0.0026158532127738, "loss_iou": 0.392578125, "loss_num": 0.03369140625, "loss_xval": 0.953125, "num_input_tokens_seen": 44775240, "step": 800 }, { "epoch": 1.7839643652561246, "grad_norm": 31.044193267822266, "learning_rate": 1e-06, "loss": 1.0257, "num_input_tokens_seen": 44830904, "step": 801 }, { "epoch": 1.7839643652561246, "loss": 0.9924131035804749, "loss_ce": 0.000713902700226754, "loss_iou": 0.365234375, "loss_num": 0.0517578125, "loss_xval": 0.9921875, "num_input_tokens_seen": 44830904, "step": 801 }, { "epoch": 1.7861915367483296, "grad_norm": 16.06730842590332, "learning_rate": 1e-06, "loss": 1.0, "num_input_tokens_seen": 44885496, "step": 802 }, { "epoch": 1.7861915367483296, "loss": 1.0914322137832642, "loss_ce": 0.0006118253222666681, "loss_iou": 0.4453125, "loss_num": 0.0400390625, "loss_xval": 1.09375, "num_input_tokens_seen": 44885496, "step": 802 }, { "epoch": 1.7884187082405345, "grad_norm": 18.782161712646484, "learning_rate": 1e-06, "loss": 1.0227, "num_input_tokens_seen": 44939520, "step": 803 }, { "epoch": 1.7884187082405345, "loss": 1.2903541326522827, "loss_ce": 0.0008032987243495882, "loss_iou": 0.53125, "loss_num": 0.045166015625, "loss_xval": 1.2890625, "num_input_tokens_seen": 44939520, "step": 803 }, { "epoch": 1.7906458797327396, "grad_norm": 14.930334091186523, "learning_rate": 1e-06, "loss": 0.9842, "num_input_tokens_seen": 44994976, "step": 804 }, { "epoch": 1.7906458797327396, "loss": 0.6712737679481506, "loss_ce": 0.0028167327400296926, "loss_iou": 0.27734375, "loss_num": 0.0223388671875, "loss_xval": 0.66796875, "num_input_tokens_seen": 44994976, "step": 804 }, { "epoch": 1.7928730512249444, "grad_norm": 21.465778350830078, "learning_rate": 1e-06, "loss": 1.161, "num_input_tokens_seen": 45053556, "step": 805 }, { "epoch": 1.7928730512249444, "loss": 1.2845568656921387, "loss_ce": 0.0006212838925421238, "loss_iou": 0.486328125, "loss_num": 0.062255859375, "loss_xval": 1.28125, "num_input_tokens_seen": 45053556, "step": 805 }, { "epoch": 1.7951002227171493, "grad_norm": 20.920679092407227, "learning_rate": 1e-06, "loss": 1.1158, "num_input_tokens_seen": 45110760, "step": 806 }, { "epoch": 1.7951002227171493, "loss": 1.0931113958358765, "loss_ce": 0.000582136504817754, "loss_iou": 0.44140625, "loss_num": 0.04150390625, "loss_xval": 1.09375, "num_input_tokens_seen": 45110760, "step": 806 }, { "epoch": 1.7973273942093542, "grad_norm": 29.755828857421875, "learning_rate": 1e-06, "loss": 1.2162, "num_input_tokens_seen": 45166492, "step": 807 }, { "epoch": 1.7973273942093542, "loss": 1.4112908840179443, "loss_ce": 0.0006463178433477879, "loss_iou": 0.5703125, "loss_num": 0.0537109375, "loss_xval": 1.4140625, "num_input_tokens_seen": 45166492, "step": 807 }, { "epoch": 1.799554565701559, "grad_norm": 25.334617614746094, "learning_rate": 1e-06, "loss": 1.0345, "num_input_tokens_seen": 45222840, "step": 808 }, { "epoch": 1.799554565701559, "loss": 0.8354628086090088, "loss_ce": 0.0005018864176236093, "loss_iou": 0.32421875, "loss_num": 0.03759765625, "loss_xval": 0.8359375, "num_input_tokens_seen": 45222840, "step": 808 }, { "epoch": 1.8017817371937639, "grad_norm": 12.594086647033691, "learning_rate": 1e-06, "loss": 1.0409, "num_input_tokens_seen": 45278916, "step": 809 }, { "epoch": 1.8017817371937639, "loss": 0.8014848232269287, "loss_ce": 0.0004594190395437181, "loss_iou": 0.326171875, "loss_num": 0.029541015625, "loss_xval": 0.80078125, "num_input_tokens_seen": 45278916, "step": 809 }, { "epoch": 1.8040089086859687, "grad_norm": 19.810697555541992, "learning_rate": 1e-06, "loss": 0.8261, "num_input_tokens_seen": 45331148, "step": 810 }, { "epoch": 1.8040089086859687, "loss": 0.7200347185134888, "loss_ce": 0.0005523251602426171, "loss_iou": 0.279296875, "loss_num": 0.031982421875, "loss_xval": 0.71875, "num_input_tokens_seen": 45331148, "step": 810 }, { "epoch": 1.8062360801781736, "grad_norm": 15.187623023986816, "learning_rate": 1e-06, "loss": 0.9049, "num_input_tokens_seen": 45385580, "step": 811 }, { "epoch": 1.8062360801781736, "loss": 0.8386489748954773, "loss_ce": 0.001979056978598237, "loss_iou": 0.322265625, "loss_num": 0.03857421875, "loss_xval": 0.8359375, "num_input_tokens_seen": 45385580, "step": 811 }, { "epoch": 1.8084632516703785, "grad_norm": 30.983192443847656, "learning_rate": 1e-06, "loss": 1.2259, "num_input_tokens_seen": 45440584, "step": 812 }, { "epoch": 1.8084632516703785, "loss": 0.9607589244842529, "loss_ce": 0.001286255195736885, "loss_iou": 0.41015625, "loss_num": 0.0281982421875, "loss_xval": 0.9609375, "num_input_tokens_seen": 45440584, "step": 812 }, { "epoch": 1.8106904231625836, "grad_norm": 16.581043243408203, "learning_rate": 1e-06, "loss": 0.9445, "num_input_tokens_seen": 45493384, "step": 813 }, { "epoch": 1.8106904231625836, "loss": 0.8210749626159668, "loss_ce": 0.0007623857818543911, "loss_iou": 0.34765625, "loss_num": 0.0252685546875, "loss_xval": 0.8203125, "num_input_tokens_seen": 45493384, "step": 813 }, { "epoch": 1.8129175946547884, "grad_norm": 20.215229034423828, "learning_rate": 1e-06, "loss": 1.0194, "num_input_tokens_seen": 45548060, "step": 814 }, { "epoch": 1.8129175946547884, "loss": 1.2104692459106445, "loss_ce": 0.0029496951028704643, "loss_iou": 0.46484375, "loss_num": 0.055908203125, "loss_xval": 1.2109375, "num_input_tokens_seen": 45548060, "step": 814 }, { "epoch": 1.8151447661469933, "grad_norm": 17.977659225463867, "learning_rate": 1e-06, "loss": 0.9403, "num_input_tokens_seen": 45605948, "step": 815 }, { "epoch": 1.8151447661469933, "loss": 0.7051939964294434, "loss_ce": 0.0003600172349251807, "loss_iou": 0.302734375, "loss_num": 0.019775390625, "loss_xval": 0.703125, "num_input_tokens_seen": 45605948, "step": 815 }, { "epoch": 1.8173719376391984, "grad_norm": 21.293136596679688, "learning_rate": 1e-06, "loss": 0.9601, "num_input_tokens_seen": 45660788, "step": 816 }, { "epoch": 1.8173719376391984, "loss": 0.7573980093002319, "loss_ce": 0.0005621028249152005, "loss_iou": 0.326171875, "loss_num": 0.020751953125, "loss_xval": 0.7578125, "num_input_tokens_seen": 45660788, "step": 816 }, { "epoch": 1.8195991091314032, "grad_norm": 31.912277221679688, "learning_rate": 1e-06, "loss": 1.2548, "num_input_tokens_seen": 45713720, "step": 817 }, { "epoch": 1.8195991091314032, "loss": 1.1719120740890503, "loss_ce": 0.012366149574518204, "loss_iou": 0.490234375, "loss_num": 0.03564453125, "loss_xval": 1.15625, "num_input_tokens_seen": 45713720, "step": 817 }, { "epoch": 1.821826280623608, "grad_norm": 43.602935791015625, "learning_rate": 1e-06, "loss": 1.0844, "num_input_tokens_seen": 45769628, "step": 818 }, { "epoch": 1.821826280623608, "loss": 1.0855807065963745, "loss_ce": 0.0006197973270900548, "loss_iou": 0.451171875, "loss_num": 0.036865234375, "loss_xval": 1.0859375, "num_input_tokens_seen": 45769628, "step": 818 }, { "epoch": 1.824053452115813, "grad_norm": 23.616613388061523, "learning_rate": 1e-06, "loss": 1.1116, "num_input_tokens_seen": 45827072, "step": 819 }, { "epoch": 1.824053452115813, "loss": 1.052821159362793, "loss_ce": 0.0005750858690589666, "loss_iou": 0.4375, "loss_num": 0.03515625, "loss_xval": 1.0546875, "num_input_tokens_seen": 45827072, "step": 819 }, { "epoch": 1.8262806236080178, "grad_norm": 24.936237335205078, "learning_rate": 1e-06, "loss": 1.1643, "num_input_tokens_seen": 45882452, "step": 820 }, { "epoch": 1.8262806236080178, "loss": 1.1679127216339111, "loss_ce": 0.00043235800694674253, "loss_iou": 0.466796875, "loss_num": 0.046630859375, "loss_xval": 1.1640625, "num_input_tokens_seen": 45882452, "step": 820 }, { "epoch": 1.8285077951002227, "grad_norm": 27.94110107421875, "learning_rate": 1e-06, "loss": 1.0027, "num_input_tokens_seen": 45937344, "step": 821 }, { "epoch": 1.8285077951002227, "loss": 1.220947265625, "loss_ce": 0.0007323599420487881, "loss_iou": 0.46875, "loss_num": 0.056396484375, "loss_xval": 1.21875, "num_input_tokens_seen": 45937344, "step": 821 }, { "epoch": 1.8307349665924275, "grad_norm": 19.486064910888672, "learning_rate": 1e-06, "loss": 0.9696, "num_input_tokens_seen": 45994040, "step": 822 }, { "epoch": 1.8307349665924275, "loss": 0.7867767214775085, "loss_ce": 0.0003997594176325947, "loss_iou": 0.32421875, "loss_num": 0.028076171875, "loss_xval": 0.78515625, "num_input_tokens_seen": 45994040, "step": 822 }, { "epoch": 1.8329621380846324, "grad_norm": 33.13214111328125, "learning_rate": 1e-06, "loss": 1.2236, "num_input_tokens_seen": 46049792, "step": 823 }, { "epoch": 1.8329621380846324, "loss": 1.2174769639968872, "loss_ce": 0.0011683637276291847, "loss_iou": 0.486328125, "loss_num": 0.049072265625, "loss_xval": 1.21875, "num_input_tokens_seen": 46049792, "step": 823 }, { "epoch": 1.8351893095768375, "grad_norm": 22.1630802154541, "learning_rate": 1e-06, "loss": 1.0282, "num_input_tokens_seen": 46106908, "step": 824 }, { "epoch": 1.8351893095768375, "loss": 1.025941252708435, "loss_ce": 0.0005506377201527357, "loss_iou": 0.4453125, "loss_num": 0.02685546875, "loss_xval": 1.0234375, "num_input_tokens_seen": 46106908, "step": 824 }, { "epoch": 1.8374164810690423, "grad_norm": 18.732593536376953, "learning_rate": 1e-06, "loss": 1.1442, "num_input_tokens_seen": 46161732, "step": 825 }, { "epoch": 1.8374164810690423, "loss": 1.1100322008132935, "loss_ce": 0.0004130484303459525, "loss_iou": 0.4453125, "loss_num": 0.044189453125, "loss_xval": 1.109375, "num_input_tokens_seen": 46161732, "step": 825 }, { "epoch": 1.8396436525612472, "grad_norm": 16.942766189575195, "learning_rate": 1e-06, "loss": 1.1521, "num_input_tokens_seen": 46217944, "step": 826 }, { "epoch": 1.8396436525612472, "loss": 0.9271703362464905, "loss_ce": 0.0018773877527564764, "loss_iou": 0.3828125, "loss_num": 0.031982421875, "loss_xval": 0.92578125, "num_input_tokens_seen": 46217944, "step": 826 }, { "epoch": 1.8418708240534523, "grad_norm": 19.92278289794922, "learning_rate": 1e-06, "loss": 1.1781, "num_input_tokens_seen": 46274360, "step": 827 }, { "epoch": 1.8418708240534523, "loss": 1.132466197013855, "loss_ce": 0.0008743547950871289, "loss_iou": 0.443359375, "loss_num": 0.04931640625, "loss_xval": 1.1328125, "num_input_tokens_seen": 46274360, "step": 827 }, { "epoch": 1.8440979955456571, "grad_norm": 27.525070190429688, "learning_rate": 1e-06, "loss": 0.9065, "num_input_tokens_seen": 46331236, "step": 828 }, { "epoch": 1.8440979955456571, "loss": 0.9962807893753052, "loss_ce": 0.000431208114605397, "loss_iou": 0.412109375, "loss_num": 0.034423828125, "loss_xval": 0.99609375, "num_input_tokens_seen": 46331236, "step": 828 }, { "epoch": 1.846325167037862, "grad_norm": 19.961835861206055, "learning_rate": 1e-06, "loss": 1.0033, "num_input_tokens_seen": 46387876, "step": 829 }, { "epoch": 1.846325167037862, "loss": 0.8641173243522644, "loss_ce": 0.0008360765059478581, "loss_iou": 0.376953125, "loss_num": 0.021728515625, "loss_xval": 0.86328125, "num_input_tokens_seen": 46387876, "step": 829 }, { "epoch": 1.8485523385300668, "grad_norm": 23.06169891357422, "learning_rate": 1e-06, "loss": 1.2062, "num_input_tokens_seen": 46441284, "step": 830 }, { "epoch": 1.8485523385300668, "loss": 1.086785078048706, "loss_ce": 0.0003593094297684729, "loss_iou": 0.447265625, "loss_num": 0.037841796875, "loss_xval": 1.0859375, "num_input_tokens_seen": 46441284, "step": 830 }, { "epoch": 1.8507795100222717, "grad_norm": 23.462730407714844, "learning_rate": 1e-06, "loss": 0.932, "num_input_tokens_seen": 46498148, "step": 831 }, { "epoch": 1.8507795100222717, "loss": 1.0123460292816162, "loss_ce": 0.005265878979116678, "loss_iou": 0.41015625, "loss_num": 0.037841796875, "loss_xval": 1.0078125, "num_input_tokens_seen": 46498148, "step": 831 }, { "epoch": 1.8530066815144766, "grad_norm": 18.19162368774414, "learning_rate": 1e-06, "loss": 1.1176, "num_input_tokens_seen": 46552656, "step": 832 }, { "epoch": 1.8530066815144766, "loss": 1.0369131565093994, "loss_ce": 0.0007803026819601655, "loss_iou": 0.4296875, "loss_num": 0.03466796875, "loss_xval": 1.0390625, "num_input_tokens_seen": 46552656, "step": 832 }, { "epoch": 1.8552338530066814, "grad_norm": 27.384538650512695, "learning_rate": 1e-06, "loss": 1.327, "num_input_tokens_seen": 46609180, "step": 833 }, { "epoch": 1.8552338530066814, "loss": 1.4005694389343262, "loss_ce": 0.002132008085027337, "loss_iou": 0.55078125, "loss_num": 0.05859375, "loss_xval": 1.3984375, "num_input_tokens_seen": 46609180, "step": 833 }, { "epoch": 1.8574610244988863, "grad_norm": 17.754493713378906, "learning_rate": 1e-06, "loss": 0.8976, "num_input_tokens_seen": 46667088, "step": 834 }, { "epoch": 1.8574610244988863, "loss": 0.7892530560493469, "loss_ce": 0.00043467164505273104, "loss_iou": 0.326171875, "loss_num": 0.0274658203125, "loss_xval": 0.7890625, "num_input_tokens_seen": 46667088, "step": 834 }, { "epoch": 1.8596881959910914, "grad_norm": 22.643587112426758, "learning_rate": 1e-06, "loss": 1.2626, "num_input_tokens_seen": 46722560, "step": 835 }, { "epoch": 1.8596881959910914, "loss": 1.1584439277648926, "loss_ce": 0.0014614604879170656, "loss_iou": 0.4453125, "loss_num": 0.05322265625, "loss_xval": 1.15625, "num_input_tokens_seen": 46722560, "step": 835 }, { "epoch": 1.8619153674832962, "grad_norm": 16.647260665893555, "learning_rate": 1e-06, "loss": 0.9988, "num_input_tokens_seen": 46780772, "step": 836 }, { "epoch": 1.8619153674832962, "loss": 0.8128501176834106, "loss_ce": 0.0010825353674590588, "loss_iou": 0.30859375, "loss_num": 0.03857421875, "loss_xval": 0.8125, "num_input_tokens_seen": 46780772, "step": 836 }, { "epoch": 1.864142538975501, "grad_norm": 17.087894439697266, "learning_rate": 1e-06, "loss": 1.3527, "num_input_tokens_seen": 46837680, "step": 837 }, { "epoch": 1.864142538975501, "loss": 1.4123845100402832, "loss_ce": 0.0007634205976501107, "loss_iou": 0.56640625, "loss_num": 0.05517578125, "loss_xval": 1.4140625, "num_input_tokens_seen": 46837680, "step": 837 }, { "epoch": 1.8663697104677062, "grad_norm": 17.794803619384766, "learning_rate": 1e-06, "loss": 1.1507, "num_input_tokens_seen": 46894204, "step": 838 }, { "epoch": 1.8663697104677062, "loss": 1.2315852642059326, "loss_ce": 0.015764975920319557, "loss_iou": 0.474609375, "loss_num": 0.053466796875, "loss_xval": 1.21875, "num_input_tokens_seen": 46894204, "step": 838 }, { "epoch": 1.868596881959911, "grad_norm": 20.756214141845703, "learning_rate": 1e-06, "loss": 1.0918, "num_input_tokens_seen": 46953264, "step": 839 }, { "epoch": 1.868596881959911, "loss": 1.1783467531204224, "loss_ce": 0.00036822014953941107, "loss_iou": 0.478515625, "loss_num": 0.044189453125, "loss_xval": 1.1796875, "num_input_tokens_seen": 46953264, "step": 839 }, { "epoch": 1.8708240534521159, "grad_norm": 17.135751724243164, "learning_rate": 1e-06, "loss": 0.8869, "num_input_tokens_seen": 47008696, "step": 840 }, { "epoch": 1.8708240534521159, "loss": 0.8081262111663818, "loss_ce": 0.000631108705420047, "loss_iou": 0.302734375, "loss_num": 0.040283203125, "loss_xval": 0.80859375, "num_input_tokens_seen": 47008696, "step": 840 }, { "epoch": 1.8730512249443207, "grad_norm": 24.36521339416504, "learning_rate": 1e-06, "loss": 1.1049, "num_input_tokens_seen": 47066420, "step": 841 }, { "epoch": 1.8730512249443207, "loss": 0.8385207653045654, "loss_ce": 0.00038598667015321553, "loss_iou": 0.373046875, "loss_num": 0.01806640625, "loss_xval": 0.83984375, "num_input_tokens_seen": 47066420, "step": 841 }, { "epoch": 1.8752783964365256, "grad_norm": 29.640390396118164, "learning_rate": 1e-06, "loss": 1.0064, "num_input_tokens_seen": 47123592, "step": 842 }, { "epoch": 1.8752783964365256, "loss": 0.8564976453781128, "loss_ce": 0.0005406375275924802, "loss_iou": 0.34765625, "loss_num": 0.0322265625, "loss_xval": 0.85546875, "num_input_tokens_seen": 47123592, "step": 842 }, { "epoch": 1.8775055679287305, "grad_norm": 23.356630325317383, "learning_rate": 1e-06, "loss": 1.1621, "num_input_tokens_seen": 47178084, "step": 843 }, { "epoch": 1.8775055679287305, "loss": 1.280181646347046, "loss_ce": 0.0003964488860219717, "loss_iou": 0.55859375, "loss_num": 0.032470703125, "loss_xval": 1.28125, "num_input_tokens_seen": 47178084, "step": 843 }, { "epoch": 1.8797327394209353, "grad_norm": 22.576688766479492, "learning_rate": 1e-06, "loss": 0.9159, "num_input_tokens_seen": 47235124, "step": 844 }, { "epoch": 1.8797327394209353, "loss": 0.9658552408218384, "loss_ce": 0.0005232463008724153, "loss_iou": 0.3984375, "loss_num": 0.033935546875, "loss_xval": 0.96484375, "num_input_tokens_seen": 47235124, "step": 844 }, { "epoch": 1.8819599109131402, "grad_norm": 16.825740814208984, "learning_rate": 1e-06, "loss": 1.0477, "num_input_tokens_seen": 47291416, "step": 845 }, { "epoch": 1.8819599109131402, "loss": 1.1294829845428467, "loss_ce": 0.0013091352302581072, "loss_iou": 0.453125, "loss_num": 0.044189453125, "loss_xval": 1.125, "num_input_tokens_seen": 47291416, "step": 845 }, { "epoch": 1.884187082405345, "grad_norm": 26.097557067871094, "learning_rate": 1e-06, "loss": 0.9203, "num_input_tokens_seen": 47348868, "step": 846 }, { "epoch": 1.884187082405345, "loss": 1.0167232751846313, "loss_ce": 0.0013424212811514735, "loss_iou": 0.443359375, "loss_num": 0.025390625, "loss_xval": 1.015625, "num_input_tokens_seen": 47348868, "step": 846 }, { "epoch": 1.8864142538975501, "grad_norm": 25.306549072265625, "learning_rate": 1e-06, "loss": 1.1072, "num_input_tokens_seen": 47405380, "step": 847 }, { "epoch": 1.8864142538975501, "loss": 0.9173081517219543, "loss_ce": 0.0008042958797886968, "loss_iou": 0.359375, "loss_num": 0.039794921875, "loss_xval": 0.91796875, "num_input_tokens_seen": 47405380, "step": 847 }, { "epoch": 1.888641425389755, "grad_norm": 14.741415023803711, "learning_rate": 1e-06, "loss": 0.7495, "num_input_tokens_seen": 47460792, "step": 848 }, { "epoch": 1.888641425389755, "loss": 0.6206876635551453, "loss_ce": 0.0003263273974880576, "loss_iou": 0.265625, "loss_num": 0.0177001953125, "loss_xval": 0.62109375, "num_input_tokens_seen": 47460792, "step": 848 }, { "epoch": 1.89086859688196, "grad_norm": 16.20488739013672, "learning_rate": 1e-06, "loss": 0.9648, "num_input_tokens_seen": 47518560, "step": 849 }, { "epoch": 1.89086859688196, "loss": 0.8242032527923584, "loss_ce": 0.010482522659003735, "loss_iou": 0.287109375, "loss_num": 0.0478515625, "loss_xval": 0.8125, "num_input_tokens_seen": 47518560, "step": 849 }, { "epoch": 1.893095768374165, "grad_norm": 19.45951271057129, "learning_rate": 1e-06, "loss": 1.2282, "num_input_tokens_seen": 47574244, "step": 850 }, { "epoch": 1.893095768374165, "loss": 1.3258323669433594, "loss_ce": 0.0013694913359358907, "loss_iou": 0.515625, "loss_num": 0.059326171875, "loss_xval": 1.328125, "num_input_tokens_seen": 47574244, "step": 850 }, { "epoch": 1.8953229398663698, "grad_norm": 18.448118209838867, "learning_rate": 1e-06, "loss": 1.1335, "num_input_tokens_seen": 47627784, "step": 851 }, { "epoch": 1.8953229398663698, "loss": 1.3810503482818604, "loss_ce": 0.001167478272691369, "loss_iou": 0.5625, "loss_num": 0.051025390625, "loss_xval": 1.3828125, "num_input_tokens_seen": 47627784, "step": 851 }, { "epoch": 1.8975501113585747, "grad_norm": 28.44631004333496, "learning_rate": 1e-06, "loss": 0.9616, "num_input_tokens_seen": 47685004, "step": 852 }, { "epoch": 1.8975501113585747, "loss": 0.8197891116142273, "loss_ce": 0.00045313817099668086, "loss_iou": 0.33984375, "loss_num": 0.02783203125, "loss_xval": 0.8203125, "num_input_tokens_seen": 47685004, "step": 852 }, { "epoch": 1.8997772828507795, "grad_norm": 15.586737632751465, "learning_rate": 1e-06, "loss": 1.1863, "num_input_tokens_seen": 47740520, "step": 853 }, { "epoch": 1.8997772828507795, "loss": 1.022131323814392, "loss_ce": 0.00040277119842357934, "loss_iou": 0.431640625, "loss_num": 0.031982421875, "loss_xval": 1.0234375, "num_input_tokens_seen": 47740520, "step": 853 }, { "epoch": 1.9020044543429844, "grad_norm": 35.565025329589844, "learning_rate": 1e-06, "loss": 1.2452, "num_input_tokens_seen": 47793144, "step": 854 }, { "epoch": 1.9020044543429844, "loss": 1.1260898113250732, "loss_ce": 0.000601442065089941, "loss_iou": 0.453125, "loss_num": 0.0439453125, "loss_xval": 1.125, "num_input_tokens_seen": 47793144, "step": 854 }, { "epoch": 1.9042316258351892, "grad_norm": 21.033187866210938, "learning_rate": 1e-06, "loss": 1.1192, "num_input_tokens_seen": 47848176, "step": 855 }, { "epoch": 1.9042316258351892, "loss": 1.057677984237671, "loss_ce": 0.00042704385123215616, "loss_iou": 0.43359375, "loss_num": 0.037841796875, "loss_xval": 1.0546875, "num_input_tokens_seen": 47848176, "step": 855 }, { "epoch": 1.906458797327394, "grad_norm": 16.94105339050293, "learning_rate": 1e-06, "loss": 1.0397, "num_input_tokens_seen": 47901820, "step": 856 }, { "epoch": 1.906458797327394, "loss": 0.8765525817871094, "loss_ce": 0.0005760163185186684, "loss_iou": 0.37890625, "loss_num": 0.02392578125, "loss_xval": 0.875, "num_input_tokens_seen": 47901820, "step": 856 }, { "epoch": 1.908685968819599, "grad_norm": 22.052213668823242, "learning_rate": 1e-06, "loss": 1.058, "num_input_tokens_seen": 47958540, "step": 857 }, { "epoch": 1.908685968819599, "loss": 0.9184067845344543, "loss_ce": 0.0004380404716357589, "loss_iou": 0.376953125, "loss_num": 0.033203125, "loss_xval": 0.91796875, "num_input_tokens_seen": 47958540, "step": 857 }, { "epoch": 1.910913140311804, "grad_norm": 18.550006866455078, "learning_rate": 1e-06, "loss": 0.8463, "num_input_tokens_seen": 48015240, "step": 858 }, { "epoch": 1.910913140311804, "loss": 0.8902982473373413, "loss_ce": 0.0004057343176100403, "loss_iou": 0.330078125, "loss_num": 0.045654296875, "loss_xval": 0.890625, "num_input_tokens_seen": 48015240, "step": 858 }, { "epoch": 1.913140311804009, "grad_norm": 19.243886947631836, "learning_rate": 1e-06, "loss": 0.8471, "num_input_tokens_seen": 48071956, "step": 859 }, { "epoch": 1.913140311804009, "loss": 0.8775541186332703, "loss_ce": 0.0010892475256696343, "loss_iou": 0.36328125, "loss_num": 0.02978515625, "loss_xval": 0.875, "num_input_tokens_seen": 48071956, "step": 859 }, { "epoch": 1.9153674832962138, "grad_norm": 43.45414733886719, "learning_rate": 1e-06, "loss": 0.8823, "num_input_tokens_seen": 48125932, "step": 860 }, { "epoch": 1.9153674832962138, "loss": 0.9675887823104858, "loss_ce": 0.0005477914237417281, "loss_iou": 0.421875, "loss_num": 0.0245361328125, "loss_xval": 0.96875, "num_input_tokens_seen": 48125932, "step": 860 }, { "epoch": 1.9175946547884188, "grad_norm": 17.62973976135254, "learning_rate": 1e-06, "loss": 0.8766, "num_input_tokens_seen": 48183412, "step": 861 }, { "epoch": 1.9175946547884188, "loss": 1.0546504259109497, "loss_ce": 0.0006953147239983082, "loss_iou": 0.439453125, "loss_num": 0.034912109375, "loss_xval": 1.0546875, "num_input_tokens_seen": 48183412, "step": 861 }, { "epoch": 1.9198218262806237, "grad_norm": 15.39229965209961, "learning_rate": 1e-06, "loss": 1.0492, "num_input_tokens_seen": 48237972, "step": 862 }, { "epoch": 1.9198218262806237, "loss": 1.0228043794631958, "loss_ce": 0.0005876163486391306, "loss_iou": 0.396484375, "loss_num": 0.046142578125, "loss_xval": 1.0234375, "num_input_tokens_seen": 48237972, "step": 862 }, { "epoch": 1.9220489977728286, "grad_norm": 35.672637939453125, "learning_rate": 1e-06, "loss": 1.1325, "num_input_tokens_seen": 48294312, "step": 863 }, { "epoch": 1.9220489977728286, "loss": 1.1503536701202393, "loss_ce": 0.00045127171324566007, "loss_iou": 0.474609375, "loss_num": 0.039794921875, "loss_xval": 1.1484375, "num_input_tokens_seen": 48294312, "step": 863 }, { "epoch": 1.9242761692650334, "grad_norm": 18.644657135009766, "learning_rate": 1e-06, "loss": 1.1068, "num_input_tokens_seen": 48352424, "step": 864 }, { "epoch": 1.9242761692650334, "loss": 0.9790323972702026, "loss_ce": 0.001493359450250864, "loss_iou": 0.3984375, "loss_num": 0.0361328125, "loss_xval": 0.9765625, "num_input_tokens_seen": 48352424, "step": 864 }, { "epoch": 1.9265033407572383, "grad_norm": 19.904115676879883, "learning_rate": 1e-06, "loss": 1.2304, "num_input_tokens_seen": 48407952, "step": 865 }, { "epoch": 1.9265033407572383, "loss": 1.5032316446304321, "loss_ce": 0.0037199431098997593, "loss_iou": 0.5703125, "loss_num": 0.072265625, "loss_xval": 1.5, "num_input_tokens_seen": 48407952, "step": 865 }, { "epoch": 1.9287305122494431, "grad_norm": 16.542858123779297, "learning_rate": 1e-06, "loss": 0.8461, "num_input_tokens_seen": 48463412, "step": 866 }, { "epoch": 1.9287305122494431, "loss": 0.9278074502944946, "loss_ce": 0.0025144873652607203, "loss_iou": 0.36328125, "loss_num": 0.039306640625, "loss_xval": 0.92578125, "num_input_tokens_seen": 48463412, "step": 866 }, { "epoch": 1.930957683741648, "grad_norm": 28.734394073486328, "learning_rate": 1e-06, "loss": 1.206, "num_input_tokens_seen": 48521584, "step": 867 }, { "epoch": 1.930957683741648, "loss": 1.0615025758743286, "loss_ce": 0.00046744622522965074, "loss_iou": 0.45703125, "loss_num": 0.0291748046875, "loss_xval": 1.0625, "num_input_tokens_seen": 48521584, "step": 867 }, { "epoch": 1.9331848552338529, "grad_norm": 16.990293502807617, "learning_rate": 1e-06, "loss": 1.0472, "num_input_tokens_seen": 48575076, "step": 868 }, { "epoch": 1.9331848552338529, "loss": 1.2131456136703491, "loss_ce": 0.0012315567582845688, "loss_iou": 0.482421875, "loss_num": 0.049560546875, "loss_xval": 1.2109375, "num_input_tokens_seen": 48575076, "step": 868 }, { "epoch": 1.935412026726058, "grad_norm": 20.22574234008789, "learning_rate": 1e-06, "loss": 1.0024, "num_input_tokens_seen": 48632592, "step": 869 }, { "epoch": 1.935412026726058, "loss": 1.0861210823059082, "loss_ce": 0.0006718788645230234, "loss_iou": 0.474609375, "loss_num": 0.0272216796875, "loss_xval": 1.0859375, "num_input_tokens_seen": 48632592, "step": 869 }, { "epoch": 1.9376391982182628, "grad_norm": 16.520029067993164, "learning_rate": 1e-06, "loss": 0.9542, "num_input_tokens_seen": 48687552, "step": 870 }, { "epoch": 1.9376391982182628, "loss": 1.0474047660827637, "loss_ce": 0.0005297240568324924, "loss_iou": 0.46484375, "loss_num": 0.023193359375, "loss_xval": 1.046875, "num_input_tokens_seen": 48687552, "step": 870 }, { "epoch": 1.9398663697104677, "grad_norm": 20.480531692504883, "learning_rate": 1e-06, "loss": 0.7512, "num_input_tokens_seen": 48742604, "step": 871 }, { "epoch": 1.9398663697104677, "loss": 0.876989483833313, "loss_ce": 0.0005246583605185151, "loss_iou": 0.37890625, "loss_num": 0.024169921875, "loss_xval": 0.875, "num_input_tokens_seen": 48742604, "step": 871 }, { "epoch": 1.9420935412026727, "grad_norm": 24.84157371520996, "learning_rate": 1e-06, "loss": 0.8841, "num_input_tokens_seen": 48800660, "step": 872 }, { "epoch": 1.9420935412026727, "loss": 0.9676753282546997, "loss_ce": 0.0008784872479736805, "loss_iou": 0.40234375, "loss_num": 0.03271484375, "loss_xval": 0.96875, "num_input_tokens_seen": 48800660, "step": 872 }, { "epoch": 1.9443207126948776, "grad_norm": 22.539447784423828, "learning_rate": 1e-06, "loss": 1.1714, "num_input_tokens_seen": 48855408, "step": 873 }, { "epoch": 1.9443207126948776, "loss": 1.1532479524612427, "loss_ce": 0.0004159705131314695, "loss_iou": 0.515625, "loss_num": 0.0247802734375, "loss_xval": 1.15625, "num_input_tokens_seen": 48855408, "step": 873 }, { "epoch": 1.9465478841870825, "grad_norm": 41.367530822753906, "learning_rate": 1e-06, "loss": 1.2651, "num_input_tokens_seen": 48910668, "step": 874 }, { "epoch": 1.9465478841870825, "loss": 1.3369231224060059, "loss_ce": 0.0014739749021828175, "loss_iou": 0.5625, "loss_num": 0.041259765625, "loss_xval": 1.3359375, "num_input_tokens_seen": 48910668, "step": 874 }, { "epoch": 1.9487750556792873, "grad_norm": 15.942052841186523, "learning_rate": 1e-06, "loss": 0.9964, "num_input_tokens_seen": 48967356, "step": 875 }, { "epoch": 1.9487750556792873, "loss": 1.1262295246124268, "loss_ce": 0.0007413048297166824, "loss_iou": 0.427734375, "loss_num": 0.053955078125, "loss_xval": 1.125, "num_input_tokens_seen": 48967356, "step": 875 }, { "epoch": 1.9510022271714922, "grad_norm": 45.24485778808594, "learning_rate": 1e-06, "loss": 1.0602, "num_input_tokens_seen": 49023520, "step": 876 }, { "epoch": 1.9510022271714922, "loss": 1.150200366973877, "loss_ce": 0.0007863342761993408, "loss_iou": 0.5, "loss_num": 0.029296875, "loss_xval": 1.1484375, "num_input_tokens_seen": 49023520, "step": 876 }, { "epoch": 1.953229398663697, "grad_norm": 24.16380500793457, "learning_rate": 1e-06, "loss": 0.9545, "num_input_tokens_seen": 49079940, "step": 877 }, { "epoch": 1.953229398663697, "loss": 0.8877462148666382, "loss_ce": 0.0005392197053879499, "loss_iou": 0.37109375, "loss_num": 0.0289306640625, "loss_xval": 0.88671875, "num_input_tokens_seen": 49079940, "step": 877 }, { "epoch": 1.955456570155902, "grad_norm": 22.12382698059082, "learning_rate": 1e-06, "loss": 1.1521, "num_input_tokens_seen": 49138012, "step": 878 }, { "epoch": 1.955456570155902, "loss": 1.5235247611999512, "loss_ce": 0.0017962402198463678, "loss_iou": 0.59765625, "loss_num": 0.06494140625, "loss_xval": 1.5234375, "num_input_tokens_seen": 49138012, "step": 878 }, { "epoch": 1.9576837416481068, "grad_norm": 18.208831787109375, "learning_rate": 1e-06, "loss": 0.9084, "num_input_tokens_seen": 49194864, "step": 879 }, { "epoch": 1.9576837416481068, "loss": 0.9498248100280762, "loss_ce": 0.0003619292110670358, "loss_iou": 0.3671875, "loss_num": 0.042724609375, "loss_xval": 0.94921875, "num_input_tokens_seen": 49194864, "step": 879 }, { "epoch": 1.9599109131403119, "grad_norm": 16.236581802368164, "learning_rate": 1e-06, "loss": 0.9203, "num_input_tokens_seen": 49251060, "step": 880 }, { "epoch": 1.9599109131403119, "loss": 0.9726754426956177, "loss_ce": 0.0007516111945733428, "loss_iou": 0.396484375, "loss_num": 0.03564453125, "loss_xval": 0.97265625, "num_input_tokens_seen": 49251060, "step": 880 }, { "epoch": 1.9621380846325167, "grad_norm": 23.136600494384766, "learning_rate": 1e-06, "loss": 1.1897, "num_input_tokens_seen": 49309208, "step": 881 }, { "epoch": 1.9621380846325167, "loss": 1.0319995880126953, "loss_ce": 0.002946855966001749, "loss_iou": 0.43359375, "loss_num": 0.032470703125, "loss_xval": 1.03125, "num_input_tokens_seen": 49309208, "step": 881 }, { "epoch": 1.9643652561247216, "grad_norm": 54.83725357055664, "learning_rate": 1e-06, "loss": 1.0684, "num_input_tokens_seen": 49365352, "step": 882 }, { "epoch": 1.9643652561247216, "loss": 1.0005229711532593, "loss_ce": 0.0005229845410212874, "loss_iou": 0.39453125, "loss_num": 0.041748046875, "loss_xval": 1.0, "num_input_tokens_seen": 49365352, "step": 882 }, { "epoch": 1.9665924276169267, "grad_norm": 20.81184959411621, "learning_rate": 1e-06, "loss": 1.3279, "num_input_tokens_seen": 49420316, "step": 883 }, { "epoch": 1.9665924276169267, "loss": 1.0947761535644531, "loss_ce": 0.0005379515350796282, "loss_iou": 0.451171875, "loss_num": 0.03857421875, "loss_xval": 1.09375, "num_input_tokens_seen": 49420316, "step": 883 }, { "epoch": 1.9688195991091315, "grad_norm": 24.571720123291016, "learning_rate": 1e-06, "loss": 0.9046, "num_input_tokens_seen": 49478252, "step": 884 }, { "epoch": 1.9688195991091315, "loss": 1.0495492219924927, "loss_ce": 0.0007211226620711386, "loss_iou": 0.41796875, "loss_num": 0.042236328125, "loss_xval": 1.046875, "num_input_tokens_seen": 49478252, "step": 884 }, { "epoch": 1.9710467706013364, "grad_norm": 17.04405403137207, "learning_rate": 1e-06, "loss": 0.9426, "num_input_tokens_seen": 49533784, "step": 885 }, { "epoch": 1.9710467706013364, "loss": 0.8944860696792603, "loss_ce": 0.0009313831687904894, "loss_iou": 0.361328125, "loss_num": 0.033935546875, "loss_xval": 0.89453125, "num_input_tokens_seen": 49533784, "step": 885 }, { "epoch": 1.9732739420935412, "grad_norm": 21.639575958251953, "learning_rate": 1e-06, "loss": 1.0049, "num_input_tokens_seen": 49589404, "step": 886 }, { "epoch": 1.9732739420935412, "loss": 0.9363880157470703, "loss_ce": 0.0008411717135459185, "loss_iou": 0.369140625, "loss_num": 0.038818359375, "loss_xval": 0.9375, "num_input_tokens_seen": 49589404, "step": 886 }, { "epoch": 1.975501113585746, "grad_norm": 15.142632484436035, "learning_rate": 1e-06, "loss": 1.0436, "num_input_tokens_seen": 49646820, "step": 887 }, { "epoch": 1.975501113585746, "loss": 0.9690762758255005, "loss_ce": 0.010580191388726234, "loss_iou": 0.37890625, "loss_num": 0.039794921875, "loss_xval": 0.95703125, "num_input_tokens_seen": 49646820, "step": 887 }, { "epoch": 1.977728285077951, "grad_norm": 132.77593994140625, "learning_rate": 1e-06, "loss": 1.0238, "num_input_tokens_seen": 49702832, "step": 888 }, { "epoch": 1.977728285077951, "loss": 1.0730714797973633, "loss_ce": 0.0008057774393819273, "loss_iou": 0.44140625, "loss_num": 0.037109375, "loss_xval": 1.0703125, "num_input_tokens_seen": 49702832, "step": 888 }, { "epoch": 1.9799554565701558, "grad_norm": 46.88450622558594, "learning_rate": 1e-06, "loss": 1.0213, "num_input_tokens_seen": 49759608, "step": 889 }, { "epoch": 1.9799554565701558, "loss": 0.5785812139511108, "loss_ce": 0.0004562578978948295, "loss_iou": 0.23828125, "loss_num": 0.020263671875, "loss_xval": 0.578125, "num_input_tokens_seen": 49759608, "step": 889 }, { "epoch": 1.9821826280623607, "grad_norm": 13.415266990661621, "learning_rate": 1e-06, "loss": 0.6749, "num_input_tokens_seen": 49816076, "step": 890 }, { "epoch": 1.9821826280623607, "loss": 0.6280744075775146, "loss_ce": 0.00038887892151251435, "loss_iou": 0.24609375, "loss_num": 0.0269775390625, "loss_xval": 0.62890625, "num_input_tokens_seen": 49816076, "step": 890 }, { "epoch": 1.9844097995545658, "grad_norm": 15.064915657043457, "learning_rate": 1e-06, "loss": 0.9421, "num_input_tokens_seen": 49873896, "step": 891 }, { "epoch": 1.9844097995545658, "loss": 0.9301207065582275, "loss_ce": 0.00043319491669535637, "loss_iou": 0.388671875, "loss_num": 0.0302734375, "loss_xval": 0.9296875, "num_input_tokens_seen": 49873896, "step": 891 }, { "epoch": 1.9866369710467706, "grad_norm": 19.410207748413086, "learning_rate": 1e-06, "loss": 0.8304, "num_input_tokens_seen": 49929944, "step": 892 }, { "epoch": 1.9866369710467706, "loss": 0.8065379858016968, "loss_ce": 0.0003856243856716901, "loss_iou": 0.345703125, "loss_num": 0.0230712890625, "loss_xval": 0.8046875, "num_input_tokens_seen": 49929944, "step": 892 }, { "epoch": 1.9888641425389755, "grad_norm": 18.616121292114258, "learning_rate": 1e-06, "loss": 0.9924, "num_input_tokens_seen": 49987336, "step": 893 }, { "epoch": 1.9888641425389755, "loss": 1.0283457040786743, "loss_ce": 0.0005136135150678456, "loss_iou": 0.392578125, "loss_num": 0.048583984375, "loss_xval": 1.03125, "num_input_tokens_seen": 49987336, "step": 893 }, { "epoch": 1.9910913140311806, "grad_norm": 20.091445922851562, "learning_rate": 1e-06, "loss": 1.0854, "num_input_tokens_seen": 50045032, "step": 894 }, { "epoch": 1.9910913140311806, "loss": 0.9809556007385254, "loss_ce": 0.000730981701053679, "loss_iou": 0.396484375, "loss_num": 0.037353515625, "loss_xval": 0.98046875, "num_input_tokens_seen": 50045032, "step": 894 }, { "epoch": 1.9933184855233854, "grad_norm": 19.74848175048828, "learning_rate": 1e-06, "loss": 1.0705, "num_input_tokens_seen": 50100472, "step": 895 }, { "epoch": 1.9933184855233854, "loss": 0.8635627031326294, "loss_ce": 0.0012580410111695528, "loss_iou": 0.326171875, "loss_num": 0.04248046875, "loss_xval": 0.86328125, "num_input_tokens_seen": 50100472, "step": 895 }, { "epoch": 1.9955456570155903, "grad_norm": 17.26140022277832, "learning_rate": 1e-06, "loss": 0.9529, "num_input_tokens_seen": 50154436, "step": 896 }, { "epoch": 1.9955456570155903, "loss": 0.9841878414154053, "loss_ce": 0.0006672587478533387, "loss_iou": 0.38671875, "loss_num": 0.041748046875, "loss_xval": 0.984375, "num_input_tokens_seen": 50154436, "step": 896 }, { "epoch": 1.9977728285077951, "grad_norm": 20.126981735229492, "learning_rate": 1e-06, "loss": 1.0437, "num_input_tokens_seen": 50211096, "step": 897 }, { "epoch": 1.9977728285077951, "loss": 1.0043690204620361, "loss_ce": 0.0004628162132576108, "loss_iou": 0.408203125, "loss_num": 0.037841796875, "loss_xval": 1.0, "num_input_tokens_seen": 50211096, "step": 897 }, { "epoch": 2.0, "grad_norm": 23.061552047729492, "learning_rate": 1e-06, "loss": 1.0086, "num_input_tokens_seen": 50270028, "step": 898 }, { "epoch": 2.0, "loss": 0.6891332864761353, "loss_ce": 0.0004125749983359128, "loss_iou": 0.294921875, "loss_num": 0.0198974609375, "loss_xval": 0.6875, "num_input_tokens_seen": 50270028, "step": 898 }, { "epoch": 2.002227171492205, "grad_norm": 18.92450523376465, "learning_rate": 1e-06, "loss": 1.1896, "num_input_tokens_seen": 50325468, "step": 899 }, { "epoch": 2.002227171492205, "loss": 1.5556280612945557, "loss_ce": 0.0019170554587617517, "loss_iou": 0.578125, "loss_num": 0.080078125, "loss_xval": 1.5546875, "num_input_tokens_seen": 50325468, "step": 899 }, { "epoch": 2.0044543429844097, "grad_norm": 20.99635887145996, "learning_rate": 1e-06, "loss": 0.8291, "num_input_tokens_seen": 50384784, "step": 900 }, { "epoch": 2.0044543429844097, "loss": 0.8486037254333496, "loss_ce": 0.0004591494216583669, "loss_iou": 0.341796875, "loss_num": 0.033447265625, "loss_xval": 0.84765625, "num_input_tokens_seen": 50384784, "step": 900 }, { "epoch": 2.0066815144766146, "grad_norm": 31.64121437072754, "learning_rate": 1e-06, "loss": 0.8485, "num_input_tokens_seen": 50440300, "step": 901 }, { "epoch": 2.0066815144766146, "loss": 0.8760842084884644, "loss_ce": 0.0008400966180488467, "loss_iou": 0.35546875, "loss_num": 0.033447265625, "loss_xval": 0.875, "num_input_tokens_seen": 50440300, "step": 901 }, { "epoch": 2.0089086859688194, "grad_norm": 17.200613021850586, "learning_rate": 1e-06, "loss": 1.2305, "num_input_tokens_seen": 50497772, "step": 902 }, { "epoch": 2.0089086859688194, "loss": 1.1643800735473633, "loss_ce": 0.0008058917010203004, "loss_iou": 0.46875, "loss_num": 0.044677734375, "loss_xval": 1.1640625, "num_input_tokens_seen": 50497772, "step": 902 }, { "epoch": 2.0111358574610243, "grad_norm": 16.72941780090332, "learning_rate": 1e-06, "loss": 0.7154, "num_input_tokens_seen": 50555944, "step": 903 }, { "epoch": 2.0111358574610243, "loss": 0.8082038164138794, "loss_ce": 0.0008307406678795815, "loss_iou": 0.302734375, "loss_num": 0.040283203125, "loss_xval": 0.80859375, "num_input_tokens_seen": 50555944, "step": 903 }, { "epoch": 2.0133630289532296, "grad_norm": 20.396434783935547, "learning_rate": 1e-06, "loss": 1.1836, "num_input_tokens_seen": 50613104, "step": 904 }, { "epoch": 2.0133630289532296, "loss": 1.1902202367782593, "loss_ce": 0.0012554043205454946, "loss_iou": 0.466796875, "loss_num": 0.051025390625, "loss_xval": 1.1875, "num_input_tokens_seen": 50613104, "step": 904 }, { "epoch": 2.0155902004454345, "grad_norm": 21.122411727905273, "learning_rate": 1e-06, "loss": 0.9534, "num_input_tokens_seen": 50668660, "step": 905 }, { "epoch": 2.0155902004454345, "loss": 0.9574775695800781, "loss_ce": 0.00044635249651037157, "loss_iou": 0.390625, "loss_num": 0.035400390625, "loss_xval": 0.95703125, "num_input_tokens_seen": 50668660, "step": 905 }, { "epoch": 2.0178173719376393, "grad_norm": 12.18447494506836, "learning_rate": 1e-06, "loss": 1.2904, "num_input_tokens_seen": 50721156, "step": 906 }, { "epoch": 2.0178173719376393, "loss": 0.806514322757721, "loss_ce": 0.0004840257461182773, "loss_iou": 0.333984375, "loss_num": 0.0277099609375, "loss_xval": 0.8046875, "num_input_tokens_seen": 50721156, "step": 906 }, { "epoch": 2.020044543429844, "grad_norm": 20.636756896972656, "learning_rate": 1e-06, "loss": 1.0138, "num_input_tokens_seen": 50777508, "step": 907 }, { "epoch": 2.020044543429844, "loss": 0.9786741733551025, "loss_ce": 0.0006468579522334039, "loss_iou": 0.41796875, "loss_num": 0.02880859375, "loss_xval": 0.9765625, "num_input_tokens_seen": 50777508, "step": 907 }, { "epoch": 2.022271714922049, "grad_norm": 41.294925689697266, "learning_rate": 1e-06, "loss": 1.1918, "num_input_tokens_seen": 50833956, "step": 908 }, { "epoch": 2.022271714922049, "loss": 0.883208692073822, "loss_ce": 0.000396185350837186, "loss_iou": 0.322265625, "loss_num": 0.047607421875, "loss_xval": 0.8828125, "num_input_tokens_seen": 50833956, "step": 908 }, { "epoch": 2.024498886414254, "grad_norm": 18.029842376708984, "learning_rate": 1e-06, "loss": 1.185, "num_input_tokens_seen": 50892188, "step": 909 }, { "epoch": 2.024498886414254, "loss": 1.0920612812042236, "loss_ce": 0.0005085701122879982, "loss_iou": 0.451171875, "loss_num": 0.038330078125, "loss_xval": 1.09375, "num_input_tokens_seen": 50892188, "step": 909 }, { "epoch": 2.0267260579064588, "grad_norm": 17.75556755065918, "learning_rate": 1e-06, "loss": 0.9455, "num_input_tokens_seen": 50948976, "step": 910 }, { "epoch": 2.0267260579064588, "loss": 0.8729127645492554, "loss_ce": 0.0005983302253298461, "loss_iou": 0.37109375, "loss_num": 0.0264892578125, "loss_xval": 0.87109375, "num_input_tokens_seen": 50948976, "step": 910 }, { "epoch": 2.0289532293986636, "grad_norm": 22.336694717407227, "learning_rate": 1e-06, "loss": 0.811, "num_input_tokens_seen": 51005372, "step": 911 }, { "epoch": 2.0289532293986636, "loss": 0.8184252977371216, "loss_ce": 0.0010424721986055374, "loss_iou": 0.33203125, "loss_num": 0.0306396484375, "loss_xval": 0.81640625, "num_input_tokens_seen": 51005372, "step": 911 }, { "epoch": 2.0311804008908685, "grad_norm": 24.3977108001709, "learning_rate": 1e-06, "loss": 0.7044, "num_input_tokens_seen": 51060608, "step": 912 }, { "epoch": 2.0311804008908685, "loss": 0.8316826820373535, "loss_ce": 0.00038386776577681303, "loss_iou": 0.349609375, "loss_num": 0.0267333984375, "loss_xval": 0.83203125, "num_input_tokens_seen": 51060608, "step": 912 }, { "epoch": 2.0334075723830733, "grad_norm": 21.548065185546875, "learning_rate": 1e-06, "loss": 0.9748, "num_input_tokens_seen": 51119092, "step": 913 }, { "epoch": 2.0334075723830733, "loss": 0.853911280632019, "loss_ce": 0.0003957064473070204, "loss_iou": 0.34765625, "loss_num": 0.031494140625, "loss_xval": 0.8515625, "num_input_tokens_seen": 51119092, "step": 913 }, { "epoch": 2.035634743875278, "grad_norm": 17.78352928161621, "learning_rate": 1e-06, "loss": 0.8966, "num_input_tokens_seen": 51174088, "step": 914 }, { "epoch": 2.035634743875278, "loss": 0.6430214047431946, "loss_ce": 0.0004432816640473902, "loss_iou": 0.275390625, "loss_num": 0.0184326171875, "loss_xval": 0.640625, "num_input_tokens_seen": 51174088, "step": 914 }, { "epoch": 2.0378619153674835, "grad_norm": 18.09092903137207, "learning_rate": 1e-06, "loss": 1.0737, "num_input_tokens_seen": 51231032, "step": 915 }, { "epoch": 2.0378619153674835, "loss": 1.0431830883026123, "loss_ce": 0.0007026625098660588, "loss_iou": 0.412109375, "loss_num": 0.0439453125, "loss_xval": 1.0390625, "num_input_tokens_seen": 51231032, "step": 915 }, { "epoch": 2.0400890868596884, "grad_norm": 21.492124557495117, "learning_rate": 1e-06, "loss": 0.849, "num_input_tokens_seen": 51288688, "step": 916 }, { "epoch": 2.0400890868596884, "loss": 0.8285926580429077, "loss_ce": 0.0004676440730690956, "loss_iou": 0.3515625, "loss_num": 0.02490234375, "loss_xval": 0.828125, "num_input_tokens_seen": 51288688, "step": 916 }, { "epoch": 2.0423162583518932, "grad_norm": 24.50774574279785, "learning_rate": 1e-06, "loss": 1.1289, "num_input_tokens_seen": 51343280, "step": 917 }, { "epoch": 2.0423162583518932, "loss": 1.1621774435043335, "loss_ce": 0.0005563499871641397, "loss_iou": 0.482421875, "loss_num": 0.0390625, "loss_xval": 1.1640625, "num_input_tokens_seen": 51343280, "step": 917 }, { "epoch": 2.044543429844098, "grad_norm": 23.351022720336914, "learning_rate": 1e-06, "loss": 0.8928, "num_input_tokens_seen": 51399164, "step": 918 }, { "epoch": 2.044543429844098, "loss": 0.76546311378479, "loss_ce": 0.0005705538205802441, "loss_iou": 0.31640625, "loss_num": 0.0263671875, "loss_xval": 0.765625, "num_input_tokens_seen": 51399164, "step": 918 }, { "epoch": 2.046770601336303, "grad_norm": 23.73122215270996, "learning_rate": 1e-06, "loss": 1.2118, "num_input_tokens_seen": 51454772, "step": 919 }, { "epoch": 2.046770601336303, "loss": 1.1581388711929321, "loss_ce": 0.0006681361701339483, "loss_iou": 0.5078125, "loss_num": 0.0281982421875, "loss_xval": 1.15625, "num_input_tokens_seen": 51454772, "step": 919 }, { "epoch": 2.048997772828508, "grad_norm": 18.12936782836914, "learning_rate": 1e-06, "loss": 1.1355, "num_input_tokens_seen": 51508836, "step": 920 }, { "epoch": 2.048997772828508, "loss": 0.9356842041015625, "loss_ce": 0.0016021885676309466, "loss_iou": 0.3359375, "loss_num": 0.05224609375, "loss_xval": 0.93359375, "num_input_tokens_seen": 51508836, "step": 920 }, { "epoch": 2.0512249443207127, "grad_norm": 18.033832550048828, "learning_rate": 1e-06, "loss": 0.897, "num_input_tokens_seen": 51562932, "step": 921 }, { "epoch": 2.0512249443207127, "loss": 0.9213240742683411, "loss_ce": 0.001158121507614851, "loss_iou": 0.4140625, "loss_num": 0.0185546875, "loss_xval": 0.921875, "num_input_tokens_seen": 51562932, "step": 921 }, { "epoch": 2.0534521158129175, "grad_norm": 22.288450241088867, "learning_rate": 1e-06, "loss": 1.1296, "num_input_tokens_seen": 51617208, "step": 922 }, { "epoch": 2.0534521158129175, "loss": 1.1455841064453125, "loss_ce": 0.017166122794151306, "loss_iou": 0.447265625, "loss_num": 0.046630859375, "loss_xval": 1.125, "num_input_tokens_seen": 51617208, "step": 922 }, { "epoch": 2.0556792873051224, "grad_norm": 13.78810977935791, "learning_rate": 1e-06, "loss": 1.0396, "num_input_tokens_seen": 51669524, "step": 923 }, { "epoch": 2.0556792873051224, "loss": 1.045444130897522, "loss_ce": 0.0005222847685217857, "loss_iou": 0.421875, "loss_num": 0.04052734375, "loss_xval": 1.046875, "num_input_tokens_seen": 51669524, "step": 923 }, { "epoch": 2.0579064587973273, "grad_norm": 15.79564380645752, "learning_rate": 1e-06, "loss": 0.9122, "num_input_tokens_seen": 51727060, "step": 924 }, { "epoch": 2.0579064587973273, "loss": 0.7765226364135742, "loss_ce": 0.000399627722799778, "loss_iou": 0.326171875, "loss_num": 0.0245361328125, "loss_xval": 0.77734375, "num_input_tokens_seen": 51727060, "step": 924 }, { "epoch": 2.060133630289532, "grad_norm": 27.499584197998047, "learning_rate": 1e-06, "loss": 1.1524, "num_input_tokens_seen": 51782548, "step": 925 }, { "epoch": 2.060133630289532, "loss": 1.210993766784668, "loss_ce": 0.00042247679084539413, "loss_iou": 0.51171875, "loss_num": 0.037353515625, "loss_xval": 1.2109375, "num_input_tokens_seen": 51782548, "step": 925 }, { "epoch": 2.062360801781737, "grad_norm": 21.81817626953125, "learning_rate": 1e-06, "loss": 0.7765, "num_input_tokens_seen": 51839240, "step": 926 }, { "epoch": 2.062360801781737, "loss": 0.8635718822479248, "loss_ce": 0.0005347240949049592, "loss_iou": 0.365234375, "loss_num": 0.02685546875, "loss_xval": 0.86328125, "num_input_tokens_seen": 51839240, "step": 926 }, { "epoch": 2.0645879732739423, "grad_norm": 27.51004409790039, "learning_rate": 1e-06, "loss": 0.885, "num_input_tokens_seen": 51896564, "step": 927 }, { "epoch": 2.0645879732739423, "loss": 0.7961362600326538, "loss_ce": 0.0007261328864842653, "loss_iou": 0.357421875, "loss_num": 0.016357421875, "loss_xval": 0.796875, "num_input_tokens_seen": 51896564, "step": 927 }, { "epoch": 2.066815144766147, "grad_norm": 19.509262084960938, "learning_rate": 1e-06, "loss": 0.9487, "num_input_tokens_seen": 51954088, "step": 928 }, { "epoch": 2.066815144766147, "loss": 1.0089809894561768, "loss_ce": 0.0006801420240662992, "loss_iou": 0.396484375, "loss_num": 0.04296875, "loss_xval": 1.0078125, "num_input_tokens_seen": 51954088, "step": 928 }, { "epoch": 2.069042316258352, "grad_norm": 21.640626907348633, "learning_rate": 1e-06, "loss": 0.9389, "num_input_tokens_seen": 52010852, "step": 929 }, { "epoch": 2.069042316258352, "loss": 1.0190068483352661, "loss_ce": 0.0014287333469837904, "loss_iou": 0.40625, "loss_num": 0.04150390625, "loss_xval": 1.015625, "num_input_tokens_seen": 52010852, "step": 929 }, { "epoch": 2.071269487750557, "grad_norm": 18.838546752929688, "learning_rate": 1e-06, "loss": 1.213, "num_input_tokens_seen": 52069620, "step": 930 }, { "epoch": 2.071269487750557, "loss": 1.1467416286468506, "loss_ce": 0.0009896388510242105, "loss_iou": 0.498046875, "loss_num": 0.030029296875, "loss_xval": 1.1484375, "num_input_tokens_seen": 52069620, "step": 930 }, { "epoch": 2.0734966592427617, "grad_norm": 22.098485946655273, "learning_rate": 1e-06, "loss": 0.9992, "num_input_tokens_seen": 52125176, "step": 931 }, { "epoch": 2.0734966592427617, "loss": 1.115182638168335, "loss_ce": 0.0004365970380604267, "loss_iou": 0.42578125, "loss_num": 0.05224609375, "loss_xval": 1.1171875, "num_input_tokens_seen": 52125176, "step": 931 }, { "epoch": 2.0757238307349666, "grad_norm": 15.070525169372559, "learning_rate": 1e-06, "loss": 0.9082, "num_input_tokens_seen": 52182404, "step": 932 }, { "epoch": 2.0757238307349666, "loss": 0.8831936717033386, "loss_ce": 0.0005032622721046209, "loss_iou": 0.3828125, "loss_num": 0.023681640625, "loss_xval": 0.8828125, "num_input_tokens_seen": 52182404, "step": 932 }, { "epoch": 2.0779510022271714, "grad_norm": 22.24066925048828, "learning_rate": 1e-06, "loss": 0.9648, "num_input_tokens_seen": 52238920, "step": 933 }, { "epoch": 2.0779510022271714, "loss": 1.1474614143371582, "loss_ce": 0.0004888359108008444, "loss_iou": 0.462890625, "loss_num": 0.044189453125, "loss_xval": 1.1484375, "num_input_tokens_seen": 52238920, "step": 933 }, { "epoch": 2.0801781737193763, "grad_norm": 17.68231964111328, "learning_rate": 1e-06, "loss": 0.9119, "num_input_tokens_seen": 52293392, "step": 934 }, { "epoch": 2.0801781737193763, "loss": 0.9804922342300415, "loss_ce": 0.0010000698966905475, "loss_iou": 0.38671875, "loss_num": 0.041015625, "loss_xval": 0.98046875, "num_input_tokens_seen": 52293392, "step": 934 }, { "epoch": 2.082405345211581, "grad_norm": 16.659244537353516, "learning_rate": 1e-06, "loss": 0.9513, "num_input_tokens_seen": 52345928, "step": 935 }, { "epoch": 2.082405345211581, "loss": 0.7912268042564392, "loss_ce": 0.00045530046918429434, "loss_iou": 0.337890625, "loss_num": 0.0228271484375, "loss_xval": 0.7890625, "num_input_tokens_seen": 52345928, "step": 935 }, { "epoch": 2.084632516703786, "grad_norm": 17.690183639526367, "learning_rate": 1e-06, "loss": 0.8534, "num_input_tokens_seen": 52401488, "step": 936 }, { "epoch": 2.084632516703786, "loss": 0.9090708494186401, "loss_ce": 0.00037944965879432857, "loss_iou": 0.33984375, "loss_num": 0.04541015625, "loss_xval": 0.91015625, "num_input_tokens_seen": 52401488, "step": 936 }, { "epoch": 2.086859688195991, "grad_norm": 18.25249481201172, "learning_rate": 1e-06, "loss": 0.9348, "num_input_tokens_seen": 52454132, "step": 937 }, { "epoch": 2.086859688195991, "loss": 0.9369181990623474, "loss_ce": 0.00039476132951676846, "loss_iou": 0.3828125, "loss_num": 0.0341796875, "loss_xval": 0.9375, "num_input_tokens_seen": 52454132, "step": 937 }, { "epoch": 2.089086859688196, "grad_norm": 23.943071365356445, "learning_rate": 1e-06, "loss": 0.9978, "num_input_tokens_seen": 52508724, "step": 938 }, { "epoch": 2.089086859688196, "loss": 0.9771347045898438, "loss_ce": 0.0025253635831177235, "loss_iou": 0.40234375, "loss_num": 0.033935546875, "loss_xval": 0.9765625, "num_input_tokens_seen": 52508724, "step": 938 }, { "epoch": 2.091314031180401, "grad_norm": 19.138086318969727, "learning_rate": 1e-06, "loss": 0.9259, "num_input_tokens_seen": 52567416, "step": 939 }, { "epoch": 2.091314031180401, "loss": 0.8586698174476624, "loss_ce": 0.0019803589675575495, "loss_iou": 0.3515625, "loss_num": 0.03125, "loss_xval": 0.85546875, "num_input_tokens_seen": 52567416, "step": 939 }, { "epoch": 2.093541202672606, "grad_norm": 29.820850372314453, "learning_rate": 1e-06, "loss": 1.0665, "num_input_tokens_seen": 52625816, "step": 940 }, { "epoch": 2.093541202672606, "loss": 1.4001514911651611, "loss_ce": 0.0012256972258910537, "loss_iou": 0.5703125, "loss_num": 0.051513671875, "loss_xval": 1.3984375, "num_input_tokens_seen": 52625816, "step": 940 }, { "epoch": 2.0957683741648108, "grad_norm": 14.36317253112793, "learning_rate": 1e-06, "loss": 0.5621, "num_input_tokens_seen": 52682968, "step": 941 }, { "epoch": 2.0957683741648108, "loss": 0.5812158584594727, "loss_ce": 0.00040527121745981276, "loss_iou": 0.2197265625, "loss_num": 0.0284423828125, "loss_xval": 0.58203125, "num_input_tokens_seen": 52682968, "step": 941 }, { "epoch": 2.0979955456570156, "grad_norm": 19.567962646484375, "learning_rate": 1e-06, "loss": 0.9139, "num_input_tokens_seen": 52737516, "step": 942 }, { "epoch": 2.0979955456570156, "loss": 1.065826654434204, "loss_ce": 0.00039695383748039603, "loss_iou": 0.453125, "loss_num": 0.031982421875, "loss_xval": 1.0625, "num_input_tokens_seen": 52737516, "step": 942 }, { "epoch": 2.1002227171492205, "grad_norm": 28.01074981689453, "learning_rate": 1e-06, "loss": 1.1929, "num_input_tokens_seen": 52793536, "step": 943 }, { "epoch": 2.1002227171492205, "loss": 1.1947014331817627, "loss_ce": 0.00036561937304213643, "loss_iou": 0.48046875, "loss_num": 0.046630859375, "loss_xval": 1.1953125, "num_input_tokens_seen": 52793536, "step": 943 }, { "epoch": 2.1024498886414253, "grad_norm": 20.295827865600586, "learning_rate": 1e-06, "loss": 1.2702, "num_input_tokens_seen": 52847904, "step": 944 }, { "epoch": 2.1024498886414253, "loss": 1.352550983428955, "loss_ce": 0.0005001933313906193, "loss_iou": 0.578125, "loss_num": 0.0380859375, "loss_xval": 1.3515625, "num_input_tokens_seen": 52847904, "step": 944 }, { "epoch": 2.10467706013363, "grad_norm": 16.32712745666504, "learning_rate": 1e-06, "loss": 1.0087, "num_input_tokens_seen": 52904284, "step": 945 }, { "epoch": 2.10467706013363, "loss": 1.0332987308502197, "loss_ce": 0.0005839248769916594, "loss_iou": 0.404296875, "loss_num": 0.044921875, "loss_xval": 1.03125, "num_input_tokens_seen": 52904284, "step": 945 }, { "epoch": 2.106904231625835, "grad_norm": 29.043960571289062, "learning_rate": 1e-06, "loss": 1.0885, "num_input_tokens_seen": 52960784, "step": 946 }, { "epoch": 2.106904231625835, "loss": 0.9473793506622314, "loss_ce": 0.0006019732682034373, "loss_iou": 0.3671875, "loss_num": 0.04248046875, "loss_xval": 0.9453125, "num_input_tokens_seen": 52960784, "step": 946 }, { "epoch": 2.10913140311804, "grad_norm": 14.747257232666016, "learning_rate": 1e-06, "loss": 0.9877, "num_input_tokens_seen": 53015812, "step": 947 }, { "epoch": 2.10913140311804, "loss": 1.0732710361480713, "loss_ce": 0.0007612318731844425, "loss_iou": 0.42578125, "loss_num": 0.044189453125, "loss_xval": 1.0703125, "num_input_tokens_seen": 53015812, "step": 947 }, { "epoch": 2.111358574610245, "grad_norm": 103.11000061035156, "learning_rate": 1e-06, "loss": 0.9914, "num_input_tokens_seen": 53072308, "step": 948 }, { "epoch": 2.111358574610245, "loss": 0.9887726902961731, "loss_ce": 0.000491408514790237, "loss_iou": 0.447265625, "loss_num": 0.0185546875, "loss_xval": 0.98828125, "num_input_tokens_seen": 53072308, "step": 948 }, { "epoch": 2.11358574610245, "grad_norm": 23.691804885864258, "learning_rate": 1e-06, "loss": 1.2725, "num_input_tokens_seen": 53127836, "step": 949 }, { "epoch": 2.11358574610245, "loss": 1.3882193565368652, "loss_ce": 0.001500481041148305, "loss_iou": 0.51171875, "loss_num": 0.07373046875, "loss_xval": 1.390625, "num_input_tokens_seen": 53127836, "step": 949 }, { "epoch": 2.115812917594655, "grad_norm": 13.513452529907227, "learning_rate": 1e-06, "loss": 0.9153, "num_input_tokens_seen": 53184644, "step": 950 }, { "epoch": 2.115812917594655, "loss": 1.0870821475982666, "loss_ce": 0.0004122781683690846, "loss_iou": 0.435546875, "loss_num": 0.042724609375, "loss_xval": 1.0859375, "num_input_tokens_seen": 53184644, "step": 950 }, { "epoch": 2.11804008908686, "grad_norm": 14.758941650390625, "learning_rate": 1e-06, "loss": 0.8424, "num_input_tokens_seen": 53242056, "step": 951 }, { "epoch": 2.11804008908686, "loss": 0.9238724708557129, "loss_ce": 0.00041059922659769654, "loss_iou": 0.384765625, "loss_num": 0.0311279296875, "loss_xval": 0.921875, "num_input_tokens_seen": 53242056, "step": 951 }, { "epoch": 2.1202672605790647, "grad_norm": 50.94358825683594, "learning_rate": 1e-06, "loss": 0.9876, "num_input_tokens_seen": 53296836, "step": 952 }, { "epoch": 2.1202672605790647, "loss": 0.8739633560180664, "loss_ce": 0.00042815617052838206, "loss_iou": 0.35546875, "loss_num": 0.0322265625, "loss_xval": 0.875, "num_input_tokens_seen": 53296836, "step": 952 }, { "epoch": 2.1224944320712695, "grad_norm": 29.346717834472656, "learning_rate": 1e-06, "loss": 0.8476, "num_input_tokens_seen": 53356080, "step": 953 }, { "epoch": 2.1224944320712695, "loss": 0.7091416120529175, "loss_ce": 0.00040138079202733934, "loss_iou": 0.298828125, "loss_num": 0.022705078125, "loss_xval": 0.70703125, "num_input_tokens_seen": 53356080, "step": 953 }, { "epoch": 2.1247216035634744, "grad_norm": 30.826066970825195, "learning_rate": 1e-06, "loss": 0.8384, "num_input_tokens_seen": 53411060, "step": 954 }, { "epoch": 2.1247216035634744, "loss": 0.8927929401397705, "loss_ce": 0.00045894747017882764, "loss_iou": 0.330078125, "loss_num": 0.04638671875, "loss_xval": 0.890625, "num_input_tokens_seen": 53411060, "step": 954 }, { "epoch": 2.1269487750556793, "grad_norm": 26.217329025268555, "learning_rate": 1e-06, "loss": 1.2109, "num_input_tokens_seen": 53467452, "step": 955 }, { "epoch": 2.1269487750556793, "loss": 1.171452283859253, "loss_ce": 0.0005538875702768564, "loss_iou": 0.50390625, "loss_num": 0.031982421875, "loss_xval": 1.171875, "num_input_tokens_seen": 53467452, "step": 955 }, { "epoch": 2.129175946547884, "grad_norm": 15.275303840637207, "learning_rate": 1e-06, "loss": 1.0093, "num_input_tokens_seen": 53523000, "step": 956 }, { "epoch": 2.129175946547884, "loss": 0.9647861123085022, "loss_ce": 0.0004306259215809405, "loss_iou": 0.3984375, "loss_num": 0.033447265625, "loss_xval": 0.96484375, "num_input_tokens_seen": 53523000, "step": 956 }, { "epoch": 2.131403118040089, "grad_norm": 20.987869262695312, "learning_rate": 1e-06, "loss": 0.8899, "num_input_tokens_seen": 53577268, "step": 957 }, { "epoch": 2.131403118040089, "loss": 0.8194369077682495, "loss_ce": 0.00034510315163061023, "loss_iou": 0.3359375, "loss_num": 0.0296630859375, "loss_xval": 0.8203125, "num_input_tokens_seen": 53577268, "step": 957 }, { "epoch": 2.133630289532294, "grad_norm": 17.244304656982422, "learning_rate": 1e-06, "loss": 0.9678, "num_input_tokens_seen": 53632968, "step": 958 }, { "epoch": 2.133630289532294, "loss": 0.7954371571540833, "loss_ce": 0.0010035325540229678, "loss_iou": 0.32421875, "loss_num": 0.02880859375, "loss_xval": 0.79296875, "num_input_tokens_seen": 53632968, "step": 958 }, { "epoch": 2.1358574610244987, "grad_norm": 27.137948989868164, "learning_rate": 1e-06, "loss": 0.8922, "num_input_tokens_seen": 53686276, "step": 959 }, { "epoch": 2.1358574610244987, "loss": 0.8764593005180359, "loss_ce": 0.007318664342164993, "loss_iou": 0.345703125, "loss_num": 0.035888671875, "loss_xval": 0.8671875, "num_input_tokens_seen": 53686276, "step": 959 }, { "epoch": 2.138084632516704, "grad_norm": 13.352965354919434, "learning_rate": 1e-06, "loss": 1.2179, "num_input_tokens_seen": 53738980, "step": 960 }, { "epoch": 2.138084632516704, "loss": 1.0099034309387207, "loss_ce": 0.0003819413250312209, "loss_iou": 0.4375, "loss_num": 0.0272216796875, "loss_xval": 1.0078125, "num_input_tokens_seen": 53738980, "step": 960 }, { "epoch": 2.140311804008909, "grad_norm": 26.12520980834961, "learning_rate": 1e-06, "loss": 1.0853, "num_input_tokens_seen": 53795408, "step": 961 }, { "epoch": 2.140311804008909, "loss": 1.0113427639007568, "loss_ce": 0.00035642064176499844, "loss_iou": 0.42578125, "loss_num": 0.0322265625, "loss_xval": 1.0078125, "num_input_tokens_seen": 53795408, "step": 961 }, { "epoch": 2.1425389755011137, "grad_norm": 25.219985961914062, "learning_rate": 1e-06, "loss": 1.0355, "num_input_tokens_seen": 53849140, "step": 962 }, { "epoch": 2.1425389755011137, "loss": 0.8350330591201782, "loss_ce": 0.0008045152062550187, "loss_iou": 0.33984375, "loss_num": 0.030517578125, "loss_xval": 0.8359375, "num_input_tokens_seen": 53849140, "step": 962 }, { "epoch": 2.1447661469933186, "grad_norm": 22.237886428833008, "learning_rate": 1e-06, "loss": 1.0859, "num_input_tokens_seen": 53904680, "step": 963 }, { "epoch": 2.1447661469933186, "loss": 0.9965952634811401, "loss_ce": 0.000501530768815428, "loss_iou": 0.41015625, "loss_num": 0.034912109375, "loss_xval": 0.99609375, "num_input_tokens_seen": 53904680, "step": 963 }, { "epoch": 2.1469933184855234, "grad_norm": 27.480417251586914, "learning_rate": 1e-06, "loss": 1.0019, "num_input_tokens_seen": 53962276, "step": 964 }, { "epoch": 2.1469933184855234, "loss": 0.9119249582290649, "loss_ce": 0.0007921320502646267, "loss_iou": 0.373046875, "loss_num": 0.033203125, "loss_xval": 0.91015625, "num_input_tokens_seen": 53962276, "step": 964 }, { "epoch": 2.1492204899777283, "grad_norm": 16.98265266418457, "learning_rate": 1e-06, "loss": 0.9955, "num_input_tokens_seen": 54015980, "step": 965 }, { "epoch": 2.1492204899777283, "loss": 0.8996407985687256, "loss_ce": 0.0004708755586761981, "loss_iou": 0.376953125, "loss_num": 0.029052734375, "loss_xval": 0.8984375, "num_input_tokens_seen": 54015980, "step": 965 }, { "epoch": 2.151447661469933, "grad_norm": 17.36399269104004, "learning_rate": 1e-06, "loss": 0.9589, "num_input_tokens_seen": 54072540, "step": 966 }, { "epoch": 2.151447661469933, "loss": 0.8351460099220276, "loss_ce": 0.00042922317516058683, "loss_iou": 0.341796875, "loss_num": 0.0301513671875, "loss_xval": 0.8359375, "num_input_tokens_seen": 54072540, "step": 966 }, { "epoch": 2.153674832962138, "grad_norm": 25.51936149597168, "learning_rate": 1e-06, "loss": 1.0361, "num_input_tokens_seen": 54128332, "step": 967 }, { "epoch": 2.153674832962138, "loss": 0.8526254892349243, "loss_ce": 0.0005747509421780705, "loss_iou": 0.365234375, "loss_num": 0.02392578125, "loss_xval": 0.8515625, "num_input_tokens_seen": 54128332, "step": 967 }, { "epoch": 2.155902004454343, "grad_norm": 25.46952247619629, "learning_rate": 1e-06, "loss": 1.2554, "num_input_tokens_seen": 54184776, "step": 968 }, { "epoch": 2.155902004454343, "loss": 1.3332043886184692, "loss_ce": 0.0011730894912034273, "loss_iou": 0.52734375, "loss_num": 0.05615234375, "loss_xval": 1.328125, "num_input_tokens_seen": 54184776, "step": 968 }, { "epoch": 2.1581291759465477, "grad_norm": 16.16930389404297, "learning_rate": 1e-06, "loss": 0.8004, "num_input_tokens_seen": 54241804, "step": 969 }, { "epoch": 2.1581291759465477, "loss": 0.612720787525177, "loss_ce": 0.006275475956499577, "loss_iou": 0.2412109375, "loss_num": 0.0245361328125, "loss_xval": 0.60546875, "num_input_tokens_seen": 54241804, "step": 969 }, { "epoch": 2.1603563474387526, "grad_norm": 12.317280769348145, "learning_rate": 1e-06, "loss": 0.7401, "num_input_tokens_seen": 54297804, "step": 970 }, { "epoch": 2.1603563474387526, "loss": 0.8193256855010986, "loss_ce": 0.00047803280176594853, "loss_iou": 0.349609375, "loss_num": 0.0240478515625, "loss_xval": 0.8203125, "num_input_tokens_seen": 54297804, "step": 970 }, { "epoch": 2.1625835189309575, "grad_norm": 25.277143478393555, "learning_rate": 1e-06, "loss": 0.9579, "num_input_tokens_seen": 54352416, "step": 971 }, { "epoch": 2.1625835189309575, "loss": 1.0242624282836914, "loss_ce": 0.0003366165910847485, "loss_iou": 0.435546875, "loss_num": 0.030517578125, "loss_xval": 1.0234375, "num_input_tokens_seen": 54352416, "step": 971 }, { "epoch": 2.1648106904231628, "grad_norm": 23.600387573242188, "learning_rate": 1e-06, "loss": 1.0226, "num_input_tokens_seen": 54408836, "step": 972 }, { "epoch": 2.1648106904231628, "loss": 0.9383513927459717, "loss_ce": 0.0003631227882578969, "loss_iou": 0.392578125, "loss_num": 0.0303955078125, "loss_xval": 0.9375, "num_input_tokens_seen": 54408836, "step": 972 }, { "epoch": 2.1670378619153676, "grad_norm": 28.22263526916504, "learning_rate": 1e-06, "loss": 1.102, "num_input_tokens_seen": 54461188, "step": 973 }, { "epoch": 2.1670378619153676, "loss": 1.2241106033325195, "loss_ce": 0.0004167944425716996, "loss_iou": 0.498046875, "loss_num": 0.04541015625, "loss_xval": 1.2265625, "num_input_tokens_seen": 54461188, "step": 973 }, { "epoch": 2.1692650334075725, "grad_norm": 23.0220947265625, "learning_rate": 1e-06, "loss": 1.0596, "num_input_tokens_seen": 54516224, "step": 974 }, { "epoch": 2.1692650334075725, "loss": 1.1534892320632935, "loss_ce": 0.0006571857957169414, "loss_iou": 0.47265625, "loss_num": 0.04150390625, "loss_xval": 1.15625, "num_input_tokens_seen": 54516224, "step": 974 }, { "epoch": 2.1714922048997773, "grad_norm": 50.8327751159668, "learning_rate": 1e-06, "loss": 0.962, "num_input_tokens_seen": 54571536, "step": 975 }, { "epoch": 2.1714922048997773, "loss": 0.8975195288658142, "loss_ce": 0.0004247867036610842, "loss_iou": 0.3828125, "loss_num": 0.0264892578125, "loss_xval": 0.8984375, "num_input_tokens_seen": 54571536, "step": 975 }, { "epoch": 2.173719376391982, "grad_norm": 18.573394775390625, "learning_rate": 1e-06, "loss": 1.0714, "num_input_tokens_seen": 54624220, "step": 976 }, { "epoch": 2.173719376391982, "loss": 0.7970374822616577, "loss_ce": 0.0011390313738957047, "loss_iou": 0.3046875, "loss_num": 0.037109375, "loss_xval": 0.796875, "num_input_tokens_seen": 54624220, "step": 976 }, { "epoch": 2.175946547884187, "grad_norm": 26.248809814453125, "learning_rate": 1e-06, "loss": 0.9597, "num_input_tokens_seen": 54681684, "step": 977 }, { "epoch": 2.175946547884187, "loss": 0.7498317956924438, "loss_ce": 0.0003200596256647259, "loss_iou": 0.314453125, "loss_num": 0.02392578125, "loss_xval": 0.75, "num_input_tokens_seen": 54681684, "step": 977 }, { "epoch": 2.178173719376392, "grad_norm": 17.984722137451172, "learning_rate": 1e-06, "loss": 0.8119, "num_input_tokens_seen": 54739128, "step": 978 }, { "epoch": 2.178173719376392, "loss": 0.8240878582000732, "loss_ce": 0.00035743031185120344, "loss_iou": 0.345703125, "loss_num": 0.0263671875, "loss_xval": 0.82421875, "num_input_tokens_seen": 54739128, "step": 978 }, { "epoch": 2.180400890868597, "grad_norm": 16.49453353881836, "learning_rate": 1e-06, "loss": 1.1017, "num_input_tokens_seen": 54795068, "step": 979 }, { "epoch": 2.180400890868597, "loss": 1.3098089694976807, "loss_ce": 0.0004827585944440216, "loss_iou": 0.546875, "loss_num": 0.042724609375, "loss_xval": 1.3125, "num_input_tokens_seen": 54795068, "step": 979 }, { "epoch": 2.1826280623608016, "grad_norm": 25.356178283691406, "learning_rate": 1e-06, "loss": 0.9565, "num_input_tokens_seen": 54851036, "step": 980 }, { "epoch": 2.1826280623608016, "loss": 0.794826328754425, "loss_ce": 0.0003927270008716732, "loss_iou": 0.3515625, "loss_num": 0.018310546875, "loss_xval": 0.79296875, "num_input_tokens_seen": 54851036, "step": 980 }, { "epoch": 2.1848552338530065, "grad_norm": 20.565073013305664, "learning_rate": 1e-06, "loss": 0.8048, "num_input_tokens_seen": 54903052, "step": 981 }, { "epoch": 2.1848552338530065, "loss": 0.7406924366950989, "loss_ce": 0.00045808005961589515, "loss_iou": 0.318359375, "loss_num": 0.020751953125, "loss_xval": 0.7421875, "num_input_tokens_seen": 54903052, "step": 981 }, { "epoch": 2.187082405345212, "grad_norm": 25.87790870666504, "learning_rate": 1e-06, "loss": 0.8178, "num_input_tokens_seen": 54960984, "step": 982 }, { "epoch": 2.187082405345212, "loss": 0.7066792249679565, "loss_ce": 0.0006245420081540942, "loss_iou": 0.294921875, "loss_num": 0.0235595703125, "loss_xval": 0.70703125, "num_input_tokens_seen": 54960984, "step": 982 }, { "epoch": 2.1893095768374167, "grad_norm": 19.50031089782715, "learning_rate": 1e-06, "loss": 1.1593, "num_input_tokens_seen": 55017748, "step": 983 }, { "epoch": 2.1893095768374167, "loss": 0.988908588886261, "loss_ce": 0.0003832111251540482, "loss_iou": 0.423828125, "loss_num": 0.028076171875, "loss_xval": 0.98828125, "num_input_tokens_seen": 55017748, "step": 983 }, { "epoch": 2.1915367483296215, "grad_norm": 16.748844146728516, "learning_rate": 1e-06, "loss": 0.8003, "num_input_tokens_seen": 55074808, "step": 984 }, { "epoch": 2.1915367483296215, "loss": 0.9688905477523804, "loss_ce": 0.00038473017048090696, "loss_iou": 0.419921875, "loss_num": 0.0262451171875, "loss_xval": 0.96875, "num_input_tokens_seen": 55074808, "step": 984 }, { "epoch": 2.1937639198218264, "grad_norm": 68.782958984375, "learning_rate": 1e-06, "loss": 1.0795, "num_input_tokens_seen": 55131672, "step": 985 }, { "epoch": 2.1937639198218264, "loss": 0.8397893309593201, "loss_ce": 0.0006780020194128156, "loss_iou": 0.353515625, "loss_num": 0.02685546875, "loss_xval": 0.83984375, "num_input_tokens_seen": 55131672, "step": 985 }, { "epoch": 2.1959910913140313, "grad_norm": 21.74385643005371, "learning_rate": 1e-06, "loss": 0.966, "num_input_tokens_seen": 55186240, "step": 986 }, { "epoch": 2.1959910913140313, "loss": 1.022130012512207, "loss_ce": 0.005528404843062162, "loss_iou": 0.404296875, "loss_num": 0.04150390625, "loss_xval": 1.015625, "num_input_tokens_seen": 55186240, "step": 986 }, { "epoch": 2.198218262806236, "grad_norm": 27.58509063720703, "learning_rate": 1e-06, "loss": 1.0463, "num_input_tokens_seen": 55243732, "step": 987 }, { "epoch": 2.198218262806236, "loss": 1.0700922012329102, "loss_ce": 0.0005121089052408934, "loss_iou": 0.4375, "loss_num": 0.038330078125, "loss_xval": 1.0703125, "num_input_tokens_seen": 55243732, "step": 987 }, { "epoch": 2.200445434298441, "grad_norm": 19.08161735534668, "learning_rate": 1e-06, "loss": 1.0416, "num_input_tokens_seen": 55300384, "step": 988 }, { "epoch": 2.200445434298441, "loss": 1.1714062690734863, "loss_ce": 0.0007519207429140806, "loss_iou": 0.4375, "loss_num": 0.0595703125, "loss_xval": 1.171875, "num_input_tokens_seen": 55300384, "step": 988 }, { "epoch": 2.202672605790646, "grad_norm": 21.197906494140625, "learning_rate": 1e-06, "loss": 0.9063, "num_input_tokens_seen": 55355972, "step": 989 }, { "epoch": 2.202672605790646, "loss": 0.8070752024650574, "loss_ce": 0.0011670144740492105, "loss_iou": 0.330078125, "loss_num": 0.02880859375, "loss_xval": 0.8046875, "num_input_tokens_seen": 55355972, "step": 989 }, { "epoch": 2.2048997772828507, "grad_norm": 57.792137145996094, "learning_rate": 1e-06, "loss": 0.7107, "num_input_tokens_seen": 55411904, "step": 990 }, { "epoch": 2.2048997772828507, "loss": 0.8724934458732605, "loss_ce": 0.00042314609163440764, "loss_iou": 0.357421875, "loss_num": 0.03125, "loss_xval": 0.87109375, "num_input_tokens_seen": 55411904, "step": 990 }, { "epoch": 2.2071269487750556, "grad_norm": 18.81410026550293, "learning_rate": 1e-06, "loss": 0.921, "num_input_tokens_seen": 55469032, "step": 991 }, { "epoch": 2.2071269487750556, "loss": 0.5198028087615967, "loss_ce": 0.0003936740104109049, "loss_iou": 0.2275390625, "loss_num": 0.01287841796875, "loss_xval": 0.51953125, "num_input_tokens_seen": 55469032, "step": 991 }, { "epoch": 2.2093541202672604, "grad_norm": 19.535879135131836, "learning_rate": 1e-06, "loss": 0.8962, "num_input_tokens_seen": 55528000, "step": 992 }, { "epoch": 2.2093541202672604, "loss": 1.013750433921814, "loss_ce": 0.0005668357480317354, "loss_iou": 0.423828125, "loss_num": 0.033203125, "loss_xval": 1.015625, "num_input_tokens_seen": 55528000, "step": 992 }, { "epoch": 2.2115812917594653, "grad_norm": 19.903423309326172, "learning_rate": 1e-06, "loss": 1.1321, "num_input_tokens_seen": 55585248, "step": 993 }, { "epoch": 2.2115812917594653, "loss": 1.1713190078735352, "loss_ce": 0.0009089382365345955, "loss_iou": 0.486328125, "loss_num": 0.03955078125, "loss_xval": 1.171875, "num_input_tokens_seen": 55585248, "step": 993 }, { "epoch": 2.2138084632516706, "grad_norm": 20.668163299560547, "learning_rate": 1e-06, "loss": 1.093, "num_input_tokens_seen": 55643640, "step": 994 }, { "epoch": 2.2138084632516706, "loss": 1.0199007987976074, "loss_ce": 0.0006136804004199803, "loss_iou": 0.400390625, "loss_num": 0.04345703125, "loss_xval": 1.015625, "num_input_tokens_seen": 55643640, "step": 994 }, { "epoch": 2.2160356347438754, "grad_norm": 14.16650676727295, "learning_rate": 1e-06, "loss": 0.8074, "num_input_tokens_seen": 55699880, "step": 995 }, { "epoch": 2.2160356347438754, "loss": 0.7972851991653442, "loss_ce": 0.0004102342645637691, "loss_iou": 0.330078125, "loss_num": 0.0274658203125, "loss_xval": 0.796875, "num_input_tokens_seen": 55699880, "step": 995 }, { "epoch": 2.2182628062360803, "grad_norm": 21.781885147094727, "learning_rate": 1e-06, "loss": 1.2838, "num_input_tokens_seen": 55757468, "step": 996 }, { "epoch": 2.2182628062360803, "loss": 1.5526351928710938, "loss_ce": 0.0008773244917392731, "loss_iou": 0.625, "loss_num": 0.060546875, "loss_xval": 1.5546875, "num_input_tokens_seen": 55757468, "step": 996 }, { "epoch": 2.220489977728285, "grad_norm": 13.922673225402832, "learning_rate": 1e-06, "loss": 0.8959, "num_input_tokens_seen": 55811116, "step": 997 }, { "epoch": 2.220489977728285, "loss": 0.8420233130455017, "loss_ce": 0.00047059194184839725, "loss_iou": 0.3515625, "loss_num": 0.02783203125, "loss_xval": 0.83984375, "num_input_tokens_seen": 55811116, "step": 997 }, { "epoch": 2.22271714922049, "grad_norm": 31.61424446105957, "learning_rate": 1e-06, "loss": 1.0489, "num_input_tokens_seen": 55867056, "step": 998 }, { "epoch": 2.22271714922049, "loss": 0.9875812530517578, "loss_ce": 0.0016193470219150186, "loss_iou": 0.4140625, "loss_num": 0.031982421875, "loss_xval": 0.984375, "num_input_tokens_seen": 55867056, "step": 998 }, { "epoch": 2.224944320712695, "grad_norm": 18.81981658935547, "learning_rate": 1e-06, "loss": 1.1018, "num_input_tokens_seen": 55922508, "step": 999 }, { "epoch": 2.224944320712695, "loss": 1.031872272491455, "loss_ce": 0.0011104578152298927, "loss_iou": 0.41796875, "loss_num": 0.0390625, "loss_xval": 1.03125, "num_input_tokens_seen": 55922508, "step": 999 }, { "epoch": 2.2271714922048997, "grad_norm": 30.99945640563965, "learning_rate": 1e-06, "loss": 0.8712, "num_input_tokens_seen": 55979796, "step": 1000 }, { "epoch": 2.2271714922048997, "eval_seeclick_web_CIoU": 0.5659106969833374, "eval_seeclick_web_GIoU": 0.5565789341926575, "eval_seeclick_web_IoU": 0.5834561288356781, "eval_seeclick_web_MAE_all": 0.016565547324717045, "eval_seeclick_web_MAE_h": 0.010058594401925802, "eval_seeclick_web_MAE_w": 0.016721216030418873, "eval_seeclick_web_MAE_x_boxes": 0.009843998588621616, "eval_seeclick_web_MAE_y_boxes": 0.02248636749573052, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9639145135879517, "eval_seeclick_web_loss_ce": 0.0004584374401019886, "eval_seeclick_web_loss_iou": 0.4400634765625, "eval_seeclick_web_loss_num": 0.013164520263671875, "eval_seeclick_web_loss_xval": 0.946044921875, "eval_seeclick_web_runtime": 18.6045, "eval_seeclick_web_samples_per_second": 2.688, "eval_seeclick_web_steps_per_second": 0.108, "num_input_tokens_seen": 55979796, "step": 1000 }, { "epoch": 2.2271714922048997, "eval_icons_CIoU": 0.3126373589038849, "eval_icons_GIoU": 0.3512195348739624, "eval_icons_IoU": 0.38992173969745636, "eval_icons_MAE_all": 0.06972917914390564, "eval_icons_MAE_h": 0.035466980654746294, "eval_icons_MAE_w": 0.07942605763673782, "eval_icons_MAE_x_boxes": 0.06474643759429455, "eval_icons_MAE_y_boxes": 0.03778674267232418, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.6989706754684448, "eval_icons_loss_ce": 0.0012570468825288117, "eval_icons_loss_iou": 0.6531982421875, "eval_icons_loss_num": 0.06581878662109375, "eval_icons_loss_xval": 1.634765625, "eval_icons_runtime": 18.1838, "eval_icons_samples_per_second": 2.75, "eval_icons_steps_per_second": 0.11, "num_input_tokens_seen": 55979796, "step": 1000 }, { "epoch": 2.2271714922048997, "eval_screenspot_CIoU": 0.30459535121917725, "eval_screenspot_GIoU": 0.326509823401769, "eval_screenspot_IoU": 0.38954149683316547, "eval_screenspot_MAE_all": 0.0834270715713501, "eval_screenspot_MAE_h": 0.04727879042426745, "eval_screenspot_MAE_w": 0.07983010758956273, "eval_screenspot_MAE_x_boxes": 0.12237335244814555, "eval_screenspot_MAE_y_boxes": 0.0545121505856514, "eval_screenspot_inside_bbox": 0.6045833428700765, "eval_screenspot_loss": 1.8311774730682373, "eval_screenspot_loss_ce": 0.002042266229788462, "eval_screenspot_loss_iou": 0.721923828125, "eval_screenspot_loss_num": 0.09384409586588542, "eval_screenspot_loss_xval": 1.9124348958333333, "eval_screenspot_runtime": 27.92, "eval_screenspot_samples_per_second": 3.188, "eval_screenspot_steps_per_second": 0.107, "num_input_tokens_seen": 55979796, "step": 1000 }, { "epoch": 2.2271714922048997, "eval_compot_CIoU": 0.3376414477825165, "eval_compot_GIoU": 0.3708181381225586, "eval_compot_IoU": 0.40139785408973694, "eval_compot_MAE_all": 0.02684677764773369, "eval_compot_MAE_h": 0.011378975585103035, "eval_compot_MAE_w": 0.037089540623128414, "eval_compot_MAE_x_boxes": 0.034720015712082386, "eval_compot_MAE_y_boxes": 0.007318113464862108, "eval_compot_inside_bbox": 0.5868055522441864, "eval_compot_loss": 1.426712989807129, "eval_compot_loss_ce": 0.0004611587501130998, "eval_compot_loss_iou": 0.6314697265625, "eval_compot_loss_num": 0.024566650390625, "eval_compot_loss_xval": 1.38525390625, "eval_compot_runtime": 18.3333, "eval_compot_samples_per_second": 2.727, "eval_compot_steps_per_second": 0.109, "num_input_tokens_seen": 55979796, "step": 1000 }, { "epoch": 2.2271714922048997, "eval_custom_ui_val_CIoU": 0.411270619266563, "eval_custom_ui_val_GIoU": 0.4448644651307, "eval_custom_ui_val_IoU": 0.4744107557667626, "eval_custom_ui_val_MAE_all": 0.040074211441808276, "eval_custom_ui_val_MAE_h": 0.023370659496221278, "eval_custom_ui_val_MAE_w": 0.04196054426332315, "eval_custom_ui_val_MAE_x_boxes": 0.04563241203625997, "eval_custom_ui_val_MAE_y_boxes": 0.02151624123669333, "eval_custom_ui_val_inside_bbox": 0.6712962985038757, "eval_custom_ui_val_loss": 1.345447063446045, "eval_custom_ui_val_loss_ce": 0.0011988434901771445, "eval_custom_ui_val_loss_iou": 0.5582411024305556, "eval_custom_ui_val_loss_num": 0.03997251722547743, "eval_custom_ui_val_loss_xval": 1.3163519965277777, "eval_custom_ui_val_runtime": 55.6004, "eval_custom_ui_val_samples_per_second": 4.766, "eval_custom_ui_val_steps_per_second": 0.162, "num_input_tokens_seen": 55979796, "step": 1000 }, { "epoch": 2.2271714922048997, "loss": 1.0719767808914185, "loss_ce": 0.000687746680341661, "loss_iou": 0.45703125, "loss_num": 0.03125, "loss_xval": 1.0703125, "num_input_tokens_seen": 55979796, "step": 1000 }, { "epoch": 2.2293986636971046, "grad_norm": 23.139463424682617, "learning_rate": 1e-06, "loss": 1.2981, "num_input_tokens_seen": 56034828, "step": 1001 }, { "epoch": 2.2293986636971046, "loss": 1.173054575920105, "loss_ce": 0.0004471880674827844, "loss_iou": 0.484375, "loss_num": 0.04052734375, "loss_xval": 1.171875, "num_input_tokens_seen": 56034828, "step": 1001 }, { "epoch": 2.2316258351893095, "grad_norm": 14.833909034729004, "learning_rate": 1e-06, "loss": 0.7852, "num_input_tokens_seen": 56090360, "step": 1002 }, { "epoch": 2.2316258351893095, "loss": 0.7579194903373718, "loss_ce": 0.00035115116043016315, "loss_iou": 0.32421875, "loss_num": 0.0216064453125, "loss_xval": 0.7578125, "num_input_tokens_seen": 56090360, "step": 1002 }, { "epoch": 2.2338530066815143, "grad_norm": 15.422577857971191, "learning_rate": 1e-06, "loss": 0.9412, "num_input_tokens_seen": 56146060, "step": 1003 }, { "epoch": 2.2338530066815143, "loss": 1.1035232543945312, "loss_ce": 0.0006180062773637474, "loss_iou": 0.474609375, "loss_num": 0.030517578125, "loss_xval": 1.1015625, "num_input_tokens_seen": 56146060, "step": 1003 }, { "epoch": 2.236080178173719, "grad_norm": 18.446163177490234, "learning_rate": 1e-06, "loss": 0.7755, "num_input_tokens_seen": 56202244, "step": 1004 }, { "epoch": 2.236080178173719, "loss": 0.8598674535751343, "loss_ce": 0.000492453167680651, "loss_iou": 0.349609375, "loss_num": 0.031982421875, "loss_xval": 0.859375, "num_input_tokens_seen": 56202244, "step": 1004 }, { "epoch": 2.2383073496659245, "grad_norm": 15.064460754394531, "learning_rate": 1e-06, "loss": 0.9762, "num_input_tokens_seen": 56257360, "step": 1005 }, { "epoch": 2.2383073496659245, "loss": 1.0436865091323853, "loss_ce": 0.0007178318337537348, "loss_iou": 0.404296875, "loss_num": 0.047119140625, "loss_xval": 1.046875, "num_input_tokens_seen": 56257360, "step": 1005 }, { "epoch": 2.2405345211581293, "grad_norm": 37.368812561035156, "learning_rate": 1e-06, "loss": 1.0937, "num_input_tokens_seen": 56309892, "step": 1006 }, { "epoch": 2.2405345211581293, "loss": 0.8646106123924255, "loss_ce": 0.00035280632437206805, "loss_iou": 0.3671875, "loss_num": 0.0257568359375, "loss_xval": 0.86328125, "num_input_tokens_seen": 56309892, "step": 1006 }, { "epoch": 2.242761692650334, "grad_norm": 20.58150863647461, "learning_rate": 1e-06, "loss": 1.0268, "num_input_tokens_seen": 56365936, "step": 1007 }, { "epoch": 2.242761692650334, "loss": 1.112687587738037, "loss_ce": 0.00038295358535833657, "loss_iou": 0.443359375, "loss_num": 0.044921875, "loss_xval": 1.109375, "num_input_tokens_seen": 56365936, "step": 1007 }, { "epoch": 2.244988864142539, "grad_norm": 19.283090591430664, "learning_rate": 1e-06, "loss": 0.9734, "num_input_tokens_seen": 56422576, "step": 1008 }, { "epoch": 2.244988864142539, "loss": 1.0260212421417236, "loss_ce": 0.00038648463669233024, "loss_iou": 0.4453125, "loss_num": 0.02685546875, "loss_xval": 1.0234375, "num_input_tokens_seen": 56422576, "step": 1008 }, { "epoch": 2.247216035634744, "grad_norm": 15.9085111618042, "learning_rate": 1e-06, "loss": 1.1175, "num_input_tokens_seen": 56481320, "step": 1009 }, { "epoch": 2.247216035634744, "loss": 0.8886697888374329, "loss_ce": 0.0004862197383772582, "loss_iou": 0.33203125, "loss_num": 0.04443359375, "loss_xval": 0.88671875, "num_input_tokens_seen": 56481320, "step": 1009 }, { "epoch": 2.249443207126949, "grad_norm": 20.693801879882812, "learning_rate": 1e-06, "loss": 0.8815, "num_input_tokens_seen": 56537736, "step": 1010 }, { "epoch": 2.249443207126949, "loss": 0.7114344835281372, "loss_ce": 0.000619084108620882, "loss_iou": 0.294921875, "loss_num": 0.024658203125, "loss_xval": 0.7109375, "num_input_tokens_seen": 56537736, "step": 1010 }, { "epoch": 2.2516703786191536, "grad_norm": 16.531198501586914, "learning_rate": 1e-06, "loss": 1.0092, "num_input_tokens_seen": 56594736, "step": 1011 }, { "epoch": 2.2516703786191536, "loss": 0.9720375537872314, "loss_ce": 0.0003578157047741115, "loss_iou": 0.419921875, "loss_num": 0.0264892578125, "loss_xval": 0.97265625, "num_input_tokens_seen": 56594736, "step": 1011 }, { "epoch": 2.2538975501113585, "grad_norm": 36.781532287597656, "learning_rate": 1e-06, "loss": 1.0224, "num_input_tokens_seen": 56651788, "step": 1012 }, { "epoch": 2.2538975501113585, "loss": 1.0697126388549805, "loss_ce": 0.00037672443431802094, "loss_iou": 0.462890625, "loss_num": 0.0281982421875, "loss_xval": 1.0703125, "num_input_tokens_seen": 56651788, "step": 1012 }, { "epoch": 2.2561247216035634, "grad_norm": 18.51349449157715, "learning_rate": 1e-06, "loss": 0.7345, "num_input_tokens_seen": 56707000, "step": 1013 }, { "epoch": 2.2561247216035634, "loss": 0.8490347862243652, "loss_ce": 0.000401980709284544, "loss_iou": 0.3828125, "loss_num": 0.0166015625, "loss_xval": 0.84765625, "num_input_tokens_seen": 56707000, "step": 1013 }, { "epoch": 2.2583518930957682, "grad_norm": 23.15570068359375, "learning_rate": 1e-06, "loss": 1.0084, "num_input_tokens_seen": 56761980, "step": 1014 }, { "epoch": 2.2583518930957682, "loss": 0.8266038298606873, "loss_ce": 0.0005540476413443685, "loss_iou": 0.34375, "loss_num": 0.02734375, "loss_xval": 0.82421875, "num_input_tokens_seen": 56761980, "step": 1014 }, { "epoch": 2.260579064587973, "grad_norm": 34.847137451171875, "learning_rate": 1e-06, "loss": 1.0461, "num_input_tokens_seen": 56820908, "step": 1015 }, { "epoch": 2.260579064587973, "loss": 0.9271408319473267, "loss_ce": 0.0006271736929193139, "loss_iou": 0.36328125, "loss_num": 0.0400390625, "loss_xval": 0.92578125, "num_input_tokens_seen": 56820908, "step": 1015 }, { "epoch": 2.262806236080178, "grad_norm": 34.261474609375, "learning_rate": 1e-06, "loss": 1.0021, "num_input_tokens_seen": 56877908, "step": 1016 }, { "epoch": 2.262806236080178, "loss": 0.912013590335846, "loss_ce": 0.00039250371628440917, "loss_iou": 0.384765625, "loss_num": 0.028076171875, "loss_xval": 0.91015625, "num_input_tokens_seen": 56877908, "step": 1016 }, { "epoch": 2.2650334075723833, "grad_norm": 30.62000846862793, "learning_rate": 1e-06, "loss": 1.1779, "num_input_tokens_seen": 56933112, "step": 1017 }, { "epoch": 2.2650334075723833, "loss": 1.2571220397949219, "loss_ce": 0.0012625895906239748, "loss_iou": 0.5078125, "loss_num": 0.04736328125, "loss_xval": 1.2578125, "num_input_tokens_seen": 56933112, "step": 1017 }, { "epoch": 2.267260579064588, "grad_norm": 21.855445861816406, "learning_rate": 1e-06, "loss": 0.8362, "num_input_tokens_seen": 56988708, "step": 1018 }, { "epoch": 2.267260579064588, "loss": 0.8795033097267151, "loss_ce": 0.00035289014340378344, "loss_iou": 0.380859375, "loss_num": 0.023193359375, "loss_xval": 0.87890625, "num_input_tokens_seen": 56988708, "step": 1018 }, { "epoch": 2.269487750556793, "grad_norm": 26.169057846069336, "learning_rate": 1e-06, "loss": 0.6786, "num_input_tokens_seen": 57046476, "step": 1019 }, { "epoch": 2.269487750556793, "loss": 0.7506150007247925, "loss_ce": 0.00037088984390720725, "loss_iou": 0.328125, "loss_num": 0.0191650390625, "loss_xval": 0.75, "num_input_tokens_seen": 57046476, "step": 1019 }, { "epoch": 2.271714922048998, "grad_norm": 13.190043449401855, "learning_rate": 1e-06, "loss": 0.6864, "num_input_tokens_seen": 57104372, "step": 1020 }, { "epoch": 2.271714922048998, "loss": 0.7020950317382812, "loss_ce": 0.0004348924267105758, "loss_iou": 0.298828125, "loss_num": 0.0211181640625, "loss_xval": 0.703125, "num_input_tokens_seen": 57104372, "step": 1020 }, { "epoch": 2.2739420935412027, "grad_norm": 16.335966110229492, "learning_rate": 1e-06, "loss": 0.7953, "num_input_tokens_seen": 57159292, "step": 1021 }, { "epoch": 2.2739420935412027, "loss": 0.8143704533576965, "loss_ce": 0.0004056024190504104, "loss_iou": 0.359375, "loss_num": 0.01904296875, "loss_xval": 0.8125, "num_input_tokens_seen": 57159292, "step": 1021 }, { "epoch": 2.2761692650334076, "grad_norm": 27.681427001953125, "learning_rate": 1e-06, "loss": 0.9381, "num_input_tokens_seen": 57216432, "step": 1022 }, { "epoch": 2.2761692650334076, "loss": 0.8490380048751831, "loss_ce": 0.0004051811702083796, "loss_iou": 0.369140625, "loss_num": 0.0223388671875, "loss_xval": 0.84765625, "num_input_tokens_seen": 57216432, "step": 1022 }, { "epoch": 2.2783964365256124, "grad_norm": 19.292463302612305, "learning_rate": 1e-06, "loss": 0.9677, "num_input_tokens_seen": 57273288, "step": 1023 }, { "epoch": 2.2783964365256124, "loss": 0.9046714305877686, "loss_ce": 0.000374538212781772, "loss_iou": 0.3515625, "loss_num": 0.04052734375, "loss_xval": 0.90625, "num_input_tokens_seen": 57273288, "step": 1023 }, { "epoch": 2.2806236080178173, "grad_norm": 22.14390754699707, "learning_rate": 1e-06, "loss": 0.8592, "num_input_tokens_seen": 57329664, "step": 1024 }, { "epoch": 2.2806236080178173, "loss": 1.0152591466903687, "loss_ce": 0.00036650500260293484, "loss_iou": 0.427734375, "loss_num": 0.031982421875, "loss_xval": 1.015625, "num_input_tokens_seen": 57329664, "step": 1024 }, { "epoch": 2.282850779510022, "grad_norm": 27.401857376098633, "learning_rate": 1e-06, "loss": 0.936, "num_input_tokens_seen": 57384724, "step": 1025 }, { "epoch": 2.282850779510022, "loss": 0.8641769886016846, "loss_ce": 0.00040744812577031553, "loss_iou": 0.357421875, "loss_num": 0.0294189453125, "loss_xval": 0.86328125, "num_input_tokens_seen": 57384724, "step": 1025 }, { "epoch": 2.285077951002227, "grad_norm": 13.289790153503418, "learning_rate": 1e-06, "loss": 0.8172, "num_input_tokens_seen": 57441660, "step": 1026 }, { "epoch": 2.285077951002227, "loss": 0.7961374521255493, "loss_ce": 0.0017038530204445124, "loss_iou": 0.30859375, "loss_num": 0.03515625, "loss_xval": 0.79296875, "num_input_tokens_seen": 57441660, "step": 1026 }, { "epoch": 2.2873051224944323, "grad_norm": 14.736815452575684, "learning_rate": 1e-06, "loss": 0.8734, "num_input_tokens_seen": 57497360, "step": 1027 }, { "epoch": 2.2873051224944323, "loss": 0.8420823216438293, "loss_ce": 0.0005296375602483749, "loss_iou": 0.345703125, "loss_num": 0.0299072265625, "loss_xval": 0.83984375, "num_input_tokens_seen": 57497360, "step": 1027 }, { "epoch": 2.289532293986637, "grad_norm": 18.238637924194336, "learning_rate": 1e-06, "loss": 1.3031, "num_input_tokens_seen": 57556032, "step": 1028 }, { "epoch": 2.289532293986637, "loss": 1.1549606323242188, "loss_ce": 0.0009079101146198809, "loss_iou": 0.44140625, "loss_num": 0.0546875, "loss_xval": 1.15625, "num_input_tokens_seen": 57556032, "step": 1028 }, { "epoch": 2.291759465478842, "grad_norm": 20.993566513061523, "learning_rate": 1e-06, "loss": 0.9124, "num_input_tokens_seen": 57610720, "step": 1029 }, { "epoch": 2.291759465478842, "loss": 0.8001816272735596, "loss_ce": 0.00031593156745657325, "loss_iou": 0.345703125, "loss_num": 0.0216064453125, "loss_xval": 0.80078125, "num_input_tokens_seen": 57610720, "step": 1029 }, { "epoch": 2.293986636971047, "grad_norm": 31.266921997070312, "learning_rate": 1e-06, "loss": 0.9937, "num_input_tokens_seen": 57669460, "step": 1030 }, { "epoch": 2.293986636971047, "loss": 1.138893485069275, "loss_ce": 0.0021747485734522343, "loss_iou": 0.45703125, "loss_num": 0.044921875, "loss_xval": 1.140625, "num_input_tokens_seen": 57669460, "step": 1030 }, { "epoch": 2.2962138084632517, "grad_norm": 29.804710388183594, "learning_rate": 1e-06, "loss": 0.9726, "num_input_tokens_seen": 57726024, "step": 1031 }, { "epoch": 2.2962138084632517, "loss": 1.0021300315856934, "loss_ce": 0.00042098466656170785, "loss_iou": 0.388671875, "loss_num": 0.045654296875, "loss_xval": 1.0, "num_input_tokens_seen": 57726024, "step": 1031 }, { "epoch": 2.2984409799554566, "grad_norm": 16.084012985229492, "learning_rate": 1e-06, "loss": 1.006, "num_input_tokens_seen": 57783076, "step": 1032 }, { "epoch": 2.2984409799554566, "loss": 1.1425867080688477, "loss_ce": 0.0004969405708834529, "loss_iou": 0.4609375, "loss_num": 0.04443359375, "loss_xval": 1.140625, "num_input_tokens_seen": 57783076, "step": 1032 }, { "epoch": 2.3006681514476615, "grad_norm": 27.19474983215332, "learning_rate": 1e-06, "loss": 0.992, "num_input_tokens_seen": 57837908, "step": 1033 }, { "epoch": 2.3006681514476615, "loss": 0.9429985284805298, "loss_ce": 0.0006157412426546216, "loss_iou": 0.3828125, "loss_num": 0.03564453125, "loss_xval": 0.94140625, "num_input_tokens_seen": 57837908, "step": 1033 }, { "epoch": 2.3028953229398663, "grad_norm": 30.541584014892578, "learning_rate": 1e-06, "loss": 1.1811, "num_input_tokens_seen": 57893948, "step": 1034 }, { "epoch": 2.3028953229398663, "loss": 1.2314056158065796, "loss_ce": 0.00044855731539428234, "loss_iou": 0.490234375, "loss_num": 0.049560546875, "loss_xval": 1.234375, "num_input_tokens_seen": 57893948, "step": 1034 }, { "epoch": 2.305122494432071, "grad_norm": 16.411218643188477, "learning_rate": 1e-06, "loss": 0.8391, "num_input_tokens_seen": 57950452, "step": 1035 }, { "epoch": 2.305122494432071, "loss": 0.8310631513595581, "loss_ce": 0.0004967194981873035, "loss_iou": 0.328125, "loss_num": 0.034423828125, "loss_xval": 0.83203125, "num_input_tokens_seen": 57950452, "step": 1035 }, { "epoch": 2.307349665924276, "grad_norm": 20.609291076660156, "learning_rate": 1e-06, "loss": 0.9038, "num_input_tokens_seen": 58006408, "step": 1036 }, { "epoch": 2.307349665924276, "loss": 0.7673088312149048, "loss_ce": 0.0004631901392713189, "loss_iou": 0.326171875, "loss_num": 0.0230712890625, "loss_xval": 0.765625, "num_input_tokens_seen": 58006408, "step": 1036 }, { "epoch": 2.309576837416481, "grad_norm": 31.944068908691406, "learning_rate": 1e-06, "loss": 1.2189, "num_input_tokens_seen": 58062244, "step": 1037 }, { "epoch": 2.309576837416481, "loss": 0.9620157480239868, "loss_ce": 0.0003457932034507394, "loss_iou": 0.396484375, "loss_num": 0.033447265625, "loss_xval": 0.9609375, "num_input_tokens_seen": 58062244, "step": 1037 }, { "epoch": 2.3118040089086858, "grad_norm": 28.491863250732422, "learning_rate": 1e-06, "loss": 1.0784, "num_input_tokens_seen": 58116528, "step": 1038 }, { "epoch": 2.3118040089086858, "loss": 0.8677387237548828, "loss_ce": 0.0003071002720389515, "loss_iou": 0.359375, "loss_num": 0.0299072265625, "loss_xval": 0.8671875, "num_input_tokens_seen": 58116528, "step": 1038 }, { "epoch": 2.3140311804008906, "grad_norm": 16.422298431396484, "learning_rate": 1e-06, "loss": 1.1027, "num_input_tokens_seen": 58172968, "step": 1039 }, { "epoch": 2.3140311804008906, "loss": 1.250427484512329, "loss_ce": 0.0006715654162690043, "loss_iou": 0.482421875, "loss_num": 0.05712890625, "loss_xval": 1.25, "num_input_tokens_seen": 58172968, "step": 1039 }, { "epoch": 2.316258351893096, "grad_norm": 33.38667297363281, "learning_rate": 1e-06, "loss": 1.3044, "num_input_tokens_seen": 58228600, "step": 1040 }, { "epoch": 2.316258351893096, "loss": 1.3572330474853516, "loss_ce": 0.00029941959655843675, "loss_iou": 0.5703125, "loss_num": 0.042724609375, "loss_xval": 1.359375, "num_input_tokens_seen": 58228600, "step": 1040 }, { "epoch": 2.318485523385301, "grad_norm": 25.701223373413086, "learning_rate": 1e-06, "loss": 0.8746, "num_input_tokens_seen": 58284388, "step": 1041 }, { "epoch": 2.318485523385301, "loss": 0.9398232102394104, "loss_ce": 0.0006142100319266319, "loss_iou": 0.412109375, "loss_num": 0.0233154296875, "loss_xval": 0.9375, "num_input_tokens_seen": 58284388, "step": 1041 }, { "epoch": 2.3207126948775056, "grad_norm": 30.38446807861328, "learning_rate": 1e-06, "loss": 0.9229, "num_input_tokens_seen": 58342404, "step": 1042 }, { "epoch": 2.3207126948775056, "loss": 1.0379189252853394, "loss_ce": 0.0005654151318594813, "loss_iou": 0.43359375, "loss_num": 0.033447265625, "loss_xval": 1.0390625, "num_input_tokens_seen": 58342404, "step": 1042 }, { "epoch": 2.3229398663697105, "grad_norm": 26.981040954589844, "learning_rate": 1e-06, "loss": 1.1894, "num_input_tokens_seen": 58398820, "step": 1043 }, { "epoch": 2.3229398663697105, "loss": 1.1476168632507324, "loss_ce": 0.0004000905028078705, "loss_iou": 0.494140625, "loss_num": 0.031494140625, "loss_xval": 1.1484375, "num_input_tokens_seen": 58398820, "step": 1043 }, { "epoch": 2.3251670378619154, "grad_norm": 17.203502655029297, "learning_rate": 1e-06, "loss": 1.1498, "num_input_tokens_seen": 58455324, "step": 1044 }, { "epoch": 2.3251670378619154, "loss": 1.3662068843841553, "loss_ce": 0.00048417344805784523, "loss_iou": 0.578125, "loss_num": 0.04248046875, "loss_xval": 1.3671875, "num_input_tokens_seen": 58455324, "step": 1044 }, { "epoch": 2.3273942093541202, "grad_norm": 26.704832077026367, "learning_rate": 1e-06, "loss": 1.1157, "num_input_tokens_seen": 58512188, "step": 1045 }, { "epoch": 2.3273942093541202, "loss": 1.0862228870391846, "loss_ce": 0.000529451877810061, "loss_iou": 0.451171875, "loss_num": 0.03662109375, "loss_xval": 1.0859375, "num_input_tokens_seen": 58512188, "step": 1045 }, { "epoch": 2.329621380846325, "grad_norm": 18.440515518188477, "learning_rate": 1e-06, "loss": 0.9097, "num_input_tokens_seen": 58570316, "step": 1046 }, { "epoch": 2.329621380846325, "loss": 0.8449282050132751, "loss_ce": 0.0004457966424524784, "loss_iou": 0.35546875, "loss_num": 0.0262451171875, "loss_xval": 0.84375, "num_input_tokens_seen": 58570316, "step": 1046 }, { "epoch": 2.33184855233853, "grad_norm": 15.037751197814941, "learning_rate": 1e-06, "loss": 0.7533, "num_input_tokens_seen": 58628892, "step": 1047 }, { "epoch": 2.33184855233853, "loss": 0.7818921804428101, "loss_ce": 0.00039804144762456417, "loss_iou": 0.353515625, "loss_num": 0.01507568359375, "loss_xval": 0.78125, "num_input_tokens_seen": 58628892, "step": 1047 }, { "epoch": 2.334075723830735, "grad_norm": 19.363588333129883, "learning_rate": 1e-06, "loss": 0.8876, "num_input_tokens_seen": 58685104, "step": 1048 }, { "epoch": 2.334075723830735, "loss": 1.0165672302246094, "loss_ce": 0.000453831598861143, "loss_iou": 0.408203125, "loss_num": 0.039794921875, "loss_xval": 1.015625, "num_input_tokens_seen": 58685104, "step": 1048 }, { "epoch": 2.33630289532294, "grad_norm": 28.217439651489258, "learning_rate": 1e-06, "loss": 1.1681, "num_input_tokens_seen": 58738936, "step": 1049 }, { "epoch": 2.33630289532294, "loss": 1.4371428489685059, "loss_ce": 0.0006193204899318516, "loss_iou": 0.55078125, "loss_num": 0.06787109375, "loss_xval": 1.4375, "num_input_tokens_seen": 58738936, "step": 1049 }, { "epoch": 2.338530066815145, "grad_norm": 17.534543991088867, "learning_rate": 1e-06, "loss": 0.6902, "num_input_tokens_seen": 58798988, "step": 1050 }, { "epoch": 2.338530066815145, "loss": 0.47515368461608887, "loss_ce": 0.00030017929384484887, "loss_iou": 0.212890625, "loss_num": 0.0098876953125, "loss_xval": 0.474609375, "num_input_tokens_seen": 58798988, "step": 1050 }, { "epoch": 2.34075723830735, "grad_norm": 18.249549865722656, "learning_rate": 1e-06, "loss": 0.9676, "num_input_tokens_seen": 58854660, "step": 1051 }, { "epoch": 2.34075723830735, "loss": 0.977979302406311, "loss_ce": 0.0004402369959279895, "loss_iou": 0.40234375, "loss_num": 0.0341796875, "loss_xval": 0.9765625, "num_input_tokens_seen": 58854660, "step": 1051 }, { "epoch": 2.3429844097995547, "grad_norm": 21.75814437866211, "learning_rate": 1e-06, "loss": 0.9833, "num_input_tokens_seen": 58911232, "step": 1052 }, { "epoch": 2.3429844097995547, "loss": 1.1573677062988281, "loss_ce": 0.0005073855281807482, "loss_iou": 0.484375, "loss_num": 0.0380859375, "loss_xval": 1.15625, "num_input_tokens_seen": 58911232, "step": 1052 }, { "epoch": 2.3452115812917596, "grad_norm": 18.00012969970703, "learning_rate": 1e-06, "loss": 0.7443, "num_input_tokens_seen": 58968392, "step": 1053 }, { "epoch": 2.3452115812917596, "loss": 0.7719112634658813, "loss_ce": 0.0004268469929229468, "loss_iou": 0.314453125, "loss_num": 0.0286865234375, "loss_xval": 0.7734375, "num_input_tokens_seen": 58968392, "step": 1053 }, { "epoch": 2.3474387527839644, "grad_norm": 17.328489303588867, "learning_rate": 1e-06, "loss": 0.9562, "num_input_tokens_seen": 59026080, "step": 1054 }, { "epoch": 2.3474387527839644, "loss": 0.7599896192550659, "loss_ce": 0.00046816159738227725, "loss_iou": 0.330078125, "loss_num": 0.019775390625, "loss_xval": 0.7578125, "num_input_tokens_seen": 59026080, "step": 1054 }, { "epoch": 2.3496659242761693, "grad_norm": 24.498754501342773, "learning_rate": 1e-06, "loss": 1.0251, "num_input_tokens_seen": 59082868, "step": 1055 }, { "epoch": 2.3496659242761693, "loss": 1.0223811864852905, "loss_ce": 0.0024837690871208906, "loss_iou": 0.443359375, "loss_num": 0.0267333984375, "loss_xval": 1.0234375, "num_input_tokens_seen": 59082868, "step": 1055 }, { "epoch": 2.351893095768374, "grad_norm": 258.32928466796875, "learning_rate": 1e-06, "loss": 1.0218, "num_input_tokens_seen": 59139812, "step": 1056 }, { "epoch": 2.351893095768374, "loss": 0.9473181962966919, "loss_ce": 0.0005408285651355982, "loss_iou": 0.3828125, "loss_num": 0.0361328125, "loss_xval": 0.9453125, "num_input_tokens_seen": 59139812, "step": 1056 }, { "epoch": 2.354120267260579, "grad_norm": 41.9471435546875, "learning_rate": 1e-06, "loss": 0.9818, "num_input_tokens_seen": 59195536, "step": 1057 }, { "epoch": 2.354120267260579, "loss": 1.0145529508590698, "loss_ce": 0.0003928400401491672, "loss_iou": 0.44140625, "loss_num": 0.02587890625, "loss_xval": 1.015625, "num_input_tokens_seen": 59195536, "step": 1057 }, { "epoch": 2.356347438752784, "grad_norm": 20.948766708374023, "learning_rate": 1e-06, "loss": 1.0111, "num_input_tokens_seen": 59252604, "step": 1058 }, { "epoch": 2.356347438752784, "loss": 1.2552516460418701, "loss_ce": 0.0003688871511258185, "loss_iou": 0.4921875, "loss_num": 0.053955078125, "loss_xval": 1.2578125, "num_input_tokens_seen": 59252604, "step": 1058 }, { "epoch": 2.3585746102449887, "grad_norm": 33.876991271972656, "learning_rate": 1e-06, "loss": 0.8549, "num_input_tokens_seen": 59308408, "step": 1059 }, { "epoch": 2.3585746102449887, "loss": 0.6373202204704285, "loss_ce": 0.0003573211724869907, "loss_iou": 0.251953125, "loss_num": 0.0269775390625, "loss_xval": 0.63671875, "num_input_tokens_seen": 59308408, "step": 1059 }, { "epoch": 2.3608017817371936, "grad_norm": 18.77747917175293, "learning_rate": 1e-06, "loss": 0.7226, "num_input_tokens_seen": 59365048, "step": 1060 }, { "epoch": 2.3608017817371936, "loss": 0.7437995076179504, "loss_ce": 0.0008795711910352111, "loss_iou": 0.31640625, "loss_num": 0.0220947265625, "loss_xval": 0.7421875, "num_input_tokens_seen": 59365048, "step": 1060 }, { "epoch": 2.3630289532293984, "grad_norm": 28.6199893951416, "learning_rate": 1e-06, "loss": 1.1339, "num_input_tokens_seen": 59421260, "step": 1061 }, { "epoch": 2.3630289532293984, "loss": 0.9527925848960876, "loss_ce": 0.000644176616333425, "loss_iou": 0.396484375, "loss_num": 0.03173828125, "loss_xval": 0.953125, "num_input_tokens_seen": 59421260, "step": 1061 }, { "epoch": 2.3652561247216037, "grad_norm": 60.87717056274414, "learning_rate": 1e-06, "loss": 0.9096, "num_input_tokens_seen": 59479676, "step": 1062 }, { "epoch": 2.3652561247216037, "loss": 0.817761242389679, "loss_ce": 0.0006225479301065207, "loss_iou": 0.34765625, "loss_num": 0.024658203125, "loss_xval": 0.81640625, "num_input_tokens_seen": 59479676, "step": 1062 }, { "epoch": 2.3674832962138086, "grad_norm": 21.296977996826172, "learning_rate": 1e-06, "loss": 0.9039, "num_input_tokens_seen": 59534276, "step": 1063 }, { "epoch": 2.3674832962138086, "loss": 0.9175959825515747, "loss_ce": 0.0003596206079237163, "loss_iou": 0.35546875, "loss_num": 0.041015625, "loss_xval": 0.91796875, "num_input_tokens_seen": 59534276, "step": 1063 }, { "epoch": 2.3697104677060135, "grad_norm": 28.44460105895996, "learning_rate": 1e-06, "loss": 1.0105, "num_input_tokens_seen": 59589900, "step": 1064 }, { "epoch": 2.3697104677060135, "loss": 1.1641950607299805, "loss_ce": 0.0006208861595951021, "loss_iou": 0.482421875, "loss_num": 0.03955078125, "loss_xval": 1.1640625, "num_input_tokens_seen": 59589900, "step": 1064 }, { "epoch": 2.3719376391982183, "grad_norm": 35.73344421386719, "learning_rate": 1e-06, "loss": 0.8737, "num_input_tokens_seen": 59649136, "step": 1065 }, { "epoch": 2.3719376391982183, "loss": 0.921160101890564, "loss_ce": 0.000505772652104497, "loss_iou": 0.37109375, "loss_num": 0.03564453125, "loss_xval": 0.921875, "num_input_tokens_seen": 59649136, "step": 1065 }, { "epoch": 2.374164810690423, "grad_norm": 22.40777015686035, "learning_rate": 1e-06, "loss": 0.9714, "num_input_tokens_seen": 59701696, "step": 1066 }, { "epoch": 2.374164810690423, "loss": 0.939410388469696, "loss_ce": 0.00044557781075127423, "loss_iou": 0.390625, "loss_num": 0.031982421875, "loss_xval": 0.9375, "num_input_tokens_seen": 59701696, "step": 1066 }, { "epoch": 2.376391982182628, "grad_norm": 19.74895668029785, "learning_rate": 1e-06, "loss": 0.7167, "num_input_tokens_seen": 59759464, "step": 1067 }, { "epoch": 2.376391982182628, "loss": 0.7840393781661987, "loss_ce": 0.00034799351124092937, "loss_iou": 0.333984375, "loss_num": 0.0230712890625, "loss_xval": 0.78515625, "num_input_tokens_seen": 59759464, "step": 1067 }, { "epoch": 2.378619153674833, "grad_norm": 45.2192268371582, "learning_rate": 1e-06, "loss": 0.9642, "num_input_tokens_seen": 59813332, "step": 1068 }, { "epoch": 2.378619153674833, "loss": 0.961778998374939, "loss_ce": 0.0003532259142957628, "loss_iou": 0.404296875, "loss_num": 0.030517578125, "loss_xval": 0.9609375, "num_input_tokens_seen": 59813332, "step": 1068 }, { "epoch": 2.3808463251670378, "grad_norm": 21.226028442382812, "learning_rate": 1e-06, "loss": 0.8399, "num_input_tokens_seen": 59867084, "step": 1069 }, { "epoch": 2.3808463251670378, "loss": 0.6262215375900269, "loss_ce": 0.00036704522790387273, "loss_iou": 0.2294921875, "loss_num": 0.033447265625, "loss_xval": 0.625, "num_input_tokens_seen": 59867084, "step": 1069 }, { "epoch": 2.3830734966592426, "grad_norm": 20.79296112060547, "learning_rate": 1e-06, "loss": 0.949, "num_input_tokens_seen": 59923596, "step": 1070 }, { "epoch": 2.3830734966592426, "loss": 0.9344473481178284, "loss_ce": 0.0003653277817647904, "loss_iou": 0.39453125, "loss_num": 0.029052734375, "loss_xval": 0.93359375, "num_input_tokens_seen": 59923596, "step": 1070 }, { "epoch": 2.3853006681514475, "grad_norm": 23.782529830932617, "learning_rate": 1e-06, "loss": 0.9666, "num_input_tokens_seen": 59978588, "step": 1071 }, { "epoch": 2.3853006681514475, "loss": 0.9463720917701721, "loss_ce": 0.0005713239079341292, "loss_iou": 0.3828125, "loss_num": 0.03564453125, "loss_xval": 0.9453125, "num_input_tokens_seen": 59978588, "step": 1071 }, { "epoch": 2.387527839643653, "grad_norm": 25.347000122070312, "learning_rate": 1e-06, "loss": 1.0797, "num_input_tokens_seen": 60035216, "step": 1072 }, { "epoch": 2.387527839643653, "loss": 1.156226396560669, "loss_ce": 0.0004647444002330303, "loss_iou": 0.482421875, "loss_num": 0.037841796875, "loss_xval": 1.15625, "num_input_tokens_seen": 60035216, "step": 1072 }, { "epoch": 2.3897550111358576, "grad_norm": 104.3314208984375, "learning_rate": 1e-06, "loss": 0.8764, "num_input_tokens_seen": 60089212, "step": 1073 }, { "epoch": 2.3897550111358576, "loss": 0.868368923664093, "loss_ce": 0.00044899751082994044, "loss_iou": 0.3671875, "loss_num": 0.02685546875, "loss_xval": 0.8671875, "num_input_tokens_seen": 60089212, "step": 1073 }, { "epoch": 2.3919821826280625, "grad_norm": 17.936288833618164, "learning_rate": 1e-06, "loss": 1.1517, "num_input_tokens_seen": 60145076, "step": 1074 }, { "epoch": 2.3919821826280625, "loss": 1.1004548072814941, "loss_ce": 0.0006012015510350466, "loss_iou": 0.431640625, "loss_num": 0.0478515625, "loss_xval": 1.1015625, "num_input_tokens_seen": 60145076, "step": 1074 }, { "epoch": 2.3942093541202674, "grad_norm": 71.37808227539062, "learning_rate": 1e-06, "loss": 0.9387, "num_input_tokens_seen": 60201880, "step": 1075 }, { "epoch": 2.3942093541202674, "loss": 0.8587861061096191, "loss_ce": 0.00038765568751841784, "loss_iou": 0.35546875, "loss_num": 0.0294189453125, "loss_xval": 0.859375, "num_input_tokens_seen": 60201880, "step": 1075 }, { "epoch": 2.3964365256124722, "grad_norm": 15.348673820495605, "learning_rate": 1e-06, "loss": 0.7725, "num_input_tokens_seen": 60257620, "step": 1076 }, { "epoch": 2.3964365256124722, "loss": 0.8453316688537598, "loss_ce": 0.00036094876122660935, "loss_iou": 0.373046875, "loss_num": 0.0196533203125, "loss_xval": 0.84375, "num_input_tokens_seen": 60257620, "step": 1076 }, { "epoch": 2.398663697104677, "grad_norm": 13.168649673461914, "learning_rate": 1e-06, "loss": 0.7955, "num_input_tokens_seen": 60315036, "step": 1077 }, { "epoch": 2.398663697104677, "loss": 1.0594782829284668, "loss_ce": 0.000884645152837038, "loss_iou": 0.435546875, "loss_num": 0.037353515625, "loss_xval": 1.0625, "num_input_tokens_seen": 60315036, "step": 1077 }, { "epoch": 2.400890868596882, "grad_norm": 22.153902053833008, "learning_rate": 1e-06, "loss": 0.9847, "num_input_tokens_seen": 60371132, "step": 1078 }, { "epoch": 2.400890868596882, "loss": 0.6142248511314392, "loss_ce": 0.0004552791069727391, "loss_iou": 0.2470703125, "loss_num": 0.0240478515625, "loss_xval": 0.61328125, "num_input_tokens_seen": 60371132, "step": 1078 }, { "epoch": 2.403118040089087, "grad_norm": 16.075576782226562, "learning_rate": 1e-06, "loss": 0.9945, "num_input_tokens_seen": 60425052, "step": 1079 }, { "epoch": 2.403118040089087, "loss": 0.8446251153945923, "loss_ce": 0.0003868020430672914, "loss_iou": 0.34765625, "loss_num": 0.030029296875, "loss_xval": 0.84375, "num_input_tokens_seen": 60425052, "step": 1079 }, { "epoch": 2.4053452115812917, "grad_norm": 30.00712013244629, "learning_rate": 1e-06, "loss": 0.9475, "num_input_tokens_seen": 60481796, "step": 1080 }, { "epoch": 2.4053452115812917, "loss": 1.1728054285049438, "loss_ce": 0.0004421064513735473, "loss_iou": 0.46875, "loss_num": 0.047119140625, "loss_xval": 1.171875, "num_input_tokens_seen": 60481796, "step": 1080 }, { "epoch": 2.4075723830734965, "grad_norm": 37.473915100097656, "learning_rate": 1e-06, "loss": 1.0343, "num_input_tokens_seen": 60535540, "step": 1081 }, { "epoch": 2.4075723830734965, "loss": 0.8917477130889893, "loss_ce": 0.00039027928141877055, "loss_iou": 0.369140625, "loss_num": 0.0308837890625, "loss_xval": 0.890625, "num_input_tokens_seen": 60535540, "step": 1081 }, { "epoch": 2.4097995545657014, "grad_norm": 26.055566787719727, "learning_rate": 1e-06, "loss": 0.9813, "num_input_tokens_seen": 60591724, "step": 1082 }, { "epoch": 2.4097995545657014, "loss": 0.8454374670982361, "loss_ce": 0.00046675774501636624, "loss_iou": 0.341796875, "loss_num": 0.0322265625, "loss_xval": 0.84375, "num_input_tokens_seen": 60591724, "step": 1082 }, { "epoch": 2.4120267260579062, "grad_norm": 19.031705856323242, "learning_rate": 1e-06, "loss": 0.9273, "num_input_tokens_seen": 60646416, "step": 1083 }, { "epoch": 2.4120267260579062, "loss": 1.0545891523361206, "loss_ce": 0.000389896216802299, "loss_iou": 0.458984375, "loss_num": 0.0277099609375, "loss_xval": 1.0546875, "num_input_tokens_seen": 60646416, "step": 1083 }, { "epoch": 2.4142538975501115, "grad_norm": 13.565470695495605, "learning_rate": 1e-06, "loss": 0.8106, "num_input_tokens_seen": 60703608, "step": 1084 }, { "epoch": 2.4142538975501115, "loss": 0.821467399597168, "loss_ce": 0.0004224562435410917, "loss_iou": 0.3203125, "loss_num": 0.036376953125, "loss_xval": 0.8203125, "num_input_tokens_seen": 60703608, "step": 1084 }, { "epoch": 2.4164810690423164, "grad_norm": 12.959884643554688, "learning_rate": 1e-06, "loss": 0.8867, "num_input_tokens_seen": 60760392, "step": 1085 }, { "epoch": 2.4164810690423164, "loss": 0.7961212396621704, "loss_ce": 0.00046693626791238785, "loss_iou": 0.310546875, "loss_num": 0.034912109375, "loss_xval": 0.796875, "num_input_tokens_seen": 60760392, "step": 1085 }, { "epoch": 2.4187082405345213, "grad_norm": 15.361568450927734, "learning_rate": 1e-06, "loss": 0.8654, "num_input_tokens_seen": 60817288, "step": 1086 }, { "epoch": 2.4187082405345213, "loss": 0.6870740056037903, "loss_ce": 0.00030639575561508536, "loss_iou": 0.294921875, "loss_num": 0.01953125, "loss_xval": 0.6875, "num_input_tokens_seen": 60817288, "step": 1086 }, { "epoch": 2.420935412026726, "grad_norm": 22.026248931884766, "learning_rate": 1e-06, "loss": 1.1047, "num_input_tokens_seen": 60874444, "step": 1087 }, { "epoch": 2.420935412026726, "loss": 0.8688787221908569, "loss_ce": 0.0007146652205847204, "loss_iou": 0.3828125, "loss_num": 0.0208740234375, "loss_xval": 0.8671875, "num_input_tokens_seen": 60874444, "step": 1087 }, { "epoch": 2.423162583518931, "grad_norm": 20.72212028503418, "learning_rate": 1e-06, "loss": 1.1223, "num_input_tokens_seen": 60932720, "step": 1088 }, { "epoch": 2.423162583518931, "loss": 1.190258264541626, "loss_ce": 0.00080519710900262, "loss_iou": 0.4609375, "loss_num": 0.0537109375, "loss_xval": 1.1875, "num_input_tokens_seen": 60932720, "step": 1088 }, { "epoch": 2.425389755011136, "grad_norm": 22.80421257019043, "learning_rate": 1e-06, "loss": 1.0687, "num_input_tokens_seen": 60990336, "step": 1089 }, { "epoch": 2.425389755011136, "loss": 1.2014228105545044, "loss_ce": 0.0007392432307824492, "loss_iou": 0.4921875, "loss_num": 0.04345703125, "loss_xval": 1.203125, "num_input_tokens_seen": 60990336, "step": 1089 }, { "epoch": 2.4276169265033407, "grad_norm": 23.451295852661133, "learning_rate": 1e-06, "loss": 1.0575, "num_input_tokens_seen": 61045524, "step": 1090 }, { "epoch": 2.4276169265033407, "loss": 1.208905816078186, "loss_ce": 0.000409751373808831, "loss_iou": 0.49609375, "loss_num": 0.043212890625, "loss_xval": 1.2109375, "num_input_tokens_seen": 61045524, "step": 1090 }, { "epoch": 2.4298440979955456, "grad_norm": 17.383296966552734, "learning_rate": 1e-06, "loss": 0.7853, "num_input_tokens_seen": 61104956, "step": 1091 }, { "epoch": 2.4298440979955456, "loss": 0.7447309494018555, "loss_ce": 0.000834420439787209, "loss_iou": 0.32421875, "loss_num": 0.0189208984375, "loss_xval": 0.7421875, "num_input_tokens_seen": 61104956, "step": 1091 }, { "epoch": 2.4320712694877504, "grad_norm": 18.50430679321289, "learning_rate": 1e-06, "loss": 0.9737, "num_input_tokens_seen": 61159236, "step": 1092 }, { "epoch": 2.4320712694877504, "loss": 0.9158762693405151, "loss_ce": 0.00034890550887212157, "loss_iou": 0.38671875, "loss_num": 0.028076171875, "loss_xval": 0.9140625, "num_input_tokens_seen": 61159236, "step": 1092 }, { "epoch": 2.4342984409799553, "grad_norm": 18.052396774291992, "learning_rate": 1e-06, "loss": 1.0082, "num_input_tokens_seen": 61217816, "step": 1093 }, { "epoch": 2.4342984409799553, "loss": 0.8641993999481201, "loss_ce": 0.00042982713785022497, "loss_iou": 0.36328125, "loss_num": 0.027587890625, "loss_xval": 0.86328125, "num_input_tokens_seen": 61217816, "step": 1093 }, { "epoch": 2.4365256124721606, "grad_norm": 20.244873046875, "learning_rate": 1e-06, "loss": 0.5503, "num_input_tokens_seen": 61274332, "step": 1094 }, { "epoch": 2.4365256124721606, "loss": 0.40073591470718384, "loss_ce": 0.0003452802775427699, "loss_iou": 0.1748046875, "loss_num": 0.010009765625, "loss_xval": 0.400390625, "num_input_tokens_seen": 61274332, "step": 1094 }, { "epoch": 2.4387527839643655, "grad_norm": 18.268579483032227, "learning_rate": 1e-06, "loss": 0.9795, "num_input_tokens_seen": 61327972, "step": 1095 }, { "epoch": 2.4387527839643655, "loss": 0.696860671043396, "loss_ce": 0.00044950933079235256, "loss_iou": 0.291015625, "loss_num": 0.02294921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 61327972, "step": 1095 }, { "epoch": 2.4409799554565703, "grad_norm": 13.835185050964355, "learning_rate": 1e-06, "loss": 0.8244, "num_input_tokens_seen": 61384388, "step": 1096 }, { "epoch": 2.4409799554565703, "loss": 0.6497031450271606, "loss_ce": 0.00028911407571285963, "loss_iou": 0.2421875, "loss_num": 0.03271484375, "loss_xval": 0.6484375, "num_input_tokens_seen": 61384388, "step": 1096 }, { "epoch": 2.443207126948775, "grad_norm": 17.01195526123047, "learning_rate": 1e-06, "loss": 0.9698, "num_input_tokens_seen": 61439968, "step": 1097 }, { "epoch": 2.443207126948775, "loss": 1.043675184249878, "loss_ce": 0.0007063635857775807, "loss_iou": 0.45703125, "loss_num": 0.0260009765625, "loss_xval": 1.046875, "num_input_tokens_seen": 61439968, "step": 1097 }, { "epoch": 2.44543429844098, "grad_norm": 12.948113441467285, "learning_rate": 1e-06, "loss": 0.8937, "num_input_tokens_seen": 61496432, "step": 1098 }, { "epoch": 2.44543429844098, "loss": 0.6754014492034912, "loss_ce": 0.00035263324389234185, "loss_iou": 0.291015625, "loss_num": 0.0184326171875, "loss_xval": 0.67578125, "num_input_tokens_seen": 61496432, "step": 1098 }, { "epoch": 2.447661469933185, "grad_norm": 15.429597854614258, "learning_rate": 1e-06, "loss": 1.3256, "num_input_tokens_seen": 61555176, "step": 1099 }, { "epoch": 2.447661469933185, "loss": 1.1492905616760254, "loss_ce": 0.0003646724799182266, "loss_iou": 0.4609375, "loss_num": 0.045654296875, "loss_xval": 1.1484375, "num_input_tokens_seen": 61555176, "step": 1099 }, { "epoch": 2.4498886414253898, "grad_norm": 13.81130313873291, "learning_rate": 1e-06, "loss": 0.9434, "num_input_tokens_seen": 61614452, "step": 1100 }, { "epoch": 2.4498886414253898, "loss": 1.213216781616211, "loss_ce": 0.00032615740201435983, "loss_iou": 0.5234375, "loss_num": 0.03271484375, "loss_xval": 1.2109375, "num_input_tokens_seen": 61614452, "step": 1100 }, { "epoch": 2.4521158129175946, "grad_norm": 36.95311737060547, "learning_rate": 1e-06, "loss": 0.6854, "num_input_tokens_seen": 61671728, "step": 1101 }, { "epoch": 2.4521158129175946, "loss": 0.6432477235794067, "loss_ce": 0.0006696175551041961, "loss_iou": 0.24609375, "loss_num": 0.0302734375, "loss_xval": 0.640625, "num_input_tokens_seen": 61671728, "step": 1101 }, { "epoch": 2.4543429844097995, "grad_norm": 18.458709716796875, "learning_rate": 1e-06, "loss": 0.8445, "num_input_tokens_seen": 61723072, "step": 1102 }, { "epoch": 2.4543429844097995, "loss": 1.0072041749954224, "loss_ce": 0.00036824517883360386, "loss_iou": 0.412109375, "loss_num": 0.036865234375, "loss_xval": 1.0078125, "num_input_tokens_seen": 61723072, "step": 1102 }, { "epoch": 2.4565701559020043, "grad_norm": 15.849419593811035, "learning_rate": 1e-06, "loss": 1.1075, "num_input_tokens_seen": 61780896, "step": 1103 }, { "epoch": 2.4565701559020043, "loss": 0.9099684953689575, "loss_ce": 0.006648160517215729, "loss_iou": 0.3984375, "loss_num": 0.021240234375, "loss_xval": 0.90234375, "num_input_tokens_seen": 61780896, "step": 1103 }, { "epoch": 2.458797327394209, "grad_norm": 19.382238388061523, "learning_rate": 1e-06, "loss": 1.0016, "num_input_tokens_seen": 61836688, "step": 1104 }, { "epoch": 2.458797327394209, "loss": 0.6892969608306885, "loss_ce": 0.0003321617841720581, "loss_iou": 0.28125, "loss_num": 0.025146484375, "loss_xval": 0.6875, "num_input_tokens_seen": 61836688, "step": 1104 }, { "epoch": 2.461024498886414, "grad_norm": 19.609500885009766, "learning_rate": 1e-06, "loss": 1.1058, "num_input_tokens_seen": 61892164, "step": 1105 }, { "epoch": 2.461024498886414, "loss": 1.3998944759368896, "loss_ce": 0.00048047915333881974, "loss_iou": 0.546875, "loss_num": 0.06103515625, "loss_xval": 1.3984375, "num_input_tokens_seen": 61892164, "step": 1105 }, { "epoch": 2.463251670378619, "grad_norm": 19.078765869140625, "learning_rate": 1e-06, "loss": 0.8841, "num_input_tokens_seen": 61948728, "step": 1106 }, { "epoch": 2.463251670378619, "loss": 0.673430323600769, "loss_ce": 0.0003346248995512724, "loss_iou": 0.251953125, "loss_num": 0.033935546875, "loss_xval": 0.671875, "num_input_tokens_seen": 61948728, "step": 1106 }, { "epoch": 2.4654788418708242, "grad_norm": 28.457544326782227, "learning_rate": 1e-06, "loss": 0.7968, "num_input_tokens_seen": 62002888, "step": 1107 }, { "epoch": 2.4654788418708242, "loss": 0.6821703314781189, "loss_ce": 0.00028555351309478283, "loss_iou": 0.251953125, "loss_num": 0.035888671875, "loss_xval": 0.68359375, "num_input_tokens_seen": 62002888, "step": 1107 }, { "epoch": 2.467706013363029, "grad_norm": 15.16339111328125, "learning_rate": 1e-06, "loss": 0.8623, "num_input_tokens_seen": 62058460, "step": 1108 }, { "epoch": 2.467706013363029, "loss": 0.8493123650550842, "loss_ce": 0.00043543853098526597, "loss_iou": 0.3515625, "loss_num": 0.029296875, "loss_xval": 0.84765625, "num_input_tokens_seen": 62058460, "step": 1108 }, { "epoch": 2.469933184855234, "grad_norm": 21.689632415771484, "learning_rate": 1e-06, "loss": 0.8395, "num_input_tokens_seen": 62116612, "step": 1109 }, { "epoch": 2.469933184855234, "loss": 0.6302697658538818, "loss_ce": 0.00038696054252795875, "loss_iou": 0.267578125, "loss_num": 0.0194091796875, "loss_xval": 0.62890625, "num_input_tokens_seen": 62116612, "step": 1109 }, { "epoch": 2.472160356347439, "grad_norm": 32.711299896240234, "learning_rate": 1e-06, "loss": 0.9001, "num_input_tokens_seen": 62173120, "step": 1110 }, { "epoch": 2.472160356347439, "loss": 1.1314702033996582, "loss_ce": 0.0003667679848149419, "loss_iou": 0.5078125, "loss_num": 0.0223388671875, "loss_xval": 1.1328125, "num_input_tokens_seen": 62173120, "step": 1110 }, { "epoch": 2.4743875278396437, "grad_norm": 63.607330322265625, "learning_rate": 1e-06, "loss": 0.928, "num_input_tokens_seen": 62231148, "step": 1111 }, { "epoch": 2.4743875278396437, "loss": 0.8059262037277222, "loss_ce": 0.00038422548095695674, "loss_iou": 0.3359375, "loss_num": 0.026611328125, "loss_xval": 0.8046875, "num_input_tokens_seen": 62231148, "step": 1111 }, { "epoch": 2.4766146993318485, "grad_norm": 17.097545623779297, "learning_rate": 1e-06, "loss": 0.9165, "num_input_tokens_seen": 62288036, "step": 1112 }, { "epoch": 2.4766146993318485, "loss": 0.9176112413406372, "loss_ce": 0.0003749003808479756, "loss_iou": 0.400390625, "loss_num": 0.02294921875, "loss_xval": 0.91796875, "num_input_tokens_seen": 62288036, "step": 1112 }, { "epoch": 2.4788418708240534, "grad_norm": 19.068836212158203, "learning_rate": 1e-06, "loss": 1.0021, "num_input_tokens_seen": 62343840, "step": 1113 }, { "epoch": 2.4788418708240534, "loss": 0.8066169619560242, "loss_ce": 0.0007087617414072156, "loss_iou": 0.326171875, "loss_num": 0.0311279296875, "loss_xval": 0.8046875, "num_input_tokens_seen": 62343840, "step": 1113 }, { "epoch": 2.4810690423162582, "grad_norm": 18.9963321685791, "learning_rate": 1e-06, "loss": 0.8238, "num_input_tokens_seen": 62399636, "step": 1114 }, { "epoch": 2.4810690423162582, "loss": 0.6373310089111328, "loss_ce": 0.00036811293102800846, "loss_iou": 0.27734375, "loss_num": 0.01611328125, "loss_xval": 0.63671875, "num_input_tokens_seen": 62399636, "step": 1114 }, { "epoch": 2.483296213808463, "grad_norm": 18.128734588623047, "learning_rate": 1e-06, "loss": 1.0068, "num_input_tokens_seen": 62457020, "step": 1115 }, { "epoch": 2.483296213808463, "loss": 0.7235212326049805, "loss_ce": 0.0003766651498153806, "loss_iou": 0.30859375, "loss_num": 0.021240234375, "loss_xval": 0.72265625, "num_input_tokens_seen": 62457020, "step": 1115 }, { "epoch": 2.485523385300668, "grad_norm": 53.74859619140625, "learning_rate": 1e-06, "loss": 0.8546, "num_input_tokens_seen": 62515924, "step": 1116 }, { "epoch": 2.485523385300668, "loss": 1.067058801651001, "loss_ce": 0.0006525892531499267, "loss_iou": 0.4375, "loss_num": 0.0380859375, "loss_xval": 1.0625, "num_input_tokens_seen": 62515924, "step": 1116 }, { "epoch": 2.4877505567928733, "grad_norm": 16.628570556640625, "learning_rate": 1e-06, "loss": 0.9406, "num_input_tokens_seen": 62575028, "step": 1117 }, { "epoch": 2.4877505567928733, "loss": 1.1619257926940918, "loss_ce": 0.0005488308379426599, "loss_iou": 0.4765625, "loss_num": 0.041748046875, "loss_xval": 1.1640625, "num_input_tokens_seen": 62575028, "step": 1117 }, { "epoch": 2.489977728285078, "grad_norm": 18.023460388183594, "learning_rate": 1e-06, "loss": 0.9142, "num_input_tokens_seen": 62630184, "step": 1118 }, { "epoch": 2.489977728285078, "loss": 1.1649341583251953, "loss_ce": 0.00038348851376213133, "loss_iou": 0.458984375, "loss_num": 0.049560546875, "loss_xval": 1.1640625, "num_input_tokens_seen": 62630184, "step": 1118 }, { "epoch": 2.492204899777283, "grad_norm": 28.666088104248047, "learning_rate": 1e-06, "loss": 1.1036, "num_input_tokens_seen": 62686600, "step": 1119 }, { "epoch": 2.492204899777283, "loss": 1.091188907623291, "loss_ce": 0.0008568049524910748, "loss_iou": 0.443359375, "loss_num": 0.040771484375, "loss_xval": 1.09375, "num_input_tokens_seen": 62686600, "step": 1119 }, { "epoch": 2.494432071269488, "grad_norm": 34.05298614501953, "learning_rate": 1e-06, "loss": 1.3067, "num_input_tokens_seen": 62740120, "step": 1120 }, { "epoch": 2.494432071269488, "loss": 1.4040180444717407, "loss_ce": 0.00094178831204772, "loss_iou": 0.53125, "loss_num": 0.0673828125, "loss_xval": 1.40625, "num_input_tokens_seen": 62740120, "step": 1120 }, { "epoch": 2.4966592427616927, "grad_norm": 23.138212203979492, "learning_rate": 1e-06, "loss": 0.9225, "num_input_tokens_seen": 62796964, "step": 1121 }, { "epoch": 2.4966592427616927, "loss": 1.013495922088623, "loss_ce": 0.0003123595961369574, "loss_iou": 0.404296875, "loss_num": 0.04052734375, "loss_xval": 1.015625, "num_input_tokens_seen": 62796964, "step": 1121 }, { "epoch": 2.4988864142538976, "grad_norm": 17.127120971679688, "learning_rate": 1e-06, "loss": 1.2497, "num_input_tokens_seen": 62853264, "step": 1122 }, { "epoch": 2.4988864142538976, "loss": 1.4592686891555786, "loss_ce": 0.0007725717732682824, "loss_iou": 0.546875, "loss_num": 0.07373046875, "loss_xval": 1.4609375, "num_input_tokens_seen": 62853264, "step": 1122 }, { "epoch": 2.5011135857461024, "grad_norm": 14.743764877319336, "learning_rate": 1e-06, "loss": 0.8212, "num_input_tokens_seen": 62909164, "step": 1123 }, { "epoch": 2.5011135857461024, "loss": 0.598504900932312, "loss_ce": 0.0003604079829528928, "loss_iou": 0.26171875, "loss_num": 0.01519775390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 62909164, "step": 1123 }, { "epoch": 2.5033407572383073, "grad_norm": 13.89709758758545, "learning_rate": 1e-06, "loss": 0.9177, "num_input_tokens_seen": 62965280, "step": 1124 }, { "epoch": 2.5033407572383073, "loss": 0.7613284587860107, "loss_ce": 0.0003421393339522183, "loss_iou": 0.3046875, "loss_num": 0.0301513671875, "loss_xval": 0.76171875, "num_input_tokens_seen": 62965280, "step": 1124 }, { "epoch": 2.505567928730512, "grad_norm": 27.25965690612793, "learning_rate": 1e-06, "loss": 0.8435, "num_input_tokens_seen": 63019164, "step": 1125 }, { "epoch": 2.505567928730512, "loss": 0.9681985974311829, "loss_ce": 0.0004251442151144147, "loss_iou": 0.390625, "loss_num": 0.037109375, "loss_xval": 0.96875, "num_input_tokens_seen": 63019164, "step": 1125 }, { "epoch": 2.507795100222717, "grad_norm": 19.890287399291992, "learning_rate": 1e-06, "loss": 0.8424, "num_input_tokens_seen": 63073888, "step": 1126 }, { "epoch": 2.507795100222717, "loss": 0.7713284492492676, "loss_ce": 0.00033233320573344827, "loss_iou": 0.32421875, "loss_num": 0.0242919921875, "loss_xval": 0.76953125, "num_input_tokens_seen": 63073888, "step": 1126 }, { "epoch": 2.510022271714922, "grad_norm": 19.229721069335938, "learning_rate": 1e-06, "loss": 0.799, "num_input_tokens_seen": 63131120, "step": 1127 }, { "epoch": 2.510022271714922, "loss": 0.7538760900497437, "loss_ce": 0.0004581384710036218, "loss_iou": 0.306640625, "loss_num": 0.0279541015625, "loss_xval": 0.75390625, "num_input_tokens_seen": 63131120, "step": 1127 }, { "epoch": 2.5122494432071267, "grad_norm": 18.243764877319336, "learning_rate": 1e-06, "loss": 1.0801, "num_input_tokens_seen": 63184712, "step": 1128 }, { "epoch": 2.5122494432071267, "loss": 1.014033555984497, "loss_ce": 0.00036173040280118585, "loss_iou": 0.431640625, "loss_num": 0.0302734375, "loss_xval": 1.015625, "num_input_tokens_seen": 63184712, "step": 1128 }, { "epoch": 2.5144766146993316, "grad_norm": 27.520559310913086, "learning_rate": 1e-06, "loss": 1.0515, "num_input_tokens_seen": 63239260, "step": 1129 }, { "epoch": 2.5144766146993316, "loss": 0.9654926061630249, "loss_ce": 0.0005268162931315601, "loss_iou": 0.369140625, "loss_num": 0.0458984375, "loss_xval": 0.96484375, "num_input_tokens_seen": 63239260, "step": 1129 }, { "epoch": 2.516703786191537, "grad_norm": 23.411195755004883, "learning_rate": 1e-06, "loss": 0.5888, "num_input_tokens_seen": 63294036, "step": 1130 }, { "epoch": 2.516703786191537, "loss": 0.5716757774353027, "loss_ce": 0.00038673574454151094, "loss_iou": 0.248046875, "loss_num": 0.0150146484375, "loss_xval": 0.5703125, "num_input_tokens_seen": 63294036, "step": 1130 }, { "epoch": 2.5189309576837418, "grad_norm": 27.774524688720703, "learning_rate": 1e-06, "loss": 0.9061, "num_input_tokens_seen": 63350920, "step": 1131 }, { "epoch": 2.5189309576837418, "loss": 0.7308274507522583, "loss_ce": 0.00035869883140549064, "loss_iou": 0.298828125, "loss_num": 0.026123046875, "loss_xval": 0.73046875, "num_input_tokens_seen": 63350920, "step": 1131 }, { "epoch": 2.5211581291759466, "grad_norm": 42.71458053588867, "learning_rate": 1e-06, "loss": 1.0052, "num_input_tokens_seen": 63408328, "step": 1132 }, { "epoch": 2.5211581291759466, "loss": 0.9227426052093506, "loss_ce": 0.00037933725980110466, "loss_iou": 0.41015625, "loss_num": 0.0203857421875, "loss_xval": 0.921875, "num_input_tokens_seen": 63408328, "step": 1132 }, { "epoch": 2.5233853006681515, "grad_norm": 21.282625198364258, "learning_rate": 1e-06, "loss": 1.0933, "num_input_tokens_seen": 63463376, "step": 1133 }, { "epoch": 2.5233853006681515, "loss": 0.8489203453063965, "loss_ce": 0.0005317054456099868, "loss_iou": 0.380859375, "loss_num": 0.017333984375, "loss_xval": 0.84765625, "num_input_tokens_seen": 63463376, "step": 1133 }, { "epoch": 2.5256124721603563, "grad_norm": 13.566144943237305, "learning_rate": 1e-06, "loss": 0.9126, "num_input_tokens_seen": 63521748, "step": 1134 }, { "epoch": 2.5256124721603563, "loss": 1.027003288269043, "loss_ce": 0.0006361395353451371, "loss_iou": 0.42578125, "loss_num": 0.03466796875, "loss_xval": 1.0234375, "num_input_tokens_seen": 63521748, "step": 1134 }, { "epoch": 2.527839643652561, "grad_norm": 22.058757781982422, "learning_rate": 1e-06, "loss": 0.8642, "num_input_tokens_seen": 63575808, "step": 1135 }, { "epoch": 2.527839643652561, "loss": 0.5723444223403931, "loss_ce": 0.00032291823299601674, "loss_iou": 0.224609375, "loss_num": 0.024658203125, "loss_xval": 0.5703125, "num_input_tokens_seen": 63575808, "step": 1135 }, { "epoch": 2.530066815144766, "grad_norm": 17.351253509521484, "learning_rate": 1e-06, "loss": 0.9297, "num_input_tokens_seen": 63629380, "step": 1136 }, { "epoch": 2.530066815144766, "loss": 1.1235007047653198, "loss_ce": 0.0006980298785492778, "loss_iou": 0.466796875, "loss_num": 0.037841796875, "loss_xval": 1.125, "num_input_tokens_seen": 63629380, "step": 1136 }, { "epoch": 2.532293986636971, "grad_norm": 25.26835060119629, "learning_rate": 1e-06, "loss": 0.8625, "num_input_tokens_seen": 63685404, "step": 1137 }, { "epoch": 2.532293986636971, "loss": 0.7635213136672974, "loss_ce": 0.0003376836539246142, "loss_iou": 0.314453125, "loss_num": 0.026611328125, "loss_xval": 0.76171875, "num_input_tokens_seen": 63685404, "step": 1137 }, { "epoch": 2.534521158129176, "grad_norm": 23.412189483642578, "learning_rate": 1e-06, "loss": 1.0618, "num_input_tokens_seen": 63742344, "step": 1138 }, { "epoch": 2.534521158129176, "loss": 1.1275867223739624, "loss_ce": 0.0006335656507872045, "loss_iou": 0.470703125, "loss_num": 0.03759765625, "loss_xval": 1.125, "num_input_tokens_seen": 63742344, "step": 1138 }, { "epoch": 2.536748329621381, "grad_norm": 59.32328414916992, "learning_rate": 1e-06, "loss": 0.9894, "num_input_tokens_seen": 63798548, "step": 1139 }, { "epoch": 2.536748329621381, "loss": 0.9086170196533203, "loss_ce": 0.0006579948822036386, "loss_iou": 0.37890625, "loss_num": 0.030029296875, "loss_xval": 0.90625, "num_input_tokens_seen": 63798548, "step": 1139 }, { "epoch": 2.538975501113586, "grad_norm": 34.01222610473633, "learning_rate": 1e-06, "loss": 1.2089, "num_input_tokens_seen": 63851244, "step": 1140 }, { "epoch": 2.538975501113586, "loss": 1.4944491386413574, "loss_ce": 0.0003085851203650236, "loss_iou": 0.66796875, "loss_num": 0.03173828125, "loss_xval": 1.4921875, "num_input_tokens_seen": 63851244, "step": 1140 }, { "epoch": 2.541202672605791, "grad_norm": 23.09809112548828, "learning_rate": 1e-06, "loss": 1.0927, "num_input_tokens_seen": 63908384, "step": 1141 }, { "epoch": 2.541202672605791, "loss": 0.905631959438324, "loss_ce": 0.00035852432483807206, "loss_iou": 0.375, "loss_num": 0.0311279296875, "loss_xval": 0.90625, "num_input_tokens_seen": 63908384, "step": 1141 }, { "epoch": 2.5434298440979957, "grad_norm": 12.117959022521973, "learning_rate": 1e-06, "loss": 1.2241, "num_input_tokens_seen": 63965248, "step": 1142 }, { "epoch": 2.5434298440979957, "loss": 1.4422664642333984, "loss_ce": 0.0006161456694826484, "loss_iou": 0.515625, "loss_num": 0.0830078125, "loss_xval": 1.4453125, "num_input_tokens_seen": 63965248, "step": 1142 }, { "epoch": 2.5456570155902005, "grad_norm": 19.716156005859375, "learning_rate": 1e-06, "loss": 0.9176, "num_input_tokens_seen": 64018792, "step": 1143 }, { "epoch": 2.5456570155902005, "loss": 0.611274778842926, "loss_ce": 0.000434944755397737, "loss_iou": 0.248046875, "loss_num": 0.023193359375, "loss_xval": 0.609375, "num_input_tokens_seen": 64018792, "step": 1143 }, { "epoch": 2.5478841870824054, "grad_norm": 16.11542320251465, "learning_rate": 1e-06, "loss": 0.9638, "num_input_tokens_seen": 64073780, "step": 1144 }, { "epoch": 2.5478841870824054, "loss": 1.0830767154693604, "loss_ce": 0.0004351097741164267, "loss_iou": 0.3984375, "loss_num": 0.05712890625, "loss_xval": 1.0859375, "num_input_tokens_seen": 64073780, "step": 1144 }, { "epoch": 2.5501113585746102, "grad_norm": 27.197479248046875, "learning_rate": 1e-06, "loss": 0.8059, "num_input_tokens_seen": 64130436, "step": 1145 }, { "epoch": 2.5501113585746102, "loss": 1.001615285873413, "loss_ce": 0.0008829243597574532, "loss_iou": 0.39453125, "loss_num": 0.042236328125, "loss_xval": 1.0, "num_input_tokens_seen": 64130436, "step": 1145 }, { "epoch": 2.552338530066815, "grad_norm": 17.01374053955078, "learning_rate": 1e-06, "loss": 0.9329, "num_input_tokens_seen": 64183692, "step": 1146 }, { "epoch": 2.552338530066815, "loss": 0.9256563186645508, "loss_ce": 0.00036334185278974473, "loss_iou": 0.375, "loss_num": 0.034912109375, "loss_xval": 0.92578125, "num_input_tokens_seen": 64183692, "step": 1146 }, { "epoch": 2.55456570155902, "grad_norm": 18.95051383972168, "learning_rate": 1e-06, "loss": 0.9019, "num_input_tokens_seen": 64239220, "step": 1147 }, { "epoch": 2.55456570155902, "loss": 1.1268051862716675, "loss_ce": 0.0003403578884899616, "loss_iou": 0.462890625, "loss_num": 0.040283203125, "loss_xval": 1.125, "num_input_tokens_seen": 64239220, "step": 1147 }, { "epoch": 2.556792873051225, "grad_norm": 26.1760196685791, "learning_rate": 1e-06, "loss": 0.8004, "num_input_tokens_seen": 64292876, "step": 1148 }, { "epoch": 2.556792873051225, "loss": 0.802330493927002, "loss_ce": 0.0004505745891947299, "loss_iou": 0.3125, "loss_num": 0.03564453125, "loss_xval": 0.80078125, "num_input_tokens_seen": 64292876, "step": 1148 }, { "epoch": 2.5590200445434297, "grad_norm": 18.058563232421875, "learning_rate": 1e-06, "loss": 0.9304, "num_input_tokens_seen": 64350980, "step": 1149 }, { "epoch": 2.5590200445434297, "loss": 1.0028624534606934, "loss_ce": 0.00066518341191113, "loss_iou": 0.41796875, "loss_num": 0.032958984375, "loss_xval": 1.0, "num_input_tokens_seen": 64350980, "step": 1149 }, { "epoch": 2.5612472160356345, "grad_norm": 15.238943099975586, "learning_rate": 1e-06, "loss": 0.8567, "num_input_tokens_seen": 64407372, "step": 1150 }, { "epoch": 2.5612472160356345, "loss": 0.6080912351608276, "loss_ce": 0.00042517349356785417, "loss_iou": 0.255859375, "loss_num": 0.0194091796875, "loss_xval": 0.609375, "num_input_tokens_seen": 64407372, "step": 1150 }, { "epoch": 2.5634743875278394, "grad_norm": 24.09518814086914, "learning_rate": 1e-06, "loss": 0.9458, "num_input_tokens_seen": 64459936, "step": 1151 }, { "epoch": 2.5634743875278394, "loss": 0.8809654712677002, "loss_ce": 0.0003502329345792532, "loss_iou": 0.40234375, "loss_num": 0.01507568359375, "loss_xval": 0.87890625, "num_input_tokens_seen": 64459936, "step": 1151 }, { "epoch": 2.5657015590200447, "grad_norm": 18.098283767700195, "learning_rate": 1e-06, "loss": 0.9909, "num_input_tokens_seen": 64516664, "step": 1152 }, { "epoch": 2.5657015590200447, "loss": 0.8294593095779419, "loss_ce": 0.00035776515142060816, "loss_iou": 0.337890625, "loss_num": 0.03076171875, "loss_xval": 0.828125, "num_input_tokens_seen": 64516664, "step": 1152 }, { "epoch": 2.5679287305122496, "grad_norm": 23.110363006591797, "learning_rate": 1e-06, "loss": 0.8105, "num_input_tokens_seen": 64573704, "step": 1153 }, { "epoch": 2.5679287305122496, "loss": 0.7701290845870972, "loss_ce": 0.0003536652075126767, "loss_iou": 0.326171875, "loss_num": 0.0234375, "loss_xval": 0.76953125, "num_input_tokens_seen": 64573704, "step": 1153 }, { "epoch": 2.5701559020044544, "grad_norm": 18.98008155822754, "learning_rate": 1e-06, "loss": 0.8578, "num_input_tokens_seen": 64632268, "step": 1154 }, { "epoch": 2.5701559020044544, "loss": 0.7266561985015869, "loss_ce": 0.00033786421408876777, "loss_iou": 0.32421875, "loss_num": 0.015625, "loss_xval": 0.7265625, "num_input_tokens_seen": 64632268, "step": 1154 }, { "epoch": 2.5723830734966593, "grad_norm": 17.440505981445312, "learning_rate": 1e-06, "loss": 1.0514, "num_input_tokens_seen": 64687228, "step": 1155 }, { "epoch": 2.5723830734966593, "loss": 1.1310064792633057, "loss_ce": 0.0003913280088454485, "loss_iou": 0.470703125, "loss_num": 0.03759765625, "loss_xval": 1.1328125, "num_input_tokens_seen": 64687228, "step": 1155 }, { "epoch": 2.574610244988864, "grad_norm": 17.068138122558594, "learning_rate": 1e-06, "loss": 0.7717, "num_input_tokens_seen": 64744716, "step": 1156 }, { "epoch": 2.574610244988864, "loss": 0.6678210496902466, "loss_ce": 0.00034053760464303195, "loss_iou": 0.291015625, "loss_num": 0.01708984375, "loss_xval": 0.66796875, "num_input_tokens_seen": 64744716, "step": 1156 }, { "epoch": 2.576837416481069, "grad_norm": 16.1224308013916, "learning_rate": 1e-06, "loss": 0.894, "num_input_tokens_seen": 64803444, "step": 1157 }, { "epoch": 2.576837416481069, "loss": 0.783063530921936, "loss_ce": 0.0003486335917841643, "loss_iou": 0.34375, "loss_num": 0.0194091796875, "loss_xval": 0.78125, "num_input_tokens_seen": 64803444, "step": 1157 }, { "epoch": 2.579064587973274, "grad_norm": 31.78223991394043, "learning_rate": 1e-06, "loss": 0.8672, "num_input_tokens_seen": 64859080, "step": 1158 }, { "epoch": 2.579064587973274, "loss": 0.8566364645957947, "loss_ce": 0.0004353098920546472, "loss_iou": 0.33984375, "loss_num": 0.03515625, "loss_xval": 0.85546875, "num_input_tokens_seen": 64859080, "step": 1158 }, { "epoch": 2.5812917594654787, "grad_norm": 22.2586612701416, "learning_rate": 1e-06, "loss": 1.2238, "num_input_tokens_seen": 64916436, "step": 1159 }, { "epoch": 2.5812917594654787, "loss": 1.3102490901947021, "loss_ce": 0.0009229998104274273, "loss_iou": 0.5078125, "loss_num": 0.059326171875, "loss_xval": 1.3125, "num_input_tokens_seen": 64916436, "step": 1159 }, { "epoch": 2.5835189309576836, "grad_norm": 47.00844955444336, "learning_rate": 1e-06, "loss": 1.2237, "num_input_tokens_seen": 64972836, "step": 1160 }, { "epoch": 2.5835189309576836, "loss": 1.0932590961456299, "loss_ce": 0.0004857148160226643, "loss_iou": 0.4375, "loss_num": 0.04345703125, "loss_xval": 1.09375, "num_input_tokens_seen": 64972836, "step": 1160 }, { "epoch": 2.585746102449889, "grad_norm": 20.3150634765625, "learning_rate": 1e-06, "loss": 1.236, "num_input_tokens_seen": 65027704, "step": 1161 }, { "epoch": 2.585746102449889, "loss": 1.0992555618286133, "loss_ce": 0.001111081801354885, "loss_iou": 0.474609375, "loss_num": 0.0296630859375, "loss_xval": 1.1015625, "num_input_tokens_seen": 65027704, "step": 1161 }, { "epoch": 2.5879732739420938, "grad_norm": 18.011165618896484, "learning_rate": 1e-06, "loss": 0.932, "num_input_tokens_seen": 65083788, "step": 1162 }, { "epoch": 2.5879732739420938, "loss": 1.0766229629516602, "loss_ce": 0.00045114755630493164, "loss_iou": 0.470703125, "loss_num": 0.0267333984375, "loss_xval": 1.078125, "num_input_tokens_seen": 65083788, "step": 1162 }, { "epoch": 2.5902004454342986, "grad_norm": 82.84613037109375, "learning_rate": 1e-06, "loss": 1.2092, "num_input_tokens_seen": 65139956, "step": 1163 }, { "epoch": 2.5902004454342986, "loss": 1.3553651571273804, "loss_ce": 0.00038471657899208367, "loss_iou": 0.546875, "loss_num": 0.051513671875, "loss_xval": 1.3515625, "num_input_tokens_seen": 65139956, "step": 1163 }, { "epoch": 2.5924276169265035, "grad_norm": 17.478727340698242, "learning_rate": 1e-06, "loss": 0.9212, "num_input_tokens_seen": 65192748, "step": 1164 }, { "epoch": 2.5924276169265035, "loss": 0.8342557549476624, "loss_ce": 0.0005154828540980816, "loss_iou": 0.337890625, "loss_num": 0.03173828125, "loss_xval": 0.83203125, "num_input_tokens_seen": 65192748, "step": 1164 }, { "epoch": 2.5946547884187083, "grad_norm": 16.476207733154297, "learning_rate": 1e-06, "loss": 0.9718, "num_input_tokens_seen": 65247340, "step": 1165 }, { "epoch": 2.5946547884187083, "loss": 0.9732043743133545, "loss_ce": 0.002012967597693205, "loss_iou": 0.4140625, "loss_num": 0.029052734375, "loss_xval": 0.97265625, "num_input_tokens_seen": 65247340, "step": 1165 }, { "epoch": 2.596881959910913, "grad_norm": 22.207536697387695, "learning_rate": 1e-06, "loss": 1.1978, "num_input_tokens_seen": 65304100, "step": 1166 }, { "epoch": 2.596881959910913, "loss": 1.1994106769561768, "loss_ce": 0.00043600943172350526, "loss_iou": 0.49609375, "loss_num": 0.04150390625, "loss_xval": 1.1953125, "num_input_tokens_seen": 65304100, "step": 1166 }, { "epoch": 2.599109131403118, "grad_norm": 28.934432983398438, "learning_rate": 1e-06, "loss": 0.7938, "num_input_tokens_seen": 65362144, "step": 1167 }, { "epoch": 2.599109131403118, "loss": 0.9534372091293335, "loss_ce": 0.0051950025372207165, "loss_iou": 0.400390625, "loss_num": 0.0294189453125, "loss_xval": 0.94921875, "num_input_tokens_seen": 65362144, "step": 1167 }, { "epoch": 2.601336302895323, "grad_norm": 18.83047103881836, "learning_rate": 1e-06, "loss": 0.8324, "num_input_tokens_seen": 65417596, "step": 1168 }, { "epoch": 2.601336302895323, "loss": 1.0699725151062012, "loss_ce": 0.0008807817357592285, "loss_iou": 0.421875, "loss_num": 0.044677734375, "loss_xval": 1.0703125, "num_input_tokens_seen": 65417596, "step": 1168 }, { "epoch": 2.6035634743875278, "grad_norm": 24.358144760131836, "learning_rate": 1e-06, "loss": 1.0814, "num_input_tokens_seen": 65470364, "step": 1169 }, { "epoch": 2.6035634743875278, "loss": 1.0574841499328613, "loss_ce": 0.0003552237758412957, "loss_iou": 0.421875, "loss_num": 0.04296875, "loss_xval": 1.0546875, "num_input_tokens_seen": 65470364, "step": 1169 }, { "epoch": 2.6057906458797326, "grad_norm": 93.62918090820312, "learning_rate": 1e-06, "loss": 1.0645, "num_input_tokens_seen": 65526196, "step": 1170 }, { "epoch": 2.6057906458797326, "loss": 0.9251736998558044, "loss_ce": 0.0003689858131110668, "loss_iou": 0.40625, "loss_num": 0.0223388671875, "loss_xval": 0.92578125, "num_input_tokens_seen": 65526196, "step": 1170 }, { "epoch": 2.6080178173719375, "grad_norm": 18.0775146484375, "learning_rate": 1e-06, "loss": 0.9244, "num_input_tokens_seen": 65580796, "step": 1171 }, { "epoch": 2.6080178173719375, "loss": 0.8135091662406921, "loss_ce": 0.0005208852817304432, "loss_iou": 0.330078125, "loss_num": 0.03076171875, "loss_xval": 0.8125, "num_input_tokens_seen": 65580796, "step": 1171 }, { "epoch": 2.6102449888641424, "grad_norm": 20.29970932006836, "learning_rate": 1e-06, "loss": 0.9885, "num_input_tokens_seen": 65633880, "step": 1172 }, { "epoch": 2.6102449888641424, "loss": 0.7842642068862915, "loss_ce": 0.00032866618130356073, "loss_iou": 0.318359375, "loss_num": 0.0294189453125, "loss_xval": 0.78515625, "num_input_tokens_seen": 65633880, "step": 1172 }, { "epoch": 2.612472160356347, "grad_norm": 16.221195220947266, "learning_rate": 1e-06, "loss": 0.7178, "num_input_tokens_seen": 65689656, "step": 1173 }, { "epoch": 2.612472160356347, "loss": 0.6070936322212219, "loss_ce": 0.00040419274591840804, "loss_iou": 0.2578125, "loss_num": 0.0185546875, "loss_xval": 0.60546875, "num_input_tokens_seen": 65689656, "step": 1173 }, { "epoch": 2.614699331848552, "grad_norm": 21.689422607421875, "learning_rate": 1e-06, "loss": 0.9887, "num_input_tokens_seen": 65748144, "step": 1174 }, { "epoch": 2.614699331848552, "loss": 0.829987108707428, "loss_ce": 0.00039724778616800904, "loss_iou": 0.369140625, "loss_num": 0.0179443359375, "loss_xval": 0.828125, "num_input_tokens_seen": 65748144, "step": 1174 }, { "epoch": 2.6169265033407574, "grad_norm": 26.423198699951172, "learning_rate": 1e-06, "loss": 1.0744, "num_input_tokens_seen": 65804144, "step": 1175 }, { "epoch": 2.6169265033407574, "loss": 1.1194689273834229, "loss_ce": 0.00032840511994436383, "loss_iou": 0.4609375, "loss_num": 0.03955078125, "loss_xval": 1.1171875, "num_input_tokens_seen": 65804144, "step": 1175 }, { "epoch": 2.6191536748329622, "grad_norm": 24.602550506591797, "learning_rate": 1e-06, "loss": 1.1684, "num_input_tokens_seen": 65858964, "step": 1176 }, { "epoch": 2.6191536748329622, "loss": 0.9794027805328369, "loss_ce": 0.00039887180901132524, "loss_iou": 0.40625, "loss_num": 0.033203125, "loss_xval": 0.98046875, "num_input_tokens_seen": 65858964, "step": 1176 }, { "epoch": 2.621380846325167, "grad_norm": 20.516910552978516, "learning_rate": 1e-06, "loss": 0.8049, "num_input_tokens_seen": 65914988, "step": 1177 }, { "epoch": 2.621380846325167, "loss": 0.8118576407432556, "loss_ce": 0.00033423834247514606, "loss_iou": 0.3515625, "loss_num": 0.0216064453125, "loss_xval": 0.8125, "num_input_tokens_seen": 65914988, "step": 1177 }, { "epoch": 2.623608017817372, "grad_norm": 20.05683135986328, "learning_rate": 1e-06, "loss": 1.0382, "num_input_tokens_seen": 65971964, "step": 1178 }, { "epoch": 2.623608017817372, "loss": 1.1247992515563965, "loss_ce": 0.0005316782626323402, "loss_iou": 0.451171875, "loss_num": 0.044189453125, "loss_xval": 1.125, "num_input_tokens_seen": 65971964, "step": 1178 }, { "epoch": 2.625835189309577, "grad_norm": 33.51689910888672, "learning_rate": 1e-06, "loss": 0.919, "num_input_tokens_seen": 66030480, "step": 1179 }, { "epoch": 2.625835189309577, "loss": 1.0413250923156738, "loss_ce": 0.00030942526063881814, "loss_iou": 0.435546875, "loss_num": 0.03369140625, "loss_xval": 1.0390625, "num_input_tokens_seen": 66030480, "step": 1179 }, { "epoch": 2.6280623608017817, "grad_norm": 21.715831756591797, "learning_rate": 1e-06, "loss": 1.1688, "num_input_tokens_seen": 66084552, "step": 1180 }, { "epoch": 2.6280623608017817, "loss": 1.3229314088821411, "loss_ce": 0.0006658075144514441, "loss_iou": 0.52734375, "loss_num": 0.052978515625, "loss_xval": 1.3203125, "num_input_tokens_seen": 66084552, "step": 1180 }, { "epoch": 2.6302895322939865, "grad_norm": 16.5938777923584, "learning_rate": 1e-06, "loss": 0.8147, "num_input_tokens_seen": 66140764, "step": 1181 }, { "epoch": 2.6302895322939865, "loss": 0.7720546722412109, "loss_ce": 0.0005702448543161154, "loss_iou": 0.306640625, "loss_num": 0.031494140625, "loss_xval": 0.7734375, "num_input_tokens_seen": 66140764, "step": 1181 }, { "epoch": 2.6325167037861914, "grad_norm": 20.419330596923828, "learning_rate": 1e-06, "loss": 0.9905, "num_input_tokens_seen": 66196076, "step": 1182 }, { "epoch": 2.6325167037861914, "loss": 1.1305346488952637, "loss_ce": 0.0006517736474052072, "loss_iou": 0.50390625, "loss_num": 0.0245361328125, "loss_xval": 1.1328125, "num_input_tokens_seen": 66196076, "step": 1182 }, { "epoch": 2.6347438752783967, "grad_norm": 27.849132537841797, "learning_rate": 1e-06, "loss": 0.7988, "num_input_tokens_seen": 66247728, "step": 1183 }, { "epoch": 2.6347438752783967, "loss": 0.901054859161377, "loss_ce": 0.000420065043726936, "loss_iou": 0.375, "loss_num": 0.0296630859375, "loss_xval": 0.90234375, "num_input_tokens_seen": 66247728, "step": 1183 }, { "epoch": 2.6369710467706016, "grad_norm": 20.54474449157715, "learning_rate": 1e-06, "loss": 1.0772, "num_input_tokens_seen": 66301456, "step": 1184 }, { "epoch": 2.6369710467706016, "loss": 1.1666040420532227, "loss_ce": 0.000588378170505166, "loss_iou": 0.43359375, "loss_num": 0.060302734375, "loss_xval": 1.1640625, "num_input_tokens_seen": 66301456, "step": 1184 }, { "epoch": 2.6391982182628064, "grad_norm": 21.712364196777344, "learning_rate": 1e-06, "loss": 0.8525, "num_input_tokens_seen": 66358172, "step": 1185 }, { "epoch": 2.6391982182628064, "loss": 0.7973392009735107, "loss_ce": 0.0003421990550123155, "loss_iou": 0.35546875, "loss_num": 0.017333984375, "loss_xval": 0.796875, "num_input_tokens_seen": 66358172, "step": 1185 }, { "epoch": 2.6414253897550113, "grad_norm": 46.89886474609375, "learning_rate": 1e-06, "loss": 1.2667, "num_input_tokens_seen": 66415012, "step": 1186 }, { "epoch": 2.6414253897550113, "loss": 1.1698817014694214, "loss_ce": 0.0004481581272557378, "loss_iou": 0.47265625, "loss_num": 0.045166015625, "loss_xval": 1.171875, "num_input_tokens_seen": 66415012, "step": 1186 }, { "epoch": 2.643652561247216, "grad_norm": 18.3448429107666, "learning_rate": 1e-06, "loss": 1.0143, "num_input_tokens_seen": 66468628, "step": 1187 }, { "epoch": 2.643652561247216, "loss": 1.03020179271698, "loss_ce": 0.0004165967693552375, "loss_iou": 0.439453125, "loss_num": 0.030029296875, "loss_xval": 1.03125, "num_input_tokens_seen": 66468628, "step": 1187 }, { "epoch": 2.645879732739421, "grad_norm": 36.016719818115234, "learning_rate": 1e-06, "loss": 0.8041, "num_input_tokens_seen": 66524504, "step": 1188 }, { "epoch": 2.645879732739421, "loss": 0.7011793851852417, "loss_ce": 0.0004957778146490455, "loss_iou": 0.275390625, "loss_num": 0.0301513671875, "loss_xval": 0.69921875, "num_input_tokens_seen": 66524504, "step": 1188 }, { "epoch": 2.648106904231626, "grad_norm": 31.499238967895508, "learning_rate": 1e-06, "loss": 0.923, "num_input_tokens_seen": 66580192, "step": 1189 }, { "epoch": 2.648106904231626, "loss": 0.7815631031990051, "loss_ce": 0.00031307380413636565, "loss_iou": 0.287109375, "loss_num": 0.0419921875, "loss_xval": 0.78125, "num_input_tokens_seen": 66580192, "step": 1189 }, { "epoch": 2.6503340757238307, "grad_norm": 17.818838119506836, "learning_rate": 1e-06, "loss": 1.0473, "num_input_tokens_seen": 66637228, "step": 1190 }, { "epoch": 2.6503340757238307, "loss": 0.8321281671524048, "loss_ce": 0.0003410530334804207, "loss_iou": 0.3515625, "loss_num": 0.0260009765625, "loss_xval": 0.83203125, "num_input_tokens_seen": 66637228, "step": 1190 }, { "epoch": 2.6525612472160356, "grad_norm": 21.78716278076172, "learning_rate": 1e-06, "loss": 0.8932, "num_input_tokens_seen": 66695384, "step": 1191 }, { "epoch": 2.6525612472160356, "loss": 0.9930351376533508, "loss_ce": 0.0003593713045120239, "loss_iou": 0.41796875, "loss_num": 0.03125, "loss_xval": 0.9921875, "num_input_tokens_seen": 66695384, "step": 1191 }, { "epoch": 2.6547884187082404, "grad_norm": 19.491619110107422, "learning_rate": 1e-06, "loss": 0.8489, "num_input_tokens_seen": 66749192, "step": 1192 }, { "epoch": 2.6547884187082404, "loss": 0.7878109216690063, "loss_ce": 0.0011898394441232085, "loss_iou": 0.3359375, "loss_num": 0.023193359375, "loss_xval": 0.78515625, "num_input_tokens_seen": 66749192, "step": 1192 }, { "epoch": 2.6570155902004453, "grad_norm": 26.39838981628418, "learning_rate": 1e-06, "loss": 1.1329, "num_input_tokens_seen": 66803576, "step": 1193 }, { "epoch": 2.6570155902004453, "loss": 1.0078489780426025, "loss_ce": 0.0005247670924291015, "loss_iou": 0.421875, "loss_num": 0.03271484375, "loss_xval": 1.0078125, "num_input_tokens_seen": 66803576, "step": 1193 }, { "epoch": 2.65924276169265, "grad_norm": 23.46156883239746, "learning_rate": 1e-06, "loss": 0.8324, "num_input_tokens_seen": 66859052, "step": 1194 }, { "epoch": 2.65924276169265, "loss": 0.7302192449569702, "loss_ce": 0.000482962466776371, "loss_iou": 0.296875, "loss_num": 0.0274658203125, "loss_xval": 0.73046875, "num_input_tokens_seen": 66859052, "step": 1194 }, { "epoch": 2.661469933184855, "grad_norm": 16.806400299072266, "learning_rate": 1e-06, "loss": 1.0672, "num_input_tokens_seen": 66914164, "step": 1195 }, { "epoch": 2.661469933184855, "loss": 1.1170461177825928, "loss_ce": 0.000591110554523766, "loss_iou": 0.427734375, "loss_num": 0.052001953125, "loss_xval": 1.1171875, "num_input_tokens_seen": 66914164, "step": 1195 }, { "epoch": 2.66369710467706, "grad_norm": 18.408832550048828, "learning_rate": 1e-06, "loss": 1.0065, "num_input_tokens_seen": 66969884, "step": 1196 }, { "epoch": 2.66369710467706, "loss": 0.8114081621170044, "loss_ce": 0.00037299515679478645, "loss_iou": 0.341796875, "loss_num": 0.02587890625, "loss_xval": 0.8125, "num_input_tokens_seen": 66969884, "step": 1196 }, { "epoch": 2.665924276169265, "grad_norm": 23.257755279541016, "learning_rate": 1e-06, "loss": 1.0099, "num_input_tokens_seen": 67028592, "step": 1197 }, { "epoch": 2.665924276169265, "loss": 1.1483575105667114, "loss_ce": 0.0004082891682628542, "loss_iou": 0.451171875, "loss_num": 0.04931640625, "loss_xval": 1.1484375, "num_input_tokens_seen": 67028592, "step": 1197 }, { "epoch": 2.66815144766147, "grad_norm": 14.403682708740234, "learning_rate": 1e-06, "loss": 0.917, "num_input_tokens_seen": 67085904, "step": 1198 }, { "epoch": 2.66815144766147, "loss": 0.916401207447052, "loss_ce": 0.000385561550501734, "loss_iou": 0.3984375, "loss_num": 0.0234375, "loss_xval": 0.9140625, "num_input_tokens_seen": 67085904, "step": 1198 }, { "epoch": 2.670378619153675, "grad_norm": 16.39507484436035, "learning_rate": 1e-06, "loss": 0.7198, "num_input_tokens_seen": 67143124, "step": 1199 }, { "epoch": 2.670378619153675, "loss": 0.6678426861763, "loss_ce": 0.0003622480435296893, "loss_iou": 0.27734375, "loss_num": 0.0224609375, "loss_xval": 0.66796875, "num_input_tokens_seen": 67143124, "step": 1199 }, { "epoch": 2.6726057906458798, "grad_norm": 12.098701477050781, "learning_rate": 1e-06, "loss": 0.867, "num_input_tokens_seen": 67196720, "step": 1200 }, { "epoch": 2.6726057906458798, "loss": 1.0529296398162842, "loss_ce": 0.0005615358240902424, "loss_iou": 0.40625, "loss_num": 0.0478515625, "loss_xval": 1.0546875, "num_input_tokens_seen": 67196720, "step": 1200 }, { "epoch": 2.6748329621380846, "grad_norm": 23.761484146118164, "learning_rate": 1e-06, "loss": 0.9273, "num_input_tokens_seen": 67251244, "step": 1201 }, { "epoch": 2.6748329621380846, "loss": 0.9254390001296997, "loss_ce": 0.0003901528543792665, "loss_iou": 0.3359375, "loss_num": 0.051025390625, "loss_xval": 0.92578125, "num_input_tokens_seen": 67251244, "step": 1201 }, { "epoch": 2.6770601336302895, "grad_norm": 13.946866989135742, "learning_rate": 1e-06, "loss": 0.9489, "num_input_tokens_seen": 67305792, "step": 1202 }, { "epoch": 2.6770601336302895, "loss": 1.0536152124404907, "loss_ce": 0.0003925645723938942, "loss_iou": 0.439453125, "loss_num": 0.03466796875, "loss_xval": 1.0546875, "num_input_tokens_seen": 67305792, "step": 1202 }, { "epoch": 2.6792873051224944, "grad_norm": 14.783041954040527, "learning_rate": 1e-06, "loss": 0.7948, "num_input_tokens_seen": 67363808, "step": 1203 }, { "epoch": 2.6792873051224944, "loss": 0.7142338156700134, "loss_ce": 0.0010990574955940247, "loss_iou": 0.30859375, "loss_num": 0.0191650390625, "loss_xval": 0.71484375, "num_input_tokens_seen": 67363808, "step": 1203 }, { "epoch": 2.681514476614699, "grad_norm": 85.64546966552734, "learning_rate": 1e-06, "loss": 0.7912, "num_input_tokens_seen": 67419668, "step": 1204 }, { "epoch": 2.681514476614699, "loss": 0.7728584408760071, "loss_ce": 0.0003975207218900323, "loss_iou": 0.296875, "loss_num": 0.036376953125, "loss_xval": 0.7734375, "num_input_tokens_seen": 67419668, "step": 1204 }, { "epoch": 2.683741648106904, "grad_norm": 26.301057815551758, "learning_rate": 1e-06, "loss": 0.7419, "num_input_tokens_seen": 67477960, "step": 1205 }, { "epoch": 2.683741648106904, "loss": 0.5447392463684082, "loss_ce": 0.0003056719433516264, "loss_iou": 0.2138671875, "loss_num": 0.0235595703125, "loss_xval": 0.54296875, "num_input_tokens_seen": 67477960, "step": 1205 }, { "epoch": 2.6859688195991094, "grad_norm": 20.807857513427734, "learning_rate": 1e-06, "loss": 1.0459, "num_input_tokens_seen": 67530876, "step": 1206 }, { "epoch": 2.6859688195991094, "loss": 1.1529654264450073, "loss_ce": 0.0003775313380174339, "loss_iou": 0.50390625, "loss_num": 0.029052734375, "loss_xval": 1.15625, "num_input_tokens_seen": 67530876, "step": 1206 }, { "epoch": 2.6881959910913142, "grad_norm": 23.6107234954834, "learning_rate": 1e-06, "loss": 0.826, "num_input_tokens_seen": 67586056, "step": 1207 }, { "epoch": 2.6881959910913142, "loss": 0.9727563858032227, "loss_ce": 0.0005884337006136775, "loss_iou": 0.392578125, "loss_num": 0.037353515625, "loss_xval": 0.97265625, "num_input_tokens_seen": 67586056, "step": 1207 }, { "epoch": 2.690423162583519, "grad_norm": 17.839683532714844, "learning_rate": 1e-06, "loss": 0.9884, "num_input_tokens_seen": 67639676, "step": 1208 }, { "epoch": 2.690423162583519, "loss": 0.9732263088226318, "loss_ce": 0.0003259408404119313, "loss_iou": 0.40625, "loss_num": 0.0322265625, "loss_xval": 0.97265625, "num_input_tokens_seen": 67639676, "step": 1208 }, { "epoch": 2.692650334075724, "grad_norm": 71.87188720703125, "learning_rate": 1e-06, "loss": 1.0553, "num_input_tokens_seen": 67695312, "step": 1209 }, { "epoch": 2.692650334075724, "loss": 1.0907628536224365, "loss_ce": 0.0004308174247853458, "loss_iou": 0.4296875, "loss_num": 0.046142578125, "loss_xval": 1.09375, "num_input_tokens_seen": 67695312, "step": 1209 }, { "epoch": 2.694877505567929, "grad_norm": 21.322093963623047, "learning_rate": 1e-06, "loss": 0.7315, "num_input_tokens_seen": 67754484, "step": 1210 }, { "epoch": 2.694877505567929, "loss": 0.6453206539154053, "loss_ce": 0.00030111317755654454, "loss_iou": 0.279296875, "loss_num": 0.0169677734375, "loss_xval": 0.64453125, "num_input_tokens_seen": 67754484, "step": 1210 }, { "epoch": 2.6971046770601337, "grad_norm": 17.93378448486328, "learning_rate": 1e-06, "loss": 0.834, "num_input_tokens_seen": 67806972, "step": 1211 }, { "epoch": 2.6971046770601337, "loss": 0.879258930683136, "loss_ce": 0.00035265146289020777, "loss_iou": 0.375, "loss_num": 0.0257568359375, "loss_xval": 0.87890625, "num_input_tokens_seen": 67806972, "step": 1211 }, { "epoch": 2.6993318485523385, "grad_norm": 17.423023223876953, "learning_rate": 1e-06, "loss": 0.9924, "num_input_tokens_seen": 67865540, "step": 1212 }, { "epoch": 2.6993318485523385, "loss": 1.1357271671295166, "loss_ce": 0.0004733309615403414, "loss_iou": 0.46875, "loss_num": 0.0390625, "loss_xval": 1.1328125, "num_input_tokens_seen": 67865540, "step": 1212 }, { "epoch": 2.7015590200445434, "grad_norm": 16.1553955078125, "learning_rate": 1e-06, "loss": 0.8851, "num_input_tokens_seen": 67923660, "step": 1213 }, { "epoch": 2.7015590200445434, "loss": 0.8159765005111694, "loss_ce": 0.0004247480828780681, "loss_iou": 0.357421875, "loss_num": 0.0201416015625, "loss_xval": 0.81640625, "num_input_tokens_seen": 67923660, "step": 1213 }, { "epoch": 2.7037861915367483, "grad_norm": 27.426773071289062, "learning_rate": 1e-06, "loss": 1.0762, "num_input_tokens_seen": 67979792, "step": 1214 }, { "epoch": 2.7037861915367483, "loss": 0.9264326095581055, "loss_ce": 0.00040718415402807295, "loss_iou": 0.361328125, "loss_num": 0.04052734375, "loss_xval": 0.92578125, "num_input_tokens_seen": 67979792, "step": 1214 }, { "epoch": 2.706013363028953, "grad_norm": 143.9352569580078, "learning_rate": 1e-06, "loss": 1.2101, "num_input_tokens_seen": 68036028, "step": 1215 }, { "epoch": 2.706013363028953, "loss": 1.3111499547958374, "loss_ce": 0.0006030529621057212, "loss_iou": 0.53125, "loss_num": 0.0498046875, "loss_xval": 1.3125, "num_input_tokens_seen": 68036028, "step": 1215 }, { "epoch": 2.708240534521158, "grad_norm": 15.904016494750977, "learning_rate": 1e-06, "loss": 0.8784, "num_input_tokens_seen": 68090580, "step": 1216 }, { "epoch": 2.708240534521158, "loss": 0.8531776070594788, "loss_ce": 0.00039442608249373734, "loss_iou": 0.384765625, "loss_num": 0.016845703125, "loss_xval": 0.8515625, "num_input_tokens_seen": 68090580, "step": 1216 }, { "epoch": 2.710467706013363, "grad_norm": 16.765676498413086, "learning_rate": 1e-06, "loss": 0.7818, "num_input_tokens_seen": 68146632, "step": 1217 }, { "epoch": 2.710467706013363, "loss": 0.8600921630859375, "loss_ce": 0.00047299021389335394, "loss_iou": 0.3671875, "loss_num": 0.02490234375, "loss_xval": 0.859375, "num_input_tokens_seen": 68146632, "step": 1217 }, { "epoch": 2.7126948775055677, "grad_norm": 16.13719367980957, "learning_rate": 1e-06, "loss": 0.8074, "num_input_tokens_seen": 68202376, "step": 1218 }, { "epoch": 2.7126948775055677, "loss": 0.9842748045921326, "loss_ce": 0.00038809922989457846, "loss_iou": 0.40625, "loss_num": 0.0341796875, "loss_xval": 0.984375, "num_input_tokens_seen": 68202376, "step": 1218 }, { "epoch": 2.7149220489977726, "grad_norm": 16.392423629760742, "learning_rate": 1e-06, "loss": 0.832, "num_input_tokens_seen": 68257756, "step": 1219 }, { "epoch": 2.7149220489977726, "loss": 0.6619054079055786, "loss_ce": 0.00028435979038476944, "loss_iou": 0.275390625, "loss_num": 0.0224609375, "loss_xval": 0.66015625, "num_input_tokens_seen": 68257756, "step": 1219 }, { "epoch": 2.717149220489978, "grad_norm": 39.93339920043945, "learning_rate": 1e-06, "loss": 0.8111, "num_input_tokens_seen": 68317524, "step": 1220 }, { "epoch": 2.717149220489978, "loss": 0.8016022443771362, "loss_ce": 0.0003327628946863115, "loss_iou": 0.3203125, "loss_num": 0.0322265625, "loss_xval": 0.80078125, "num_input_tokens_seen": 68317524, "step": 1220 }, { "epoch": 2.7193763919821827, "grad_norm": 32.62158203125, "learning_rate": 1e-06, "loss": 0.8964, "num_input_tokens_seen": 68370704, "step": 1221 }, { "epoch": 2.7193763919821827, "loss": 0.9595242738723755, "loss_ce": 0.0005399275105446577, "loss_iou": 0.404296875, "loss_num": 0.02978515625, "loss_xval": 0.9609375, "num_input_tokens_seen": 68370704, "step": 1221 }, { "epoch": 2.7216035634743876, "grad_norm": 26.76487159729004, "learning_rate": 1e-06, "loss": 1.0057, "num_input_tokens_seen": 68426772, "step": 1222 }, { "epoch": 2.7216035634743876, "loss": 0.91068434715271, "loss_ce": 0.00028396639390848577, "loss_iou": 0.37890625, "loss_num": 0.0303955078125, "loss_xval": 0.91015625, "num_input_tokens_seen": 68426772, "step": 1222 }, { "epoch": 2.7238307349665924, "grad_norm": 18.720125198364258, "learning_rate": 1e-06, "loss": 0.8459, "num_input_tokens_seen": 68483180, "step": 1223 }, { "epoch": 2.7238307349665924, "loss": 0.9395471215248108, "loss_ce": 0.0003381132846698165, "loss_iou": 0.380859375, "loss_num": 0.03564453125, "loss_xval": 0.9375, "num_input_tokens_seen": 68483180, "step": 1223 }, { "epoch": 2.7260579064587973, "grad_norm": 15.632884979248047, "learning_rate": 1e-06, "loss": 1.093, "num_input_tokens_seen": 68538720, "step": 1224 }, { "epoch": 2.7260579064587973, "loss": 1.2165559530258179, "loss_ce": 0.0004914908786304295, "loss_iou": 0.484375, "loss_num": 0.0498046875, "loss_xval": 1.21875, "num_input_tokens_seen": 68538720, "step": 1224 }, { "epoch": 2.728285077951002, "grad_norm": 16.02979850769043, "learning_rate": 1e-06, "loss": 0.8688, "num_input_tokens_seen": 68596884, "step": 1225 }, { "epoch": 2.728285077951002, "loss": 1.0046154260635376, "loss_ce": 0.00046502824989147484, "loss_iou": 0.427734375, "loss_num": 0.0299072265625, "loss_xval": 1.0078125, "num_input_tokens_seen": 68596884, "step": 1225 }, { "epoch": 2.730512249443207, "grad_norm": 19.00936508178711, "learning_rate": 1e-06, "loss": 0.8638, "num_input_tokens_seen": 68654332, "step": 1226 }, { "epoch": 2.730512249443207, "loss": 0.9798787832260132, "loss_ce": 0.00038662960287183523, "loss_iou": 0.373046875, "loss_num": 0.046875, "loss_xval": 0.98046875, "num_input_tokens_seen": 68654332, "step": 1226 }, { "epoch": 2.732739420935412, "grad_norm": 18.884357452392578, "learning_rate": 1e-06, "loss": 0.7695, "num_input_tokens_seen": 68713232, "step": 1227 }, { "epoch": 2.732739420935412, "loss": 0.8073458671569824, "loss_ce": 0.00046107626985758543, "loss_iou": 0.349609375, "loss_num": 0.0213623046875, "loss_xval": 0.80859375, "num_input_tokens_seen": 68713232, "step": 1227 }, { "epoch": 2.734966592427617, "grad_norm": 14.613882064819336, "learning_rate": 1e-06, "loss": 0.8354, "num_input_tokens_seen": 68768872, "step": 1228 }, { "epoch": 2.734966592427617, "loss": 0.870397686958313, "loss_ce": 0.0005245659267529845, "loss_iou": 0.353515625, "loss_num": 0.032470703125, "loss_xval": 0.87109375, "num_input_tokens_seen": 68768872, "step": 1228 }, { "epoch": 2.737193763919822, "grad_norm": 16.955501556396484, "learning_rate": 1e-06, "loss": 1.1796, "num_input_tokens_seen": 68822780, "step": 1229 }, { "epoch": 2.737193763919822, "loss": 1.0951062440872192, "loss_ce": 0.0003797074896283448, "loss_iou": 0.439453125, "loss_num": 0.043701171875, "loss_xval": 1.09375, "num_input_tokens_seen": 68822780, "step": 1229 }, { "epoch": 2.739420935412027, "grad_norm": 20.716533660888672, "learning_rate": 1e-06, "loss": 1.0516, "num_input_tokens_seen": 68877892, "step": 1230 }, { "epoch": 2.739420935412027, "loss": 1.0252530574798584, "loss_ce": 0.0008389821741729975, "loss_iou": 0.412109375, "loss_num": 0.0400390625, "loss_xval": 1.0234375, "num_input_tokens_seen": 68877892, "step": 1230 }, { "epoch": 2.7416481069042318, "grad_norm": 40.377017974853516, "learning_rate": 1e-06, "loss": 0.7578, "num_input_tokens_seen": 68935384, "step": 1231 }, { "epoch": 2.7416481069042318, "loss": 0.8238745331764221, "loss_ce": 0.0003882101736962795, "loss_iou": 0.34375, "loss_num": 0.0274658203125, "loss_xval": 0.82421875, "num_input_tokens_seen": 68935384, "step": 1231 }, { "epoch": 2.7438752783964366, "grad_norm": 20.555801391601562, "learning_rate": 1e-06, "loss": 0.9902, "num_input_tokens_seen": 68991268, "step": 1232 }, { "epoch": 2.7438752783964366, "loss": 1.0188990831375122, "loss_ce": 0.0003444222966209054, "loss_iou": 0.453125, "loss_num": 0.0225830078125, "loss_xval": 1.015625, "num_input_tokens_seen": 68991268, "step": 1232 }, { "epoch": 2.7461024498886415, "grad_norm": 17.75653648376465, "learning_rate": 1e-06, "loss": 0.9883, "num_input_tokens_seen": 69048292, "step": 1233 }, { "epoch": 2.7461024498886415, "loss": 1.4018105268478394, "loss_ce": 0.0009315803181380033, "loss_iou": 0.5390625, "loss_num": 0.06494140625, "loss_xval": 1.3984375, "num_input_tokens_seen": 69048292, "step": 1233 }, { "epoch": 2.7483296213808464, "grad_norm": 25.2138614654541, "learning_rate": 1e-06, "loss": 0.9916, "num_input_tokens_seen": 69102780, "step": 1234 }, { "epoch": 2.7483296213808464, "loss": 1.0661447048187256, "loss_ce": 0.00047087084385566413, "loss_iou": 0.421875, "loss_num": 0.044677734375, "loss_xval": 1.0625, "num_input_tokens_seen": 69102780, "step": 1234 }, { "epoch": 2.750556792873051, "grad_norm": 41.74817657470703, "learning_rate": 1e-06, "loss": 0.7839, "num_input_tokens_seen": 69157904, "step": 1235 }, { "epoch": 2.750556792873051, "loss": 0.9228337407112122, "loss_ce": 0.00047044423990882933, "loss_iou": 0.3828125, "loss_num": 0.03173828125, "loss_xval": 0.921875, "num_input_tokens_seen": 69157904, "step": 1235 }, { "epoch": 2.752783964365256, "grad_norm": 17.140514373779297, "learning_rate": 1e-06, "loss": 0.7179, "num_input_tokens_seen": 69215832, "step": 1236 }, { "epoch": 2.752783964365256, "loss": 0.8632500171661377, "loss_ce": 0.00045710656559094787, "loss_iou": 0.330078125, "loss_num": 0.040283203125, "loss_xval": 0.86328125, "num_input_tokens_seen": 69215832, "step": 1236 }, { "epoch": 2.755011135857461, "grad_norm": 43.60929870605469, "learning_rate": 1e-06, "loss": 0.9365, "num_input_tokens_seen": 69271512, "step": 1237 }, { "epoch": 2.755011135857461, "loss": 1.1083848476409912, "loss_ce": 0.0004747234925162047, "loss_iou": 0.486328125, "loss_num": 0.0272216796875, "loss_xval": 1.109375, "num_input_tokens_seen": 69271512, "step": 1237 }, { "epoch": 2.757238307349666, "grad_norm": 82.30256652832031, "learning_rate": 1e-06, "loss": 0.9312, "num_input_tokens_seen": 69328124, "step": 1238 }, { "epoch": 2.757238307349666, "loss": 1.1663775444030762, "loss_ce": 0.000606063287705183, "loss_iou": 0.466796875, "loss_num": 0.046142578125, "loss_xval": 1.1640625, "num_input_tokens_seen": 69328124, "step": 1238 }, { "epoch": 2.7594654788418707, "grad_norm": 18.209518432617188, "learning_rate": 1e-06, "loss": 0.9739, "num_input_tokens_seen": 69385368, "step": 1239 }, { "epoch": 2.7594654788418707, "loss": 1.1721243858337402, "loss_ce": 0.0022025578655302525, "loss_iou": 0.466796875, "loss_num": 0.047119140625, "loss_xval": 1.171875, "num_input_tokens_seen": 69385368, "step": 1239 }, { "epoch": 2.7616926503340755, "grad_norm": 27.468372344970703, "learning_rate": 1e-06, "loss": 0.7438, "num_input_tokens_seen": 69440864, "step": 1240 }, { "epoch": 2.7616926503340755, "loss": 0.7869477868080139, "loss_ce": 0.00032670435030013323, "loss_iou": 0.333984375, "loss_num": 0.0233154296875, "loss_xval": 0.78515625, "num_input_tokens_seen": 69440864, "step": 1240 }, { "epoch": 2.7639198218262804, "grad_norm": 29.165250778198242, "learning_rate": 1e-06, "loss": 0.9046, "num_input_tokens_seen": 69497304, "step": 1241 }, { "epoch": 2.7639198218262804, "loss": 0.9364851117134094, "loss_ce": 0.0004500235663726926, "loss_iou": 0.34375, "loss_num": 0.0498046875, "loss_xval": 0.9375, "num_input_tokens_seen": 69497304, "step": 1241 }, { "epoch": 2.7661469933184857, "grad_norm": 17.164466857910156, "learning_rate": 1e-06, "loss": 0.8518, "num_input_tokens_seen": 69554524, "step": 1242 }, { "epoch": 2.7661469933184857, "loss": 0.9595236778259277, "loss_ce": 0.0005393511964939535, "loss_iou": 0.404296875, "loss_num": 0.0301513671875, "loss_xval": 0.9609375, "num_input_tokens_seen": 69554524, "step": 1242 }, { "epoch": 2.7683741648106905, "grad_norm": 19.568635940551758, "learning_rate": 1e-06, "loss": 0.8746, "num_input_tokens_seen": 69611204, "step": 1243 }, { "epoch": 2.7683741648106905, "loss": 1.1134837865829468, "loss_ce": 0.004352906718850136, "loss_iou": 0.47265625, "loss_num": 0.033203125, "loss_xval": 1.109375, "num_input_tokens_seen": 69611204, "step": 1243 }, { "epoch": 2.7706013363028954, "grad_norm": 16.17546844482422, "learning_rate": 1e-06, "loss": 0.7277, "num_input_tokens_seen": 69668100, "step": 1244 }, { "epoch": 2.7706013363028954, "loss": 0.7887986898422241, "loss_ce": 0.00046862097224220634, "loss_iou": 0.318359375, "loss_num": 0.0301513671875, "loss_xval": 0.7890625, "num_input_tokens_seen": 69668100, "step": 1244 }, { "epoch": 2.7728285077951003, "grad_norm": 22.0662784576416, "learning_rate": 1e-06, "loss": 0.9972, "num_input_tokens_seen": 69723688, "step": 1245 }, { "epoch": 2.7728285077951003, "loss": 1.2928619384765625, "loss_ce": 0.00038146309088915586, "loss_iou": 0.5234375, "loss_num": 0.04931640625, "loss_xval": 1.2890625, "num_input_tokens_seen": 69723688, "step": 1245 }, { "epoch": 2.775055679287305, "grad_norm": 19.460460662841797, "learning_rate": 1e-06, "loss": 0.601, "num_input_tokens_seen": 69778896, "step": 1246 }, { "epoch": 2.775055679287305, "loss": 0.5189080238342285, "loss_ce": 0.000353323295712471, "loss_iou": 0.2197265625, "loss_num": 0.015869140625, "loss_xval": 0.51953125, "num_input_tokens_seen": 69778896, "step": 1246 }, { "epoch": 2.77728285077951, "grad_norm": 20.321802139282227, "learning_rate": 1e-06, "loss": 1.1584, "num_input_tokens_seen": 69836796, "step": 1247 }, { "epoch": 2.77728285077951, "loss": 1.031623125076294, "loss_ce": 0.00037302449345588684, "loss_iou": 0.4296875, "loss_num": 0.034912109375, "loss_xval": 1.03125, "num_input_tokens_seen": 69836796, "step": 1247 }, { "epoch": 2.779510022271715, "grad_norm": 15.81376838684082, "learning_rate": 1e-06, "loss": 0.8184, "num_input_tokens_seen": 69893548, "step": 1248 }, { "epoch": 2.779510022271715, "loss": 0.9024643898010254, "loss_ce": 0.00036481593269854784, "loss_iou": 0.365234375, "loss_num": 0.03466796875, "loss_xval": 0.90234375, "num_input_tokens_seen": 69893548, "step": 1248 }, { "epoch": 2.7817371937639197, "grad_norm": 131.67137145996094, "learning_rate": 1e-06, "loss": 1.0096, "num_input_tokens_seen": 69944348, "step": 1249 }, { "epoch": 2.7817371937639197, "loss": 0.9452086687088013, "loss_ce": 0.00038447632687166333, "loss_iou": 0.40625, "loss_num": 0.0264892578125, "loss_xval": 0.9453125, "num_input_tokens_seen": 69944348, "step": 1249 }, { "epoch": 2.7839643652561246, "grad_norm": 13.716216087341309, "learning_rate": 1e-06, "loss": 1.1417, "num_input_tokens_seen": 70001912, "step": 1250 }, { "epoch": 2.7839643652561246, "eval_seeclick_web_CIoU": 0.5651521682739258, "eval_seeclick_web_GIoU": 0.5592525601387024, "eval_seeclick_web_IoU": 0.5811098515987396, "eval_seeclick_web_MAE_all": 0.017407238017767668, "eval_seeclick_web_MAE_h": 0.01108331186696887, "eval_seeclick_web_MAE_w": 0.01834576530382037, "eval_seeclick_web_MAE_x_boxes": 0.008968821726739407, "eval_seeclick_web_MAE_y_boxes": 0.02286715735681355, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9634284973144531, "eval_seeclick_web_loss_ce": 0.00043979176552966237, "eval_seeclick_web_loss_iou": 0.439697265625, "eval_seeclick_web_loss_num": 0.013711929321289062, "eval_seeclick_web_loss_xval": 0.9478759765625, "eval_seeclick_web_runtime": 32.3264, "eval_seeclick_web_samples_per_second": 1.547, "eval_seeclick_web_steps_per_second": 0.062, "num_input_tokens_seen": 70001912, "step": 1250 }, { "epoch": 2.7839643652561246, "eval_icons_CIoU": 0.3112401217222214, "eval_icons_GIoU": 0.33725370466709137, "eval_icons_IoU": 0.38187770545482635, "eval_icons_MAE_all": 0.06834794208407402, "eval_icons_MAE_h": 0.03893335722386837, "eval_icons_MAE_w": 0.08004930429160595, "eval_icons_MAE_x_boxes": 0.05875684879720211, "eval_icons_MAE_y_boxes": 0.039277092553675175, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.721555471420288, "eval_icons_loss_ce": 0.0008201654127333313, "eval_icons_loss_iou": 0.66845703125, "eval_icons_loss_num": 0.06448173522949219, "eval_icons_loss_xval": 1.65869140625, "eval_icons_runtime": 30.6205, "eval_icons_samples_per_second": 1.633, "eval_icons_steps_per_second": 0.065, "num_input_tokens_seen": 70001912, "step": 1250 }, { "epoch": 2.7839643652561246, "eval_screenspot_CIoU": 0.31326837340990704, "eval_screenspot_GIoU": 0.3382245600223541, "eval_screenspot_IoU": 0.3957456350326538, "eval_screenspot_MAE_all": 0.08052034179369609, "eval_screenspot_MAE_h": 0.04156584106385708, "eval_screenspot_MAE_w": 0.0848269909620285, "eval_screenspot_MAE_x_boxes": 0.11255322148402531, "eval_screenspot_MAE_y_boxes": 0.04932925725976626, "eval_screenspot_inside_bbox": 0.6045833428700765, "eval_screenspot_loss": 1.7774311304092407, "eval_screenspot_loss_ce": 0.0008034493657760322, "eval_screenspot_loss_iou": 0.7025553385416666, "eval_screenspot_loss_num": 0.09097544352213542, "eval_screenspot_loss_xval": 1.8603515625, "eval_screenspot_runtime": 54.3229, "eval_screenspot_samples_per_second": 1.638, "eval_screenspot_steps_per_second": 0.055, "num_input_tokens_seen": 70001912, "step": 1250 }, { "epoch": 2.7839643652561246, "eval_compot_CIoU": 0.3437846302986145, "eval_compot_GIoU": 0.3697910010814667, "eval_compot_IoU": 0.4004169702529907, "eval_compot_MAE_all": 0.021469497121870518, "eval_compot_MAE_h": 0.011028026696294546, "eval_compot_MAE_w": 0.02703993208706379, "eval_compot_MAE_x_boxes": 0.030306325759738684, "eval_compot_MAE_y_boxes": 0.007093302207067609, "eval_compot_inside_bbox": 0.6145833432674408, "eval_compot_loss": 1.3977539539337158, "eval_compot_loss_ce": 0.0004101828089915216, "eval_compot_loss_iou": 0.63037109375, "eval_compot_loss_num": 0.020694732666015625, "eval_compot_loss_xval": 1.36376953125, "eval_compot_runtime": 32.0716, "eval_compot_samples_per_second": 1.559, "eval_compot_steps_per_second": 0.062, "num_input_tokens_seen": 70001912, "step": 1250 }, { "epoch": 2.7839643652561246, "eval_custom_ui_val_CIoU": 0.41552355140447617, "eval_custom_ui_val_GIoU": 0.44775263799561393, "eval_custom_ui_val_IoU": 0.47426238159338635, "eval_custom_ui_val_MAE_all": 0.03807516168389055, "eval_custom_ui_val_MAE_h": 0.020889455763002236, "eval_custom_ui_val_MAE_w": 0.04122109152376652, "eval_custom_ui_val_MAE_x_boxes": 0.04385989842315515, "eval_custom_ui_val_MAE_y_boxes": 0.020979333875907794, "eval_custom_ui_val_inside_bbox": 0.6678240762816535, "eval_custom_ui_val_loss": 1.312791347503662, "eval_custom_ui_val_loss_ce": 0.0007190946586585293, "eval_custom_ui_val_loss_iou": 0.5456407335069444, "eval_custom_ui_val_loss_num": 0.03783988952636719, "eval_custom_ui_val_loss_xval": 1.2806260850694444, "eval_custom_ui_val_runtime": 95.8302, "eval_custom_ui_val_samples_per_second": 2.765, "eval_custom_ui_val_steps_per_second": 0.094, "num_input_tokens_seen": 70001912, "step": 1250 }, { "epoch": 2.7839643652561246, "loss": 1.004927635192871, "loss_ce": 0.0005332073196768761, "loss_iou": 0.4296875, "loss_num": 0.0286865234375, "loss_xval": 1.0078125, "num_input_tokens_seen": 70001912, "step": 1250 }, { "epoch": 2.78619153674833, "grad_norm": 19.672489166259766, "learning_rate": 1e-06, "loss": 0.8286, "num_input_tokens_seen": 70058016, "step": 1251 }, { "epoch": 2.78619153674833, "loss": 0.6981078386306763, "loss_ce": 0.00035395551822148263, "loss_iou": 0.294921875, "loss_num": 0.0213623046875, "loss_xval": 0.69921875, "num_input_tokens_seen": 70058016, "step": 1251 }, { "epoch": 2.7884187082405347, "grad_norm": 17.608375549316406, "learning_rate": 1e-06, "loss": 0.9146, "num_input_tokens_seen": 70112088, "step": 1252 }, { "epoch": 2.7884187082405347, "loss": 0.9438179135322571, "loss_ce": 0.0003364117001183331, "loss_iou": 0.38671875, "loss_num": 0.033935546875, "loss_xval": 0.9453125, "num_input_tokens_seen": 70112088, "step": 1252 }, { "epoch": 2.7906458797327396, "grad_norm": 14.264594078063965, "learning_rate": 1e-06, "loss": 0.8837, "num_input_tokens_seen": 70171184, "step": 1253 }, { "epoch": 2.7906458797327396, "loss": 0.9383166432380676, "loss_ce": 0.00032835284946486354, "loss_iou": 0.400390625, "loss_num": 0.0272216796875, "loss_xval": 0.9375, "num_input_tokens_seen": 70171184, "step": 1253 }, { "epoch": 2.7928730512249444, "grad_norm": 21.34050750732422, "learning_rate": 1e-06, "loss": 1.0205, "num_input_tokens_seen": 70227628, "step": 1254 }, { "epoch": 2.7928730512249444, "loss": 1.0269427299499512, "loss_ce": 0.0004534609033726156, "loss_iou": 0.4296875, "loss_num": 0.03369140625, "loss_xval": 1.0234375, "num_input_tokens_seen": 70227628, "step": 1254 }, { "epoch": 2.7951002227171493, "grad_norm": 14.754793167114258, "learning_rate": 1e-06, "loss": 0.9247, "num_input_tokens_seen": 70283180, "step": 1255 }, { "epoch": 2.7951002227171493, "loss": 0.9754384160041809, "loss_ce": 0.00034071545815095305, "loss_iou": 0.392578125, "loss_num": 0.037841796875, "loss_xval": 0.9765625, "num_input_tokens_seen": 70283180, "step": 1255 }, { "epoch": 2.797327394209354, "grad_norm": 21.97988510131836, "learning_rate": 1e-06, "loss": 1.0107, "num_input_tokens_seen": 70335640, "step": 1256 }, { "epoch": 2.797327394209354, "loss": 1.1570931673049927, "loss_ce": 0.0003548713284544647, "loss_iou": 0.50390625, "loss_num": 0.029541015625, "loss_xval": 1.15625, "num_input_tokens_seen": 70335640, "step": 1256 }, { "epoch": 2.799554565701559, "grad_norm": 38.17330551147461, "learning_rate": 1e-06, "loss": 0.7453, "num_input_tokens_seen": 70390748, "step": 1257 }, { "epoch": 2.799554565701559, "loss": 0.8263010382652283, "loss_ce": 0.00037330458872020245, "loss_iou": 0.37109375, "loss_num": 0.01611328125, "loss_xval": 0.82421875, "num_input_tokens_seen": 70390748, "step": 1257 }, { "epoch": 2.801781737193764, "grad_norm": 22.141918182373047, "learning_rate": 1e-06, "loss": 1.0046, "num_input_tokens_seen": 70446116, "step": 1258 }, { "epoch": 2.801781737193764, "loss": 1.124394178390503, "loss_ce": 0.0003707860014401376, "loss_iou": 0.4609375, "loss_num": 0.040283203125, "loss_xval": 1.125, "num_input_tokens_seen": 70446116, "step": 1258 }, { "epoch": 2.8040089086859687, "grad_norm": 115.46956634521484, "learning_rate": 1e-06, "loss": 0.9891, "num_input_tokens_seen": 70503256, "step": 1259 }, { "epoch": 2.8040089086859687, "loss": 0.8390759229660034, "loss_ce": 0.0004528118879534304, "loss_iou": 0.3828125, "loss_num": 0.0147705078125, "loss_xval": 0.83984375, "num_input_tokens_seen": 70503256, "step": 1259 }, { "epoch": 2.8062360801781736, "grad_norm": 18.191608428955078, "learning_rate": 1e-06, "loss": 0.6494, "num_input_tokens_seen": 70559428, "step": 1260 }, { "epoch": 2.8062360801781736, "loss": 0.4827492833137512, "loss_ce": 0.0003274133778177202, "loss_iou": 0.2041015625, "loss_num": 0.014892578125, "loss_xval": 0.482421875, "num_input_tokens_seen": 70559428, "step": 1260 }, { "epoch": 2.8084632516703785, "grad_norm": 47.72921371459961, "learning_rate": 1e-06, "loss": 0.937, "num_input_tokens_seen": 70614552, "step": 1261 }, { "epoch": 2.8084632516703785, "loss": 0.9824411869049072, "loss_ce": 0.0036814198829233646, "loss_iou": 0.41015625, "loss_num": 0.031494140625, "loss_xval": 0.98046875, "num_input_tokens_seen": 70614552, "step": 1261 }, { "epoch": 2.8106904231625833, "grad_norm": 15.726812362670898, "learning_rate": 1e-06, "loss": 0.8683, "num_input_tokens_seen": 70669544, "step": 1262 }, { "epoch": 2.8106904231625833, "loss": 1.0436663627624512, "loss_ce": 0.0004534028994385153, "loss_iou": 0.42578125, "loss_num": 0.0380859375, "loss_xval": 1.046875, "num_input_tokens_seen": 70669544, "step": 1262 }, { "epoch": 2.812917594654788, "grad_norm": 42.43077087402344, "learning_rate": 1e-06, "loss": 0.8308, "num_input_tokens_seen": 70725468, "step": 1263 }, { "epoch": 2.812917594654788, "loss": 1.070866346359253, "loss_ce": 0.0005538529367186129, "loss_iou": 0.400390625, "loss_num": 0.0537109375, "loss_xval": 1.0703125, "num_input_tokens_seen": 70725468, "step": 1263 }, { "epoch": 2.815144766146993, "grad_norm": 15.935296058654785, "learning_rate": 1e-06, "loss": 0.8946, "num_input_tokens_seen": 70780344, "step": 1264 }, { "epoch": 2.815144766146993, "loss": 0.5159022212028503, "loss_ce": 0.00027721430524252355, "loss_iou": 0.2197265625, "loss_num": 0.015380859375, "loss_xval": 0.515625, "num_input_tokens_seen": 70780344, "step": 1264 }, { "epoch": 2.8173719376391984, "grad_norm": 14.334254264831543, "learning_rate": 1e-06, "loss": 0.7528, "num_input_tokens_seen": 70836060, "step": 1265 }, { "epoch": 2.8173719376391984, "loss": 0.7817806601524353, "loss_ce": 0.0002865034039132297, "loss_iou": 0.33203125, "loss_num": 0.0233154296875, "loss_xval": 0.78125, "num_input_tokens_seen": 70836060, "step": 1265 }, { "epoch": 2.819599109131403, "grad_norm": 19.307138442993164, "learning_rate": 1e-06, "loss": 0.8431, "num_input_tokens_seen": 70893088, "step": 1266 }, { "epoch": 2.819599109131403, "loss": 0.9788370132446289, "loss_ce": 0.00032134755747392774, "loss_iou": 0.412109375, "loss_num": 0.030517578125, "loss_xval": 0.9765625, "num_input_tokens_seen": 70893088, "step": 1266 }, { "epoch": 2.821826280623608, "grad_norm": 25.13027000427246, "learning_rate": 1e-06, "loss": 0.9889, "num_input_tokens_seen": 70949580, "step": 1267 }, { "epoch": 2.821826280623608, "loss": 1.048335313796997, "loss_ce": 0.00048371872981078923, "loss_iou": 0.421875, "loss_num": 0.040771484375, "loss_xval": 1.046875, "num_input_tokens_seen": 70949580, "step": 1267 }, { "epoch": 2.824053452115813, "grad_norm": 23.69377899169922, "learning_rate": 1e-06, "loss": 0.7746, "num_input_tokens_seen": 71005544, "step": 1268 }, { "epoch": 2.824053452115813, "loss": 0.871030330657959, "loss_ce": 0.00042489959741942585, "loss_iou": 0.3203125, "loss_num": 0.046142578125, "loss_xval": 0.87109375, "num_input_tokens_seen": 71005544, "step": 1268 }, { "epoch": 2.826280623608018, "grad_norm": 17.32742691040039, "learning_rate": 1e-06, "loss": 0.6273, "num_input_tokens_seen": 71061960, "step": 1269 }, { "epoch": 2.826280623608018, "loss": 0.5913735628128052, "loss_ce": 0.0003091061080340296, "loss_iou": 0.25, "loss_num": 0.0184326171875, "loss_xval": 0.58984375, "num_input_tokens_seen": 71061960, "step": 1269 }, { "epoch": 2.8285077951002227, "grad_norm": 14.68806266784668, "learning_rate": 1e-06, "loss": 0.9952, "num_input_tokens_seen": 71118024, "step": 1270 }, { "epoch": 2.8285077951002227, "loss": 0.9375334978103638, "loss_ce": 0.00027759268414229155, "loss_iou": 0.400390625, "loss_num": 0.0274658203125, "loss_xval": 0.9375, "num_input_tokens_seen": 71118024, "step": 1270 }, { "epoch": 2.8307349665924275, "grad_norm": 11.978171348571777, "learning_rate": 1e-06, "loss": 0.933, "num_input_tokens_seen": 71170604, "step": 1271 }, { "epoch": 2.8307349665924275, "loss": 1.082184076309204, "loss_ce": 0.0006410967325791717, "loss_iou": 0.455078125, "loss_num": 0.033935546875, "loss_xval": 1.078125, "num_input_tokens_seen": 71170604, "step": 1271 }, { "epoch": 2.8329621380846324, "grad_norm": 16.340866088867188, "learning_rate": 1e-06, "loss": 0.7549, "num_input_tokens_seen": 71227240, "step": 1272 }, { "epoch": 2.8329621380846324, "loss": 0.6341564059257507, "loss_ce": 0.000367343716789037, "loss_iou": 0.27734375, "loss_num": 0.01556396484375, "loss_xval": 0.6328125, "num_input_tokens_seen": 71227240, "step": 1272 }, { "epoch": 2.8351893095768377, "grad_norm": 21.42453384399414, "learning_rate": 1e-06, "loss": 1.3223, "num_input_tokens_seen": 71281800, "step": 1273 }, { "epoch": 2.8351893095768377, "loss": 1.39503812789917, "loss_ce": 0.0005069249891676009, "loss_iou": 0.609375, "loss_num": 0.034912109375, "loss_xval": 1.390625, "num_input_tokens_seen": 71281800, "step": 1273 }, { "epoch": 2.8374164810690425, "grad_norm": 14.59418773651123, "learning_rate": 1e-06, "loss": 0.858, "num_input_tokens_seen": 71338428, "step": 1274 }, { "epoch": 2.8374164810690425, "loss": 0.6660067439079285, "loss_ce": 0.00047936852206476033, "loss_iou": 0.302734375, "loss_num": 0.011962890625, "loss_xval": 0.6640625, "num_input_tokens_seen": 71338428, "step": 1274 }, { "epoch": 2.8396436525612474, "grad_norm": 23.267770767211914, "learning_rate": 1e-06, "loss": 1.0233, "num_input_tokens_seen": 71393332, "step": 1275 }, { "epoch": 2.8396436525612474, "loss": 1.2549712657928467, "loss_ce": 0.0005767274415120482, "loss_iou": 0.4765625, "loss_num": 0.06103515625, "loss_xval": 1.2578125, "num_input_tokens_seen": 71393332, "step": 1275 }, { "epoch": 2.8418708240534523, "grad_norm": 19.297468185424805, "learning_rate": 1e-06, "loss": 0.863, "num_input_tokens_seen": 71450628, "step": 1276 }, { "epoch": 2.8418708240534523, "loss": 0.8579794764518738, "loss_ce": 0.00031341775320470333, "loss_iou": 0.349609375, "loss_num": 0.03125, "loss_xval": 0.859375, "num_input_tokens_seen": 71450628, "step": 1276 }, { "epoch": 2.844097995545657, "grad_norm": 20.925310134887695, "learning_rate": 1e-06, "loss": 0.785, "num_input_tokens_seen": 71509096, "step": 1277 }, { "epoch": 2.844097995545657, "loss": 0.7992924451828003, "loss_ce": 0.0003422028967179358, "loss_iou": 0.310546875, "loss_num": 0.03564453125, "loss_xval": 0.80078125, "num_input_tokens_seen": 71509096, "step": 1277 }, { "epoch": 2.846325167037862, "grad_norm": 21.453073501586914, "learning_rate": 1e-06, "loss": 0.7625, "num_input_tokens_seen": 71567296, "step": 1278 }, { "epoch": 2.846325167037862, "loss": 0.7048360705375671, "loss_ce": 0.0004903356893919408, "loss_iou": 0.3046875, "loss_num": 0.01904296875, "loss_xval": 0.703125, "num_input_tokens_seen": 71567296, "step": 1278 }, { "epoch": 2.848552338530067, "grad_norm": 33.181209564208984, "learning_rate": 1e-06, "loss": 0.891, "num_input_tokens_seen": 71623012, "step": 1279 }, { "epoch": 2.848552338530067, "loss": 0.5747618675231934, "loss_ce": 0.00029898268985562027, "loss_iou": 0.23828125, "loss_num": 0.0194091796875, "loss_xval": 0.57421875, "num_input_tokens_seen": 71623012, "step": 1279 }, { "epoch": 2.8507795100222717, "grad_norm": 17.72443199157715, "learning_rate": 1e-06, "loss": 0.8168, "num_input_tokens_seen": 71680860, "step": 1280 }, { "epoch": 2.8507795100222717, "loss": 0.709847629070282, "loss_ce": 0.00037496426375582814, "loss_iou": 0.3046875, "loss_num": 0.0205078125, "loss_xval": 0.7109375, "num_input_tokens_seen": 71680860, "step": 1280 }, { "epoch": 2.8530066815144766, "grad_norm": 24.76962661743164, "learning_rate": 1e-06, "loss": 0.9463, "num_input_tokens_seen": 71734508, "step": 1281 }, { "epoch": 2.8530066815144766, "loss": 0.9339801669120789, "loss_ce": 0.0003863995661959052, "loss_iou": 0.3828125, "loss_num": 0.03369140625, "loss_xval": 0.93359375, "num_input_tokens_seen": 71734508, "step": 1281 }, { "epoch": 2.8552338530066814, "grad_norm": 22.486467361450195, "learning_rate": 1e-06, "loss": 0.7574, "num_input_tokens_seen": 71790040, "step": 1282 }, { "epoch": 2.8552338530066814, "loss": 0.8373603820800781, "loss_ce": 0.000446313846623525, "loss_iou": 0.3359375, "loss_num": 0.032470703125, "loss_xval": 0.8359375, "num_input_tokens_seen": 71790040, "step": 1282 }, { "epoch": 2.8574610244988863, "grad_norm": 22.6174373626709, "learning_rate": 1e-06, "loss": 0.9784, "num_input_tokens_seen": 71843828, "step": 1283 }, { "epoch": 2.8574610244988863, "loss": 1.0318081378936768, "loss_ce": 0.0005582119338214397, "loss_iou": 0.40625, "loss_num": 0.043212890625, "loss_xval": 1.03125, "num_input_tokens_seen": 71843828, "step": 1283 }, { "epoch": 2.859688195991091, "grad_norm": 18.821727752685547, "learning_rate": 1e-06, "loss": 0.939, "num_input_tokens_seen": 71900448, "step": 1284 }, { "epoch": 2.859688195991091, "loss": 0.8391966819763184, "loss_ce": 0.0006956788711249828, "loss_iou": 0.32421875, "loss_num": 0.037841796875, "loss_xval": 0.83984375, "num_input_tokens_seen": 71900448, "step": 1284 }, { "epoch": 2.861915367483296, "grad_norm": 13.810822486877441, "learning_rate": 1e-06, "loss": 0.9658, "num_input_tokens_seen": 71958344, "step": 1285 }, { "epoch": 2.861915367483296, "loss": 0.921851396560669, "loss_ce": 0.0004646632296498865, "loss_iou": 0.380859375, "loss_num": 0.031494140625, "loss_xval": 0.921875, "num_input_tokens_seen": 71958344, "step": 1285 }, { "epoch": 2.864142538975501, "grad_norm": 19.463180541992188, "learning_rate": 1e-06, "loss": 0.9619, "num_input_tokens_seen": 72014732, "step": 1286 }, { "epoch": 2.864142538975501, "loss": 1.0936338901519775, "loss_ce": 0.0003722285036928952, "loss_iou": 0.44140625, "loss_num": 0.041748046875, "loss_xval": 1.09375, "num_input_tokens_seen": 72014732, "step": 1286 }, { "epoch": 2.866369710467706, "grad_norm": 20.0224666595459, "learning_rate": 1e-06, "loss": 0.8855, "num_input_tokens_seen": 72071312, "step": 1287 }, { "epoch": 2.866369710467706, "loss": 0.9935523271560669, "loss_ce": 0.0011207055067643523, "loss_iou": 0.36328125, "loss_num": 0.053466796875, "loss_xval": 0.9921875, "num_input_tokens_seen": 72071312, "step": 1287 }, { "epoch": 2.868596881959911, "grad_norm": 18.678556442260742, "learning_rate": 1e-06, "loss": 0.8911, "num_input_tokens_seen": 72126324, "step": 1288 }, { "epoch": 2.868596881959911, "loss": 0.7727195024490356, "loss_ce": 0.00025854576961137354, "loss_iou": 0.3203125, "loss_num": 0.0260009765625, "loss_xval": 0.7734375, "num_input_tokens_seen": 72126324, "step": 1288 }, { "epoch": 2.870824053452116, "grad_norm": 13.80396556854248, "learning_rate": 1e-06, "loss": 0.9222, "num_input_tokens_seen": 72181044, "step": 1289 }, { "epoch": 2.870824053452116, "loss": 1.003150224685669, "loss_ce": 0.0004646561574190855, "loss_iou": 0.3984375, "loss_num": 0.041259765625, "loss_xval": 1.0, "num_input_tokens_seen": 72181044, "step": 1289 }, { "epoch": 2.8730512249443207, "grad_norm": 30.7203369140625, "learning_rate": 1e-06, "loss": 1.0782, "num_input_tokens_seen": 72238480, "step": 1290 }, { "epoch": 2.8730512249443207, "loss": 1.1529991626739502, "loss_ce": 0.0004112512688152492, "loss_iou": 0.51171875, "loss_num": 0.0262451171875, "loss_xval": 1.15625, "num_input_tokens_seen": 72238480, "step": 1290 }, { "epoch": 2.8752783964365256, "grad_norm": 25.847972869873047, "learning_rate": 1e-06, "loss": 0.8204, "num_input_tokens_seen": 72293396, "step": 1291 }, { "epoch": 2.8752783964365256, "loss": 1.011704444885254, "loss_ce": 0.0009622069192118943, "loss_iou": 0.380859375, "loss_num": 0.0498046875, "loss_xval": 1.0078125, "num_input_tokens_seen": 72293396, "step": 1291 }, { "epoch": 2.8775055679287305, "grad_norm": 100.3687744140625, "learning_rate": 1e-06, "loss": 0.8099, "num_input_tokens_seen": 72349376, "step": 1292 }, { "epoch": 2.8775055679287305, "loss": 0.7852460741996765, "loss_ce": 0.0003339627292007208, "loss_iou": 0.287109375, "loss_num": 0.0419921875, "loss_xval": 0.78515625, "num_input_tokens_seen": 72349376, "step": 1292 }, { "epoch": 2.8797327394209353, "grad_norm": 31.29021644592285, "learning_rate": 1e-06, "loss": 0.8552, "num_input_tokens_seen": 72405340, "step": 1293 }, { "epoch": 2.8797327394209353, "loss": 0.8164255619049072, "loss_ce": 0.0005075936205685139, "loss_iou": 0.333984375, "loss_num": 0.0296630859375, "loss_xval": 0.81640625, "num_input_tokens_seen": 72405340, "step": 1293 }, { "epoch": 2.88195991091314, "grad_norm": 18.81956672668457, "learning_rate": 1e-06, "loss": 1.1343, "num_input_tokens_seen": 72462008, "step": 1294 }, { "epoch": 2.88195991091314, "loss": 0.831671416759491, "loss_ce": 0.0003726041177287698, "loss_iou": 0.3359375, "loss_num": 0.031982421875, "loss_xval": 0.83203125, "num_input_tokens_seen": 72462008, "step": 1294 }, { "epoch": 2.884187082405345, "grad_norm": 28.06816291809082, "learning_rate": 1e-06, "loss": 0.7352, "num_input_tokens_seen": 72520860, "step": 1295 }, { "epoch": 2.884187082405345, "loss": 0.8308737277984619, "loss_ce": 0.0005514706717804074, "loss_iou": 0.3671875, "loss_num": 0.0191650390625, "loss_xval": 0.83203125, "num_input_tokens_seen": 72520860, "step": 1295 }, { "epoch": 2.8864142538975504, "grad_norm": 119.70653533935547, "learning_rate": 1e-06, "loss": 0.9987, "num_input_tokens_seen": 72578972, "step": 1296 }, { "epoch": 2.8864142538975504, "loss": 1.2035748958587646, "loss_ce": 0.00044986687134951353, "loss_iou": 0.482421875, "loss_num": 0.04736328125, "loss_xval": 1.203125, "num_input_tokens_seen": 72578972, "step": 1296 }, { "epoch": 2.888641425389755, "grad_norm": 23.493093490600586, "learning_rate": 1e-06, "loss": 0.9778, "num_input_tokens_seen": 72634868, "step": 1297 }, { "epoch": 2.888641425389755, "loss": 0.8210999965667725, "loss_ce": 0.0002992242225445807, "loss_iou": 0.353515625, "loss_num": 0.0224609375, "loss_xval": 0.8203125, "num_input_tokens_seen": 72634868, "step": 1297 }, { "epoch": 2.89086859688196, "grad_norm": 112.7999038696289, "learning_rate": 1e-06, "loss": 1.1468, "num_input_tokens_seen": 72686616, "step": 1298 }, { "epoch": 2.89086859688196, "loss": 1.0459107160568237, "loss_ce": 0.0007446431554853916, "loss_iou": 0.4140625, "loss_num": 0.04345703125, "loss_xval": 1.046875, "num_input_tokens_seen": 72686616, "step": 1298 }, { "epoch": 2.893095768374165, "grad_norm": 17.558181762695312, "learning_rate": 1e-06, "loss": 0.9396, "num_input_tokens_seen": 72740388, "step": 1299 }, { "epoch": 2.893095768374165, "loss": 0.843862771987915, "loss_ce": 0.0008451773319393396, "loss_iou": 0.359375, "loss_num": 0.0250244140625, "loss_xval": 0.84375, "num_input_tokens_seen": 72740388, "step": 1299 }, { "epoch": 2.89532293986637, "grad_norm": 20.879600524902344, "learning_rate": 1e-06, "loss": 0.9484, "num_input_tokens_seen": 72796572, "step": 1300 }, { "epoch": 2.89532293986637, "loss": 1.0628635883331299, "loss_ce": 0.00036358111537992954, "loss_iou": 0.4296875, "loss_num": 0.041015625, "loss_xval": 1.0625, "num_input_tokens_seen": 72796572, "step": 1300 }, { "epoch": 2.8975501113585747, "grad_norm": 24.84024429321289, "learning_rate": 1e-06, "loss": 0.8657, "num_input_tokens_seen": 72854948, "step": 1301 }, { "epoch": 2.8975501113585747, "loss": 0.7214281558990479, "loss_ce": 0.00035886449040845037, "loss_iou": 0.306640625, "loss_num": 0.021728515625, "loss_xval": 0.72265625, "num_input_tokens_seen": 72854948, "step": 1301 }, { "epoch": 2.8997772828507795, "grad_norm": 22.32291030883789, "learning_rate": 1e-06, "loss": 1.0913, "num_input_tokens_seen": 72907880, "step": 1302 }, { "epoch": 2.8997772828507795, "loss": 1.042357325553894, "loss_ce": 0.00036513677332550287, "loss_iou": 0.458984375, "loss_num": 0.0245361328125, "loss_xval": 1.0390625, "num_input_tokens_seen": 72907880, "step": 1302 }, { "epoch": 2.9020044543429844, "grad_norm": 94.72842407226562, "learning_rate": 1e-06, "loss": 0.8327, "num_input_tokens_seen": 72965036, "step": 1303 }, { "epoch": 2.9020044543429844, "loss": 0.7842406034469604, "loss_ce": 0.00030507519841194153, "loss_iou": 0.3203125, "loss_num": 0.028564453125, "loss_xval": 0.78515625, "num_input_tokens_seen": 72965036, "step": 1303 }, { "epoch": 2.9042316258351892, "grad_norm": 64.44385528564453, "learning_rate": 1e-06, "loss": 1.0017, "num_input_tokens_seen": 73019812, "step": 1304 }, { "epoch": 2.9042316258351892, "loss": 1.2140803337097168, "loss_ce": 0.0009456113912165165, "loss_iou": 0.46875, "loss_num": 0.055419921875, "loss_xval": 1.2109375, "num_input_tokens_seen": 73019812, "step": 1304 }, { "epoch": 2.906458797327394, "grad_norm": 63.43607711791992, "learning_rate": 1e-06, "loss": 0.6413, "num_input_tokens_seen": 73077144, "step": 1305 }, { "epoch": 2.906458797327394, "loss": 0.6670837998390198, "loss_ce": 0.0003357888199388981, "loss_iou": 0.302734375, "loss_num": 0.01202392578125, "loss_xval": 0.66796875, "num_input_tokens_seen": 73077144, "step": 1305 }, { "epoch": 2.908685968819599, "grad_norm": 19.118846893310547, "learning_rate": 1e-06, "loss": 0.8712, "num_input_tokens_seen": 73134624, "step": 1306 }, { "epoch": 2.908685968819599, "loss": 0.8654381036758423, "loss_ce": 0.00044786007492803037, "loss_iou": 0.37890625, "loss_num": 0.0208740234375, "loss_xval": 0.86328125, "num_input_tokens_seen": 73134624, "step": 1306 }, { "epoch": 2.910913140311804, "grad_norm": 15.382525444030762, "learning_rate": 1e-06, "loss": 0.9095, "num_input_tokens_seen": 73189224, "step": 1307 }, { "epoch": 2.910913140311804, "loss": 0.9012154340744019, "loss_ce": 0.0003364897274877876, "loss_iou": 0.384765625, "loss_num": 0.0260009765625, "loss_xval": 0.90234375, "num_input_tokens_seen": 73189224, "step": 1307 }, { "epoch": 2.9131403118040087, "grad_norm": 19.829851150512695, "learning_rate": 1e-06, "loss": 0.6906, "num_input_tokens_seen": 73245992, "step": 1308 }, { "epoch": 2.9131403118040087, "loss": 0.6446969509124756, "loss_ce": 0.00040984569932334125, "loss_iou": 0.267578125, "loss_num": 0.02197265625, "loss_xval": 0.64453125, "num_input_tokens_seen": 73245992, "step": 1308 }, { "epoch": 2.9153674832962135, "grad_norm": 29.750280380249023, "learning_rate": 1e-06, "loss": 0.9183, "num_input_tokens_seen": 73299640, "step": 1309 }, { "epoch": 2.9153674832962135, "loss": 0.7756353616714478, "loss_ce": 0.0004888884141109884, "loss_iou": 0.33203125, "loss_num": 0.0224609375, "loss_xval": 0.7734375, "num_input_tokens_seen": 73299640, "step": 1309 }, { "epoch": 2.917594654788419, "grad_norm": 14.211082458496094, "learning_rate": 1e-06, "loss": 0.8288, "num_input_tokens_seen": 73354816, "step": 1310 }, { "epoch": 2.917594654788419, "loss": 0.7674408555030823, "loss_ce": 0.0003510417591314763, "loss_iou": 0.328125, "loss_num": 0.0220947265625, "loss_xval": 0.765625, "num_input_tokens_seen": 73354816, "step": 1310 }, { "epoch": 2.9198218262806237, "grad_norm": 23.373992919921875, "learning_rate": 1e-06, "loss": 0.789, "num_input_tokens_seen": 73412668, "step": 1311 }, { "epoch": 2.9198218262806237, "loss": 0.7085351943969727, "loss_ce": 0.0005274016875773668, "loss_iou": 0.3125, "loss_num": 0.016845703125, "loss_xval": 0.70703125, "num_input_tokens_seen": 73412668, "step": 1311 }, { "epoch": 2.9220489977728286, "grad_norm": 16.870405197143555, "learning_rate": 1e-06, "loss": 1.0438, "num_input_tokens_seen": 73468408, "step": 1312 }, { "epoch": 2.9220489977728286, "loss": 0.9538618922233582, "loss_ce": 0.002201691037043929, "loss_iou": 0.421875, "loss_num": 0.0218505859375, "loss_xval": 0.953125, "num_input_tokens_seen": 73468408, "step": 1312 }, { "epoch": 2.9242761692650334, "grad_norm": 17.644731521606445, "learning_rate": 1e-06, "loss": 0.9705, "num_input_tokens_seen": 73525120, "step": 1313 }, { "epoch": 2.9242761692650334, "loss": 1.1021685600280762, "loss_ce": 0.0006060994928702712, "loss_iou": 0.4296875, "loss_num": 0.048828125, "loss_xval": 1.1015625, "num_input_tokens_seen": 73525120, "step": 1313 }, { "epoch": 2.9265033407572383, "grad_norm": 12.963172912597656, "learning_rate": 1e-06, "loss": 0.6575, "num_input_tokens_seen": 73581328, "step": 1314 }, { "epoch": 2.9265033407572383, "loss": 0.7298256754875183, "loss_ce": 0.00033350015291944146, "loss_iou": 0.30859375, "loss_num": 0.022216796875, "loss_xval": 0.73046875, "num_input_tokens_seen": 73581328, "step": 1314 }, { "epoch": 2.928730512249443, "grad_norm": 15.870047569274902, "learning_rate": 1e-06, "loss": 0.9587, "num_input_tokens_seen": 73636896, "step": 1315 }, { "epoch": 2.928730512249443, "loss": 0.8428421020507812, "loss_ce": 0.0003127763920929283, "loss_iou": 0.376953125, "loss_num": 0.0179443359375, "loss_xval": 0.84375, "num_input_tokens_seen": 73636896, "step": 1315 }, { "epoch": 2.930957683741648, "grad_norm": 14.364209175109863, "learning_rate": 1e-06, "loss": 0.8652, "num_input_tokens_seen": 73693568, "step": 1316 }, { "epoch": 2.930957683741648, "loss": 0.7832823395729065, "loss_ce": 0.0003234214964322746, "loss_iou": 0.337890625, "loss_num": 0.021240234375, "loss_xval": 0.78125, "num_input_tokens_seen": 73693568, "step": 1316 }, { "epoch": 2.933184855233853, "grad_norm": 32.526336669921875, "learning_rate": 1e-06, "loss": 0.9016, "num_input_tokens_seen": 73752056, "step": 1317 }, { "epoch": 2.933184855233853, "loss": 0.7434287071228027, "loss_ce": 0.0003867132472805679, "loss_iou": 0.33203125, "loss_num": 0.015869140625, "loss_xval": 0.7421875, "num_input_tokens_seen": 73752056, "step": 1317 }, { "epoch": 2.935412026726058, "grad_norm": 16.808130264282227, "learning_rate": 1e-06, "loss": 0.8978, "num_input_tokens_seen": 73809184, "step": 1318 }, { "epoch": 2.935412026726058, "loss": 0.7791517972946167, "loss_ce": 0.0003431940567679703, "loss_iou": 0.326171875, "loss_num": 0.0255126953125, "loss_xval": 0.77734375, "num_input_tokens_seen": 73809184, "step": 1318 }, { "epoch": 2.937639198218263, "grad_norm": 25.0362548828125, "learning_rate": 1e-06, "loss": 0.9062, "num_input_tokens_seen": 73863224, "step": 1319 }, { "epoch": 2.937639198218263, "loss": 0.8815177083015442, "loss_ce": 0.00041416779276914895, "loss_iou": 0.375, "loss_num": 0.026123046875, "loss_xval": 0.8828125, "num_input_tokens_seen": 73863224, "step": 1319 }, { "epoch": 2.939866369710468, "grad_norm": 23.442148208618164, "learning_rate": 1e-06, "loss": 1.0457, "num_input_tokens_seen": 73919812, "step": 1320 }, { "epoch": 2.939866369710468, "loss": 0.9350742101669312, "loss_ce": 0.0002597759012132883, "loss_iou": 0.388671875, "loss_num": 0.03125, "loss_xval": 0.93359375, "num_input_tokens_seen": 73919812, "step": 1320 }, { "epoch": 2.9420935412026727, "grad_norm": 39.34425735473633, "learning_rate": 1e-06, "loss": 0.7751, "num_input_tokens_seen": 73974388, "step": 1321 }, { "epoch": 2.9420935412026727, "loss": 0.6952590346336365, "loss_ce": 0.00031273809145204723, "loss_iou": 0.302734375, "loss_num": 0.0179443359375, "loss_xval": 0.6953125, "num_input_tokens_seen": 73974388, "step": 1321 }, { "epoch": 2.9443207126948776, "grad_norm": 15.462008476257324, "learning_rate": 1e-06, "loss": 0.9421, "num_input_tokens_seen": 74032576, "step": 1322 }, { "epoch": 2.9443207126948776, "loss": 0.9447818994522095, "loss_ce": 0.0009342257399111986, "loss_iou": 0.33984375, "loss_num": 0.052978515625, "loss_xval": 0.9453125, "num_input_tokens_seen": 74032576, "step": 1322 }, { "epoch": 2.9465478841870825, "grad_norm": 14.060518264770508, "learning_rate": 1e-06, "loss": 0.7786, "num_input_tokens_seen": 74087264, "step": 1323 }, { "epoch": 2.9465478841870825, "loss": 0.6352978944778442, "loss_ce": 0.0005322670331224799, "loss_iou": 0.28125, "loss_num": 0.01483154296875, "loss_xval": 0.6328125, "num_input_tokens_seen": 74087264, "step": 1323 }, { "epoch": 2.9487750556792873, "grad_norm": 22.72332763671875, "learning_rate": 1e-06, "loss": 1.0088, "num_input_tokens_seen": 74142868, "step": 1324 }, { "epoch": 2.9487750556792873, "loss": 1.0528690814971924, "loss_ce": 0.0006230201106518507, "loss_iou": 0.455078125, "loss_num": 0.028564453125, "loss_xval": 1.0546875, "num_input_tokens_seen": 74142868, "step": 1324 }, { "epoch": 2.951002227171492, "grad_norm": 12.463337898254395, "learning_rate": 1e-06, "loss": 1.0772, "num_input_tokens_seen": 74195720, "step": 1325 }, { "epoch": 2.951002227171492, "loss": 1.2419848442077637, "loss_ce": 0.0002855784259736538, "loss_iou": 0.53125, "loss_num": 0.036376953125, "loss_xval": 1.2421875, "num_input_tokens_seen": 74195720, "step": 1325 }, { "epoch": 2.953229398663697, "grad_norm": 21.965835571289062, "learning_rate": 1e-06, "loss": 1.1917, "num_input_tokens_seen": 74249536, "step": 1326 }, { "epoch": 2.953229398663697, "loss": 0.998505711555481, "loss_ce": 0.00045889458851888776, "loss_iou": 0.4140625, "loss_num": 0.033935546875, "loss_xval": 1.0, "num_input_tokens_seen": 74249536, "step": 1326 }, { "epoch": 2.955456570155902, "grad_norm": 19.335357666015625, "learning_rate": 1e-06, "loss": 0.79, "num_input_tokens_seen": 74306892, "step": 1327 }, { "epoch": 2.955456570155902, "loss": 0.7617478370666504, "loss_ce": 0.0003953152918256819, "loss_iou": 0.314453125, "loss_num": 0.026611328125, "loss_xval": 0.76171875, "num_input_tokens_seen": 74306892, "step": 1327 }, { "epoch": 2.9576837416481068, "grad_norm": 17.01285743713379, "learning_rate": 1e-06, "loss": 1.0855, "num_input_tokens_seen": 74363500, "step": 1328 }, { "epoch": 2.9576837416481068, "loss": 1.1959009170532227, "loss_ce": 0.0015648790867999196, "loss_iou": 0.46484375, "loss_num": 0.05224609375, "loss_xval": 1.1953125, "num_input_tokens_seen": 74363500, "step": 1328 }, { "epoch": 2.9599109131403116, "grad_norm": 15.02244758605957, "learning_rate": 1e-06, "loss": 0.8086, "num_input_tokens_seen": 74420444, "step": 1329 }, { "epoch": 2.9599109131403116, "loss": 0.6169714331626892, "loss_ce": 0.00027223769575357437, "loss_iou": 0.265625, "loss_num": 0.0172119140625, "loss_xval": 0.6171875, "num_input_tokens_seen": 74420444, "step": 1329 }, { "epoch": 2.9621380846325165, "grad_norm": 28.502256393432617, "learning_rate": 1e-06, "loss": 0.9974, "num_input_tokens_seen": 74475096, "step": 1330 }, { "epoch": 2.9621380846325165, "loss": 1.2459745407104492, "loss_ce": 0.00036911843926645815, "loss_iou": 0.53515625, "loss_num": 0.03564453125, "loss_xval": 1.2421875, "num_input_tokens_seen": 74475096, "step": 1330 }, { "epoch": 2.9643652561247213, "grad_norm": 78.01403045654297, "learning_rate": 1e-06, "loss": 0.695, "num_input_tokens_seen": 74532852, "step": 1331 }, { "epoch": 2.9643652561247213, "loss": 0.6845568418502808, "loss_ce": 0.000352742470568046, "loss_iou": 0.306640625, "loss_num": 0.014404296875, "loss_xval": 0.68359375, "num_input_tokens_seen": 74532852, "step": 1331 }, { "epoch": 2.9665924276169267, "grad_norm": 15.447741508483887, "learning_rate": 1e-06, "loss": 0.778, "num_input_tokens_seen": 74588216, "step": 1332 }, { "epoch": 2.9665924276169267, "loss": 0.9468116760253906, "loss_ce": 0.00027843567659147084, "loss_iou": 0.388671875, "loss_num": 0.0341796875, "loss_xval": 0.9453125, "num_input_tokens_seen": 74588216, "step": 1332 }, { "epoch": 2.9688195991091315, "grad_norm": 20.878141403198242, "learning_rate": 1e-06, "loss": 0.8829, "num_input_tokens_seen": 74645576, "step": 1333 }, { "epoch": 2.9688195991091315, "loss": 1.0510952472686768, "loss_ce": 0.0003140345506835729, "loss_iou": 0.443359375, "loss_num": 0.033447265625, "loss_xval": 1.046875, "num_input_tokens_seen": 74645576, "step": 1333 }, { "epoch": 2.9710467706013364, "grad_norm": 15.299370765686035, "learning_rate": 1e-06, "loss": 1.2055, "num_input_tokens_seen": 74704232, "step": 1334 }, { "epoch": 2.9710467706013364, "loss": 1.2119064331054688, "loss_ce": 0.0012130287941545248, "loss_iou": 0.51171875, "loss_num": 0.037353515625, "loss_xval": 1.2109375, "num_input_tokens_seen": 74704232, "step": 1334 }, { "epoch": 2.9732739420935412, "grad_norm": 19.733963012695312, "learning_rate": 1e-06, "loss": 0.8997, "num_input_tokens_seen": 74757716, "step": 1335 }, { "epoch": 2.9732739420935412, "loss": 0.8220304250717163, "loss_ce": 0.0004972000606358051, "loss_iou": 0.345703125, "loss_num": 0.0257568359375, "loss_xval": 0.8203125, "num_input_tokens_seen": 74757716, "step": 1335 }, { "epoch": 2.975501113585746, "grad_norm": 23.857521057128906, "learning_rate": 1e-06, "loss": 0.9514, "num_input_tokens_seen": 74811252, "step": 1336 }, { "epoch": 2.975501113585746, "loss": 1.0858557224273682, "loss_ce": 0.0004064875829499215, "loss_iou": 0.455078125, "loss_num": 0.035400390625, "loss_xval": 1.0859375, "num_input_tokens_seen": 74811252, "step": 1336 }, { "epoch": 2.977728285077951, "grad_norm": 17.06792449951172, "learning_rate": 1e-06, "loss": 0.8545, "num_input_tokens_seen": 74867424, "step": 1337 }, { "epoch": 2.977728285077951, "loss": 0.698322057723999, "loss_ce": 0.0003239895449951291, "loss_iou": 0.32421875, "loss_num": 0.009765625, "loss_xval": 0.69921875, "num_input_tokens_seen": 74867424, "step": 1337 }, { "epoch": 2.979955456570156, "grad_norm": 21.56948471069336, "learning_rate": 1e-06, "loss": 0.8931, "num_input_tokens_seen": 74923808, "step": 1338 }, { "epoch": 2.979955456570156, "loss": 1.2361738681793213, "loss_ce": 0.0003339699178468436, "loss_iou": 0.494140625, "loss_num": 0.0498046875, "loss_xval": 1.234375, "num_input_tokens_seen": 74923808, "step": 1338 }, { "epoch": 2.9821826280623607, "grad_norm": 18.15468978881836, "learning_rate": 1e-06, "loss": 0.7672, "num_input_tokens_seen": 74978584, "step": 1339 }, { "epoch": 2.9821826280623607, "loss": 0.5524653792381287, "loss_ce": 0.0003413406084291637, "loss_iou": 0.232421875, "loss_num": 0.0174560546875, "loss_xval": 0.55078125, "num_input_tokens_seen": 74978584, "step": 1339 }, { "epoch": 2.984409799554566, "grad_norm": 19.710803985595703, "learning_rate": 1e-06, "loss": 0.9056, "num_input_tokens_seen": 75033200, "step": 1340 }, { "epoch": 2.984409799554566, "loss": 0.7972690463066101, "loss_ce": 0.00039405166171491146, "loss_iou": 0.328125, "loss_num": 0.02783203125, "loss_xval": 0.796875, "num_input_tokens_seen": 75033200, "step": 1340 }, { "epoch": 2.986636971046771, "grad_norm": 17.216930389404297, "learning_rate": 1e-06, "loss": 0.7153, "num_input_tokens_seen": 75088396, "step": 1341 }, { "epoch": 2.986636971046771, "loss": 0.7058947682380676, "loss_ce": 0.00032835971796885133, "loss_iou": 0.291015625, "loss_num": 0.0247802734375, "loss_xval": 0.70703125, "num_input_tokens_seen": 75088396, "step": 1341 }, { "epoch": 2.9888641425389757, "grad_norm": 40.40977478027344, "learning_rate": 1e-06, "loss": 1.0308, "num_input_tokens_seen": 75146988, "step": 1342 }, { "epoch": 2.9888641425389757, "loss": 0.981564462184906, "loss_ce": 0.000363294588169083, "loss_iou": 0.42578125, "loss_num": 0.0255126953125, "loss_xval": 0.98046875, "num_input_tokens_seen": 75146988, "step": 1342 }, { "epoch": 2.9910913140311806, "grad_norm": 409.5078430175781, "learning_rate": 1e-06, "loss": 1.0702, "num_input_tokens_seen": 75204000, "step": 1343 }, { "epoch": 2.9910913140311806, "loss": 0.9847346544265747, "loss_ce": 0.0003596529713831842, "loss_iou": 0.3984375, "loss_num": 0.037841796875, "loss_xval": 0.984375, "num_input_tokens_seen": 75204000, "step": 1343 }, { "epoch": 2.9933184855233854, "grad_norm": 14.489014625549316, "learning_rate": 1e-06, "loss": 0.9728, "num_input_tokens_seen": 75260556, "step": 1344 }, { "epoch": 2.9933184855233854, "loss": 1.0310882329940796, "loss_ce": 0.0003265210543759167, "loss_iou": 0.390625, "loss_num": 0.050048828125, "loss_xval": 1.03125, "num_input_tokens_seen": 75260556, "step": 1344 }, { "epoch": 2.9955456570155903, "grad_norm": 21.714834213256836, "learning_rate": 1e-06, "loss": 0.8475, "num_input_tokens_seen": 75318464, "step": 1345 }, { "epoch": 2.9955456570155903, "loss": 0.8694126605987549, "loss_ce": 0.0005162069573998451, "loss_iou": 0.384765625, "loss_num": 0.0198974609375, "loss_xval": 0.8671875, "num_input_tokens_seen": 75318464, "step": 1345 }, { "epoch": 2.997772828507795, "grad_norm": 18.179824829101562, "learning_rate": 1e-06, "loss": 0.8263, "num_input_tokens_seen": 75375036, "step": 1346 }, { "epoch": 2.997772828507795, "loss": 0.9615136384963989, "loss_ce": 0.0003319892566651106, "loss_iou": 0.376953125, "loss_num": 0.04150390625, "loss_xval": 0.9609375, "num_input_tokens_seen": 75375036, "step": 1346 }, { "epoch": 3.0, "grad_norm": 17.222766876220703, "learning_rate": 1e-06, "loss": 0.969, "num_input_tokens_seen": 75431008, "step": 1347 }, { "epoch": 3.0, "loss": 0.4970862567424774, "loss_ce": 0.00038217363180592656, "loss_iou": 0.2158203125, "loss_num": 0.01287841796875, "loss_xval": 0.49609375, "num_input_tokens_seen": 75431008, "step": 1347 }, { "epoch": 3.002227171492205, "grad_norm": 15.791449546813965, "learning_rate": 1e-06, "loss": 1.1072, "num_input_tokens_seen": 75486424, "step": 1348 }, { "epoch": 3.002227171492205, "loss": 1.2740671634674072, "loss_ce": 0.0006296620704233646, "loss_iou": 0.515625, "loss_num": 0.04833984375, "loss_xval": 1.2734375, "num_input_tokens_seen": 75486424, "step": 1348 }, { "epoch": 3.0044543429844097, "grad_norm": 18.678091049194336, "learning_rate": 1e-06, "loss": 0.8753, "num_input_tokens_seen": 75542876, "step": 1349 }, { "epoch": 3.0044543429844097, "loss": 0.7326757907867432, "loss_ce": 0.00025389608344994485, "loss_iou": 0.328125, "loss_num": 0.01513671875, "loss_xval": 0.734375, "num_input_tokens_seen": 75542876, "step": 1349 }, { "epoch": 3.0066815144766146, "grad_norm": 33.28176498413086, "learning_rate": 1e-06, "loss": 0.8996, "num_input_tokens_seen": 75599980, "step": 1350 }, { "epoch": 3.0066815144766146, "loss": 0.8423166275024414, "loss_ce": 0.00051976612303406, "loss_iou": 0.34375, "loss_num": 0.0308837890625, "loss_xval": 0.84375, "num_input_tokens_seen": 75599980, "step": 1350 }, { "epoch": 3.0089086859688194, "grad_norm": 16.385852813720703, "learning_rate": 1e-06, "loss": 0.9101, "num_input_tokens_seen": 75656456, "step": 1351 }, { "epoch": 3.0089086859688194, "loss": 1.0261783599853516, "loss_ce": 0.0005435821949504316, "loss_iou": 0.44140625, "loss_num": 0.0283203125, "loss_xval": 1.0234375, "num_input_tokens_seen": 75656456, "step": 1351 }, { "epoch": 3.0111358574610243, "grad_norm": 23.76874351501465, "learning_rate": 1e-06, "loss": 0.9519, "num_input_tokens_seen": 75715892, "step": 1352 }, { "epoch": 3.0111358574610243, "loss": 0.8311777114868164, "loss_ce": 0.0003672138263937086, "loss_iou": 0.353515625, "loss_num": 0.0244140625, "loss_xval": 0.83203125, "num_input_tokens_seen": 75715892, "step": 1352 }, { "epoch": 3.0133630289532296, "grad_norm": 20.76018714904785, "learning_rate": 1e-06, "loss": 0.8318, "num_input_tokens_seen": 75774108, "step": 1353 }, { "epoch": 3.0133630289532296, "loss": 0.8268052935600281, "loss_ce": 0.0006334498757496476, "loss_iou": 0.328125, "loss_num": 0.034423828125, "loss_xval": 0.828125, "num_input_tokens_seen": 75774108, "step": 1353 }, { "epoch": 3.0155902004454345, "grad_norm": 13.831488609313965, "learning_rate": 1e-06, "loss": 0.8734, "num_input_tokens_seen": 75829580, "step": 1354 }, { "epoch": 3.0155902004454345, "loss": 0.7394604682922363, "loss_ce": 0.00032472447492182255, "loss_iou": 0.30859375, "loss_num": 0.0242919921875, "loss_xval": 0.73828125, "num_input_tokens_seen": 75829580, "step": 1354 }, { "epoch": 3.0178173719376393, "grad_norm": 32.255157470703125, "learning_rate": 1e-06, "loss": 0.7957, "num_input_tokens_seen": 75888104, "step": 1355 }, { "epoch": 3.0178173719376393, "loss": 0.7028149366378784, "loss_ce": 0.0003003134625032544, "loss_iou": 0.29296875, "loss_num": 0.023193359375, "loss_xval": 0.703125, "num_input_tokens_seen": 75888104, "step": 1355 }, { "epoch": 3.020044543429844, "grad_norm": 38.09650802612305, "learning_rate": 1e-06, "loss": 0.8811, "num_input_tokens_seen": 75946020, "step": 1356 }, { "epoch": 3.020044543429844, "loss": 0.7325311899185181, "loss_ce": 0.00035350650432519615, "loss_iou": 0.306640625, "loss_num": 0.0240478515625, "loss_xval": 0.73046875, "num_input_tokens_seen": 75946020, "step": 1356 }, { "epoch": 3.022271714922049, "grad_norm": 16.011274337768555, "learning_rate": 1e-06, "loss": 0.7803, "num_input_tokens_seen": 76003528, "step": 1357 }, { "epoch": 3.022271714922049, "loss": 0.8663822412490845, "loss_ce": 0.0004154411144554615, "loss_iou": 0.380859375, "loss_num": 0.0211181640625, "loss_xval": 0.8671875, "num_input_tokens_seen": 76003528, "step": 1357 }, { "epoch": 3.024498886414254, "grad_norm": 33.57292556762695, "learning_rate": 1e-06, "loss": 0.9545, "num_input_tokens_seen": 76059112, "step": 1358 }, { "epoch": 3.024498886414254, "loss": 0.9820139408111572, "loss_ce": 0.00032445252873003483, "loss_iou": 0.37890625, "loss_num": 0.044921875, "loss_xval": 0.98046875, "num_input_tokens_seen": 76059112, "step": 1358 }, { "epoch": 3.0267260579064588, "grad_norm": 18.756921768188477, "learning_rate": 1e-06, "loss": 0.8035, "num_input_tokens_seen": 76115888, "step": 1359 }, { "epoch": 3.0267260579064588, "loss": 0.6882917284965515, "loss_ce": 0.00030343266553245485, "loss_iou": 0.3046875, "loss_num": 0.0155029296875, "loss_xval": 0.6875, "num_input_tokens_seen": 76115888, "step": 1359 }, { "epoch": 3.0289532293986636, "grad_norm": 46.1540641784668, "learning_rate": 1e-06, "loss": 0.9699, "num_input_tokens_seen": 76175268, "step": 1360 }, { "epoch": 3.0289532293986636, "loss": 0.7430423498153687, "loss_ce": 0.00036659964825958014, "loss_iou": 0.291015625, "loss_num": 0.032470703125, "loss_xval": 0.7421875, "num_input_tokens_seen": 76175268, "step": 1360 }, { "epoch": 3.0311804008908685, "grad_norm": 18.095458984375, "learning_rate": 1e-06, "loss": 1.0666, "num_input_tokens_seen": 76232400, "step": 1361 }, { "epoch": 3.0311804008908685, "loss": 0.8669412732124329, "loss_ce": 0.00048618431901559234, "loss_iou": 0.34765625, "loss_num": 0.0341796875, "loss_xval": 0.8671875, "num_input_tokens_seen": 76232400, "step": 1361 }, { "epoch": 3.0334075723830733, "grad_norm": 16.167116165161133, "learning_rate": 1e-06, "loss": 0.7831, "num_input_tokens_seen": 76287924, "step": 1362 }, { "epoch": 3.0334075723830733, "loss": 0.9040330648422241, "loss_ce": 0.00046859911526553333, "loss_iou": 0.3515625, "loss_num": 0.0400390625, "loss_xval": 0.90234375, "num_input_tokens_seen": 76287924, "step": 1362 }, { "epoch": 3.035634743875278, "grad_norm": 25.74004364013672, "learning_rate": 1e-06, "loss": 1.0122, "num_input_tokens_seen": 76344832, "step": 1363 }, { "epoch": 3.035634743875278, "loss": 1.1795121431350708, "loss_ce": 0.0005570236244238913, "loss_iou": 0.447265625, "loss_num": 0.05712890625, "loss_xval": 1.1796875, "num_input_tokens_seen": 76344832, "step": 1363 }, { "epoch": 3.0378619153674835, "grad_norm": 25.54564094543457, "learning_rate": 1e-06, "loss": 0.9171, "num_input_tokens_seen": 76399332, "step": 1364 }, { "epoch": 3.0378619153674835, "loss": 0.7992980480194092, "loss_ce": 0.0004698993288911879, "loss_iou": 0.359375, "loss_num": 0.0157470703125, "loss_xval": 0.796875, "num_input_tokens_seen": 76399332, "step": 1364 }, { "epoch": 3.0400890868596884, "grad_norm": 14.032575607299805, "learning_rate": 1e-06, "loss": 1.0658, "num_input_tokens_seen": 76452824, "step": 1365 }, { "epoch": 3.0400890868596884, "loss": 1.1477856636047363, "loss_ce": 0.000324697612086311, "loss_iou": 0.486328125, "loss_num": 0.034912109375, "loss_xval": 1.1484375, "num_input_tokens_seen": 76452824, "step": 1365 }, { "epoch": 3.0423162583518932, "grad_norm": 27.30165672302246, "learning_rate": 1e-06, "loss": 0.838, "num_input_tokens_seen": 76507928, "step": 1366 }, { "epoch": 3.0423162583518932, "loss": 1.014754056930542, "loss_ce": 0.00034971325658261776, "loss_iou": 0.41796875, "loss_num": 0.03515625, "loss_xval": 1.015625, "num_input_tokens_seen": 76507928, "step": 1366 }, { "epoch": 3.044543429844098, "grad_norm": 30.125919342041016, "learning_rate": 1e-06, "loss": 0.9891, "num_input_tokens_seen": 76561868, "step": 1367 }, { "epoch": 3.044543429844098, "loss": 1.0369231700897217, "loss_ce": 0.0007904000231064856, "loss_iou": 0.421875, "loss_num": 0.038330078125, "loss_xval": 1.0390625, "num_input_tokens_seen": 76561868, "step": 1367 }, { "epoch": 3.046770601336303, "grad_norm": 15.93181324005127, "learning_rate": 1e-06, "loss": 1.4316, "num_input_tokens_seen": 76615632, "step": 1368 }, { "epoch": 3.046770601336303, "loss": 1.6312259435653687, "loss_ce": 0.00036655933945439756, "loss_iou": 0.68359375, "loss_num": 0.052734375, "loss_xval": 1.6328125, "num_input_tokens_seen": 76615632, "step": 1368 }, { "epoch": 3.048997772828508, "grad_norm": 12.806370735168457, "learning_rate": 1e-06, "loss": 0.6985, "num_input_tokens_seen": 76672624, "step": 1369 }, { "epoch": 3.048997772828508, "loss": 0.7415366172790527, "loss_ce": 0.0003256290510762483, "loss_iou": 0.32421875, "loss_num": 0.018310546875, "loss_xval": 0.7421875, "num_input_tokens_seen": 76672624, "step": 1369 }, { "epoch": 3.0512249443207127, "grad_norm": 19.221446990966797, "learning_rate": 1e-06, "loss": 0.8891, "num_input_tokens_seen": 76729096, "step": 1370 }, { "epoch": 3.0512249443207127, "loss": 0.729907751083374, "loss_ce": 0.0004155657079536468, "loss_iou": 0.3125, "loss_num": 0.0208740234375, "loss_xval": 0.73046875, "num_input_tokens_seen": 76729096, "step": 1370 }, { "epoch": 3.0534521158129175, "grad_norm": 41.225067138671875, "learning_rate": 1e-06, "loss": 1.0878, "num_input_tokens_seen": 76783684, "step": 1371 }, { "epoch": 3.0534521158129175, "loss": 1.0934221744537354, "loss_ce": 0.0004046167596243322, "loss_iou": 0.44921875, "loss_num": 0.03955078125, "loss_xval": 1.09375, "num_input_tokens_seen": 76783684, "step": 1371 }, { "epoch": 3.0556792873051224, "grad_norm": 75.78993225097656, "learning_rate": 1e-06, "loss": 0.736, "num_input_tokens_seen": 76841952, "step": 1372 }, { "epoch": 3.0556792873051224, "loss": 0.587233304977417, "loss_ce": 0.00031919381581246853, "loss_iou": 0.251953125, "loss_num": 0.0162353515625, "loss_xval": 0.5859375, "num_input_tokens_seen": 76841952, "step": 1372 }, { "epoch": 3.0579064587973273, "grad_norm": 21.043071746826172, "learning_rate": 1e-06, "loss": 0.796, "num_input_tokens_seen": 76897836, "step": 1373 }, { "epoch": 3.0579064587973273, "loss": 0.8417444229125977, "loss_ce": 0.0004358206642791629, "loss_iou": 0.31640625, "loss_num": 0.0419921875, "loss_xval": 0.83984375, "num_input_tokens_seen": 76897836, "step": 1373 }, { "epoch": 3.060133630289532, "grad_norm": 15.805767059326172, "learning_rate": 1e-06, "loss": 0.9074, "num_input_tokens_seen": 76955312, "step": 1374 }, { "epoch": 3.060133630289532, "loss": 0.8958030343055725, "loss_ce": 0.0002952259674202651, "loss_iou": 0.369140625, "loss_num": 0.03173828125, "loss_xval": 0.89453125, "num_input_tokens_seen": 76955312, "step": 1374 }, { "epoch": 3.062360801781737, "grad_norm": 17.199810028076172, "learning_rate": 1e-06, "loss": 0.9986, "num_input_tokens_seen": 77010728, "step": 1375 }, { "epoch": 3.062360801781737, "loss": 0.8748060464859009, "loss_ce": 0.00029435683973133564, "loss_iou": 0.390625, "loss_num": 0.018310546875, "loss_xval": 0.875, "num_input_tokens_seen": 77010728, "step": 1375 }, { "epoch": 3.0645879732739423, "grad_norm": 25.19390869140625, "learning_rate": 1e-06, "loss": 0.8589, "num_input_tokens_seen": 77067240, "step": 1376 }, { "epoch": 3.0645879732739423, "loss": 0.8482306003570557, "loss_ce": 0.00033024855656549335, "loss_iou": 0.349609375, "loss_num": 0.0301513671875, "loss_xval": 0.84765625, "num_input_tokens_seen": 77067240, "step": 1376 }, { "epoch": 3.066815144766147, "grad_norm": 32.438499450683594, "learning_rate": 1e-06, "loss": 0.7083, "num_input_tokens_seen": 77124068, "step": 1377 }, { "epoch": 3.066815144766147, "loss": 0.721910297870636, "loss_ce": 0.0035265071783214808, "loss_iou": 0.310546875, "loss_num": 0.019287109375, "loss_xval": 0.71875, "num_input_tokens_seen": 77124068, "step": 1377 }, { "epoch": 3.069042316258352, "grad_norm": 17.08912467956543, "learning_rate": 1e-06, "loss": 0.9961, "num_input_tokens_seen": 77183416, "step": 1378 }, { "epoch": 3.069042316258352, "loss": 1.1389042139053345, "loss_ce": 0.0004764240875374526, "loss_iou": 0.48046875, "loss_num": 0.035400390625, "loss_xval": 1.140625, "num_input_tokens_seen": 77183416, "step": 1378 }, { "epoch": 3.071269487750557, "grad_norm": 23.56962776184082, "learning_rate": 1e-06, "loss": 0.7918, "num_input_tokens_seen": 77241704, "step": 1379 }, { "epoch": 3.071269487750557, "loss": 0.9982140064239502, "loss_ce": 0.000411315995734185, "loss_iou": 0.375, "loss_num": 0.050048828125, "loss_xval": 0.99609375, "num_input_tokens_seen": 77241704, "step": 1379 }, { "epoch": 3.0734966592427617, "grad_norm": 55.8472785949707, "learning_rate": 1e-06, "loss": 0.9511, "num_input_tokens_seen": 77297284, "step": 1380 }, { "epoch": 3.0734966592427617, "loss": 0.7722369432449341, "loss_ce": 0.0005083876312710345, "loss_iou": 0.33984375, "loss_num": 0.01806640625, "loss_xval": 0.7734375, "num_input_tokens_seen": 77297284, "step": 1380 }, { "epoch": 3.0757238307349666, "grad_norm": 19.836666107177734, "learning_rate": 1e-06, "loss": 0.9226, "num_input_tokens_seen": 77351728, "step": 1381 }, { "epoch": 3.0757238307349666, "loss": 0.816044807434082, "loss_ce": 0.0003710235469043255, "loss_iou": 0.333984375, "loss_num": 0.029296875, "loss_xval": 0.81640625, "num_input_tokens_seen": 77351728, "step": 1381 }, { "epoch": 3.0779510022271714, "grad_norm": 32.599266052246094, "learning_rate": 1e-06, "loss": 1.1038, "num_input_tokens_seen": 77408188, "step": 1382 }, { "epoch": 3.0779510022271714, "loss": 1.1422669887542725, "loss_ce": 0.0006654143799096346, "loss_iou": 0.474609375, "loss_num": 0.038818359375, "loss_xval": 1.140625, "num_input_tokens_seen": 77408188, "step": 1382 }, { "epoch": 3.0801781737193763, "grad_norm": 24.89826774597168, "learning_rate": 1e-06, "loss": 0.9169, "num_input_tokens_seen": 77463240, "step": 1383 }, { "epoch": 3.0801781737193763, "loss": 0.7601173520088196, "loss_ce": 0.000351747585227713, "loss_iou": 0.283203125, "loss_num": 0.0390625, "loss_xval": 0.7578125, "num_input_tokens_seen": 77463240, "step": 1383 }, { "epoch": 3.082405345211581, "grad_norm": 11.264007568359375, "learning_rate": 1e-06, "loss": 0.5929, "num_input_tokens_seen": 77520856, "step": 1384 }, { "epoch": 3.082405345211581, "loss": 0.5832231044769287, "loss_ce": 0.0009477226412855089, "loss_iou": 0.2177734375, "loss_num": 0.029052734375, "loss_xval": 0.58203125, "num_input_tokens_seen": 77520856, "step": 1384 }, { "epoch": 3.084632516703786, "grad_norm": 20.889442443847656, "learning_rate": 1e-06, "loss": 0.9322, "num_input_tokens_seen": 77575336, "step": 1385 }, { "epoch": 3.084632516703786, "loss": 0.9303987622261047, "loss_ce": 0.000467123172711581, "loss_iou": 0.39453125, "loss_num": 0.0283203125, "loss_xval": 0.9296875, "num_input_tokens_seen": 77575336, "step": 1385 }, { "epoch": 3.086859688195991, "grad_norm": 21.084487915039062, "learning_rate": 1e-06, "loss": 0.7207, "num_input_tokens_seen": 77631024, "step": 1386 }, { "epoch": 3.086859688195991, "loss": 0.8573043346405029, "loss_ce": 0.0003706898423843086, "loss_iou": 0.359375, "loss_num": 0.0274658203125, "loss_xval": 0.85546875, "num_input_tokens_seen": 77631024, "step": 1386 }, { "epoch": 3.089086859688196, "grad_norm": 27.087024688720703, "learning_rate": 1e-06, "loss": 0.8915, "num_input_tokens_seen": 77689520, "step": 1387 }, { "epoch": 3.089086859688196, "loss": 1.0073137283325195, "loss_ce": 0.00047768885269761086, "loss_iou": 0.416015625, "loss_num": 0.03515625, "loss_xval": 1.0078125, "num_input_tokens_seen": 77689520, "step": 1387 }, { "epoch": 3.091314031180401, "grad_norm": 13.960137367248535, "learning_rate": 1e-06, "loss": 0.8432, "num_input_tokens_seen": 77744956, "step": 1388 }, { "epoch": 3.091314031180401, "loss": 0.9717408418655396, "loss_ce": 0.0003052502288483083, "loss_iou": 0.4140625, "loss_num": 0.029052734375, "loss_xval": 0.97265625, "num_input_tokens_seen": 77744956, "step": 1388 }, { "epoch": 3.093541202672606, "grad_norm": 59.64553451538086, "learning_rate": 1e-06, "loss": 0.8163, "num_input_tokens_seen": 77800484, "step": 1389 }, { "epoch": 3.093541202672606, "loss": 0.7509998679161072, "loss_ce": 0.00026743774651549757, "loss_iou": 0.30859375, "loss_num": 0.02685546875, "loss_xval": 0.75, "num_input_tokens_seen": 77800484, "step": 1389 }, { "epoch": 3.0957683741648108, "grad_norm": 29.264339447021484, "learning_rate": 1e-06, "loss": 1.0133, "num_input_tokens_seen": 77857748, "step": 1390 }, { "epoch": 3.0957683741648108, "loss": 1.0564740896224976, "loss_ce": 0.0003217510529793799, "loss_iou": 0.43359375, "loss_num": 0.0380859375, "loss_xval": 1.0546875, "num_input_tokens_seen": 77857748, "step": 1390 }, { "epoch": 3.0979955456570156, "grad_norm": 16.573060989379883, "learning_rate": 1e-06, "loss": 0.8231, "num_input_tokens_seen": 77912436, "step": 1391 }, { "epoch": 3.0979955456570156, "loss": 0.7225488424301147, "loss_ce": 0.0006250177975744009, "loss_iou": 0.265625, "loss_num": 0.03857421875, "loss_xval": 0.72265625, "num_input_tokens_seen": 77912436, "step": 1391 }, { "epoch": 3.1002227171492205, "grad_norm": 23.289533615112305, "learning_rate": 1e-06, "loss": 0.6688, "num_input_tokens_seen": 77971256, "step": 1392 }, { "epoch": 3.1002227171492205, "loss": 0.6361066102981567, "loss_ce": 0.0015851398929953575, "loss_iou": 0.26171875, "loss_num": 0.02197265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 77971256, "step": 1392 }, { "epoch": 3.1024498886414253, "grad_norm": 37.72678756713867, "learning_rate": 1e-06, "loss": 1.0144, "num_input_tokens_seen": 78026280, "step": 1393 }, { "epoch": 3.1024498886414253, "loss": 0.9221377968788147, "loss_ce": 0.0007510941359214485, "loss_iou": 0.392578125, "loss_num": 0.027587890625, "loss_xval": 0.921875, "num_input_tokens_seen": 78026280, "step": 1393 }, { "epoch": 3.10467706013363, "grad_norm": 62.46157455444336, "learning_rate": 1e-06, "loss": 0.8603, "num_input_tokens_seen": 78083452, "step": 1394 }, { "epoch": 3.10467706013363, "loss": 0.6961342096328735, "loss_ce": 0.000333470175974071, "loss_iou": 0.3125, "loss_num": 0.01434326171875, "loss_xval": 0.6953125, "num_input_tokens_seen": 78083452, "step": 1394 }, { "epoch": 3.106904231625835, "grad_norm": 29.57560920715332, "learning_rate": 1e-06, "loss": 0.8214, "num_input_tokens_seen": 78138744, "step": 1395 }, { "epoch": 3.106904231625835, "loss": 0.8320956230163574, "loss_ce": 0.00030853800126351416, "loss_iou": 0.36328125, "loss_num": 0.0211181640625, "loss_xval": 0.83203125, "num_input_tokens_seen": 78138744, "step": 1395 }, { "epoch": 3.10913140311804, "grad_norm": 23.34572982788086, "learning_rate": 1e-06, "loss": 0.9635, "num_input_tokens_seen": 78194916, "step": 1396 }, { "epoch": 3.10913140311804, "loss": 1.1087517738342285, "loss_ce": 0.0003533945418894291, "loss_iou": 0.458984375, "loss_num": 0.037841796875, "loss_xval": 1.109375, "num_input_tokens_seen": 78194916, "step": 1396 }, { "epoch": 3.111358574610245, "grad_norm": 19.57944679260254, "learning_rate": 1e-06, "loss": 0.7877, "num_input_tokens_seen": 78251972, "step": 1397 }, { "epoch": 3.111358574610245, "loss": 0.6399654150009155, "loss_ce": 0.0003169975243508816, "loss_iou": 0.28515625, "loss_num": 0.01422119140625, "loss_xval": 0.640625, "num_input_tokens_seen": 78251972, "step": 1397 }, { "epoch": 3.11358574610245, "grad_norm": 15.57333755493164, "learning_rate": 1e-06, "loss": 0.7701, "num_input_tokens_seen": 78304736, "step": 1398 }, { "epoch": 3.11358574610245, "loss": 0.7310658693313599, "loss_ce": 0.00035301962634548545, "loss_iou": 0.28125, "loss_num": 0.03369140625, "loss_xval": 0.73046875, "num_input_tokens_seen": 78304736, "step": 1398 }, { "epoch": 3.115812917594655, "grad_norm": 20.79068946838379, "learning_rate": 1e-06, "loss": 0.9479, "num_input_tokens_seen": 78360580, "step": 1399 }, { "epoch": 3.115812917594655, "loss": 0.7796562910079956, "loss_ce": 0.0006035708356648684, "loss_iou": 0.3125, "loss_num": 0.03076171875, "loss_xval": 0.77734375, "num_input_tokens_seen": 78360580, "step": 1399 }, { "epoch": 3.11804008908686, "grad_norm": 21.23663330078125, "learning_rate": 1e-06, "loss": 0.8893, "num_input_tokens_seen": 78417556, "step": 1400 }, { "epoch": 3.11804008908686, "loss": 0.6944925785064697, "loss_ce": 0.0004008029936812818, "loss_iou": 0.30078125, "loss_num": 0.018310546875, "loss_xval": 0.6953125, "num_input_tokens_seen": 78417556, "step": 1400 }, { "epoch": 3.1202672605790647, "grad_norm": 20.82147789001465, "learning_rate": 1e-06, "loss": 0.7757, "num_input_tokens_seen": 78472356, "step": 1401 }, { "epoch": 3.1202672605790647, "loss": 0.6926823258399963, "loss_ce": 0.00029954389901831746, "loss_iou": 0.294921875, "loss_num": 0.0203857421875, "loss_xval": 0.69140625, "num_input_tokens_seen": 78472356, "step": 1401 }, { "epoch": 3.1224944320712695, "grad_norm": 24.593259811401367, "learning_rate": 1e-06, "loss": 1.1074, "num_input_tokens_seen": 78529324, "step": 1402 }, { "epoch": 3.1224944320712695, "loss": 1.332648515701294, "loss_ce": 0.0006172613939270377, "loss_iou": 0.52734375, "loss_num": 0.0556640625, "loss_xval": 1.328125, "num_input_tokens_seen": 78529324, "step": 1402 }, { "epoch": 3.1247216035634744, "grad_norm": 16.219890594482422, "learning_rate": 1e-06, "loss": 0.8194, "num_input_tokens_seen": 78582848, "step": 1403 }, { "epoch": 3.1247216035634744, "loss": 0.8170123100280762, "loss_ce": 0.00036187097430229187, "loss_iou": 0.369140625, "loss_num": 0.01556396484375, "loss_xval": 0.81640625, "num_input_tokens_seen": 78582848, "step": 1403 }, { "epoch": 3.1269487750556793, "grad_norm": 24.382253646850586, "learning_rate": 1e-06, "loss": 0.9872, "num_input_tokens_seen": 78637908, "step": 1404 }, { "epoch": 3.1269487750556793, "loss": 0.9994413256645203, "loss_ce": 0.0004178856033831835, "loss_iou": 0.412109375, "loss_num": 0.03515625, "loss_xval": 1.0, "num_input_tokens_seen": 78637908, "step": 1404 }, { "epoch": 3.129175946547884, "grad_norm": 16.891210556030273, "learning_rate": 1e-06, "loss": 0.7293, "num_input_tokens_seen": 78692304, "step": 1405 }, { "epoch": 3.129175946547884, "loss": 0.6750854253768921, "loss_ce": 0.0002806965203490108, "loss_iou": 0.27734375, "loss_num": 0.0240478515625, "loss_xval": 0.67578125, "num_input_tokens_seen": 78692304, "step": 1405 }, { "epoch": 3.131403118040089, "grad_norm": 22.945178985595703, "learning_rate": 1e-06, "loss": 0.944, "num_input_tokens_seen": 78746900, "step": 1406 }, { "epoch": 3.131403118040089, "loss": 1.02711820602417, "loss_ce": 0.000506936979945749, "loss_iou": 0.431640625, "loss_num": 0.03271484375, "loss_xval": 1.0234375, "num_input_tokens_seen": 78746900, "step": 1406 }, { "epoch": 3.133630289532294, "grad_norm": 17.015195846557617, "learning_rate": 1e-06, "loss": 0.8693, "num_input_tokens_seen": 78804192, "step": 1407 }, { "epoch": 3.133630289532294, "loss": 0.7983866930007935, "loss_ce": 0.0002909940667450428, "loss_iou": 0.36328125, "loss_num": 0.01446533203125, "loss_xval": 0.796875, "num_input_tokens_seen": 78804192, "step": 1407 }, { "epoch": 3.1358574610244987, "grad_norm": 38.74119567871094, "learning_rate": 1e-06, "loss": 0.9071, "num_input_tokens_seen": 78858704, "step": 1408 }, { "epoch": 3.1358574610244987, "loss": 0.9215487241744995, "loss_ce": 0.00028408068465068936, "loss_iou": 0.40625, "loss_num": 0.021484375, "loss_xval": 0.921875, "num_input_tokens_seen": 78858704, "step": 1408 }, { "epoch": 3.138084632516704, "grad_norm": 16.403173446655273, "learning_rate": 1e-06, "loss": 0.8264, "num_input_tokens_seen": 78913596, "step": 1409 }, { "epoch": 3.138084632516704, "loss": 0.7442107796669006, "loss_ce": 0.0003143020730931312, "loss_iou": 0.28515625, "loss_num": 0.03466796875, "loss_xval": 0.7421875, "num_input_tokens_seen": 78913596, "step": 1409 }, { "epoch": 3.140311804008909, "grad_norm": 125.99543762207031, "learning_rate": 1e-06, "loss": 0.7983, "num_input_tokens_seen": 78971496, "step": 1410 }, { "epoch": 3.140311804008909, "loss": 0.7936596870422363, "loss_ce": 0.0006909238873049617, "loss_iou": 0.34765625, "loss_num": 0.01953125, "loss_xval": 0.79296875, "num_input_tokens_seen": 78971496, "step": 1410 }, { "epoch": 3.1425389755011137, "grad_norm": 29.1822509765625, "learning_rate": 1e-06, "loss": 0.7606, "num_input_tokens_seen": 79027964, "step": 1411 }, { "epoch": 3.1425389755011137, "loss": 0.5414670705795288, "loss_ce": 0.00032938801450654864, "loss_iou": 0.22265625, "loss_num": 0.0191650390625, "loss_xval": 0.54296875, "num_input_tokens_seen": 79027964, "step": 1411 }, { "epoch": 3.1447661469933186, "grad_norm": 62.62162399291992, "learning_rate": 1e-06, "loss": 1.0511, "num_input_tokens_seen": 79085588, "step": 1412 }, { "epoch": 3.1447661469933186, "loss": 0.763961911201477, "loss_ce": 0.00029001818620599806, "loss_iou": 0.3359375, "loss_num": 0.0184326171875, "loss_xval": 0.765625, "num_input_tokens_seen": 79085588, "step": 1412 }, { "epoch": 3.1469933184855234, "grad_norm": 20.36224937438965, "learning_rate": 1e-06, "loss": 0.9567, "num_input_tokens_seen": 79143720, "step": 1413 }, { "epoch": 3.1469933184855234, "loss": 0.7871879935264587, "loss_ce": 0.00032272058888338506, "loss_iou": 0.333984375, "loss_num": 0.0234375, "loss_xval": 0.78515625, "num_input_tokens_seen": 79143720, "step": 1413 }, { "epoch": 3.1492204899777283, "grad_norm": 16.99781608581543, "learning_rate": 1e-06, "loss": 0.9043, "num_input_tokens_seen": 79200260, "step": 1414 }, { "epoch": 3.1492204899777283, "loss": 1.0250790119171143, "loss_ce": 0.00042086507892236114, "loss_iou": 0.421875, "loss_num": 0.035888671875, "loss_xval": 1.0234375, "num_input_tokens_seen": 79200260, "step": 1414 }, { "epoch": 3.151447661469933, "grad_norm": 16.20554542541504, "learning_rate": 1e-06, "loss": 1.0504, "num_input_tokens_seen": 79255792, "step": 1415 }, { "epoch": 3.151447661469933, "loss": 1.2998144626617432, "loss_ce": 0.0004980739904567599, "loss_iou": 0.53515625, "loss_num": 0.0458984375, "loss_xval": 1.296875, "num_input_tokens_seen": 79255792, "step": 1415 }, { "epoch": 3.153674832962138, "grad_norm": 25.340084075927734, "learning_rate": 1e-06, "loss": 1.1359, "num_input_tokens_seen": 79312352, "step": 1416 }, { "epoch": 3.153674832962138, "loss": 0.8494139313697815, "loss_ce": 0.0007811552495695651, "loss_iou": 0.353515625, "loss_num": 0.0279541015625, "loss_xval": 0.84765625, "num_input_tokens_seen": 79312352, "step": 1416 }, { "epoch": 3.155902004454343, "grad_norm": 11.992765426635742, "learning_rate": 1e-06, "loss": 0.8142, "num_input_tokens_seen": 79369780, "step": 1417 }, { "epoch": 3.155902004454343, "loss": 0.6079621911048889, "loss_ce": 0.00029617524705827236, "loss_iou": 0.2734375, "loss_num": 0.01220703125, "loss_xval": 0.609375, "num_input_tokens_seen": 79369780, "step": 1417 }, { "epoch": 3.1581291759465477, "grad_norm": 20.040773391723633, "learning_rate": 1e-06, "loss": 0.9713, "num_input_tokens_seen": 79424212, "step": 1418 }, { "epoch": 3.1581291759465477, "loss": 0.855168342590332, "loss_ce": 0.00030997302383184433, "loss_iou": 0.37109375, "loss_num": 0.022705078125, "loss_xval": 0.85546875, "num_input_tokens_seen": 79424212, "step": 1418 }, { "epoch": 3.1603563474387526, "grad_norm": 24.867382049560547, "learning_rate": 1e-06, "loss": 1.0417, "num_input_tokens_seen": 79481792, "step": 1419 }, { "epoch": 3.1603563474387526, "loss": 1.1068477630615234, "loss_ce": 0.00040248059667646885, "loss_iou": 0.4296875, "loss_num": 0.04931640625, "loss_xval": 1.109375, "num_input_tokens_seen": 79481792, "step": 1419 }, { "epoch": 3.1625835189309575, "grad_norm": 26.066076278686523, "learning_rate": 1e-06, "loss": 1.0346, "num_input_tokens_seen": 79538200, "step": 1420 }, { "epoch": 3.1625835189309575, "loss": 0.9904955625534058, "loss_ce": 0.0005052955239079893, "loss_iou": 0.392578125, "loss_num": 0.041015625, "loss_xval": 0.98828125, "num_input_tokens_seen": 79538200, "step": 1420 }, { "epoch": 3.1648106904231628, "grad_norm": 32.84996032714844, "learning_rate": 1e-06, "loss": 0.7832, "num_input_tokens_seen": 79594752, "step": 1421 }, { "epoch": 3.1648106904231628, "loss": 0.6035779714584351, "loss_ce": 0.0003064596385229379, "loss_iou": 0.244140625, "loss_num": 0.0230712890625, "loss_xval": 0.6015625, "num_input_tokens_seen": 79594752, "step": 1421 }, { "epoch": 3.1670378619153676, "grad_norm": 12.997925758361816, "learning_rate": 1e-06, "loss": 0.7086, "num_input_tokens_seen": 79649304, "step": 1422 }, { "epoch": 3.1670378619153676, "loss": 0.6079559326171875, "loss_ce": 0.0002899472019635141, "loss_iou": 0.2578125, "loss_num": 0.0186767578125, "loss_xval": 0.609375, "num_input_tokens_seen": 79649304, "step": 1422 }, { "epoch": 3.1692650334075725, "grad_norm": 14.003304481506348, "learning_rate": 1e-06, "loss": 0.9371, "num_input_tokens_seen": 79705016, "step": 1423 }, { "epoch": 3.1692650334075725, "loss": 0.9424291253089905, "loss_ce": 0.00029046309646219015, "loss_iou": 0.408203125, "loss_num": 0.0255126953125, "loss_xval": 0.94140625, "num_input_tokens_seen": 79705016, "step": 1423 }, { "epoch": 3.1714922048997773, "grad_norm": 15.485200881958008, "learning_rate": 1e-06, "loss": 0.7606, "num_input_tokens_seen": 79760764, "step": 1424 }, { "epoch": 3.1714922048997773, "loss": 0.867828369140625, "loss_ce": 0.0003967147204093635, "loss_iou": 0.375, "loss_num": 0.02294921875, "loss_xval": 0.8671875, "num_input_tokens_seen": 79760764, "step": 1424 }, { "epoch": 3.173719376391982, "grad_norm": 19.317201614379883, "learning_rate": 1e-06, "loss": 0.7716, "num_input_tokens_seen": 79816396, "step": 1425 }, { "epoch": 3.173719376391982, "loss": 0.697672963142395, "loss_ce": 0.0004073456802871078, "loss_iou": 0.291015625, "loss_num": 0.0234375, "loss_xval": 0.6953125, "num_input_tokens_seen": 79816396, "step": 1425 }, { "epoch": 3.175946547884187, "grad_norm": 16.760774612426758, "learning_rate": 1e-06, "loss": 0.8792, "num_input_tokens_seen": 79871868, "step": 1426 }, { "epoch": 3.175946547884187, "loss": 0.954727292060852, "loss_ce": 0.00038158154347911477, "loss_iou": 0.3828125, "loss_num": 0.0380859375, "loss_xval": 0.953125, "num_input_tokens_seen": 79871868, "step": 1426 }, { "epoch": 3.178173719376392, "grad_norm": 18.781694412231445, "learning_rate": 1e-06, "loss": 0.6773, "num_input_tokens_seen": 79928048, "step": 1427 }, { "epoch": 3.178173719376392, "loss": 0.6321383714675903, "loss_ce": 0.0003024227044079453, "loss_iou": 0.2734375, "loss_num": 0.0167236328125, "loss_xval": 0.6328125, "num_input_tokens_seen": 79928048, "step": 1427 }, { "epoch": 3.180400890868597, "grad_norm": 14.42310905456543, "learning_rate": 1e-06, "loss": 0.7926, "num_input_tokens_seen": 79983508, "step": 1428 }, { "epoch": 3.180400890868597, "loss": 0.8372380137443542, "loss_ce": 0.0003239199868403375, "loss_iou": 0.353515625, "loss_num": 0.02587890625, "loss_xval": 0.8359375, "num_input_tokens_seen": 79983508, "step": 1428 }, { "epoch": 3.1826280623608016, "grad_norm": 14.923147201538086, "learning_rate": 1e-06, "loss": 0.9376, "num_input_tokens_seen": 80042264, "step": 1429 }, { "epoch": 3.1826280623608016, "loss": 0.7183116674423218, "loss_ce": 0.00029408183763734996, "loss_iou": 0.294921875, "loss_num": 0.025146484375, "loss_xval": 0.71875, "num_input_tokens_seen": 80042264, "step": 1429 }, { "epoch": 3.1848552338530065, "grad_norm": 14.917871475219727, "learning_rate": 1e-06, "loss": 0.9401, "num_input_tokens_seen": 80098272, "step": 1430 }, { "epoch": 3.1848552338530065, "loss": 1.095562219619751, "loss_ce": 0.00034750672057271004, "loss_iou": 0.451171875, "loss_num": 0.038330078125, "loss_xval": 1.09375, "num_input_tokens_seen": 80098272, "step": 1430 }, { "epoch": 3.187082405345212, "grad_norm": 21.82032585144043, "learning_rate": 1e-06, "loss": 0.813, "num_input_tokens_seen": 80153352, "step": 1431 }, { "epoch": 3.187082405345212, "loss": 0.6784963607788086, "loss_ce": 0.00027368031442165375, "loss_iou": 0.2890625, "loss_num": 0.0196533203125, "loss_xval": 0.6796875, "num_input_tokens_seen": 80153352, "step": 1431 }, { "epoch": 3.1893095768374167, "grad_norm": 20.35378074645996, "learning_rate": 1e-06, "loss": 0.8377, "num_input_tokens_seen": 80209672, "step": 1432 }, { "epoch": 3.1893095768374167, "loss": 0.9991272687911987, "loss_ce": 0.0003479632141534239, "loss_iou": 0.419921875, "loss_num": 0.03173828125, "loss_xval": 1.0, "num_input_tokens_seen": 80209672, "step": 1432 }, { "epoch": 3.1915367483296215, "grad_norm": 18.134105682373047, "learning_rate": 1e-06, "loss": 0.7804, "num_input_tokens_seen": 80264404, "step": 1433 }, { "epoch": 3.1915367483296215, "loss": 0.8298586010932922, "loss_ce": 0.000268744770437479, "loss_iou": 0.349609375, "loss_num": 0.0257568359375, "loss_xval": 0.828125, "num_input_tokens_seen": 80264404, "step": 1433 }, { "epoch": 3.1937639198218264, "grad_norm": 25.565568923950195, "learning_rate": 1e-06, "loss": 0.7445, "num_input_tokens_seen": 80322156, "step": 1434 }, { "epoch": 3.1937639198218264, "loss": 0.8270922303199768, "loss_ce": 0.00043206167174503207, "loss_iou": 0.345703125, "loss_num": 0.0269775390625, "loss_xval": 0.828125, "num_input_tokens_seen": 80322156, "step": 1434 }, { "epoch": 3.1959910913140313, "grad_norm": 19.158845901489258, "learning_rate": 1e-06, "loss": 0.7464, "num_input_tokens_seen": 80378616, "step": 1435 }, { "epoch": 3.1959910913140313, "loss": 0.6876095533370972, "loss_ce": 0.0005977747496217489, "loss_iou": 0.26171875, "loss_num": 0.032470703125, "loss_xval": 0.6875, "num_input_tokens_seen": 80378616, "step": 1435 }, { "epoch": 3.198218262806236, "grad_norm": 12.419325828552246, "learning_rate": 1e-06, "loss": 0.5989, "num_input_tokens_seen": 80437060, "step": 1436 }, { "epoch": 3.198218262806236, "loss": 0.6812313199043274, "loss_ce": 0.00032312102848663926, "loss_iou": 0.287109375, "loss_num": 0.0213623046875, "loss_xval": 0.6796875, "num_input_tokens_seen": 80437060, "step": 1436 }, { "epoch": 3.200445434298441, "grad_norm": 27.797161102294922, "learning_rate": 1e-06, "loss": 0.8508, "num_input_tokens_seen": 80492916, "step": 1437 }, { "epoch": 3.200445434298441, "loss": 0.8674960732460022, "loss_ce": 0.00030855537625029683, "loss_iou": 0.34765625, "loss_num": 0.0341796875, "loss_xval": 0.8671875, "num_input_tokens_seen": 80492916, "step": 1437 }, { "epoch": 3.202672605790646, "grad_norm": 26.252103805541992, "learning_rate": 1e-06, "loss": 0.8936, "num_input_tokens_seen": 80550292, "step": 1438 }, { "epoch": 3.202672605790646, "loss": 0.885981559753418, "loss_ce": 0.00048353534657508135, "loss_iou": 0.384765625, "loss_num": 0.022705078125, "loss_xval": 0.88671875, "num_input_tokens_seen": 80550292, "step": 1438 }, { "epoch": 3.2048997772828507, "grad_norm": 18.954618453979492, "learning_rate": 1e-06, "loss": 1.1562, "num_input_tokens_seen": 80606748, "step": 1439 }, { "epoch": 3.2048997772828507, "loss": 0.9013663530349731, "loss_ce": 0.00048747207620181143, "loss_iou": 0.35546875, "loss_num": 0.038330078125, "loss_xval": 0.90234375, "num_input_tokens_seen": 80606748, "step": 1439 }, { "epoch": 3.2071269487750556, "grad_norm": 17.839231491088867, "learning_rate": 1e-06, "loss": 0.7546, "num_input_tokens_seen": 80665732, "step": 1440 }, { "epoch": 3.2071269487750556, "loss": 0.7411551475524902, "loss_ce": 0.0004324775072745979, "loss_iou": 0.3203125, "loss_num": 0.0198974609375, "loss_xval": 0.7421875, "num_input_tokens_seen": 80665732, "step": 1440 }, { "epoch": 3.2093541202672604, "grad_norm": 16.883333206176758, "learning_rate": 1e-06, "loss": 0.8151, "num_input_tokens_seen": 80720260, "step": 1441 }, { "epoch": 3.2093541202672604, "loss": 1.001312255859375, "loss_ce": 0.00033568451181054115, "loss_iou": 0.40234375, "loss_num": 0.039794921875, "loss_xval": 1.0, "num_input_tokens_seen": 80720260, "step": 1441 }, { "epoch": 3.2115812917594653, "grad_norm": 19.242998123168945, "learning_rate": 1e-06, "loss": 0.8271, "num_input_tokens_seen": 80771032, "step": 1442 }, { "epoch": 3.2115812917594653, "loss": 0.8663139343261719, "loss_ce": 0.0005913155619055033, "loss_iou": 0.345703125, "loss_num": 0.034912109375, "loss_xval": 0.8671875, "num_input_tokens_seen": 80771032, "step": 1442 }, { "epoch": 3.2138084632516706, "grad_norm": 18.97730827331543, "learning_rate": 1e-06, "loss": 0.7771, "num_input_tokens_seen": 80823748, "step": 1443 }, { "epoch": 3.2138084632516706, "loss": 0.7416298389434814, "loss_ce": 0.0002968141343444586, "loss_iou": 0.314453125, "loss_num": 0.0224609375, "loss_xval": 0.7421875, "num_input_tokens_seen": 80823748, "step": 1443 }, { "epoch": 3.2160356347438754, "grad_norm": 71.22569274902344, "learning_rate": 1e-06, "loss": 0.966, "num_input_tokens_seen": 80879232, "step": 1444 }, { "epoch": 3.2160356347438754, "loss": 0.8303946852684021, "loss_ce": 0.000316560675855726, "loss_iou": 0.345703125, "loss_num": 0.02783203125, "loss_xval": 0.828125, "num_input_tokens_seen": 80879232, "step": 1444 }, { "epoch": 3.2182628062360803, "grad_norm": 16.554031372070312, "learning_rate": 1e-06, "loss": 0.772, "num_input_tokens_seen": 80936056, "step": 1445 }, { "epoch": 3.2182628062360803, "loss": 0.7341041564941406, "loss_ce": 0.0004616554651875049, "loss_iou": 0.294921875, "loss_num": 0.0286865234375, "loss_xval": 0.734375, "num_input_tokens_seen": 80936056, "step": 1445 }, { "epoch": 3.220489977728285, "grad_norm": 12.350296020507812, "learning_rate": 1e-06, "loss": 0.983, "num_input_tokens_seen": 80992044, "step": 1446 }, { "epoch": 3.220489977728285, "loss": 1.119089126586914, "loss_ce": 0.00043676019413396716, "loss_iou": 0.4375, "loss_num": 0.048583984375, "loss_xval": 1.1171875, "num_input_tokens_seen": 80992044, "step": 1446 }, { "epoch": 3.22271714922049, "grad_norm": 35.66438293457031, "learning_rate": 1e-06, "loss": 0.9671, "num_input_tokens_seen": 81047568, "step": 1447 }, { "epoch": 3.22271714922049, "loss": 1.1087019443511963, "loss_ce": 0.000303465174511075, "loss_iou": 0.46875, "loss_num": 0.0341796875, "loss_xval": 1.109375, "num_input_tokens_seen": 81047568, "step": 1447 }, { "epoch": 3.224944320712695, "grad_norm": 23.309444427490234, "learning_rate": 1e-06, "loss": 1.0038, "num_input_tokens_seen": 81105220, "step": 1448 }, { "epoch": 3.224944320712695, "loss": 0.9000189304351807, "loss_ce": 0.0008490003529004753, "loss_iou": 0.37109375, "loss_num": 0.031494140625, "loss_xval": 0.8984375, "num_input_tokens_seen": 81105220, "step": 1448 }, { "epoch": 3.2271714922048997, "grad_norm": 20.23189926147461, "learning_rate": 1e-06, "loss": 0.7571, "num_input_tokens_seen": 81157732, "step": 1449 }, { "epoch": 3.2271714922048997, "loss": 0.5965100526809692, "loss_ce": 0.00031864526681602, "loss_iou": 0.2412109375, "loss_num": 0.022705078125, "loss_xval": 0.59765625, "num_input_tokens_seen": 81157732, "step": 1449 }, { "epoch": 3.2293986636971046, "grad_norm": 20.063142776489258, "learning_rate": 1e-06, "loss": 0.7535, "num_input_tokens_seen": 81215292, "step": 1450 }, { "epoch": 3.2293986636971046, "loss": 0.9407631158828735, "loss_ce": 0.00033346362761221826, "loss_iou": 0.40625, "loss_num": 0.0255126953125, "loss_xval": 0.94140625, "num_input_tokens_seen": 81215292, "step": 1450 }, { "epoch": 3.2316258351893095, "grad_norm": 19.31182289123535, "learning_rate": 1e-06, "loss": 0.817, "num_input_tokens_seen": 81272552, "step": 1451 }, { "epoch": 3.2316258351893095, "loss": 1.0058592557907104, "loss_ce": 0.0004881612549070269, "loss_iou": 0.376953125, "loss_num": 0.050048828125, "loss_xval": 1.0078125, "num_input_tokens_seen": 81272552, "step": 1451 }, { "epoch": 3.2338530066815143, "grad_norm": 34.98418426513672, "learning_rate": 1e-06, "loss": 0.9567, "num_input_tokens_seen": 81322452, "step": 1452 }, { "epoch": 3.2338530066815143, "loss": 0.8940277099609375, "loss_ce": 0.000717109243851155, "loss_iou": 0.33203125, "loss_num": 0.04541015625, "loss_xval": 0.89453125, "num_input_tokens_seen": 81322452, "step": 1452 }, { "epoch": 3.236080178173719, "grad_norm": 22.681028366088867, "learning_rate": 1e-06, "loss": 1.0263, "num_input_tokens_seen": 81378452, "step": 1453 }, { "epoch": 3.236080178173719, "loss": 1.1674110889434814, "loss_ce": 0.0006629737326875329, "loss_iou": 0.46484375, "loss_num": 0.047607421875, "loss_xval": 1.1640625, "num_input_tokens_seen": 81378452, "step": 1453 }, { "epoch": 3.2383073496659245, "grad_norm": 15.543540954589844, "learning_rate": 1e-06, "loss": 0.7653, "num_input_tokens_seen": 81432916, "step": 1454 }, { "epoch": 3.2383073496659245, "loss": 0.5583458542823792, "loss_ce": 0.0002403960534138605, "loss_iou": 0.2392578125, "loss_num": 0.015869140625, "loss_xval": 0.55859375, "num_input_tokens_seen": 81432916, "step": 1454 }, { "epoch": 3.2405345211581293, "grad_norm": 35.686893463134766, "learning_rate": 1e-06, "loss": 1.0985, "num_input_tokens_seen": 81488412, "step": 1455 }, { "epoch": 3.2405345211581293, "loss": 1.1553540229797363, "loss_ce": 0.0003246957203373313, "loss_iou": 0.490234375, "loss_num": 0.034423828125, "loss_xval": 1.15625, "num_input_tokens_seen": 81488412, "step": 1455 }, { "epoch": 3.242761692650334, "grad_norm": 16.909992218017578, "learning_rate": 1e-06, "loss": 0.962, "num_input_tokens_seen": 81543048, "step": 1456 }, { "epoch": 3.242761692650334, "loss": 1.0606622695922852, "loss_ce": 0.00035947078140452504, "loss_iou": 0.41796875, "loss_num": 0.045654296875, "loss_xval": 1.0625, "num_input_tokens_seen": 81543048, "step": 1456 }, { "epoch": 3.244988864142539, "grad_norm": 17.404476165771484, "learning_rate": 1e-06, "loss": 0.5719, "num_input_tokens_seen": 81599572, "step": 1457 }, { "epoch": 3.244988864142539, "loss": 0.43446803092956543, "loss_ce": 0.00026391935534775257, "loss_iou": 0.1845703125, "loss_num": 0.01312255859375, "loss_xval": 0.43359375, "num_input_tokens_seen": 81599572, "step": 1457 }, { "epoch": 3.247216035634744, "grad_norm": 15.075811386108398, "learning_rate": 1e-06, "loss": 0.8541, "num_input_tokens_seen": 81658816, "step": 1458 }, { "epoch": 3.247216035634744, "loss": 0.969578206539154, "loss_ce": 0.0003399454872123897, "loss_iou": 0.400390625, "loss_num": 0.0341796875, "loss_xval": 0.96875, "num_input_tokens_seen": 81658816, "step": 1458 }, { "epoch": 3.249443207126949, "grad_norm": 189.14511108398438, "learning_rate": 1e-06, "loss": 0.767, "num_input_tokens_seen": 81714620, "step": 1459 }, { "epoch": 3.249443207126949, "loss": 0.8307961225509644, "loss_ce": 0.00022976743639446795, "loss_iou": 0.361328125, "loss_num": 0.0213623046875, "loss_xval": 0.83203125, "num_input_tokens_seen": 81714620, "step": 1459 }, { "epoch": 3.2516703786191536, "grad_norm": 16.0401611328125, "learning_rate": 1e-06, "loss": 0.6961, "num_input_tokens_seen": 81768192, "step": 1460 }, { "epoch": 3.2516703786191536, "loss": 0.4611513614654541, "loss_ce": 0.00027491431683301926, "loss_iou": 0.1875, "loss_num": 0.0172119140625, "loss_xval": 0.4609375, "num_input_tokens_seen": 81768192, "step": 1460 }, { "epoch": 3.2538975501113585, "grad_norm": 24.038867950439453, "learning_rate": 1e-06, "loss": 0.945, "num_input_tokens_seen": 81823076, "step": 1461 }, { "epoch": 3.2538975501113585, "loss": 1.0662281513214111, "loss_ce": 0.00043232861207798123, "loss_iou": 0.453125, "loss_num": 0.031494140625, "loss_xval": 1.0625, "num_input_tokens_seen": 81823076, "step": 1461 }, { "epoch": 3.2561247216035634, "grad_norm": 27.399744033813477, "learning_rate": 1e-06, "loss": 0.8288, "num_input_tokens_seen": 81878532, "step": 1462 }, { "epoch": 3.2561247216035634, "loss": 1.0180044174194336, "loss_ce": 0.00042639480670914054, "loss_iou": 0.4453125, "loss_num": 0.0252685546875, "loss_xval": 1.015625, "num_input_tokens_seen": 81878532, "step": 1462 }, { "epoch": 3.2583518930957682, "grad_norm": 30.450820922851562, "learning_rate": 1e-06, "loss": 0.9573, "num_input_tokens_seen": 81934472, "step": 1463 }, { "epoch": 3.2583518930957682, "loss": 0.7863764762878418, "loss_ce": 0.00024365229182876647, "loss_iou": 0.310546875, "loss_num": 0.03271484375, "loss_xval": 0.78515625, "num_input_tokens_seen": 81934472, "step": 1463 }, { "epoch": 3.260579064587973, "grad_norm": 73.2469482421875, "learning_rate": 1e-06, "loss": 0.8654, "num_input_tokens_seen": 81992352, "step": 1464 }, { "epoch": 3.260579064587973, "loss": 0.8775367736816406, "loss_ce": 0.0003394762461539358, "loss_iou": 0.373046875, "loss_num": 0.0264892578125, "loss_xval": 0.87890625, "num_input_tokens_seen": 81992352, "step": 1464 }, { "epoch": 3.262806236080178, "grad_norm": 17.928956985473633, "learning_rate": 1e-06, "loss": 0.9042, "num_input_tokens_seen": 82047880, "step": 1465 }, { "epoch": 3.262806236080178, "loss": 0.8798074722290039, "loss_ce": 0.009446126408874989, "loss_iou": 0.37109375, "loss_num": 0.025390625, "loss_xval": 0.87109375, "num_input_tokens_seen": 82047880, "step": 1465 }, { "epoch": 3.2650334075723833, "grad_norm": 13.953981399536133, "learning_rate": 1e-06, "loss": 0.7979, "num_input_tokens_seen": 82105036, "step": 1466 }, { "epoch": 3.2650334075723833, "loss": 0.9124147891998291, "loss_ce": 0.0003053955442737788, "loss_iou": 0.408203125, "loss_num": 0.01953125, "loss_xval": 0.9140625, "num_input_tokens_seen": 82105036, "step": 1466 }, { "epoch": 3.267260579064588, "grad_norm": 19.903533935546875, "learning_rate": 1e-06, "loss": 0.7328, "num_input_tokens_seen": 82161524, "step": 1467 }, { "epoch": 3.267260579064588, "loss": 0.7695513963699341, "loss_ce": 0.0002642880426719785, "loss_iou": 0.328125, "loss_num": 0.0225830078125, "loss_xval": 0.76953125, "num_input_tokens_seen": 82161524, "step": 1467 }, { "epoch": 3.269487750556793, "grad_norm": 16.431427001953125, "learning_rate": 1e-06, "loss": 0.7701, "num_input_tokens_seen": 82216912, "step": 1468 }, { "epoch": 3.269487750556793, "loss": 0.7241719365119934, "loss_ce": 0.00029497878858819604, "loss_iou": 0.3125, "loss_num": 0.0194091796875, "loss_xval": 0.72265625, "num_input_tokens_seen": 82216912, "step": 1468 }, { "epoch": 3.271714922048998, "grad_norm": 21.81812286376953, "learning_rate": 1e-06, "loss": 0.7118, "num_input_tokens_seen": 82273656, "step": 1469 }, { "epoch": 3.271714922048998, "loss": 0.6574879884719849, "loss_ce": 0.0002613977121654898, "loss_iou": 0.28125, "loss_num": 0.0191650390625, "loss_xval": 0.65625, "num_input_tokens_seen": 82273656, "step": 1469 }, { "epoch": 3.2739420935412027, "grad_norm": 16.43152618408203, "learning_rate": 1e-06, "loss": 0.8491, "num_input_tokens_seen": 82329848, "step": 1470 }, { "epoch": 3.2739420935412027, "loss": 0.6564966440200806, "loss_ce": 0.0002466381702106446, "loss_iou": 0.26171875, "loss_num": 0.0263671875, "loss_xval": 0.65625, "num_input_tokens_seen": 82329848, "step": 1470 }, { "epoch": 3.2761692650334076, "grad_norm": 26.318092346191406, "learning_rate": 1e-06, "loss": 0.7104, "num_input_tokens_seen": 82388348, "step": 1471 }, { "epoch": 3.2761692650334076, "loss": 0.7334426045417786, "loss_ce": 0.00028831767849624157, "loss_iou": 0.283203125, "loss_num": 0.033447265625, "loss_xval": 0.734375, "num_input_tokens_seen": 82388348, "step": 1471 }, { "epoch": 3.2783964365256124, "grad_norm": 15.957210540771484, "learning_rate": 1e-06, "loss": 0.5911, "num_input_tokens_seen": 82447224, "step": 1472 }, { "epoch": 3.2783964365256124, "loss": 0.5700989961624146, "loss_ce": 0.0002747434191405773, "loss_iou": 0.2451171875, "loss_num": 0.0159912109375, "loss_xval": 0.5703125, "num_input_tokens_seen": 82447224, "step": 1472 }, { "epoch": 3.2806236080178173, "grad_norm": 21.27912712097168, "learning_rate": 1e-06, "loss": 0.9417, "num_input_tokens_seen": 82503100, "step": 1473 }, { "epoch": 3.2806236080178173, "loss": 1.111638069152832, "loss_ce": 0.0003100019530393183, "loss_iou": 0.490234375, "loss_num": 0.0257568359375, "loss_xval": 1.109375, "num_input_tokens_seen": 82503100, "step": 1473 }, { "epoch": 3.282850779510022, "grad_norm": 36.66477584838867, "learning_rate": 1e-06, "loss": 0.8626, "num_input_tokens_seen": 82560744, "step": 1474 }, { "epoch": 3.282850779510022, "loss": 0.839104413986206, "loss_ce": 0.00048139755381271243, "loss_iou": 0.359375, "loss_num": 0.0244140625, "loss_xval": 0.83984375, "num_input_tokens_seen": 82560744, "step": 1474 }, { "epoch": 3.285077951002227, "grad_norm": 20.039661407470703, "learning_rate": 1e-06, "loss": 0.8975, "num_input_tokens_seen": 82617300, "step": 1475 }, { "epoch": 3.285077951002227, "loss": 1.0146886110305786, "loss_ce": 0.0002843354013748467, "loss_iou": 0.439453125, "loss_num": 0.0274658203125, "loss_xval": 1.015625, "num_input_tokens_seen": 82617300, "step": 1475 }, { "epoch": 3.2873051224944323, "grad_norm": 15.592321395874023, "learning_rate": 1e-06, "loss": 0.93, "num_input_tokens_seen": 82670780, "step": 1476 }, { "epoch": 3.2873051224944323, "loss": 0.9146976470947266, "loss_ce": 0.0008792462758719921, "loss_iou": 0.373046875, "loss_num": 0.03369140625, "loss_xval": 0.9140625, "num_input_tokens_seen": 82670780, "step": 1476 }, { "epoch": 3.289532293986637, "grad_norm": 15.493447303771973, "learning_rate": 1e-06, "loss": 0.7573, "num_input_tokens_seen": 82729732, "step": 1477 }, { "epoch": 3.289532293986637, "loss": 0.6953732371330261, "loss_ce": 0.0003048654180020094, "loss_iou": 0.291015625, "loss_num": 0.022705078125, "loss_xval": 0.6953125, "num_input_tokens_seen": 82729732, "step": 1477 }, { "epoch": 3.291759465478842, "grad_norm": 26.018573760986328, "learning_rate": 1e-06, "loss": 0.8349, "num_input_tokens_seen": 82787736, "step": 1478 }, { "epoch": 3.291759465478842, "loss": 0.6441195607185364, "loss_ce": 0.0003207349800504744, "loss_iou": 0.28515625, "loss_num": 0.01446533203125, "loss_xval": 0.64453125, "num_input_tokens_seen": 82787736, "step": 1478 }, { "epoch": 3.293986636971047, "grad_norm": 18.60807991027832, "learning_rate": 1e-06, "loss": 0.9953, "num_input_tokens_seen": 82843952, "step": 1479 }, { "epoch": 3.293986636971047, "loss": 0.9405885934829712, "loss_ce": 0.0004030146519653499, "loss_iou": 0.375, "loss_num": 0.038330078125, "loss_xval": 0.94140625, "num_input_tokens_seen": 82843952, "step": 1479 }, { "epoch": 3.2962138084632517, "grad_norm": 71.17398834228516, "learning_rate": 1e-06, "loss": 0.9742, "num_input_tokens_seen": 82901868, "step": 1480 }, { "epoch": 3.2962138084632517, "loss": 1.149244785308838, "loss_ce": 0.00031894395942799747, "loss_iou": 0.50390625, "loss_num": 0.0283203125, "loss_xval": 1.1484375, "num_input_tokens_seen": 82901868, "step": 1480 }, { "epoch": 3.2984409799554566, "grad_norm": 53.66946792602539, "learning_rate": 1e-06, "loss": 0.941, "num_input_tokens_seen": 82955420, "step": 1481 }, { "epoch": 3.2984409799554566, "loss": 1.0035812854766846, "loss_ce": 0.004924064036458731, "loss_iou": 0.39453125, "loss_num": 0.042236328125, "loss_xval": 1.0, "num_input_tokens_seen": 82955420, "step": 1481 }, { "epoch": 3.3006681514476615, "grad_norm": 26.24509048461914, "learning_rate": 1e-06, "loss": 0.9412, "num_input_tokens_seen": 83009936, "step": 1482 }, { "epoch": 3.3006681514476615, "loss": 1.0405406951904297, "loss_ce": 0.0006236102781258523, "loss_iou": 0.4296875, "loss_num": 0.036376953125, "loss_xval": 1.0390625, "num_input_tokens_seen": 83009936, "step": 1482 }, { "epoch": 3.3028953229398663, "grad_norm": 14.86413860321045, "learning_rate": 1e-06, "loss": 0.8586, "num_input_tokens_seen": 83067024, "step": 1483 }, { "epoch": 3.3028953229398663, "loss": 0.9363436102867126, "loss_ce": 0.00030847761081531644, "loss_iou": 0.404296875, "loss_num": 0.0255126953125, "loss_xval": 0.9375, "num_input_tokens_seen": 83067024, "step": 1483 }, { "epoch": 3.305122494432071, "grad_norm": 16.618406295776367, "learning_rate": 1e-06, "loss": 0.8834, "num_input_tokens_seen": 83122648, "step": 1484 }, { "epoch": 3.305122494432071, "loss": 1.071757197380066, "loss_ce": 0.0004681579302996397, "loss_iou": 0.435546875, "loss_num": 0.039794921875, "loss_xval": 1.0703125, "num_input_tokens_seen": 83122648, "step": 1484 }, { "epoch": 3.307349665924276, "grad_norm": 23.63321876525879, "learning_rate": 1e-06, "loss": 1.0184, "num_input_tokens_seen": 83176484, "step": 1485 }, { "epoch": 3.307349665924276, "loss": 0.7948580980300903, "loss_ce": 0.000424501282395795, "loss_iou": 0.328125, "loss_num": 0.0277099609375, "loss_xval": 0.79296875, "num_input_tokens_seen": 83176484, "step": 1485 }, { "epoch": 3.309576837416481, "grad_norm": 18.189393997192383, "learning_rate": 1e-06, "loss": 0.8297, "num_input_tokens_seen": 83233140, "step": 1486 }, { "epoch": 3.309576837416481, "loss": 0.7806228399276733, "loss_ce": 0.0003493690746836364, "loss_iou": 0.31640625, "loss_num": 0.02978515625, "loss_xval": 0.78125, "num_input_tokens_seen": 83233140, "step": 1486 }, { "epoch": 3.3118040089086858, "grad_norm": 21.735517501831055, "learning_rate": 1e-06, "loss": 0.8333, "num_input_tokens_seen": 83288588, "step": 1487 }, { "epoch": 3.3118040089086858, "loss": 0.6785666942596436, "loss_ce": 0.0005881287506781518, "loss_iou": 0.287109375, "loss_num": 0.0203857421875, "loss_xval": 0.6796875, "num_input_tokens_seen": 83288588, "step": 1487 }, { "epoch": 3.3140311804008906, "grad_norm": 239.3049774169922, "learning_rate": 1e-06, "loss": 0.896, "num_input_tokens_seen": 83343812, "step": 1488 }, { "epoch": 3.3140311804008906, "loss": 0.8566187024116516, "loss_ce": 0.00041753414552658796, "loss_iou": 0.349609375, "loss_num": 0.031494140625, "loss_xval": 0.85546875, "num_input_tokens_seen": 83343812, "step": 1488 }, { "epoch": 3.316258351893096, "grad_norm": 29.564838409423828, "learning_rate": 1e-06, "loss": 0.9014, "num_input_tokens_seen": 83399608, "step": 1489 }, { "epoch": 3.316258351893096, "loss": 0.8447275161743164, "loss_ce": 0.0004892901633866131, "loss_iou": 0.294921875, "loss_num": 0.05126953125, "loss_xval": 0.84375, "num_input_tokens_seen": 83399608, "step": 1489 }, { "epoch": 3.318485523385301, "grad_norm": 27.595678329467773, "learning_rate": 1e-06, "loss": 0.918, "num_input_tokens_seen": 83455260, "step": 1490 }, { "epoch": 3.318485523385301, "loss": 0.9408430457115173, "loss_ce": 0.00041330509702675045, "loss_iou": 0.37890625, "loss_num": 0.036865234375, "loss_xval": 0.94140625, "num_input_tokens_seen": 83455260, "step": 1490 }, { "epoch": 3.3207126948775056, "grad_norm": 12.939767837524414, "learning_rate": 1e-06, "loss": 0.7262, "num_input_tokens_seen": 83512012, "step": 1491 }, { "epoch": 3.3207126948775056, "loss": 0.773056149482727, "loss_ce": 0.00035112208570353687, "loss_iou": 0.298828125, "loss_num": 0.034912109375, "loss_xval": 0.7734375, "num_input_tokens_seen": 83512012, "step": 1491 }, { "epoch": 3.3229398663697105, "grad_norm": 17.990427017211914, "learning_rate": 1e-06, "loss": 0.9087, "num_input_tokens_seen": 83566724, "step": 1492 }, { "epoch": 3.3229398663697105, "loss": 0.7485735416412354, "loss_ce": 0.00028249542810954154, "loss_iou": 0.28515625, "loss_num": 0.035400390625, "loss_xval": 0.75, "num_input_tokens_seen": 83566724, "step": 1492 }, { "epoch": 3.3251670378619154, "grad_norm": 22.296096801757812, "learning_rate": 1e-06, "loss": 0.8386, "num_input_tokens_seen": 83623084, "step": 1493 }, { "epoch": 3.3251670378619154, "loss": 0.6770757436752319, "loss_ce": 0.0003179589402861893, "loss_iou": 0.283203125, "loss_num": 0.0223388671875, "loss_xval": 0.67578125, "num_input_tokens_seen": 83623084, "step": 1493 }, { "epoch": 3.3273942093541202, "grad_norm": 23.940916061401367, "learning_rate": 1e-06, "loss": 0.8714, "num_input_tokens_seen": 83679600, "step": 1494 }, { "epoch": 3.3273942093541202, "loss": 0.8882294297218323, "loss_ce": 0.0002899998507928103, "loss_iou": 0.396484375, "loss_num": 0.018798828125, "loss_xval": 0.88671875, "num_input_tokens_seen": 83679600, "step": 1494 }, { "epoch": 3.329621380846325, "grad_norm": 27.720958709716797, "learning_rate": 1e-06, "loss": 0.7311, "num_input_tokens_seen": 83736204, "step": 1495 }, { "epoch": 3.329621380846325, "loss": 0.7086691856384277, "loss_ce": 0.00041724531911313534, "loss_iou": 0.302734375, "loss_num": 0.0206298828125, "loss_xval": 0.70703125, "num_input_tokens_seen": 83736204, "step": 1495 }, { "epoch": 3.33184855233853, "grad_norm": 23.420194625854492, "learning_rate": 1e-06, "loss": 0.8413, "num_input_tokens_seen": 83792900, "step": 1496 }, { "epoch": 3.33184855233853, "loss": 0.7663967609405518, "loss_ce": 0.00028348196065053344, "loss_iou": 0.328125, "loss_num": 0.021728515625, "loss_xval": 0.765625, "num_input_tokens_seen": 83792900, "step": 1496 }, { "epoch": 3.334075723830735, "grad_norm": 35.49727249145508, "learning_rate": 1e-06, "loss": 0.8009, "num_input_tokens_seen": 83847828, "step": 1497 }, { "epoch": 3.334075723830735, "loss": 0.8171854615211487, "loss_ce": 0.0002908838796429336, "loss_iou": 0.34765625, "loss_num": 0.024169921875, "loss_xval": 0.81640625, "num_input_tokens_seen": 83847828, "step": 1497 }, { "epoch": 3.33630289532294, "grad_norm": 20.75282859802246, "learning_rate": 1e-06, "loss": 0.9144, "num_input_tokens_seen": 83903316, "step": 1498 }, { "epoch": 3.33630289532294, "loss": 1.1358669996261597, "loss_ce": 0.00036891031777486205, "loss_iou": 0.431640625, "loss_num": 0.05517578125, "loss_xval": 1.1328125, "num_input_tokens_seen": 83903316, "step": 1498 }, { "epoch": 3.338530066815145, "grad_norm": 28.5906925201416, "learning_rate": 1e-06, "loss": 0.8748, "num_input_tokens_seen": 83958736, "step": 1499 }, { "epoch": 3.338530066815145, "loss": 1.1274135112762451, "loss_ce": 0.00046035420382395387, "loss_iou": 0.4921875, "loss_num": 0.028564453125, "loss_xval": 1.125, "num_input_tokens_seen": 83958736, "step": 1499 }, { "epoch": 3.34075723830735, "grad_norm": 23.704599380493164, "learning_rate": 1e-06, "loss": 0.8677, "num_input_tokens_seen": 84012288, "step": 1500 }, { "epoch": 3.34075723830735, "eval_seeclick_web_CIoU": 0.5594667792320251, "eval_seeclick_web_GIoU": 0.5541926324367523, "eval_seeclick_web_IoU": 0.5765488147735596, "eval_seeclick_web_MAE_all": 0.017576972022652626, "eval_seeclick_web_MAE_h": 0.010464820079505444, "eval_seeclick_web_MAE_w": 0.018719857558608055, "eval_seeclick_web_MAE_x_boxes": 0.00861422996968031, "eval_seeclick_web_MAE_y_boxes": 0.022560626734048128, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9647981524467468, "eval_seeclick_web_loss_ce": 0.0004042425425723195, "eval_seeclick_web_loss_iou": 0.4376220703125, "eval_seeclick_web_loss_num": 0.013916015625, "eval_seeclick_web_loss_xval": 0.94482421875, "eval_seeclick_web_runtime": 33.8762, "eval_seeclick_web_samples_per_second": 1.476, "eval_seeclick_web_steps_per_second": 0.059, "num_input_tokens_seen": 84012288, "step": 1500 }, { "epoch": 3.34075723830735, "eval_icons_CIoU": 0.3017665892839432, "eval_icons_GIoU": 0.3255244195461273, "eval_icons_IoU": 0.37552230060100555, "eval_icons_MAE_all": 0.06774608045816422, "eval_icons_MAE_h": 0.03980293497443199, "eval_icons_MAE_w": 0.08278198912739754, "eval_icons_MAE_x_boxes": 0.05543145537376404, "eval_icons_MAE_y_boxes": 0.038841537199914455, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.737123966217041, "eval_icons_loss_ce": 0.0005422734539024532, "eval_icons_loss_iou": 0.670654296875, "eval_icons_loss_num": 0.05875968933105469, "eval_icons_loss_xval": 1.63427734375, "eval_icons_runtime": 35.2512, "eval_icons_samples_per_second": 1.418, "eval_icons_steps_per_second": 0.057, "num_input_tokens_seen": 84012288, "step": 1500 }, { "epoch": 3.34075723830735, "eval_screenspot_CIoU": 0.31236544251441956, "eval_screenspot_GIoU": 0.3288701077302297, "eval_screenspot_IoU": 0.3995972275733948, "eval_screenspot_MAE_all": 0.0782718521853288, "eval_screenspot_MAE_h": 0.04242397534350554, "eval_screenspot_MAE_w": 0.088284266491731, "eval_screenspot_MAE_x_boxes": 0.10724017272392909, "eval_screenspot_MAE_y_boxes": 0.047414361188809075, "eval_screenspot_inside_bbox": 0.6150000095367432, "eval_screenspot_loss": 1.7993109226226807, "eval_screenspot_loss_ce": 0.0005223362823016942, "eval_screenspot_loss_iou": 0.7171223958333334, "eval_screenspot_loss_num": 0.08854929606119792, "eval_screenspot_loss_xval": 1.8779296875, "eval_screenspot_runtime": 49.5236, "eval_screenspot_samples_per_second": 1.797, "eval_screenspot_steps_per_second": 0.061, "num_input_tokens_seen": 84012288, "step": 1500 }, { "epoch": 3.34075723830735, "eval_compot_CIoU": 0.34500832855701447, "eval_compot_GIoU": 0.37410005927085876, "eval_compot_IoU": 0.40422166883945465, "eval_compot_MAE_all": 0.020579061470925808, "eval_compot_MAE_h": 0.009703563060611486, "eval_compot_MAE_w": 0.026576916687190533, "eval_compot_MAE_x_boxes": 0.03108334168791771, "eval_compot_MAE_y_boxes": 0.006412426475435495, "eval_compot_inside_bbox": 0.6145833432674408, "eval_compot_loss": 1.364414930343628, "eval_compot_loss_ce": 0.0003521185863064602, "eval_compot_loss_iou": 0.6151123046875, "eval_compot_loss_num": 0.019611358642578125, "eval_compot_loss_xval": 1.329833984375, "eval_compot_runtime": 23.3555, "eval_compot_samples_per_second": 2.141, "eval_compot_steps_per_second": 0.086, "num_input_tokens_seen": 84012288, "step": 1500 }, { "epoch": 3.34075723830735, "eval_custom_ui_val_CIoU": 0.4252634909417894, "eval_custom_ui_val_GIoU": 0.4529141320122613, "eval_custom_ui_val_IoU": 0.48057351344161564, "eval_custom_ui_val_MAE_all": 0.03534992608345217, "eval_custom_ui_val_MAE_h": 0.020278693590727117, "eval_custom_ui_val_MAE_w": 0.039640864771273404, "eval_custom_ui_val_MAE_x_boxes": 0.03883321676403284, "eval_custom_ui_val_MAE_y_boxes": 0.020278344189541206, "eval_custom_ui_val_inside_bbox": 0.6658950646718343, "eval_custom_ui_val_loss": 1.297371745109558, "eval_custom_ui_val_loss_ce": 0.0004658611885841108, "eval_custom_ui_val_loss_iou": 0.5472547743055556, "eval_custom_ui_val_loss_num": 0.03508186340332031, "eval_custom_ui_val_loss_xval": 1.2700737847222223, "eval_custom_ui_val_runtime": 65.9243, "eval_custom_ui_val_samples_per_second": 4.02, "eval_custom_ui_val_steps_per_second": 0.137, "num_input_tokens_seen": 84012288, "step": 1500 }, { "epoch": 3.34075723830735, "loss": 1.047553539276123, "loss_ce": 0.00043441198067739606, "loss_iou": 0.453125, "loss_num": 0.0279541015625, "loss_xval": 1.046875, "num_input_tokens_seen": 84012288, "step": 1500 }, { "epoch": 3.3429844097995547, "grad_norm": 17.67667007446289, "learning_rate": 1e-06, "loss": 0.8425, "num_input_tokens_seen": 84069404, "step": 1501 }, { "epoch": 3.3429844097995547, "loss": 0.8640942573547363, "loss_ce": 0.00032469953293912113, "loss_iou": 0.33203125, "loss_num": 0.039794921875, "loss_xval": 0.86328125, "num_input_tokens_seen": 84069404, "step": 1501 }, { "epoch": 3.3452115812917596, "grad_norm": 17.004056930541992, "learning_rate": 1e-06, "loss": 0.9548, "num_input_tokens_seen": 84124248, "step": 1502 }, { "epoch": 3.3452115812917596, "loss": 1.012810468673706, "loss_ce": 0.0003593353903852403, "loss_iou": 0.4296875, "loss_num": 0.0306396484375, "loss_xval": 1.015625, "num_input_tokens_seen": 84124248, "step": 1502 }, { "epoch": 3.3474387527839644, "grad_norm": 17.8640193939209, "learning_rate": 1e-06, "loss": 0.8929, "num_input_tokens_seen": 84182520, "step": 1503 }, { "epoch": 3.3474387527839644, "loss": 1.041400671005249, "loss_ce": 0.0003850944631267339, "loss_iou": 0.453125, "loss_num": 0.02685546875, "loss_xval": 1.0390625, "num_input_tokens_seen": 84182520, "step": 1503 }, { "epoch": 3.3496659242761693, "grad_norm": 31.79290199279785, "learning_rate": 1e-06, "loss": 0.6244, "num_input_tokens_seen": 84241328, "step": 1504 }, { "epoch": 3.3496659242761693, "loss": 0.537376880645752, "loss_ce": 0.0002675021532922983, "loss_iou": 0.2177734375, "loss_num": 0.0201416015625, "loss_xval": 0.5390625, "num_input_tokens_seen": 84241328, "step": 1504 }, { "epoch": 3.351893095768374, "grad_norm": 19.212589263916016, "learning_rate": 1e-06, "loss": 0.8643, "num_input_tokens_seen": 84296620, "step": 1505 }, { "epoch": 3.351893095768374, "loss": 0.6515594124794006, "loss_ce": 0.0004363722400739789, "loss_iou": 0.236328125, "loss_num": 0.035400390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 84296620, "step": 1505 }, { "epoch": 3.354120267260579, "grad_norm": 20.98340606689453, "learning_rate": 1e-06, "loss": 0.6714, "num_input_tokens_seen": 84350084, "step": 1506 }, { "epoch": 3.354120267260579, "loss": 0.7122939825057983, "loss_ce": 0.0003799225087277591, "loss_iou": 0.296875, "loss_num": 0.023681640625, "loss_xval": 0.7109375, "num_input_tokens_seen": 84350084, "step": 1506 }, { "epoch": 3.356347438752784, "grad_norm": 16.530118942260742, "learning_rate": 1e-06, "loss": 0.8061, "num_input_tokens_seen": 84402640, "step": 1507 }, { "epoch": 3.356347438752784, "loss": 0.8870806694030762, "loss_ce": 0.0003618546761572361, "loss_iou": 0.375, "loss_num": 0.027587890625, "loss_xval": 0.88671875, "num_input_tokens_seen": 84402640, "step": 1507 }, { "epoch": 3.3585746102449887, "grad_norm": 22.20184326171875, "learning_rate": 1e-06, "loss": 0.818, "num_input_tokens_seen": 84458176, "step": 1508 }, { "epoch": 3.3585746102449887, "loss": 0.7754631638526917, "loss_ce": 0.0003166621900163591, "loss_iou": 0.341796875, "loss_num": 0.0181884765625, "loss_xval": 0.7734375, "num_input_tokens_seen": 84458176, "step": 1508 }, { "epoch": 3.3608017817371936, "grad_norm": 17.622278213500977, "learning_rate": 1e-06, "loss": 0.6034, "num_input_tokens_seen": 84514432, "step": 1509 }, { "epoch": 3.3608017817371936, "loss": 0.4961128532886505, "loss_ce": 0.0002632543910294771, "loss_iou": 0.2109375, "loss_num": 0.01507568359375, "loss_xval": 0.49609375, "num_input_tokens_seen": 84514432, "step": 1509 }, { "epoch": 3.3630289532293984, "grad_norm": 19.41647720336914, "learning_rate": 1e-06, "loss": 0.9464, "num_input_tokens_seen": 84572280, "step": 1510 }, { "epoch": 3.3630289532293984, "loss": 0.9257339239120483, "loss_ce": 0.0004409342654980719, "loss_iou": 0.3984375, "loss_num": 0.0257568359375, "loss_xval": 0.92578125, "num_input_tokens_seen": 84572280, "step": 1510 }, { "epoch": 3.3652561247216037, "grad_norm": 30.21278190612793, "learning_rate": 1e-06, "loss": 0.8469, "num_input_tokens_seen": 84629116, "step": 1511 }, { "epoch": 3.3652561247216037, "loss": 0.769070029258728, "loss_ce": 0.0002712323039304465, "loss_iou": 0.333984375, "loss_num": 0.0203857421875, "loss_xval": 0.76953125, "num_input_tokens_seen": 84629116, "step": 1511 }, { "epoch": 3.3674832962138086, "grad_norm": 27.09176254272461, "learning_rate": 1e-06, "loss": 1.0643, "num_input_tokens_seen": 84685628, "step": 1512 }, { "epoch": 3.3674832962138086, "loss": 1.0953569412231445, "loss_ce": 0.0003862777375616133, "loss_iou": 0.421875, "loss_num": 0.050048828125, "loss_xval": 1.09375, "num_input_tokens_seen": 84685628, "step": 1512 }, { "epoch": 3.3697104677060135, "grad_norm": 29.004039764404297, "learning_rate": 1e-06, "loss": 0.8344, "num_input_tokens_seen": 84743456, "step": 1513 }, { "epoch": 3.3697104677060135, "loss": 1.0513304471969604, "loss_ce": 0.0003050968807656318, "loss_iou": 0.40625, "loss_num": 0.048095703125, "loss_xval": 1.0546875, "num_input_tokens_seen": 84743456, "step": 1513 }, { "epoch": 3.3719376391982183, "grad_norm": 14.021499633789062, "learning_rate": 1e-06, "loss": 0.7724, "num_input_tokens_seen": 84800152, "step": 1514 }, { "epoch": 3.3719376391982183, "loss": 0.7707792520523071, "loss_ce": 0.004665941931307316, "loss_iou": 0.296875, "loss_num": 0.0341796875, "loss_xval": 0.765625, "num_input_tokens_seen": 84800152, "step": 1514 }, { "epoch": 3.374164810690423, "grad_norm": 21.85736656188965, "learning_rate": 1e-06, "loss": 0.9805, "num_input_tokens_seen": 84856756, "step": 1515 }, { "epoch": 3.374164810690423, "loss": 1.0318880081176758, "loss_ce": 0.0016146359266713262, "loss_iou": 0.404296875, "loss_num": 0.044189453125, "loss_xval": 1.03125, "num_input_tokens_seen": 84856756, "step": 1515 }, { "epoch": 3.376391982182628, "grad_norm": 17.330366134643555, "learning_rate": 1e-06, "loss": 0.8845, "num_input_tokens_seen": 84913964, "step": 1516 }, { "epoch": 3.376391982182628, "loss": 0.9630565643310547, "loss_ce": 0.00041007917025126517, "loss_iou": 0.4140625, "loss_num": 0.0264892578125, "loss_xval": 0.9609375, "num_input_tokens_seen": 84913964, "step": 1516 }, { "epoch": 3.378619153674833, "grad_norm": 28.434722900390625, "learning_rate": 1e-06, "loss": 0.7971, "num_input_tokens_seen": 84973372, "step": 1517 }, { "epoch": 3.378619153674833, "loss": 0.6269816160202026, "loss_ce": 0.0002726696548052132, "loss_iou": 0.255859375, "loss_num": 0.023193359375, "loss_xval": 0.625, "num_input_tokens_seen": 84973372, "step": 1517 }, { "epoch": 3.3808463251670378, "grad_norm": 22.015655517578125, "learning_rate": 1e-06, "loss": 0.7052, "num_input_tokens_seen": 85029976, "step": 1518 }, { "epoch": 3.3808463251670378, "loss": 0.7049768567085266, "loss_ce": 0.00038701502489857376, "loss_iou": 0.314453125, "loss_num": 0.01513671875, "loss_xval": 0.703125, "num_input_tokens_seen": 85029976, "step": 1518 }, { "epoch": 3.3830734966592426, "grad_norm": 18.153146743774414, "learning_rate": 1e-06, "loss": 0.7883, "num_input_tokens_seen": 85085816, "step": 1519 }, { "epoch": 3.3830734966592426, "loss": 0.9505753517150879, "loss_ce": 0.00025792684755288064, "loss_iou": 0.390625, "loss_num": 0.0341796875, "loss_xval": 0.94921875, "num_input_tokens_seen": 85085816, "step": 1519 }, { "epoch": 3.3853006681514475, "grad_norm": 21.065345764160156, "learning_rate": 1e-06, "loss": 0.9315, "num_input_tokens_seen": 85143668, "step": 1520 }, { "epoch": 3.3853006681514475, "loss": 0.7659118175506592, "loss_ce": 0.00028681475669145584, "loss_iou": 0.3359375, "loss_num": 0.0185546875, "loss_xval": 0.765625, "num_input_tokens_seen": 85143668, "step": 1520 }, { "epoch": 3.387527839643653, "grad_norm": 25.462947845458984, "learning_rate": 1e-06, "loss": 0.9036, "num_input_tokens_seen": 85199320, "step": 1521 }, { "epoch": 3.387527839643653, "loss": 0.8738066554069519, "loss_ce": 0.00027149327797815204, "loss_iou": 0.380859375, "loss_num": 0.0224609375, "loss_xval": 0.875, "num_input_tokens_seen": 85199320, "step": 1521 }, { "epoch": 3.3897550111358576, "grad_norm": 22.21038055419922, "learning_rate": 1e-06, "loss": 0.8248, "num_input_tokens_seen": 85256340, "step": 1522 }, { "epoch": 3.3897550111358576, "loss": 0.8254960179328918, "loss_ce": 0.0003007104678545147, "loss_iou": 0.359375, "loss_num": 0.0208740234375, "loss_xval": 0.82421875, "num_input_tokens_seen": 85256340, "step": 1522 }, { "epoch": 3.3919821826280625, "grad_norm": 17.12187385559082, "learning_rate": 1e-06, "loss": 0.826, "num_input_tokens_seen": 85312876, "step": 1523 }, { "epoch": 3.3919821826280625, "loss": 0.9279178380966187, "loss_ce": 0.0004276382096577436, "loss_iou": 0.37890625, "loss_num": 0.033935546875, "loss_xval": 0.92578125, "num_input_tokens_seen": 85312876, "step": 1523 }, { "epoch": 3.3942093541202674, "grad_norm": 20.59052085876465, "learning_rate": 1e-06, "loss": 0.8034, "num_input_tokens_seen": 85369068, "step": 1524 }, { "epoch": 3.3942093541202674, "loss": 0.9275302886962891, "loss_ce": 0.00028415530687198043, "loss_iou": 0.36328125, "loss_num": 0.0400390625, "loss_xval": 0.92578125, "num_input_tokens_seen": 85369068, "step": 1524 }, { "epoch": 3.3964365256124722, "grad_norm": 23.074607849121094, "learning_rate": 1e-06, "loss": 1.0088, "num_input_tokens_seen": 85424964, "step": 1525 }, { "epoch": 3.3964365256124722, "loss": 1.4848394393920898, "loss_ce": 0.00046440563164651394, "loss_iou": 0.5859375, "loss_num": 0.0634765625, "loss_xval": 1.484375, "num_input_tokens_seen": 85424964, "step": 1525 }, { "epoch": 3.398663697104677, "grad_norm": 103.47501373291016, "learning_rate": 1e-06, "loss": 0.7614, "num_input_tokens_seen": 85481460, "step": 1526 }, { "epoch": 3.398663697104677, "loss": 0.9770767688751221, "loss_ce": 0.00027014350052922964, "loss_iou": 0.400390625, "loss_num": 0.03564453125, "loss_xval": 0.9765625, "num_input_tokens_seen": 85481460, "step": 1526 }, { "epoch": 3.400890868596882, "grad_norm": 16.694969177246094, "learning_rate": 1e-06, "loss": 0.7637, "num_input_tokens_seen": 85538076, "step": 1527 }, { "epoch": 3.400890868596882, "loss": 0.7700480818748474, "loss_ce": 0.00027271179715171456, "loss_iou": 0.341796875, "loss_num": 0.01708984375, "loss_xval": 0.76953125, "num_input_tokens_seen": 85538076, "step": 1527 }, { "epoch": 3.403118040089087, "grad_norm": 15.084866523742676, "learning_rate": 1e-06, "loss": 0.8101, "num_input_tokens_seen": 85593752, "step": 1528 }, { "epoch": 3.403118040089087, "loss": 0.7312592267990112, "loss_ce": 0.0003021882730536163, "loss_iou": 0.30859375, "loss_num": 0.0225830078125, "loss_xval": 0.73046875, "num_input_tokens_seen": 85593752, "step": 1528 }, { "epoch": 3.4053452115812917, "grad_norm": 137.56297302246094, "learning_rate": 1e-06, "loss": 1.0569, "num_input_tokens_seen": 85649224, "step": 1529 }, { "epoch": 3.4053452115812917, "loss": 0.6406855583190918, "loss_ce": 0.00030470843194052577, "loss_iou": 0.2890625, "loss_num": 0.0125732421875, "loss_xval": 0.640625, "num_input_tokens_seen": 85649224, "step": 1529 }, { "epoch": 3.4075723830734965, "grad_norm": 17.245445251464844, "learning_rate": 1e-06, "loss": 0.8775, "num_input_tokens_seen": 85704284, "step": 1530 }, { "epoch": 3.4075723830734965, "loss": 0.7688406109809875, "loss_ce": 0.000285913614789024, "loss_iou": 0.326171875, "loss_num": 0.0233154296875, "loss_xval": 0.76953125, "num_input_tokens_seen": 85704284, "step": 1530 }, { "epoch": 3.4097995545657014, "grad_norm": 28.163143157958984, "learning_rate": 1e-06, "loss": 0.6158, "num_input_tokens_seen": 85758312, "step": 1531 }, { "epoch": 3.4097995545657014, "loss": 0.6164954900741577, "loss_ce": 0.000772814848460257, "loss_iou": 0.28125, "loss_num": 0.01080322265625, "loss_xval": 0.6171875, "num_input_tokens_seen": 85758312, "step": 1531 }, { "epoch": 3.4120267260579062, "grad_norm": 20.04214096069336, "learning_rate": 1e-06, "loss": 0.8842, "num_input_tokens_seen": 85814628, "step": 1532 }, { "epoch": 3.4120267260579062, "loss": 1.005671739578247, "loss_ce": 0.00030057106050662696, "loss_iou": 0.4296875, "loss_num": 0.029052734375, "loss_xval": 1.0078125, "num_input_tokens_seen": 85814628, "step": 1532 }, { "epoch": 3.4142538975501115, "grad_norm": 31.450151443481445, "learning_rate": 1e-06, "loss": 0.8459, "num_input_tokens_seen": 85870152, "step": 1533 }, { "epoch": 3.4142538975501115, "loss": 1.0091707706451416, "loss_ce": 0.0003816866665147245, "loss_iou": 0.41796875, "loss_num": 0.0341796875, "loss_xval": 1.0078125, "num_input_tokens_seen": 85870152, "step": 1533 }, { "epoch": 3.4164810690423164, "grad_norm": 16.301393508911133, "learning_rate": 1e-06, "loss": 1.1544, "num_input_tokens_seen": 85923552, "step": 1534 }, { "epoch": 3.4164810690423164, "loss": 1.0733964443206787, "loss_ce": 0.0003984392969869077, "loss_iou": 0.412109375, "loss_num": 0.04931640625, "loss_xval": 1.0703125, "num_input_tokens_seen": 85923552, "step": 1534 }, { "epoch": 3.4187082405345213, "grad_norm": 22.28922462463379, "learning_rate": 1e-06, "loss": 1.2395, "num_input_tokens_seen": 85978024, "step": 1535 }, { "epoch": 3.4187082405345213, "loss": 1.1963934898376465, "loss_ce": 0.0005926065496169031, "loss_iou": 0.4609375, "loss_num": 0.0546875, "loss_xval": 1.1953125, "num_input_tokens_seen": 85978024, "step": 1535 }, { "epoch": 3.420935412026726, "grad_norm": 17.531831741333008, "learning_rate": 1e-06, "loss": 0.857, "num_input_tokens_seen": 86032808, "step": 1536 }, { "epoch": 3.420935412026726, "loss": 0.6863161325454712, "loss_ce": 0.00028098217444494367, "loss_iou": 0.267578125, "loss_num": 0.0299072265625, "loss_xval": 0.6875, "num_input_tokens_seen": 86032808, "step": 1536 }, { "epoch": 3.423162583518931, "grad_norm": 40.43157958984375, "learning_rate": 1e-06, "loss": 0.9083, "num_input_tokens_seen": 86084988, "step": 1537 }, { "epoch": 3.423162583518931, "loss": 1.0269807577133179, "loss_ce": 0.00036941259168088436, "loss_iou": 0.416015625, "loss_num": 0.038330078125, "loss_xval": 1.0234375, "num_input_tokens_seen": 86084988, "step": 1537 }, { "epoch": 3.425389755011136, "grad_norm": 13.929858207702637, "learning_rate": 1e-06, "loss": 0.5897, "num_input_tokens_seen": 86141804, "step": 1538 }, { "epoch": 3.425389755011136, "loss": 0.5110272765159607, "loss_ce": 0.00028507091337814927, "loss_iou": 0.228515625, "loss_num": 0.0106201171875, "loss_xval": 0.51171875, "num_input_tokens_seen": 86141804, "step": 1538 }, { "epoch": 3.4276169265033407, "grad_norm": 22.480680465698242, "learning_rate": 1e-06, "loss": 1.0234, "num_input_tokens_seen": 86196392, "step": 1539 }, { "epoch": 3.4276169265033407, "loss": 1.0525072813034058, "loss_ce": 0.0002611761447042227, "loss_iou": 0.3828125, "loss_num": 0.05712890625, "loss_xval": 1.0546875, "num_input_tokens_seen": 86196392, "step": 1539 }, { "epoch": 3.4298440979955456, "grad_norm": 20.415674209594727, "learning_rate": 1e-06, "loss": 0.7179, "num_input_tokens_seen": 86254652, "step": 1540 }, { "epoch": 3.4298440979955456, "loss": 0.7557092905044556, "loss_ce": 0.0003381772548891604, "loss_iou": 0.3125, "loss_num": 0.025390625, "loss_xval": 0.75390625, "num_input_tokens_seen": 86254652, "step": 1540 }, { "epoch": 3.4320712694877504, "grad_norm": 14.296008110046387, "learning_rate": 1e-06, "loss": 0.9989, "num_input_tokens_seen": 86310292, "step": 1541 }, { "epoch": 3.4320712694877504, "loss": 1.1000525951385498, "loss_ce": 0.00044316527782939374, "loss_iou": 0.44921875, "loss_num": 0.0400390625, "loss_xval": 1.1015625, "num_input_tokens_seen": 86310292, "step": 1541 }, { "epoch": 3.4342984409799553, "grad_norm": 15.6137056350708, "learning_rate": 1e-06, "loss": 0.825, "num_input_tokens_seen": 86365832, "step": 1542 }, { "epoch": 3.4342984409799553, "loss": 0.7097923755645752, "loss_ce": 0.00031970589770935476, "loss_iou": 0.32421875, "loss_num": 0.01190185546875, "loss_xval": 0.7109375, "num_input_tokens_seen": 86365832, "step": 1542 }, { "epoch": 3.4365256124721606, "grad_norm": 405.90081787109375, "learning_rate": 1e-06, "loss": 0.8, "num_input_tokens_seen": 86423384, "step": 1543 }, { "epoch": 3.4365256124721606, "loss": 1.0093533992767334, "loss_ce": 0.0008084863657131791, "loss_iou": 0.40625, "loss_num": 0.0390625, "loss_xval": 1.0078125, "num_input_tokens_seen": 86423384, "step": 1543 }, { "epoch": 3.4387527839643655, "grad_norm": 23.693382263183594, "learning_rate": 1e-06, "loss": 0.9363, "num_input_tokens_seen": 86479144, "step": 1544 }, { "epoch": 3.4387527839643655, "loss": 0.854164719581604, "loss_ce": 0.0004049596609547734, "loss_iou": 0.37109375, "loss_num": 0.022216796875, "loss_xval": 0.85546875, "num_input_tokens_seen": 86479144, "step": 1544 }, { "epoch": 3.4409799554565703, "grad_norm": 16.630285263061523, "learning_rate": 1e-06, "loss": 1.0941, "num_input_tokens_seen": 86535676, "step": 1545 }, { "epoch": 3.4409799554565703, "loss": 1.3825310468673706, "loss_ce": 0.00032891728915274143, "loss_iou": 0.5546875, "loss_num": 0.053955078125, "loss_xval": 1.3828125, "num_input_tokens_seen": 86535676, "step": 1545 }, { "epoch": 3.443207126948775, "grad_norm": 15.714624404907227, "learning_rate": 1e-06, "loss": 0.9159, "num_input_tokens_seen": 86593604, "step": 1546 }, { "epoch": 3.443207126948775, "loss": 0.6130160093307495, "loss_ce": 0.00028408144135028124, "loss_iou": 0.259765625, "loss_num": 0.018310546875, "loss_xval": 0.61328125, "num_input_tokens_seen": 86593604, "step": 1546 }, { "epoch": 3.44543429844098, "grad_norm": 13.893007278442383, "learning_rate": 1e-06, "loss": 0.7666, "num_input_tokens_seen": 86649364, "step": 1547 }, { "epoch": 3.44543429844098, "loss": 0.6706589460372925, "loss_ce": 0.0002487250021658838, "loss_iou": 0.271484375, "loss_num": 0.025390625, "loss_xval": 0.671875, "num_input_tokens_seen": 86649364, "step": 1547 }, { "epoch": 3.447661469933185, "grad_norm": 19.675296783447266, "learning_rate": 1e-06, "loss": 0.8681, "num_input_tokens_seen": 86703940, "step": 1548 }, { "epoch": 3.447661469933185, "loss": 0.9216609001159668, "loss_ce": 0.0002741321222856641, "loss_iou": 0.412109375, "loss_num": 0.0198974609375, "loss_xval": 0.921875, "num_input_tokens_seen": 86703940, "step": 1548 }, { "epoch": 3.4498886414253898, "grad_norm": 19.515134811401367, "learning_rate": 1e-06, "loss": 1.096, "num_input_tokens_seen": 86760584, "step": 1549 }, { "epoch": 3.4498886414253898, "loss": 1.3511126041412354, "loss_ce": 0.00028249574825167656, "loss_iou": 0.51953125, "loss_num": 0.06201171875, "loss_xval": 1.3515625, "num_input_tokens_seen": 86760584, "step": 1549 }, { "epoch": 3.4521158129175946, "grad_norm": 17.653553009033203, "learning_rate": 1e-06, "loss": 0.7243, "num_input_tokens_seen": 86815208, "step": 1550 }, { "epoch": 3.4521158129175946, "loss": 0.7034174203872681, "loss_ce": 0.0002923937572631985, "loss_iou": 0.318359375, "loss_num": 0.01348876953125, "loss_xval": 0.703125, "num_input_tokens_seen": 86815208, "step": 1550 }, { "epoch": 3.4543429844097995, "grad_norm": 13.833768844604492, "learning_rate": 1e-06, "loss": 0.9415, "num_input_tokens_seen": 86869804, "step": 1551 }, { "epoch": 3.4543429844097995, "loss": 0.8110692501068115, "loss_ce": 0.00027830369072034955, "loss_iou": 0.3515625, "loss_num": 0.0218505859375, "loss_xval": 0.8125, "num_input_tokens_seen": 86869804, "step": 1551 }, { "epoch": 3.4565701559020043, "grad_norm": 21.14474868774414, "learning_rate": 1e-06, "loss": 0.8695, "num_input_tokens_seen": 86925632, "step": 1552 }, { "epoch": 3.4565701559020043, "loss": 0.7101556062698364, "loss_ce": 0.0004388463275972754, "loss_iou": 0.26953125, "loss_num": 0.0341796875, "loss_xval": 0.7109375, "num_input_tokens_seen": 86925632, "step": 1552 }, { "epoch": 3.458797327394209, "grad_norm": 23.79280662536621, "learning_rate": 1e-06, "loss": 1.0303, "num_input_tokens_seen": 86983484, "step": 1553 }, { "epoch": 3.458797327394209, "loss": 1.0085933208465576, "loss_ce": 0.0002924925647675991, "loss_iou": 0.443359375, "loss_num": 0.0244140625, "loss_xval": 1.0078125, "num_input_tokens_seen": 86983484, "step": 1553 }, { "epoch": 3.461024498886414, "grad_norm": 21.37234115600586, "learning_rate": 1e-06, "loss": 0.6182, "num_input_tokens_seen": 87040560, "step": 1554 }, { "epoch": 3.461024498886414, "loss": 0.8163036108016968, "loss_ce": 0.0003856316034216434, "loss_iou": 0.33203125, "loss_num": 0.0306396484375, "loss_xval": 0.81640625, "num_input_tokens_seen": 87040560, "step": 1554 }, { "epoch": 3.463251670378619, "grad_norm": 22.6509952545166, "learning_rate": 1e-06, "loss": 0.7905, "num_input_tokens_seen": 87096772, "step": 1555 }, { "epoch": 3.463251670378619, "loss": 0.7041604518890381, "loss_ce": 0.00030306505504995584, "loss_iou": 0.28515625, "loss_num": 0.0267333984375, "loss_xval": 0.703125, "num_input_tokens_seen": 87096772, "step": 1555 }, { "epoch": 3.4654788418708242, "grad_norm": 16.446748733520508, "learning_rate": 1e-06, "loss": 0.9045, "num_input_tokens_seen": 87153824, "step": 1556 }, { "epoch": 3.4654788418708242, "loss": 0.9084920287132263, "loss_ce": 0.0002888740273192525, "loss_iou": 0.392578125, "loss_num": 0.0244140625, "loss_xval": 0.90625, "num_input_tokens_seen": 87153824, "step": 1556 }, { "epoch": 3.467706013363029, "grad_norm": 13.511149406433105, "learning_rate": 1e-06, "loss": 1.0573, "num_input_tokens_seen": 87209524, "step": 1557 }, { "epoch": 3.467706013363029, "loss": 1.2329661846160889, "loss_ce": 0.0003001574077643454, "loss_iou": 0.5, "loss_num": 0.045654296875, "loss_xval": 1.234375, "num_input_tokens_seen": 87209524, "step": 1557 }, { "epoch": 3.469933184855234, "grad_norm": 57.4884147644043, "learning_rate": 1e-06, "loss": 0.8035, "num_input_tokens_seen": 87264100, "step": 1558 }, { "epoch": 3.469933184855234, "loss": 0.8297716379165649, "loss_ce": 0.00030388019513338804, "loss_iou": 0.359375, "loss_num": 0.0224609375, "loss_xval": 0.828125, "num_input_tokens_seen": 87264100, "step": 1558 }, { "epoch": 3.472160356347439, "grad_norm": 15.150367736816406, "learning_rate": 1e-06, "loss": 0.9682, "num_input_tokens_seen": 87321728, "step": 1559 }, { "epoch": 3.472160356347439, "loss": 1.013202428817749, "loss_ce": 0.0002630281960591674, "loss_iou": 0.435546875, "loss_num": 0.0286865234375, "loss_xval": 1.015625, "num_input_tokens_seen": 87321728, "step": 1559 }, { "epoch": 3.4743875278396437, "grad_norm": 26.730871200561523, "learning_rate": 1e-06, "loss": 0.8033, "num_input_tokens_seen": 87378128, "step": 1560 }, { "epoch": 3.4743875278396437, "loss": 0.7272953391075134, "loss_ce": 0.0004887018585577607, "loss_iou": 0.326171875, "loss_num": 0.014892578125, "loss_xval": 0.7265625, "num_input_tokens_seen": 87378128, "step": 1560 }, { "epoch": 3.4766146993318485, "grad_norm": 63.709983825683594, "learning_rate": 1e-06, "loss": 0.9815, "num_input_tokens_seen": 87432972, "step": 1561 }, { "epoch": 3.4766146993318485, "loss": 0.9485345482826233, "loss_ce": 0.0002923659631051123, "loss_iou": 0.3828125, "loss_num": 0.03662109375, "loss_xval": 0.94921875, "num_input_tokens_seen": 87432972, "step": 1561 }, { "epoch": 3.4788418708240534, "grad_norm": 19.27259063720703, "learning_rate": 1e-06, "loss": 0.7692, "num_input_tokens_seen": 87486680, "step": 1562 }, { "epoch": 3.4788418708240534, "loss": 0.7349532842636108, "loss_ce": 0.00033419817918911576, "loss_iou": 0.33203125, "loss_num": 0.01409912109375, "loss_xval": 0.734375, "num_input_tokens_seen": 87486680, "step": 1562 }, { "epoch": 3.4810690423162582, "grad_norm": 17.00948143005371, "learning_rate": 1e-06, "loss": 0.8309, "num_input_tokens_seen": 87543352, "step": 1563 }, { "epoch": 3.4810690423162582, "loss": 0.9362764954566956, "loss_ce": 0.00024133155238814652, "loss_iou": 0.412109375, "loss_num": 0.022216796875, "loss_xval": 0.9375, "num_input_tokens_seen": 87543352, "step": 1563 }, { "epoch": 3.483296213808463, "grad_norm": 24.209060668945312, "learning_rate": 1e-06, "loss": 0.9046, "num_input_tokens_seen": 87600896, "step": 1564 }, { "epoch": 3.483296213808463, "loss": 1.042458176612854, "loss_ce": 0.00046601821668446064, "loss_iou": 0.43359375, "loss_num": 0.03515625, "loss_xval": 1.0390625, "num_input_tokens_seen": 87600896, "step": 1564 }, { "epoch": 3.485523385300668, "grad_norm": 14.757556915283203, "learning_rate": 1e-06, "loss": 0.9321, "num_input_tokens_seen": 87658544, "step": 1565 }, { "epoch": 3.485523385300668, "loss": 0.7475967407226562, "loss_ce": 0.000282272812910378, "loss_iou": 0.3046875, "loss_num": 0.02783203125, "loss_xval": 0.74609375, "num_input_tokens_seen": 87658544, "step": 1565 }, { "epoch": 3.4877505567928733, "grad_norm": 29.213010787963867, "learning_rate": 1e-06, "loss": 0.9172, "num_input_tokens_seen": 87715960, "step": 1566 }, { "epoch": 3.4877505567928733, "loss": 0.8977096676826477, "loss_ce": 0.00049288832815364, "loss_iou": 0.3515625, "loss_num": 0.038818359375, "loss_xval": 0.8984375, "num_input_tokens_seen": 87715960, "step": 1566 }, { "epoch": 3.489977728285078, "grad_norm": 13.978538513183594, "learning_rate": 1e-06, "loss": 0.8842, "num_input_tokens_seen": 87769772, "step": 1567 }, { "epoch": 3.489977728285078, "loss": 0.8801678419113159, "loss_ce": 0.0002850402379408479, "loss_iou": 0.37109375, "loss_num": 0.0272216796875, "loss_xval": 0.87890625, "num_input_tokens_seen": 87769772, "step": 1567 }, { "epoch": 3.492204899777283, "grad_norm": 28.59236717224121, "learning_rate": 1e-06, "loss": 0.646, "num_input_tokens_seen": 87825804, "step": 1568 }, { "epoch": 3.492204899777283, "loss": 0.6284915208816528, "loss_ce": 0.00031768600456416607, "loss_iou": 0.28125, "loss_num": 0.0130615234375, "loss_xval": 0.62890625, "num_input_tokens_seen": 87825804, "step": 1568 }, { "epoch": 3.494432071269488, "grad_norm": 27.629867553710938, "learning_rate": 1e-06, "loss": 0.8455, "num_input_tokens_seen": 87881064, "step": 1569 }, { "epoch": 3.494432071269488, "loss": 0.7841451168060303, "loss_ce": 0.00027057109400629997, "loss_iou": 0.34375, "loss_num": 0.0191650390625, "loss_xval": 0.78515625, "num_input_tokens_seen": 87881064, "step": 1569 }, { "epoch": 3.4966592427616927, "grad_norm": 28.423208236694336, "learning_rate": 1e-06, "loss": 1.0515, "num_input_tokens_seen": 87938884, "step": 1570 }, { "epoch": 3.4966592427616927, "loss": 1.0150330066680908, "loss_ce": 0.00038457888877019286, "loss_iou": 0.4375, "loss_num": 0.0279541015625, "loss_xval": 1.015625, "num_input_tokens_seen": 87938884, "step": 1570 }, { "epoch": 3.4988864142538976, "grad_norm": 80.1189956665039, "learning_rate": 1e-06, "loss": 0.8306, "num_input_tokens_seen": 87991052, "step": 1571 }, { "epoch": 3.4988864142538976, "loss": 0.9712834358215332, "loss_ce": 0.0005803282838314772, "loss_iou": 0.412109375, "loss_num": 0.0294189453125, "loss_xval": 0.96875, "num_input_tokens_seen": 87991052, "step": 1571 }, { "epoch": 3.5011135857461024, "grad_norm": 33.21288299560547, "learning_rate": 1e-06, "loss": 0.999, "num_input_tokens_seen": 88047996, "step": 1572 }, { "epoch": 3.5011135857461024, "loss": 0.9822986721992493, "loss_ce": 0.00036505749449133873, "loss_iou": 0.412109375, "loss_num": 0.031982421875, "loss_xval": 0.98046875, "num_input_tokens_seen": 88047996, "step": 1572 }, { "epoch": 3.5033407572383073, "grad_norm": 27.994529724121094, "learning_rate": 1e-06, "loss": 0.8276, "num_input_tokens_seen": 88102512, "step": 1573 }, { "epoch": 3.5033407572383073, "loss": 0.9002130031585693, "loss_ce": 0.0003106549265794456, "loss_iou": 0.392578125, "loss_num": 0.0233154296875, "loss_xval": 0.8984375, "num_input_tokens_seen": 88102512, "step": 1573 }, { "epoch": 3.505567928730512, "grad_norm": 29.116310119628906, "learning_rate": 1e-06, "loss": 1.0666, "num_input_tokens_seen": 88156400, "step": 1574 }, { "epoch": 3.505567928730512, "loss": 1.0999196767807007, "loss_ce": 0.0003103648195974529, "loss_iou": 0.47265625, "loss_num": 0.031005859375, "loss_xval": 1.1015625, "num_input_tokens_seen": 88156400, "step": 1574 }, { "epoch": 3.507795100222717, "grad_norm": 18.990516662597656, "learning_rate": 1e-06, "loss": 0.7131, "num_input_tokens_seen": 88211188, "step": 1575 }, { "epoch": 3.507795100222717, "loss": 0.8792534470558167, "loss_ce": 0.000347210094332695, "loss_iou": 0.388671875, "loss_num": 0.02001953125, "loss_xval": 0.87890625, "num_input_tokens_seen": 88211188, "step": 1575 }, { "epoch": 3.510022271714922, "grad_norm": 21.322250366210938, "learning_rate": 1e-06, "loss": 0.8679, "num_input_tokens_seen": 88267384, "step": 1576 }, { "epoch": 3.510022271714922, "loss": 1.0999305248260498, "loss_ce": 0.0003211447037756443, "loss_iou": 0.482421875, "loss_num": 0.0272216796875, "loss_xval": 1.1015625, "num_input_tokens_seen": 88267384, "step": 1576 }, { "epoch": 3.5122494432071267, "grad_norm": 26.06310272216797, "learning_rate": 1e-06, "loss": 0.9818, "num_input_tokens_seen": 88323956, "step": 1577 }, { "epoch": 3.5122494432071267, "loss": 1.0118680000305176, "loss_ce": 0.0003933944390155375, "loss_iou": 0.40625, "loss_num": 0.03955078125, "loss_xval": 1.0078125, "num_input_tokens_seen": 88323956, "step": 1577 }, { "epoch": 3.5144766146993316, "grad_norm": 17.4585018157959, "learning_rate": 1e-06, "loss": 0.9207, "num_input_tokens_seen": 88377432, "step": 1578 }, { "epoch": 3.5144766146993316, "loss": 0.7224873304367065, "loss_ce": 0.00031937778112478554, "loss_iou": 0.314453125, "loss_num": 0.01904296875, "loss_xval": 0.72265625, "num_input_tokens_seen": 88377432, "step": 1578 }, { "epoch": 3.516703786191537, "grad_norm": 24.705142974853516, "learning_rate": 1e-06, "loss": 0.9077, "num_input_tokens_seen": 88433012, "step": 1579 }, { "epoch": 3.516703786191537, "loss": 1.0539125204086304, "loss_ce": 0.00044574099592864513, "loss_iou": 0.400390625, "loss_num": 0.05078125, "loss_xval": 1.0546875, "num_input_tokens_seen": 88433012, "step": 1579 }, { "epoch": 3.5189309576837418, "grad_norm": 30.141464233398438, "learning_rate": 1e-06, "loss": 0.7654, "num_input_tokens_seen": 88490756, "step": 1580 }, { "epoch": 3.5189309576837418, "loss": 0.8266535997390747, "loss_ce": 0.00048178687575273216, "loss_iou": 0.306640625, "loss_num": 0.042236328125, "loss_xval": 0.828125, "num_input_tokens_seen": 88490756, "step": 1580 }, { "epoch": 3.5211581291759466, "grad_norm": 21.32522964477539, "learning_rate": 1e-06, "loss": 0.8354, "num_input_tokens_seen": 88548592, "step": 1581 }, { "epoch": 3.5211581291759466, "loss": 0.9959642291069031, "loss_ce": 0.0006028971401974559, "loss_iou": 0.443359375, "loss_num": 0.021484375, "loss_xval": 0.99609375, "num_input_tokens_seen": 88548592, "step": 1581 }, { "epoch": 3.5233853006681515, "grad_norm": 26.187313079833984, "learning_rate": 1e-06, "loss": 0.7145, "num_input_tokens_seen": 88605516, "step": 1582 }, { "epoch": 3.5233853006681515, "loss": 0.4769740104675293, "loss_ce": 0.0002894522622227669, "loss_iou": 0.212890625, "loss_num": 0.01025390625, "loss_xval": 0.4765625, "num_input_tokens_seen": 88605516, "step": 1582 }, { "epoch": 3.5256124721603563, "grad_norm": 27.296201705932617, "learning_rate": 1e-06, "loss": 0.8584, "num_input_tokens_seen": 88660980, "step": 1583 }, { "epoch": 3.5256124721603563, "loss": 0.7954574823379517, "loss_ce": 0.0002915282384492457, "loss_iou": 0.33203125, "loss_num": 0.026123046875, "loss_xval": 0.796875, "num_input_tokens_seen": 88660980, "step": 1583 }, { "epoch": 3.527839643652561, "grad_norm": 18.88905143737793, "learning_rate": 1e-06, "loss": 0.898, "num_input_tokens_seen": 88715208, "step": 1584 }, { "epoch": 3.527839643652561, "loss": 0.7566571235656738, "loss_ce": 0.0003094491839874536, "loss_iou": 0.328125, "loss_num": 0.02001953125, "loss_xval": 0.7578125, "num_input_tokens_seen": 88715208, "step": 1584 }, { "epoch": 3.530066815144766, "grad_norm": 17.91330337524414, "learning_rate": 1e-06, "loss": 0.7627, "num_input_tokens_seen": 88773392, "step": 1585 }, { "epoch": 3.530066815144766, "loss": 0.8976699113845825, "loss_ce": 0.0004530604637693614, "loss_iou": 0.380859375, "loss_num": 0.02685546875, "loss_xval": 0.8984375, "num_input_tokens_seen": 88773392, "step": 1585 }, { "epoch": 3.532293986636971, "grad_norm": 15.070040702819824, "learning_rate": 1e-06, "loss": 1.0047, "num_input_tokens_seen": 88830772, "step": 1586 }, { "epoch": 3.532293986636971, "loss": 0.675557017326355, "loss_ce": 0.0002640365855768323, "loss_iou": 0.30078125, "loss_num": 0.0145263671875, "loss_xval": 0.67578125, "num_input_tokens_seen": 88830772, "step": 1586 }, { "epoch": 3.534521158129176, "grad_norm": 15.70621109008789, "learning_rate": 1e-06, "loss": 0.9061, "num_input_tokens_seen": 88887288, "step": 1587 }, { "epoch": 3.534521158129176, "loss": 1.0725452899932861, "loss_ce": 0.000279579107882455, "loss_iou": 0.431640625, "loss_num": 0.041259765625, "loss_xval": 1.0703125, "num_input_tokens_seen": 88887288, "step": 1587 }, { "epoch": 3.536748329621381, "grad_norm": 16.636899948120117, "learning_rate": 1e-06, "loss": 0.7762, "num_input_tokens_seen": 88943304, "step": 1588 }, { "epoch": 3.536748329621381, "loss": 0.7620242834091187, "loss_ce": 0.00030552022508345544, "loss_iou": 0.333984375, "loss_num": 0.0189208984375, "loss_xval": 0.76171875, "num_input_tokens_seen": 88943304, "step": 1588 }, { "epoch": 3.538975501113586, "grad_norm": 19.53682518005371, "learning_rate": 1e-06, "loss": 0.7223, "num_input_tokens_seen": 89000188, "step": 1589 }, { "epoch": 3.538975501113586, "loss": 0.6042957305908203, "loss_ce": 0.00029182256548665464, "loss_iou": 0.24609375, "loss_num": 0.0224609375, "loss_xval": 0.60546875, "num_input_tokens_seen": 89000188, "step": 1589 }, { "epoch": 3.541202672605791, "grad_norm": 16.928892135620117, "learning_rate": 1e-06, "loss": 1.0069, "num_input_tokens_seen": 89055864, "step": 1590 }, { "epoch": 3.541202672605791, "loss": 1.0560284852981567, "loss_ce": 0.0003644293174147606, "loss_iou": 0.439453125, "loss_num": 0.035400390625, "loss_xval": 1.0546875, "num_input_tokens_seen": 89055864, "step": 1590 }, { "epoch": 3.5434298440979957, "grad_norm": 21.396242141723633, "learning_rate": 1e-06, "loss": 0.7472, "num_input_tokens_seen": 89114124, "step": 1591 }, { "epoch": 3.5434298440979957, "loss": 0.6990212202072144, "loss_ce": 0.00029076545615680516, "loss_iou": 0.314453125, "loss_num": 0.013916015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 89114124, "step": 1591 }, { "epoch": 3.5456570155902005, "grad_norm": 14.654166221618652, "learning_rate": 1e-06, "loss": 1.0152, "num_input_tokens_seen": 89170248, "step": 1592 }, { "epoch": 3.5456570155902005, "loss": 0.6828731894493103, "loss_ce": 0.0002559710410423577, "loss_iou": 0.263671875, "loss_num": 0.031494140625, "loss_xval": 0.68359375, "num_input_tokens_seen": 89170248, "step": 1592 }, { "epoch": 3.5478841870824054, "grad_norm": 15.999966621398926, "learning_rate": 1e-06, "loss": 0.7778, "num_input_tokens_seen": 89226156, "step": 1593 }, { "epoch": 3.5478841870824054, "loss": 0.9013996124267578, "loss_ce": 0.0002766078105196357, "loss_iou": 0.3984375, "loss_num": 0.0203857421875, "loss_xval": 0.90234375, "num_input_tokens_seen": 89226156, "step": 1593 }, { "epoch": 3.5501113585746102, "grad_norm": 39.26863479614258, "learning_rate": 1e-06, "loss": 1.2987, "num_input_tokens_seen": 89281032, "step": 1594 }, { "epoch": 3.5501113585746102, "loss": 1.2463330030441284, "loss_ce": 0.00048335548490285873, "loss_iou": 0.466796875, "loss_num": 0.06298828125, "loss_xval": 1.2421875, "num_input_tokens_seen": 89281032, "step": 1594 }, { "epoch": 3.552338530066815, "grad_norm": 18.40961456298828, "learning_rate": 1e-06, "loss": 1.1405, "num_input_tokens_seen": 89338460, "step": 1595 }, { "epoch": 3.552338530066815, "loss": 1.0916733741760254, "loss_ce": 0.000364768726285547, "loss_iou": 0.466796875, "loss_num": 0.031494140625, "loss_xval": 1.09375, "num_input_tokens_seen": 89338460, "step": 1595 }, { "epoch": 3.55456570155902, "grad_norm": 21.472373962402344, "learning_rate": 1e-06, "loss": 0.9278, "num_input_tokens_seen": 89396420, "step": 1596 }, { "epoch": 3.55456570155902, "loss": 1.1413955688476562, "loss_ce": 0.0002824011608026922, "loss_iou": 0.474609375, "loss_num": 0.03857421875, "loss_xval": 1.140625, "num_input_tokens_seen": 89396420, "step": 1596 }, { "epoch": 3.556792873051225, "grad_norm": 14.41667366027832, "learning_rate": 1e-06, "loss": 0.8159, "num_input_tokens_seen": 89450448, "step": 1597 }, { "epoch": 3.556792873051225, "loss": 1.1159937381744385, "loss_ce": 0.00027105180197395384, "loss_iou": 0.447265625, "loss_num": 0.044677734375, "loss_xval": 1.1171875, "num_input_tokens_seen": 89450448, "step": 1597 }, { "epoch": 3.5590200445434297, "grad_norm": 20.48485565185547, "learning_rate": 1e-06, "loss": 1.0086, "num_input_tokens_seen": 89508572, "step": 1598 }, { "epoch": 3.5590200445434297, "loss": 0.9943912029266357, "loss_ce": 0.00025058950996026397, "loss_iou": 0.41796875, "loss_num": 0.031982421875, "loss_xval": 0.9921875, "num_input_tokens_seen": 89508572, "step": 1598 }, { "epoch": 3.5612472160356345, "grad_norm": 33.4490852355957, "learning_rate": 1e-06, "loss": 1.206, "num_input_tokens_seen": 89561964, "step": 1599 }, { "epoch": 3.5612472160356345, "loss": 1.290135383605957, "loss_ce": 0.0003403525915928185, "loss_iou": 0.50390625, "loss_num": 0.055908203125, "loss_xval": 1.2890625, "num_input_tokens_seen": 89561964, "step": 1599 }, { "epoch": 3.5634743875278394, "grad_norm": 14.543143272399902, "learning_rate": 1e-06, "loss": 0.748, "num_input_tokens_seen": 89616600, "step": 1600 }, { "epoch": 3.5634743875278394, "loss": 0.8427092432975769, "loss_ce": 0.0009123453055508435, "loss_iou": 0.328125, "loss_num": 0.036865234375, "loss_xval": 0.84375, "num_input_tokens_seen": 89616600, "step": 1600 }, { "epoch": 3.5657015590200447, "grad_norm": 16.943578720092773, "learning_rate": 1e-06, "loss": 0.8685, "num_input_tokens_seen": 89672620, "step": 1601 }, { "epoch": 3.5657015590200447, "loss": 0.8163316249847412, "loss_ce": 0.00041370143298991024, "loss_iou": 0.337890625, "loss_num": 0.028076171875, "loss_xval": 0.81640625, "num_input_tokens_seen": 89672620, "step": 1601 }, { "epoch": 3.5679287305122496, "grad_norm": 16.56560707092285, "learning_rate": 1e-06, "loss": 0.8267, "num_input_tokens_seen": 89727488, "step": 1602 }, { "epoch": 3.5679287305122496, "loss": 0.9919605255126953, "loss_ce": 0.000261296343524009, "loss_iou": 0.423828125, "loss_num": 0.0291748046875, "loss_xval": 0.9921875, "num_input_tokens_seen": 89727488, "step": 1602 }, { "epoch": 3.5701559020044544, "grad_norm": 17.976058959960938, "learning_rate": 1e-06, "loss": 0.8624, "num_input_tokens_seen": 89781992, "step": 1603 }, { "epoch": 3.5701559020044544, "loss": 0.9566484093666077, "loss_ce": 0.0003496097633615136, "loss_iou": 0.40234375, "loss_num": 0.030029296875, "loss_xval": 0.95703125, "num_input_tokens_seen": 89781992, "step": 1603 }, { "epoch": 3.5723830734966593, "grad_norm": 21.834300994873047, "learning_rate": 1e-06, "loss": 1.0587, "num_input_tokens_seen": 89837948, "step": 1604 }, { "epoch": 3.5723830734966593, "loss": 1.1624748706817627, "loss_ce": 0.00036550615914165974, "loss_iou": 0.50390625, "loss_num": 0.0302734375, "loss_xval": 1.1640625, "num_input_tokens_seen": 89837948, "step": 1604 }, { "epoch": 3.574610244988864, "grad_norm": 33.87417221069336, "learning_rate": 1e-06, "loss": 0.9384, "num_input_tokens_seen": 89892432, "step": 1605 }, { "epoch": 3.574610244988864, "loss": 1.0320757627487183, "loss_ce": 0.0003374941006768495, "loss_iou": 0.4453125, "loss_num": 0.0284423828125, "loss_xval": 1.03125, "num_input_tokens_seen": 89892432, "step": 1605 }, { "epoch": 3.576837416481069, "grad_norm": 31.87815284729004, "learning_rate": 1e-06, "loss": 1.0271, "num_input_tokens_seen": 89949104, "step": 1606 }, { "epoch": 3.576837416481069, "loss": 0.6741030216217041, "loss_ce": 0.0002749357954598963, "loss_iou": 0.302734375, "loss_num": 0.01397705078125, "loss_xval": 0.671875, "num_input_tokens_seen": 89949104, "step": 1606 }, { "epoch": 3.579064587973274, "grad_norm": 19.64597511291504, "learning_rate": 1e-06, "loss": 0.688, "num_input_tokens_seen": 90007552, "step": 1607 }, { "epoch": 3.579064587973274, "loss": 0.7588503956794739, "loss_ce": 0.0003054735716432333, "loss_iou": 0.345703125, "loss_num": 0.0135498046875, "loss_xval": 0.7578125, "num_input_tokens_seen": 90007552, "step": 1607 }, { "epoch": 3.5812917594654787, "grad_norm": 16.6953067779541, "learning_rate": 1e-06, "loss": 0.841, "num_input_tokens_seen": 90065300, "step": 1608 }, { "epoch": 3.5812917594654787, "loss": 1.0172404050827026, "loss_ce": 0.00039471167838200927, "loss_iou": 0.42578125, "loss_num": 0.03271484375, "loss_xval": 1.015625, "num_input_tokens_seen": 90065300, "step": 1608 }, { "epoch": 3.5835189309576836, "grad_norm": 18.6095027923584, "learning_rate": 1e-06, "loss": 0.8472, "num_input_tokens_seen": 90117120, "step": 1609 }, { "epoch": 3.5835189309576836, "loss": 0.7410043478012085, "loss_ce": 0.00028169897268526256, "loss_iou": 0.310546875, "loss_num": 0.023681640625, "loss_xval": 0.7421875, "num_input_tokens_seen": 90117120, "step": 1609 }, { "epoch": 3.585746102449889, "grad_norm": 19.11724090576172, "learning_rate": 1e-06, "loss": 0.9566, "num_input_tokens_seen": 90171580, "step": 1610 }, { "epoch": 3.585746102449889, "loss": 0.7324100136756897, "loss_ce": 0.0004764144541695714, "loss_iou": 0.30859375, "loss_num": 0.0233154296875, "loss_xval": 0.73046875, "num_input_tokens_seen": 90171580, "step": 1610 }, { "epoch": 3.5879732739420938, "grad_norm": 47.91438674926758, "learning_rate": 1e-06, "loss": 0.9755, "num_input_tokens_seen": 90227144, "step": 1611 }, { "epoch": 3.5879732739420938, "loss": 1.2943049669265747, "loss_ce": 0.000359668250894174, "loss_iou": 0.54296875, "loss_num": 0.04248046875, "loss_xval": 1.296875, "num_input_tokens_seen": 90227144, "step": 1611 }, { "epoch": 3.5902004454342986, "grad_norm": 14.350851058959961, "learning_rate": 1e-06, "loss": 0.7686, "num_input_tokens_seen": 90285220, "step": 1612 }, { "epoch": 3.5902004454342986, "loss": 0.6935353875160217, "loss_ce": 0.0004201638512313366, "loss_iou": 0.3046875, "loss_num": 0.0166015625, "loss_xval": 0.69140625, "num_input_tokens_seen": 90285220, "step": 1612 }, { "epoch": 3.5924276169265035, "grad_norm": 87.72432708740234, "learning_rate": 1e-06, "loss": 0.9391, "num_input_tokens_seen": 90341444, "step": 1613 }, { "epoch": 3.5924276169265035, "loss": 0.8298938274383545, "loss_ce": 0.00030400152900256217, "loss_iou": 0.345703125, "loss_num": 0.0274658203125, "loss_xval": 0.828125, "num_input_tokens_seen": 90341444, "step": 1613 }, { "epoch": 3.5946547884187083, "grad_norm": 18.14371681213379, "learning_rate": 1e-06, "loss": 0.8685, "num_input_tokens_seen": 90398880, "step": 1614 }, { "epoch": 3.5946547884187083, "loss": 0.9817934632301331, "loss_ce": 0.0003481835883576423, "loss_iou": 0.400390625, "loss_num": 0.0361328125, "loss_xval": 0.98046875, "num_input_tokens_seen": 90398880, "step": 1614 }, { "epoch": 3.596881959910913, "grad_norm": 18.2519588470459, "learning_rate": 1e-06, "loss": 0.8095, "num_input_tokens_seen": 90453864, "step": 1615 }, { "epoch": 3.596881959910913, "loss": 0.7134523391723633, "loss_ce": 0.00031755882082507014, "loss_iou": 0.31640625, "loss_num": 0.016357421875, "loss_xval": 0.71484375, "num_input_tokens_seen": 90453864, "step": 1615 }, { "epoch": 3.599109131403118, "grad_norm": 11.235553741455078, "learning_rate": 1e-06, "loss": 0.9908, "num_input_tokens_seen": 90508336, "step": 1616 }, { "epoch": 3.599109131403118, "loss": 1.2815724611282349, "loss_ce": 0.0003224927349947393, "loss_iou": 0.5390625, "loss_num": 0.041015625, "loss_xval": 1.28125, "num_input_tokens_seen": 90508336, "step": 1616 }, { "epoch": 3.601336302895323, "grad_norm": 19.534269332885742, "learning_rate": 1e-06, "loss": 0.9323, "num_input_tokens_seen": 90565836, "step": 1617 }, { "epoch": 3.601336302895323, "loss": 1.1978850364685059, "loss_ce": 0.0006193737499415874, "loss_iou": 0.435546875, "loss_num": 0.06494140625, "loss_xval": 1.1953125, "num_input_tokens_seen": 90565836, "step": 1617 }, { "epoch": 3.6035634743875278, "grad_norm": 16.99414825439453, "learning_rate": 1e-06, "loss": 0.791, "num_input_tokens_seen": 90621756, "step": 1618 }, { "epoch": 3.6035634743875278, "loss": 0.6094855070114136, "loss_ce": 0.0003546725492924452, "loss_iou": 0.265625, "loss_num": 0.015869140625, "loss_xval": 0.609375, "num_input_tokens_seen": 90621756, "step": 1618 }, { "epoch": 3.6057906458797326, "grad_norm": 12.391351699829102, "learning_rate": 1e-06, "loss": 0.736, "num_input_tokens_seen": 90679300, "step": 1619 }, { "epoch": 3.6057906458797326, "loss": 0.6363743543624878, "loss_ce": 0.0002659702149685472, "loss_iou": 0.26953125, "loss_num": 0.01904296875, "loss_xval": 0.63671875, "num_input_tokens_seen": 90679300, "step": 1619 }, { "epoch": 3.6080178173719375, "grad_norm": 19.21337127685547, "learning_rate": 1e-06, "loss": 0.9495, "num_input_tokens_seen": 90738552, "step": 1620 }, { "epoch": 3.6080178173719375, "loss": 0.9752398729324341, "loss_ce": 0.00038639939157292247, "loss_iou": 0.41796875, "loss_num": 0.0279541015625, "loss_xval": 0.9765625, "num_input_tokens_seen": 90738552, "step": 1620 }, { "epoch": 3.6102449888641424, "grad_norm": 14.483777046203613, "learning_rate": 1e-06, "loss": 0.7827, "num_input_tokens_seen": 90796600, "step": 1621 }, { "epoch": 3.6102449888641424, "loss": 0.7163407802581787, "loss_ce": 0.00027631394914351404, "loss_iou": 0.298828125, "loss_num": 0.0235595703125, "loss_xval": 0.71484375, "num_input_tokens_seen": 90796600, "step": 1621 }, { "epoch": 3.612472160356347, "grad_norm": 18.561866760253906, "learning_rate": 1e-06, "loss": 0.9083, "num_input_tokens_seen": 90852108, "step": 1622 }, { "epoch": 3.612472160356347, "loss": 0.7986100912094116, "loss_ce": 0.00027019885601475835, "loss_iou": 0.345703125, "loss_num": 0.021240234375, "loss_xval": 0.796875, "num_input_tokens_seen": 90852108, "step": 1622 }, { "epoch": 3.614699331848552, "grad_norm": 13.72581672668457, "learning_rate": 1e-06, "loss": 0.9149, "num_input_tokens_seen": 90909780, "step": 1623 }, { "epoch": 3.614699331848552, "loss": 0.8790796995162964, "loss_ce": 0.0002955020754598081, "loss_iou": 0.3515625, "loss_num": 0.035400390625, "loss_xval": 0.87890625, "num_input_tokens_seen": 90909780, "step": 1623 }, { "epoch": 3.6169265033407574, "grad_norm": 19.92896842956543, "learning_rate": 1e-06, "loss": 0.8137, "num_input_tokens_seen": 90964636, "step": 1624 }, { "epoch": 3.6169265033407574, "loss": 0.4973217248916626, "loss_ce": 0.00025139018543995917, "loss_iou": 0.2001953125, "loss_num": 0.0194091796875, "loss_xval": 0.49609375, "num_input_tokens_seen": 90964636, "step": 1624 }, { "epoch": 3.6191536748329622, "grad_norm": 24.191030502319336, "learning_rate": 1e-06, "loss": 0.8298, "num_input_tokens_seen": 91020328, "step": 1625 }, { "epoch": 3.6191536748329622, "loss": 0.8796758651733398, "loss_ce": 0.0002813548780977726, "loss_iou": 0.34765625, "loss_num": 0.036865234375, "loss_xval": 0.87890625, "num_input_tokens_seen": 91020328, "step": 1625 }, { "epoch": 3.621380846325167, "grad_norm": 18.71809959411621, "learning_rate": 1e-06, "loss": 0.7477, "num_input_tokens_seen": 91076856, "step": 1626 }, { "epoch": 3.621380846325167, "loss": 0.6939845085144043, "loss_ce": 0.0003809723712038249, "loss_iou": 0.27734375, "loss_num": 0.027587890625, "loss_xval": 0.6953125, "num_input_tokens_seen": 91076856, "step": 1626 }, { "epoch": 3.623608017817372, "grad_norm": 15.862205505371094, "learning_rate": 1e-06, "loss": 0.8313, "num_input_tokens_seen": 91136100, "step": 1627 }, { "epoch": 3.623608017817372, "loss": 0.8305182456970215, "loss_ce": 0.0007453373400494456, "loss_iou": 0.3359375, "loss_num": 0.0311279296875, "loss_xval": 0.828125, "num_input_tokens_seen": 91136100, "step": 1627 }, { "epoch": 3.625835189309577, "grad_norm": 23.61324691772461, "learning_rate": 1e-06, "loss": 0.8207, "num_input_tokens_seen": 91191620, "step": 1628 }, { "epoch": 3.625835189309577, "loss": 0.6336687207221985, "loss_ce": 0.0002458438102621585, "loss_iou": 0.27734375, "loss_num": 0.0157470703125, "loss_xval": 0.6328125, "num_input_tokens_seen": 91191620, "step": 1628 }, { "epoch": 3.6280623608017817, "grad_norm": 18.18329620361328, "learning_rate": 1e-06, "loss": 0.9281, "num_input_tokens_seen": 91250588, "step": 1629 }, { "epoch": 3.6280623608017817, "loss": 1.0516316890716553, "loss_ce": 0.0003620931529439986, "loss_iou": 0.421875, "loss_num": 0.04150390625, "loss_xval": 1.0546875, "num_input_tokens_seen": 91250588, "step": 1629 }, { "epoch": 3.6302895322939865, "grad_norm": 18.133012771606445, "learning_rate": 1e-06, "loss": 0.5779, "num_input_tokens_seen": 91308444, "step": 1630 }, { "epoch": 3.6302895322939865, "loss": 0.7188634872436523, "loss_ce": 0.0002355621982133016, "loss_iou": 0.29296875, "loss_num": 0.026611328125, "loss_xval": 0.71875, "num_input_tokens_seen": 91308444, "step": 1630 }, { "epoch": 3.6325167037861914, "grad_norm": 21.32465934753418, "learning_rate": 1e-06, "loss": 0.7386, "num_input_tokens_seen": 91363304, "step": 1631 }, { "epoch": 3.6325167037861914, "loss": 0.7637060284614563, "loss_ce": 0.0002782873052638024, "loss_iou": 0.337890625, "loss_num": 0.0172119140625, "loss_xval": 0.76171875, "num_input_tokens_seen": 91363304, "step": 1631 }, { "epoch": 3.6347438752783967, "grad_norm": 15.17698860168457, "learning_rate": 1e-06, "loss": 1.0197, "num_input_tokens_seen": 91417932, "step": 1632 }, { "epoch": 3.6347438752783967, "loss": 0.6608976125717163, "loss_ce": 0.0002531085046939552, "loss_iou": 0.244140625, "loss_num": 0.034423828125, "loss_xval": 0.66015625, "num_input_tokens_seen": 91417932, "step": 1632 }, { "epoch": 3.6369710467706016, "grad_norm": 19.845325469970703, "learning_rate": 1e-06, "loss": 0.737, "num_input_tokens_seen": 91474376, "step": 1633 }, { "epoch": 3.6369710467706016, "loss": 0.8098554611206055, "loss_ce": 0.0002851108438335359, "loss_iou": 0.3359375, "loss_num": 0.02783203125, "loss_xval": 0.80859375, "num_input_tokens_seen": 91474376, "step": 1633 }, { "epoch": 3.6391982182628064, "grad_norm": 18.74712371826172, "learning_rate": 1e-06, "loss": 0.8073, "num_input_tokens_seen": 91529408, "step": 1634 }, { "epoch": 3.6391982182628064, "loss": 0.799332857131958, "loss_ce": 0.0002606004709377885, "loss_iou": 0.33984375, "loss_num": 0.024169921875, "loss_xval": 0.80078125, "num_input_tokens_seen": 91529408, "step": 1634 }, { "epoch": 3.6414253897550113, "grad_norm": 16.073997497558594, "learning_rate": 1e-06, "loss": 0.6983, "num_input_tokens_seen": 91584964, "step": 1635 }, { "epoch": 3.6414253897550113, "loss": 0.5875762104988098, "loss_ce": 0.000418026524130255, "loss_iou": 0.271484375, "loss_num": 0.0091552734375, "loss_xval": 0.5859375, "num_input_tokens_seen": 91584964, "step": 1635 }, { "epoch": 3.643652561247216, "grad_norm": 26.88153076171875, "learning_rate": 1e-06, "loss": 0.737, "num_input_tokens_seen": 91642164, "step": 1636 }, { "epoch": 3.643652561247216, "loss": 0.8176705837249756, "loss_ce": 0.00028783181915059686, "loss_iou": 0.33203125, "loss_num": 0.03076171875, "loss_xval": 0.81640625, "num_input_tokens_seen": 91642164, "step": 1636 }, { "epoch": 3.645879732739421, "grad_norm": 29.69915771484375, "learning_rate": 1e-06, "loss": 1.0198, "num_input_tokens_seen": 91697608, "step": 1637 }, { "epoch": 3.645879732739421, "loss": 0.971588134765625, "loss_ce": 0.0003966961521655321, "loss_iou": 0.39453125, "loss_num": 0.0361328125, "loss_xval": 0.97265625, "num_input_tokens_seen": 91697608, "step": 1637 }, { "epoch": 3.648106904231626, "grad_norm": 14.602875709533691, "learning_rate": 1e-06, "loss": 0.7628, "num_input_tokens_seen": 91755904, "step": 1638 }, { "epoch": 3.648106904231626, "loss": 0.7990860342979431, "loss_ce": 0.0002579537685960531, "loss_iou": 0.34765625, "loss_num": 0.0208740234375, "loss_xval": 0.796875, "num_input_tokens_seen": 91755904, "step": 1638 }, { "epoch": 3.6503340757238307, "grad_norm": 19.639419555664062, "learning_rate": 1e-06, "loss": 0.9729, "num_input_tokens_seen": 91813308, "step": 1639 }, { "epoch": 3.6503340757238307, "loss": 0.7160975933074951, "loss_ce": 0.0002772239677142352, "loss_iou": 0.30078125, "loss_num": 0.0225830078125, "loss_xval": 0.71484375, "num_input_tokens_seen": 91813308, "step": 1639 }, { "epoch": 3.6525612472160356, "grad_norm": 26.63214683532715, "learning_rate": 1e-06, "loss": 0.7171, "num_input_tokens_seen": 91870888, "step": 1640 }, { "epoch": 3.6525612472160356, "loss": 0.6993926763534546, "loss_ce": 0.0004180770483799279, "loss_iou": 0.30859375, "loss_num": 0.0166015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 91870888, "step": 1640 }, { "epoch": 3.6547884187082404, "grad_norm": 24.945594787597656, "learning_rate": 1e-06, "loss": 0.7824, "num_input_tokens_seen": 91928644, "step": 1641 }, { "epoch": 3.6547884187082404, "loss": 0.8066414594650269, "loss_ce": 0.00036706856917589903, "loss_iou": 0.322265625, "loss_num": 0.0322265625, "loss_xval": 0.8046875, "num_input_tokens_seen": 91928644, "step": 1641 }, { "epoch": 3.6570155902004453, "grad_norm": 28.210344314575195, "learning_rate": 1e-06, "loss": 0.7365, "num_input_tokens_seen": 91982148, "step": 1642 }, { "epoch": 3.6570155902004453, "loss": 0.6948254704475403, "loss_ce": 0.0014660632004961371, "loss_iou": 0.3046875, "loss_num": 0.016357421875, "loss_xval": 0.6953125, "num_input_tokens_seen": 91982148, "step": 1642 }, { "epoch": 3.65924276169265, "grad_norm": 83.1476821899414, "learning_rate": 1e-06, "loss": 0.8233, "num_input_tokens_seen": 92038604, "step": 1643 }, { "epoch": 3.65924276169265, "loss": 1.0671532154083252, "loss_ce": 0.000502894283272326, "loss_iou": 0.453125, "loss_num": 0.031982421875, "loss_xval": 1.0703125, "num_input_tokens_seen": 92038604, "step": 1643 }, { "epoch": 3.661469933184855, "grad_norm": 19.53736114501953, "learning_rate": 1e-06, "loss": 0.6483, "num_input_tokens_seen": 92094664, "step": 1644 }, { "epoch": 3.661469933184855, "loss": 0.4442579448223114, "loss_ce": 0.0002882296103052795, "loss_iou": 0.1650390625, "loss_num": 0.022705078125, "loss_xval": 0.443359375, "num_input_tokens_seen": 92094664, "step": 1644 }, { "epoch": 3.66369710467706, "grad_norm": 30.144088745117188, "learning_rate": 1e-06, "loss": 0.8099, "num_input_tokens_seen": 92149264, "step": 1645 }, { "epoch": 3.66369710467706, "loss": 0.6731590032577515, "loss_ce": 0.0003074193373322487, "loss_iou": 0.275390625, "loss_num": 0.0242919921875, "loss_xval": 0.671875, "num_input_tokens_seen": 92149264, "step": 1645 }, { "epoch": 3.665924276169265, "grad_norm": 24.664552688598633, "learning_rate": 1e-06, "loss": 0.9853, "num_input_tokens_seen": 92207820, "step": 1646 }, { "epoch": 3.665924276169265, "loss": 0.8198744654655457, "loss_ce": 0.000294416124233976, "loss_iou": 0.3515625, "loss_num": 0.0235595703125, "loss_xval": 0.8203125, "num_input_tokens_seen": 92207820, "step": 1646 }, { "epoch": 3.66815144766147, "grad_norm": 17.04414939880371, "learning_rate": 1e-06, "loss": 0.6132, "num_input_tokens_seen": 92263900, "step": 1647 }, { "epoch": 3.66815144766147, "loss": 0.5955994725227356, "loss_ce": 0.0003235829062759876, "loss_iou": 0.2451171875, "loss_num": 0.0211181640625, "loss_xval": 0.59375, "num_input_tokens_seen": 92263900, "step": 1647 }, { "epoch": 3.670378619153675, "grad_norm": 26.547033309936523, "learning_rate": 1e-06, "loss": 0.7967, "num_input_tokens_seen": 92319188, "step": 1648 }, { "epoch": 3.670378619153675, "loss": 0.6533565521240234, "loss_ce": 0.00028039264725521207, "loss_iou": 0.291015625, "loss_num": 0.01446533203125, "loss_xval": 0.65234375, "num_input_tokens_seen": 92319188, "step": 1648 }, { "epoch": 3.6726057906458798, "grad_norm": 20.542510986328125, "learning_rate": 1e-06, "loss": 0.804, "num_input_tokens_seen": 92375980, "step": 1649 }, { "epoch": 3.6726057906458798, "loss": 0.8124038577079773, "loss_ce": 0.00039214000571519136, "loss_iou": 0.30859375, "loss_num": 0.0390625, "loss_xval": 0.8125, "num_input_tokens_seen": 92375980, "step": 1649 }, { "epoch": 3.6748329621380846, "grad_norm": 77.64947509765625, "learning_rate": 1e-06, "loss": 1.049, "num_input_tokens_seen": 92427792, "step": 1650 }, { "epoch": 3.6748329621380846, "loss": 0.7283252477645874, "loss_ce": 0.00029791187262162566, "loss_iou": 0.31640625, "loss_num": 0.019287109375, "loss_xval": 0.7265625, "num_input_tokens_seen": 92427792, "step": 1650 }, { "epoch": 3.6770601336302895, "grad_norm": 20.271570205688477, "learning_rate": 1e-06, "loss": 0.7777, "num_input_tokens_seen": 92484212, "step": 1651 }, { "epoch": 3.6770601336302895, "loss": 0.6192773580551147, "loss_ce": 0.00025881448527798057, "loss_iou": 0.267578125, "loss_num": 0.0172119140625, "loss_xval": 0.6171875, "num_input_tokens_seen": 92484212, "step": 1651 }, { "epoch": 3.6792873051224944, "grad_norm": 15.280570030212402, "learning_rate": 1e-06, "loss": 0.7805, "num_input_tokens_seen": 92538032, "step": 1652 }, { "epoch": 3.6792873051224944, "loss": 0.6725317239761353, "loss_ce": 0.00029047008138149977, "loss_iou": 0.283203125, "loss_num": 0.0211181640625, "loss_xval": 0.671875, "num_input_tokens_seen": 92538032, "step": 1652 }, { "epoch": 3.681514476614699, "grad_norm": 17.816753387451172, "learning_rate": 1e-06, "loss": 1.012, "num_input_tokens_seen": 92592792, "step": 1653 }, { "epoch": 3.681514476614699, "loss": 0.8765089511871338, "loss_ce": 0.0002882035914808512, "loss_iou": 0.384765625, "loss_num": 0.0213623046875, "loss_xval": 0.875, "num_input_tokens_seen": 92592792, "step": 1653 }, { "epoch": 3.683741648106904, "grad_norm": 34.85762405395508, "learning_rate": 1e-06, "loss": 0.9599, "num_input_tokens_seen": 92649552, "step": 1654 }, { "epoch": 3.683741648106904, "loss": 1.0684025287628174, "loss_ce": 0.00028726100572384894, "loss_iou": 0.453125, "loss_num": 0.0322265625, "loss_xval": 1.0703125, "num_input_tokens_seen": 92649552, "step": 1654 }, { "epoch": 3.6859688195991094, "grad_norm": 16.367671966552734, "learning_rate": 1e-06, "loss": 0.7476, "num_input_tokens_seen": 92705912, "step": 1655 }, { "epoch": 3.6859688195991094, "loss": 0.467061311006546, "loss_ce": 0.0002644392370712012, "loss_iou": 0.2080078125, "loss_num": 0.01025390625, "loss_xval": 0.466796875, "num_input_tokens_seen": 92705912, "step": 1655 }, { "epoch": 3.6881959910913142, "grad_norm": 28.718902587890625, "learning_rate": 1e-06, "loss": 0.8724, "num_input_tokens_seen": 92764576, "step": 1656 }, { "epoch": 3.6881959910913142, "loss": 0.7795916795730591, "loss_ce": 0.0002947830653283745, "loss_iou": 0.328125, "loss_num": 0.0244140625, "loss_xval": 0.78125, "num_input_tokens_seen": 92764576, "step": 1656 }, { "epoch": 3.690423162583519, "grad_norm": 25.93492317199707, "learning_rate": 1e-06, "loss": 0.9498, "num_input_tokens_seen": 92818528, "step": 1657 }, { "epoch": 3.690423162583519, "loss": 1.08326256275177, "loss_ce": 0.00025476596783846617, "loss_iou": 0.474609375, "loss_num": 0.026611328125, "loss_xval": 1.0859375, "num_input_tokens_seen": 92818528, "step": 1657 }, { "epoch": 3.692650334075724, "grad_norm": 16.00531768798828, "learning_rate": 1e-06, "loss": 1.0849, "num_input_tokens_seen": 92874168, "step": 1658 }, { "epoch": 3.692650334075724, "loss": 0.8632932901382446, "loss_ce": 0.0002562026202213019, "loss_iou": 0.384765625, "loss_num": 0.018798828125, "loss_xval": 0.86328125, "num_input_tokens_seen": 92874168, "step": 1658 }, { "epoch": 3.694877505567929, "grad_norm": 48.529300689697266, "learning_rate": 1e-06, "loss": 0.9435, "num_input_tokens_seen": 92926172, "step": 1659 }, { "epoch": 3.694877505567929, "loss": 0.9439380168914795, "loss_ce": 0.000822762493044138, "loss_iou": 0.40234375, "loss_num": 0.02783203125, "loss_xval": 0.94140625, "num_input_tokens_seen": 92926172, "step": 1659 }, { "epoch": 3.6971046770601337, "grad_norm": 16.016910552978516, "learning_rate": 1e-06, "loss": 0.7698, "num_input_tokens_seen": 92981500, "step": 1660 }, { "epoch": 3.6971046770601337, "loss": 0.7871308326721191, "loss_ce": 0.0002656061842571944, "loss_iou": 0.341796875, "loss_num": 0.0201416015625, "loss_xval": 0.78515625, "num_input_tokens_seen": 92981500, "step": 1660 }, { "epoch": 3.6993318485523385, "grad_norm": 18.289608001708984, "learning_rate": 1e-06, "loss": 1.0147, "num_input_tokens_seen": 93036804, "step": 1661 }, { "epoch": 3.6993318485523385, "loss": 1.0226683616638184, "loss_ce": 0.00032947887666523457, "loss_iou": 0.4296875, "loss_num": 0.031982421875, "loss_xval": 1.0234375, "num_input_tokens_seen": 93036804, "step": 1661 }, { "epoch": 3.7015590200445434, "grad_norm": 15.377629280090332, "learning_rate": 1e-06, "loss": 0.7615, "num_input_tokens_seen": 93093888, "step": 1662 }, { "epoch": 3.7015590200445434, "loss": 0.8022950887680054, "loss_ce": 0.00029318686574697495, "loss_iou": 0.328125, "loss_num": 0.0291748046875, "loss_xval": 0.80078125, "num_input_tokens_seen": 93093888, "step": 1662 }, { "epoch": 3.7037861915367483, "grad_norm": 16.651514053344727, "learning_rate": 1e-06, "loss": 0.8527, "num_input_tokens_seen": 93151972, "step": 1663 }, { "epoch": 3.7037861915367483, "loss": 0.8089163303375244, "loss_ce": 0.0003225764958187938, "loss_iou": 0.353515625, "loss_num": 0.0203857421875, "loss_xval": 0.80859375, "num_input_tokens_seen": 93151972, "step": 1663 }, { "epoch": 3.706013363028953, "grad_norm": 87.34024047851562, "learning_rate": 1e-06, "loss": 0.9063, "num_input_tokens_seen": 93208668, "step": 1664 }, { "epoch": 3.706013363028953, "loss": 0.6086684465408325, "loss_ce": 0.00026995883672498167, "loss_iou": 0.25390625, "loss_num": 0.019775390625, "loss_xval": 0.609375, "num_input_tokens_seen": 93208668, "step": 1664 }, { "epoch": 3.708240534521158, "grad_norm": 22.259021759033203, "learning_rate": 1e-06, "loss": 0.9715, "num_input_tokens_seen": 93266120, "step": 1665 }, { "epoch": 3.708240534521158, "loss": 1.0723727941513062, "loss_ce": 0.0003513463889248669, "loss_iou": 0.4453125, "loss_num": 0.03662109375, "loss_xval": 1.0703125, "num_input_tokens_seen": 93266120, "step": 1665 }, { "epoch": 3.710467706013363, "grad_norm": 13.237698554992676, "learning_rate": 1e-06, "loss": 0.7824, "num_input_tokens_seen": 93322796, "step": 1666 }, { "epoch": 3.710467706013363, "loss": 0.803238034248352, "loss_ce": 0.0002595084370113909, "loss_iou": 0.33984375, "loss_num": 0.0245361328125, "loss_xval": 0.8046875, "num_input_tokens_seen": 93322796, "step": 1666 }, { "epoch": 3.7126948775055677, "grad_norm": 30.929845809936523, "learning_rate": 1e-06, "loss": 0.9966, "num_input_tokens_seen": 93378408, "step": 1667 }, { "epoch": 3.7126948775055677, "loss": 0.9339191913604736, "loss_ce": 0.0003254116454627365, "loss_iou": 0.423828125, "loss_num": 0.0172119140625, "loss_xval": 0.93359375, "num_input_tokens_seen": 93378408, "step": 1667 }, { "epoch": 3.7149220489977726, "grad_norm": 17.648576736450195, "learning_rate": 1e-06, "loss": 0.9418, "num_input_tokens_seen": 93435448, "step": 1668 }, { "epoch": 3.7149220489977726, "loss": 0.8993349075317383, "loss_ce": 0.0013856550212949514, "loss_iou": 0.373046875, "loss_num": 0.0306396484375, "loss_xval": 0.8984375, "num_input_tokens_seen": 93435448, "step": 1668 }, { "epoch": 3.717149220489978, "grad_norm": 18.18744659423828, "learning_rate": 1e-06, "loss": 0.5543, "num_input_tokens_seen": 93490744, "step": 1669 }, { "epoch": 3.717149220489978, "loss": 0.5734395980834961, "loss_ce": 0.00044156977673992515, "loss_iou": 0.2470703125, "loss_num": 0.0157470703125, "loss_xval": 0.57421875, "num_input_tokens_seen": 93490744, "step": 1669 }, { "epoch": 3.7193763919821827, "grad_norm": 26.080154418945312, "learning_rate": 1e-06, "loss": 0.8468, "num_input_tokens_seen": 93546192, "step": 1670 }, { "epoch": 3.7193763919821827, "loss": 1.0555299520492554, "loss_ce": 0.00035410295822657645, "loss_iou": 0.455078125, "loss_num": 0.0286865234375, "loss_xval": 1.0546875, "num_input_tokens_seen": 93546192, "step": 1670 }, { "epoch": 3.7216035634743876, "grad_norm": 19.251693725585938, "learning_rate": 1e-06, "loss": 0.9076, "num_input_tokens_seen": 93604348, "step": 1671 }, { "epoch": 3.7216035634743876, "loss": 0.8526378273963928, "loss_ce": 0.00034295275690965354, "loss_iou": 0.361328125, "loss_num": 0.0263671875, "loss_xval": 0.8515625, "num_input_tokens_seen": 93604348, "step": 1671 }, { "epoch": 3.7238307349665924, "grad_norm": 15.625646591186523, "learning_rate": 1e-06, "loss": 0.881, "num_input_tokens_seen": 93659012, "step": 1672 }, { "epoch": 3.7238307349665924, "loss": 0.9302690625190735, "loss_ce": 0.00033744628308340907, "loss_iou": 0.3984375, "loss_num": 0.0262451171875, "loss_xval": 0.9296875, "num_input_tokens_seen": 93659012, "step": 1672 }, { "epoch": 3.7260579064587973, "grad_norm": 47.977176666259766, "learning_rate": 1e-06, "loss": 1.0519, "num_input_tokens_seen": 93710680, "step": 1673 }, { "epoch": 3.7260579064587973, "loss": 0.8930314779281616, "loss_ce": 0.0006975086871534586, "loss_iou": 0.375, "loss_num": 0.028564453125, "loss_xval": 0.890625, "num_input_tokens_seen": 93710680, "step": 1673 }, { "epoch": 3.728285077951002, "grad_norm": 27.666378021240234, "learning_rate": 1e-06, "loss": 0.8055, "num_input_tokens_seen": 93766956, "step": 1674 }, { "epoch": 3.728285077951002, "loss": 0.6055097579956055, "loss_ce": 0.000285172660369426, "loss_iou": 0.2490234375, "loss_num": 0.0213623046875, "loss_xval": 0.60546875, "num_input_tokens_seen": 93766956, "step": 1674 }, { "epoch": 3.730512249443207, "grad_norm": 28.261253356933594, "learning_rate": 1e-06, "loss": 0.6353, "num_input_tokens_seen": 93824432, "step": 1675 }, { "epoch": 3.730512249443207, "loss": 0.7097867727279663, "loss_ce": 0.00031414441764354706, "loss_iou": 0.30078125, "loss_num": 0.021240234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 93824432, "step": 1675 }, { "epoch": 3.732739420935412, "grad_norm": 17.860763549804688, "learning_rate": 1e-06, "loss": 0.7996, "num_input_tokens_seen": 93882600, "step": 1676 }, { "epoch": 3.732739420935412, "loss": 0.6038820743560791, "loss_ce": 0.00021385436411947012, "loss_iou": 0.24609375, "loss_num": 0.022216796875, "loss_xval": 0.60546875, "num_input_tokens_seen": 93882600, "step": 1676 }, { "epoch": 3.734966592427617, "grad_norm": 22.68053436279297, "learning_rate": 1e-06, "loss": 1.0956, "num_input_tokens_seen": 93938548, "step": 1677 }, { "epoch": 3.734966592427617, "loss": 0.991020917892456, "loss_ce": 0.00029826798709109426, "loss_iou": 0.44140625, "loss_num": 0.0216064453125, "loss_xval": 0.9921875, "num_input_tokens_seen": 93938548, "step": 1677 }, { "epoch": 3.737193763919822, "grad_norm": 22.026281356811523, "learning_rate": 1e-06, "loss": 0.7681, "num_input_tokens_seen": 93993212, "step": 1678 }, { "epoch": 3.737193763919822, "loss": 0.945850670337677, "loss_ce": 0.000294047174975276, "loss_iou": 0.365234375, "loss_num": 0.04296875, "loss_xval": 0.9453125, "num_input_tokens_seen": 93993212, "step": 1678 }, { "epoch": 3.739420935412027, "grad_norm": 25.18975067138672, "learning_rate": 1e-06, "loss": 0.8155, "num_input_tokens_seen": 94051056, "step": 1679 }, { "epoch": 3.739420935412027, "loss": 0.9602963328361511, "loss_ce": 0.00106780044734478, "loss_iou": 0.384765625, "loss_num": 0.038330078125, "loss_xval": 0.9609375, "num_input_tokens_seen": 94051056, "step": 1679 }, { "epoch": 3.7416481069042318, "grad_norm": 22.7869873046875, "learning_rate": 1e-06, "loss": 0.7987, "num_input_tokens_seen": 94110436, "step": 1680 }, { "epoch": 3.7416481069042318, "loss": 0.8000843524932861, "loss_ce": 0.0002796592016238719, "loss_iou": 0.337890625, "loss_num": 0.024658203125, "loss_xval": 0.80078125, "num_input_tokens_seen": 94110436, "step": 1680 }, { "epoch": 3.7438752783964366, "grad_norm": 24.386417388916016, "learning_rate": 1e-06, "loss": 1.0109, "num_input_tokens_seen": 94167244, "step": 1681 }, { "epoch": 3.7438752783964366, "loss": 1.1265201568603516, "loss_ce": 0.0005436294013634324, "loss_iou": 0.4453125, "loss_num": 0.047119140625, "loss_xval": 1.125, "num_input_tokens_seen": 94167244, "step": 1681 }, { "epoch": 3.7461024498886415, "grad_norm": 19.943164825439453, "learning_rate": 1e-06, "loss": 1.1193, "num_input_tokens_seen": 94223920, "step": 1682 }, { "epoch": 3.7461024498886415, "loss": 0.9446117877960205, "loss_ce": 0.00027577788569033146, "loss_iou": 0.41796875, "loss_num": 0.021728515625, "loss_xval": 0.9453125, "num_input_tokens_seen": 94223920, "step": 1682 }, { "epoch": 3.7483296213808464, "grad_norm": 20.862585067749023, "learning_rate": 1e-06, "loss": 0.6461, "num_input_tokens_seen": 94279276, "step": 1683 }, { "epoch": 3.7483296213808464, "loss": 0.621061384677887, "loss_ce": 0.0002728351391851902, "loss_iou": 0.267578125, "loss_num": 0.01708984375, "loss_xval": 0.62109375, "num_input_tokens_seen": 94279276, "step": 1683 }, { "epoch": 3.750556792873051, "grad_norm": 19.76258087158203, "learning_rate": 1e-06, "loss": 0.8812, "num_input_tokens_seen": 94333312, "step": 1684 }, { "epoch": 3.750556792873051, "loss": 1.0115703344345093, "loss_ce": 0.00033991390955634415, "loss_iou": 0.4375, "loss_num": 0.0277099609375, "loss_xval": 1.0078125, "num_input_tokens_seen": 94333312, "step": 1684 }, { "epoch": 3.752783964365256, "grad_norm": 13.520242691040039, "learning_rate": 1e-06, "loss": 0.9022, "num_input_tokens_seen": 94389944, "step": 1685 }, { "epoch": 3.752783964365256, "loss": 1.0272419452667236, "loss_ce": 0.00038654671516269445, "loss_iou": 0.439453125, "loss_num": 0.0296630859375, "loss_xval": 1.0234375, "num_input_tokens_seen": 94389944, "step": 1685 }, { "epoch": 3.755011135857461, "grad_norm": 13.537060737609863, "learning_rate": 1e-06, "loss": 0.7416, "num_input_tokens_seen": 94445592, "step": 1686 }, { "epoch": 3.755011135857461, "loss": 0.7466691136360168, "loss_ce": 0.00033120866282843053, "loss_iou": 0.33203125, "loss_num": 0.016357421875, "loss_xval": 0.74609375, "num_input_tokens_seen": 94445592, "step": 1686 }, { "epoch": 3.757238307349666, "grad_norm": 17.105846405029297, "learning_rate": 1e-06, "loss": 0.8223, "num_input_tokens_seen": 94504800, "step": 1687 }, { "epoch": 3.757238307349666, "loss": 0.8430700302124023, "loss_ce": 0.0002966249012388289, "loss_iou": 0.337890625, "loss_num": 0.033447265625, "loss_xval": 0.84375, "num_input_tokens_seen": 94504800, "step": 1687 }, { "epoch": 3.7594654788418707, "grad_norm": 18.66309356689453, "learning_rate": 1e-06, "loss": 1.0448, "num_input_tokens_seen": 94560456, "step": 1688 }, { "epoch": 3.7594654788418707, "loss": 0.9472213983535767, "loss_ce": 0.00044402258936315775, "loss_iou": 0.412109375, "loss_num": 0.0245361328125, "loss_xval": 0.9453125, "num_input_tokens_seen": 94560456, "step": 1688 }, { "epoch": 3.7616926503340755, "grad_norm": 16.696016311645508, "learning_rate": 1e-06, "loss": 0.7974, "num_input_tokens_seen": 94617952, "step": 1689 }, { "epoch": 3.7616926503340755, "loss": 0.7895834445953369, "loss_ce": 0.0003989200631622225, "loss_iou": 0.322265625, "loss_num": 0.029052734375, "loss_xval": 0.7890625, "num_input_tokens_seen": 94617952, "step": 1689 }, { "epoch": 3.7639198218262804, "grad_norm": 27.074752807617188, "learning_rate": 1e-06, "loss": 0.7939, "num_input_tokens_seen": 94673300, "step": 1690 }, { "epoch": 3.7639198218262804, "loss": 0.47075653076171875, "loss_ce": 0.00029751902911812067, "loss_iou": 0.2041015625, "loss_num": 0.012451171875, "loss_xval": 0.470703125, "num_input_tokens_seen": 94673300, "step": 1690 }, { "epoch": 3.7661469933184857, "grad_norm": 23.333097457885742, "learning_rate": 1e-06, "loss": 0.901, "num_input_tokens_seen": 94727112, "step": 1691 }, { "epoch": 3.7661469933184857, "loss": 0.8041844964027405, "loss_ce": 0.0004735640832222998, "loss_iou": 0.349609375, "loss_num": 0.0213623046875, "loss_xval": 0.8046875, "num_input_tokens_seen": 94727112, "step": 1691 }, { "epoch": 3.7683741648106905, "grad_norm": 14.201781272888184, "learning_rate": 1e-06, "loss": 0.8346, "num_input_tokens_seen": 94784200, "step": 1692 }, { "epoch": 3.7683741648106905, "loss": 0.82025545835495, "loss_ce": 0.0003091579128522426, "loss_iou": 0.322265625, "loss_num": 0.034912109375, "loss_xval": 0.8203125, "num_input_tokens_seen": 94784200, "step": 1692 }, { "epoch": 3.7706013363028954, "grad_norm": 17.050172805786133, "learning_rate": 1e-06, "loss": 0.9037, "num_input_tokens_seen": 94842204, "step": 1693 }, { "epoch": 3.7706013363028954, "loss": 0.7915303707122803, "loss_ce": 0.0002705698716454208, "loss_iou": 0.3359375, "loss_num": 0.02392578125, "loss_xval": 0.79296875, "num_input_tokens_seen": 94842204, "step": 1693 }, { "epoch": 3.7728285077951003, "grad_norm": 20.24613380432129, "learning_rate": 1e-06, "loss": 0.6938, "num_input_tokens_seen": 94897320, "step": 1694 }, { "epoch": 3.7728285077951003, "loss": 0.6334647536277771, "loss_ce": 0.000285994668956846, "loss_iou": 0.275390625, "loss_num": 0.0164794921875, "loss_xval": 0.6328125, "num_input_tokens_seen": 94897320, "step": 1694 }, { "epoch": 3.775055679287305, "grad_norm": 23.602615356445312, "learning_rate": 1e-06, "loss": 0.8466, "num_input_tokens_seen": 94956064, "step": 1695 }, { "epoch": 3.775055679287305, "loss": 0.8750972747802734, "loss_ce": 0.00034144739038310945, "loss_iou": 0.380859375, "loss_num": 0.0230712890625, "loss_xval": 0.875, "num_input_tokens_seen": 94956064, "step": 1695 }, { "epoch": 3.77728285077951, "grad_norm": 19.40206527709961, "learning_rate": 1e-06, "loss": 0.7123, "num_input_tokens_seen": 95011940, "step": 1696 }, { "epoch": 3.77728285077951, "loss": 0.9245999455451965, "loss_ce": 0.0002835210179910064, "loss_iou": 0.375, "loss_num": 0.03466796875, "loss_xval": 0.92578125, "num_input_tokens_seen": 95011940, "step": 1696 }, { "epoch": 3.779510022271715, "grad_norm": 22.941545486450195, "learning_rate": 1e-06, "loss": 0.7155, "num_input_tokens_seen": 95065572, "step": 1697 }, { "epoch": 3.779510022271715, "loss": 0.5341686606407166, "loss_ce": 0.00023310747928917408, "loss_iou": 0.21484375, "loss_num": 0.0208740234375, "loss_xval": 0.53515625, "num_input_tokens_seen": 95065572, "step": 1697 }, { "epoch": 3.7817371937639197, "grad_norm": 19.74363899230957, "learning_rate": 1e-06, "loss": 0.7572, "num_input_tokens_seen": 95120584, "step": 1698 }, { "epoch": 3.7817371937639197, "loss": 0.7215688228607178, "loss_ce": 0.00037740974221378565, "loss_iou": 0.294921875, "loss_num": 0.0263671875, "loss_xval": 0.72265625, "num_input_tokens_seen": 95120584, "step": 1698 }, { "epoch": 3.7839643652561246, "grad_norm": 34.252037048339844, "learning_rate": 1e-06, "loss": 0.8692, "num_input_tokens_seen": 95176160, "step": 1699 }, { "epoch": 3.7839643652561246, "loss": 0.9719327688217163, "loss_ce": 0.0002530916826799512, "loss_iou": 0.421875, "loss_num": 0.025634765625, "loss_xval": 0.97265625, "num_input_tokens_seen": 95176160, "step": 1699 }, { "epoch": 3.78619153674833, "grad_norm": 17.124095916748047, "learning_rate": 1e-06, "loss": 0.7826, "num_input_tokens_seen": 95230472, "step": 1700 }, { "epoch": 3.78619153674833, "loss": 0.7893081903457642, "loss_ce": 0.0007340057054534554, "loss_iou": 0.330078125, "loss_num": 0.0255126953125, "loss_xval": 0.7890625, "num_input_tokens_seen": 95230472, "step": 1700 }, { "epoch": 3.7884187082405347, "grad_norm": 18.04917335510254, "learning_rate": 1e-06, "loss": 0.7732, "num_input_tokens_seen": 95286580, "step": 1701 }, { "epoch": 3.7884187082405347, "loss": 0.8537644743919373, "loss_ce": 0.0004929610877297819, "loss_iou": 0.361328125, "loss_num": 0.0257568359375, "loss_xval": 0.8515625, "num_input_tokens_seen": 95286580, "step": 1701 }, { "epoch": 3.7906458797327396, "grad_norm": 19.060016632080078, "learning_rate": 1e-06, "loss": 0.7842, "num_input_tokens_seen": 95341332, "step": 1702 }, { "epoch": 3.7906458797327396, "loss": 0.5456702709197998, "loss_ce": 0.00026007683482021093, "loss_iou": 0.2236328125, "loss_num": 0.01953125, "loss_xval": 0.546875, "num_input_tokens_seen": 95341332, "step": 1702 }, { "epoch": 3.7928730512249444, "grad_norm": 14.60232925415039, "learning_rate": 1e-06, "loss": 0.6544, "num_input_tokens_seen": 95397800, "step": 1703 }, { "epoch": 3.7928730512249444, "loss": 0.5783922076225281, "loss_ce": 0.00026723096380010247, "loss_iou": 0.248046875, "loss_num": 0.016357421875, "loss_xval": 0.578125, "num_input_tokens_seen": 95397800, "step": 1703 }, { "epoch": 3.7951002227171493, "grad_norm": 237.25967407226562, "learning_rate": 1e-06, "loss": 0.8609, "num_input_tokens_seen": 95453292, "step": 1704 }, { "epoch": 3.7951002227171493, "loss": 0.7244041562080383, "loss_ce": 0.0002830714511219412, "loss_iou": 0.314453125, "loss_num": 0.018798828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 95453292, "step": 1704 }, { "epoch": 3.797327394209354, "grad_norm": 23.924535751342773, "learning_rate": 1e-06, "loss": 0.8929, "num_input_tokens_seen": 95509636, "step": 1705 }, { "epoch": 3.797327394209354, "loss": 0.9049976468086243, "loss_ce": 0.00045664224307984114, "loss_iou": 0.376953125, "loss_num": 0.0299072265625, "loss_xval": 0.90625, "num_input_tokens_seen": 95509636, "step": 1705 }, { "epoch": 3.799554565701559, "grad_norm": 13.076998710632324, "learning_rate": 1e-06, "loss": 0.7502, "num_input_tokens_seen": 95567220, "step": 1706 }, { "epoch": 3.799554565701559, "loss": 0.8397418260574341, "loss_ce": 0.0006305105634965003, "loss_iou": 0.3359375, "loss_num": 0.03369140625, "loss_xval": 0.83984375, "num_input_tokens_seen": 95567220, "step": 1706 }, { "epoch": 3.801781737193764, "grad_norm": 16.36588478088379, "learning_rate": 1e-06, "loss": 0.8995, "num_input_tokens_seen": 95621892, "step": 1707 }, { "epoch": 3.801781737193764, "loss": 0.8961795568466187, "loss_ce": 0.00042754405876621604, "loss_iou": 0.36328125, "loss_num": 0.034423828125, "loss_xval": 0.89453125, "num_input_tokens_seen": 95621892, "step": 1707 }, { "epoch": 3.8040089086859687, "grad_norm": 31.586313247680664, "learning_rate": 1e-06, "loss": 0.8272, "num_input_tokens_seen": 95678952, "step": 1708 }, { "epoch": 3.8040089086859687, "loss": 0.7548704147338867, "loss_ce": 0.0002317242615390569, "loss_iou": 0.333984375, "loss_num": 0.0169677734375, "loss_xval": 0.75390625, "num_input_tokens_seen": 95678952, "step": 1708 }, { "epoch": 3.8062360801781736, "grad_norm": 21.018543243408203, "learning_rate": 1e-06, "loss": 0.8788, "num_input_tokens_seen": 95734860, "step": 1709 }, { "epoch": 3.8062360801781736, "loss": 0.8822214603424072, "loss_ce": 0.0011179400607943535, "loss_iou": 0.37109375, "loss_num": 0.0279541015625, "loss_xval": 0.8828125, "num_input_tokens_seen": 95734860, "step": 1709 }, { "epoch": 3.8084632516703785, "grad_norm": 46.00482177734375, "learning_rate": 1e-06, "loss": 0.6713, "num_input_tokens_seen": 95791268, "step": 1710 }, { "epoch": 3.8084632516703785, "loss": 0.6450284123420715, "loss_ce": 0.00025300466222688556, "loss_iou": 0.296875, "loss_num": 0.01031494140625, "loss_xval": 0.64453125, "num_input_tokens_seen": 95791268, "step": 1710 }, { "epoch": 3.8106904231625833, "grad_norm": 19.820987701416016, "learning_rate": 1e-06, "loss": 0.9956, "num_input_tokens_seen": 95847960, "step": 1711 }, { "epoch": 3.8106904231625833, "loss": 0.8101856708526611, "loss_ce": 0.00037119118496775627, "loss_iou": 0.34765625, "loss_num": 0.0230712890625, "loss_xval": 0.80859375, "num_input_tokens_seen": 95847960, "step": 1711 }, { "epoch": 3.812917594654788, "grad_norm": 16.08196449279785, "learning_rate": 1e-06, "loss": 1.0883, "num_input_tokens_seen": 95904500, "step": 1712 }, { "epoch": 3.812917594654788, "loss": 0.8286195993423462, "loss_ce": 0.0004946578992530704, "loss_iou": 0.33984375, "loss_num": 0.029541015625, "loss_xval": 0.828125, "num_input_tokens_seen": 95904500, "step": 1712 }, { "epoch": 3.815144766146993, "grad_norm": 25.859769821166992, "learning_rate": 1e-06, "loss": 0.747, "num_input_tokens_seen": 95959000, "step": 1713 }, { "epoch": 3.815144766146993, "loss": 0.749795138835907, "loss_ce": 0.00028341758297756314, "loss_iou": 0.31640625, "loss_num": 0.0235595703125, "loss_xval": 0.75, "num_input_tokens_seen": 95959000, "step": 1713 }, { "epoch": 3.8173719376391984, "grad_norm": 16.63954734802246, "learning_rate": 1e-06, "loss": 0.7589, "num_input_tokens_seen": 96015556, "step": 1714 }, { "epoch": 3.8173719376391984, "loss": 0.7214926481246948, "loss_ce": 0.00030121111194603145, "loss_iou": 0.32421875, "loss_num": 0.01458740234375, "loss_xval": 0.72265625, "num_input_tokens_seen": 96015556, "step": 1714 }, { "epoch": 3.819599109131403, "grad_norm": 21.721033096313477, "learning_rate": 1e-06, "loss": 0.7904, "num_input_tokens_seen": 96072212, "step": 1715 }, { "epoch": 3.819599109131403, "loss": 0.7724882364273071, "loss_ce": 0.00027147267246618867, "loss_iou": 0.333984375, "loss_num": 0.0206298828125, "loss_xval": 0.7734375, "num_input_tokens_seen": 96072212, "step": 1715 }, { "epoch": 3.821826280623608, "grad_norm": 13.081016540527344, "learning_rate": 1e-06, "loss": 0.7134, "num_input_tokens_seen": 96129256, "step": 1716 }, { "epoch": 3.821826280623608, "loss": 0.8351633548736572, "loss_ce": 0.00026344467187300324, "loss_iou": 0.375, "loss_num": 0.0167236328125, "loss_xval": 0.8359375, "num_input_tokens_seen": 96129256, "step": 1716 }, { "epoch": 3.824053452115813, "grad_norm": 23.20547866821289, "learning_rate": 1e-06, "loss": 0.9529, "num_input_tokens_seen": 96186100, "step": 1717 }, { "epoch": 3.824053452115813, "loss": 1.0769813060760498, "loss_ce": 0.00032106111757457256, "loss_iou": 0.42578125, "loss_num": 0.044677734375, "loss_xval": 1.078125, "num_input_tokens_seen": 96186100, "step": 1717 }, { "epoch": 3.826280623608018, "grad_norm": 19.76165008544922, "learning_rate": 1e-06, "loss": 0.9206, "num_input_tokens_seen": 96242056, "step": 1718 }, { "epoch": 3.826280623608018, "loss": 0.7002924680709839, "loss_ce": 0.0003413489321246743, "loss_iou": 0.310546875, "loss_num": 0.0155029296875, "loss_xval": 0.69921875, "num_input_tokens_seen": 96242056, "step": 1718 }, { "epoch": 3.8285077951002227, "grad_norm": 19.560970306396484, "learning_rate": 1e-06, "loss": 0.9795, "num_input_tokens_seen": 96296820, "step": 1719 }, { "epoch": 3.8285077951002227, "loss": 1.1053590774536133, "loss_ce": 0.00037851842353120446, "loss_iou": 0.453125, "loss_num": 0.039306640625, "loss_xval": 1.1015625, "num_input_tokens_seen": 96296820, "step": 1719 }, { "epoch": 3.8307349665924275, "grad_norm": 13.943714141845703, "learning_rate": 1e-06, "loss": 0.7494, "num_input_tokens_seen": 96352772, "step": 1720 }, { "epoch": 3.8307349665924275, "loss": 0.9070804715156555, "loss_ce": 0.00034218025393784046, "loss_iou": 0.369140625, "loss_num": 0.033935546875, "loss_xval": 0.90625, "num_input_tokens_seen": 96352772, "step": 1720 }, { "epoch": 3.8329621380846324, "grad_norm": 24.83785629272461, "learning_rate": 1e-06, "loss": 0.8379, "num_input_tokens_seen": 96409184, "step": 1721 }, { "epoch": 3.8329621380846324, "loss": 0.9676505327224731, "loss_ce": 0.0003653773164842278, "loss_iou": 0.4140625, "loss_num": 0.027587890625, "loss_xval": 0.96875, "num_input_tokens_seen": 96409184, "step": 1721 }, { "epoch": 3.8351893095768377, "grad_norm": 18.294523239135742, "learning_rate": 1e-06, "loss": 0.7214, "num_input_tokens_seen": 96465040, "step": 1722 }, { "epoch": 3.8351893095768377, "loss": 0.5652261972427368, "loss_ce": 0.00028477917658165097, "loss_iou": 0.240234375, "loss_num": 0.0169677734375, "loss_xval": 0.56640625, "num_input_tokens_seen": 96465040, "step": 1722 }, { "epoch": 3.8374164810690425, "grad_norm": 14.921290397644043, "learning_rate": 1e-06, "loss": 0.7268, "num_input_tokens_seen": 96523672, "step": 1723 }, { "epoch": 3.8374164810690425, "loss": 0.7978357076644897, "loss_ce": 0.00022831102251075208, "loss_iou": 0.33203125, "loss_num": 0.0269775390625, "loss_xval": 0.796875, "num_input_tokens_seen": 96523672, "step": 1723 }, { "epoch": 3.8396436525612474, "grad_norm": 25.57061767578125, "learning_rate": 1e-06, "loss": 0.7983, "num_input_tokens_seen": 96579016, "step": 1724 }, { "epoch": 3.8396436525612474, "loss": 0.9255920648574829, "loss_ce": 0.0002990873181261122, "loss_iou": 0.404296875, "loss_num": 0.0235595703125, "loss_xval": 0.92578125, "num_input_tokens_seen": 96579016, "step": 1724 }, { "epoch": 3.8418708240534523, "grad_norm": 25.671611785888672, "learning_rate": 1e-06, "loss": 0.7852, "num_input_tokens_seen": 96633780, "step": 1725 }, { "epoch": 3.8418708240534523, "loss": 0.7190070152282715, "loss_ce": 0.0002570503856986761, "loss_iou": 0.3203125, "loss_num": 0.0157470703125, "loss_xval": 0.71875, "num_input_tokens_seen": 96633780, "step": 1725 }, { "epoch": 3.844097995545657, "grad_norm": 21.268661499023438, "learning_rate": 1e-06, "loss": 0.5661, "num_input_tokens_seen": 96689492, "step": 1726 }, { "epoch": 3.844097995545657, "loss": 0.563983678817749, "loss_ce": 0.0002629789523780346, "loss_iou": 0.2294921875, "loss_num": 0.02099609375, "loss_xval": 0.5625, "num_input_tokens_seen": 96689492, "step": 1726 }, { "epoch": 3.846325167037862, "grad_norm": 26.356407165527344, "learning_rate": 1e-06, "loss": 0.7125, "num_input_tokens_seen": 96746656, "step": 1727 }, { "epoch": 3.846325167037862, "loss": 0.8865300416946411, "loss_ce": 0.0004216782108414918, "loss_iou": 0.375, "loss_num": 0.027587890625, "loss_xval": 0.88671875, "num_input_tokens_seen": 96746656, "step": 1727 }, { "epoch": 3.848552338530067, "grad_norm": 16.691015243530273, "learning_rate": 1e-06, "loss": 0.9871, "num_input_tokens_seen": 96803292, "step": 1728 }, { "epoch": 3.848552338530067, "loss": 0.9029386043548584, "loss_ce": 0.0003506758948788047, "loss_iou": 0.380859375, "loss_num": 0.0279541015625, "loss_xval": 0.90234375, "num_input_tokens_seen": 96803292, "step": 1728 }, { "epoch": 3.8507795100222717, "grad_norm": 25.19491195678711, "learning_rate": 1e-06, "loss": 0.723, "num_input_tokens_seen": 96859896, "step": 1729 }, { "epoch": 3.8507795100222717, "loss": 0.7351480722427368, "loss_ce": 0.00028477725572884083, "loss_iou": 0.2890625, "loss_num": 0.0311279296875, "loss_xval": 0.734375, "num_input_tokens_seen": 96859896, "step": 1729 }, { "epoch": 3.8530066815144766, "grad_norm": 16.253820419311523, "learning_rate": 1e-06, "loss": 0.6062, "num_input_tokens_seen": 96917348, "step": 1730 }, { "epoch": 3.8530066815144766, "loss": 0.6677459478378296, "loss_ce": 0.0002654629643075168, "loss_iou": 0.28515625, "loss_num": 0.0189208984375, "loss_xval": 0.66796875, "num_input_tokens_seen": 96917348, "step": 1730 }, { "epoch": 3.8552338530066814, "grad_norm": 12.308496475219727, "learning_rate": 1e-06, "loss": 0.7134, "num_input_tokens_seen": 96975804, "step": 1731 }, { "epoch": 3.8552338530066814, "loss": 0.6045119166374207, "loss_ce": 0.0003859363787341863, "loss_iou": 0.248046875, "loss_num": 0.021484375, "loss_xval": 0.60546875, "num_input_tokens_seen": 96975804, "step": 1731 }, { "epoch": 3.8574610244988863, "grad_norm": 26.0831298828125, "learning_rate": 1e-06, "loss": 1.1291, "num_input_tokens_seen": 97033224, "step": 1732 }, { "epoch": 3.8574610244988863, "loss": 1.0747888088226318, "loss_ce": 0.00032590571208857, "loss_iou": 0.435546875, "loss_num": 0.04052734375, "loss_xval": 1.078125, "num_input_tokens_seen": 97033224, "step": 1732 }, { "epoch": 3.859688195991091, "grad_norm": 22.14068031311035, "learning_rate": 1e-06, "loss": 0.9682, "num_input_tokens_seen": 97092012, "step": 1733 }, { "epoch": 3.859688195991091, "loss": 1.2439754009246826, "loss_ce": 0.00032304698834195733, "loss_iou": 0.515625, "loss_num": 0.042724609375, "loss_xval": 1.2421875, "num_input_tokens_seen": 97092012, "step": 1733 }, { "epoch": 3.861915367483296, "grad_norm": 17.057979583740234, "learning_rate": 1e-06, "loss": 0.893, "num_input_tokens_seen": 97145840, "step": 1734 }, { "epoch": 3.861915367483296, "loss": 1.0669821500778198, "loss_ce": 0.0003318190574645996, "loss_iou": 0.39453125, "loss_num": 0.05517578125, "loss_xval": 1.0703125, "num_input_tokens_seen": 97145840, "step": 1734 }, { "epoch": 3.864142538975501, "grad_norm": 14.770302772521973, "learning_rate": 1e-06, "loss": 0.8305, "num_input_tokens_seen": 97201400, "step": 1735 }, { "epoch": 3.864142538975501, "loss": 0.9496909379959106, "loss_ce": 0.00047216590610332787, "loss_iou": 0.39453125, "loss_num": 0.031982421875, "loss_xval": 0.94921875, "num_input_tokens_seen": 97201400, "step": 1735 }, { "epoch": 3.866369710467706, "grad_norm": 19.580652236938477, "learning_rate": 1e-06, "loss": 0.8005, "num_input_tokens_seen": 97258904, "step": 1736 }, { "epoch": 3.866369710467706, "loss": 0.6937247514724731, "loss_ce": 0.0003653843014035374, "loss_iou": 0.298828125, "loss_num": 0.0194091796875, "loss_xval": 0.6953125, "num_input_tokens_seen": 97258904, "step": 1736 }, { "epoch": 3.868596881959911, "grad_norm": 29.482715606689453, "learning_rate": 1e-06, "loss": 0.8335, "num_input_tokens_seen": 97317044, "step": 1737 }, { "epoch": 3.868596881959911, "loss": 0.874514102935791, "loss_ce": 0.00024650723207741976, "loss_iou": 0.39453125, "loss_num": 0.017333984375, "loss_xval": 0.875, "num_input_tokens_seen": 97317044, "step": 1737 }, { "epoch": 3.870824053452116, "grad_norm": 17.633045196533203, "learning_rate": 1e-06, "loss": 0.965, "num_input_tokens_seen": 97372048, "step": 1738 }, { "epoch": 3.870824053452116, "loss": 1.020100474357605, "loss_ce": 0.0005692073609679937, "loss_iou": 0.40234375, "loss_num": 0.04248046875, "loss_xval": 1.015625, "num_input_tokens_seen": 97372048, "step": 1738 }, { "epoch": 3.8730512249443207, "grad_norm": 18.022729873657227, "learning_rate": 1e-06, "loss": 0.7467, "num_input_tokens_seen": 97427968, "step": 1739 }, { "epoch": 3.8730512249443207, "loss": 0.7238982915878296, "loss_ce": 0.0002655313292052597, "loss_iou": 0.294921875, "loss_num": 0.0267333984375, "loss_xval": 0.72265625, "num_input_tokens_seen": 97427968, "step": 1739 }, { "epoch": 3.8752783964365256, "grad_norm": 20.802419662475586, "learning_rate": 1e-06, "loss": 0.8692, "num_input_tokens_seen": 97483504, "step": 1740 }, { "epoch": 3.8752783964365256, "loss": 0.9595529437065125, "loss_ce": 0.00032444443786516786, "loss_iou": 0.419921875, "loss_num": 0.0240478515625, "loss_xval": 0.9609375, "num_input_tokens_seen": 97483504, "step": 1740 }, { "epoch": 3.8775055679287305, "grad_norm": 25.31464385986328, "learning_rate": 1e-06, "loss": 0.9623, "num_input_tokens_seen": 97539960, "step": 1741 }, { "epoch": 3.8775055679287305, "loss": 0.9504677057266235, "loss_ce": 0.0002723511715885252, "loss_iou": 0.3828125, "loss_num": 0.037109375, "loss_xval": 0.94921875, "num_input_tokens_seen": 97539960, "step": 1741 }, { "epoch": 3.8797327394209353, "grad_norm": 18.857027053833008, "learning_rate": 1e-06, "loss": 0.6928, "num_input_tokens_seen": 97597248, "step": 1742 }, { "epoch": 3.8797327394209353, "loss": 0.7760539650917053, "loss_ce": 0.00023611923097632825, "loss_iou": 0.33984375, "loss_num": 0.0196533203125, "loss_xval": 0.77734375, "num_input_tokens_seen": 97597248, "step": 1742 }, { "epoch": 3.88195991091314, "grad_norm": 24.135141372680664, "learning_rate": 1e-06, "loss": 0.7342, "num_input_tokens_seen": 97652256, "step": 1743 }, { "epoch": 3.88195991091314, "loss": 0.636237621307373, "loss_ce": 0.00025132749578915536, "loss_iou": 0.26171875, "loss_num": 0.02197265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 97652256, "step": 1743 }, { "epoch": 3.884187082405345, "grad_norm": 27.896411895751953, "learning_rate": 1e-06, "loss": 0.7845, "num_input_tokens_seen": 97706708, "step": 1744 }, { "epoch": 3.884187082405345, "loss": 0.7319508790969849, "loss_ce": 0.00026141630951315165, "loss_iou": 0.328125, "loss_num": 0.01544189453125, "loss_xval": 0.73046875, "num_input_tokens_seen": 97706708, "step": 1744 }, { "epoch": 3.8864142538975504, "grad_norm": 16.71023178100586, "learning_rate": 1e-06, "loss": 0.7643, "num_input_tokens_seen": 97765116, "step": 1745 }, { "epoch": 3.8864142538975504, "loss": 0.8230664730072021, "loss_ce": 0.00031261841650120914, "loss_iou": 0.328125, "loss_num": 0.033447265625, "loss_xval": 0.82421875, "num_input_tokens_seen": 97765116, "step": 1745 }, { "epoch": 3.888641425389755, "grad_norm": 17.422271728515625, "learning_rate": 1e-06, "loss": 0.7545, "num_input_tokens_seen": 97822628, "step": 1746 }, { "epoch": 3.888641425389755, "loss": 0.6438745260238647, "loss_ce": 0.0003198395133949816, "loss_iou": 0.2734375, "loss_num": 0.0191650390625, "loss_xval": 0.64453125, "num_input_tokens_seen": 97822628, "step": 1746 }, { "epoch": 3.89086859688196, "grad_norm": 14.700139999389648, "learning_rate": 1e-06, "loss": 0.8227, "num_input_tokens_seen": 97878372, "step": 1747 }, { "epoch": 3.89086859688196, "loss": 0.8928521275520325, "loss_ce": 0.00027394542121328413, "loss_iou": 0.380859375, "loss_num": 0.0260009765625, "loss_xval": 0.890625, "num_input_tokens_seen": 97878372, "step": 1747 }, { "epoch": 3.893095768374165, "grad_norm": 25.434099197387695, "learning_rate": 1e-06, "loss": 0.6795, "num_input_tokens_seen": 97932476, "step": 1748 }, { "epoch": 3.893095768374165, "loss": 0.6719607710838318, "loss_ce": 0.0013064806116744876, "loss_iou": 0.2578125, "loss_num": 0.03076171875, "loss_xval": 0.671875, "num_input_tokens_seen": 97932476, "step": 1748 }, { "epoch": 3.89532293986637, "grad_norm": 12.71288776397705, "learning_rate": 1e-06, "loss": 0.9539, "num_input_tokens_seen": 97990880, "step": 1749 }, { "epoch": 3.89532293986637, "loss": 0.8964526653289795, "loss_ce": 0.000578734208829701, "loss_iou": 0.3828125, "loss_num": 0.02587890625, "loss_xval": 0.89453125, "num_input_tokens_seen": 97990880, "step": 1749 }, { "epoch": 3.8975501113585747, "grad_norm": 20.1357421875, "learning_rate": 1e-06, "loss": 0.8062, "num_input_tokens_seen": 98046088, "step": 1750 }, { "epoch": 3.8975501113585747, "eval_seeclick_web_CIoU": 0.5617943108081818, "eval_seeclick_web_GIoU": 0.558159202337265, "eval_seeclick_web_IoU": 0.5777357220649719, "eval_seeclick_web_MAE_all": 0.017627435736358166, "eval_seeclick_web_MAE_h": 0.010515023721382022, "eval_seeclick_web_MAE_w": 0.018624153919517994, "eval_seeclick_web_MAE_x_boxes": 0.00829980755224824, "eval_seeclick_web_MAE_y_boxes": 0.02257556258700788, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9613375663757324, "eval_seeclick_web_loss_ce": 0.0003832996590062976, "eval_seeclick_web_loss_iou": 0.43701171875, "eval_seeclick_web_loss_num": 0.013935089111328125, "eval_seeclick_web_loss_xval": 0.9443359375, "eval_seeclick_web_runtime": 27.2225, "eval_seeclick_web_samples_per_second": 1.837, "eval_seeclick_web_steps_per_second": 0.073, "num_input_tokens_seen": 98046088, "step": 1750 }, { "epoch": 3.8975501113585747, "eval_icons_CIoU": 0.3064361959695816, "eval_icons_GIoU": 0.32649458944797516, "eval_icons_IoU": 0.38111720979213715, "eval_icons_MAE_all": 0.06658957526087761, "eval_icons_MAE_h": 0.040302242152392864, "eval_icons_MAE_w": 0.07293413020670414, "eval_icons_MAE_x_boxes": 0.06224925257265568, "eval_icons_MAE_y_boxes": 0.03964635543525219, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.7142175436019897, "eval_icons_loss_ce": 0.0004412867419887334, "eval_icons_loss_iou": 0.6680908203125, "eval_icons_loss_num": 0.06289100646972656, "eval_icons_loss_xval": 1.64990234375, "eval_icons_runtime": 24.1165, "eval_icons_samples_per_second": 2.073, "eval_icons_steps_per_second": 0.083, "num_input_tokens_seen": 98046088, "step": 1750 }, { "epoch": 3.8975501113585747, "eval_screenspot_CIoU": 0.31633803248405457, "eval_screenspot_GIoU": 0.3353947401046753, "eval_screenspot_IoU": 0.3984935482343038, "eval_screenspot_MAE_all": 0.06795181334018707, "eval_screenspot_MAE_h": 0.03754324403901895, "eval_screenspot_MAE_w": 0.07950195794304211, "eval_screenspot_MAE_x_boxes": 0.08205063392718633, "eval_screenspot_MAE_y_boxes": 0.044876331463456154, "eval_screenspot_inside_bbox": 0.6358333428700765, "eval_screenspot_loss": 1.7439180612564087, "eval_screenspot_loss_ce": 0.0004279320516313116, "eval_screenspot_loss_iou": 0.712890625, "eval_screenspot_loss_num": 0.07890574137369792, "eval_screenspot_loss_xval": 1.8204752604166667, "eval_screenspot_runtime": 34.5027, "eval_screenspot_samples_per_second": 2.58, "eval_screenspot_steps_per_second": 0.087, "num_input_tokens_seen": 98046088, "step": 1750 }, { "epoch": 3.8975501113585747, "eval_compot_CIoU": 0.3428885042667389, "eval_compot_GIoU": 0.3697793632745743, "eval_compot_IoU": 0.40237635374069214, "eval_compot_MAE_all": 0.019974621944129467, "eval_compot_MAE_h": 0.009356681257486343, "eval_compot_MAE_w": 0.02669452875852585, "eval_compot_MAE_x_boxes": 0.0306751299649477, "eval_compot_MAE_y_boxes": 0.00634155492298305, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3990836143493652, "eval_compot_loss_ce": 0.000333975360263139, "eval_compot_loss_iou": 0.637939453125, "eval_compot_loss_num": 0.018901824951171875, "eval_compot_loss_xval": 1.37109375, "eval_compot_runtime": 19.3662, "eval_compot_samples_per_second": 2.582, "eval_compot_steps_per_second": 0.103, "num_input_tokens_seen": 98046088, "step": 1750 }, { "epoch": 3.8975501113585747, "eval_custom_ui_val_CIoU": 0.4409982098473443, "eval_custom_ui_val_GIoU": 0.46577325297726524, "eval_custom_ui_val_IoU": 0.49479154580169254, "eval_custom_ui_val_MAE_all": 0.03411407251324919, "eval_custom_ui_val_MAE_h": 0.0205486583419972, "eval_custom_ui_val_MAE_w": 0.03924656597276529, "eval_custom_ui_val_MAE_x_boxes": 0.03744201848490371, "eval_custom_ui_val_MAE_y_boxes": 0.019695010320800874, "eval_custom_ui_val_inside_bbox": 0.6867283980051676, "eval_custom_ui_val_loss": 1.2667858600616455, "eval_custom_ui_val_loss_ce": 0.0004089692018977884, "eval_custom_ui_val_loss_iou": 0.5344373914930556, "eval_custom_ui_val_loss_num": 0.03371832105848524, "eval_custom_ui_val_loss_xval": 1.2374403211805556, "eval_custom_ui_val_runtime": 56.4523, "eval_custom_ui_val_samples_per_second": 4.694, "eval_custom_ui_val_steps_per_second": 0.159, "num_input_tokens_seen": 98046088, "step": 1750 }, { "epoch": 3.8975501113585747, "loss": 0.9943009614944458, "loss_ce": 0.00040448884828947484, "loss_iou": 0.42578125, "loss_num": 0.0283203125, "loss_xval": 0.9921875, "num_input_tokens_seen": 98046088, "step": 1750 }, { "epoch": 3.8997772828507795, "grad_norm": 17.32667350769043, "learning_rate": 1e-06, "loss": 0.9158, "num_input_tokens_seen": 98101832, "step": 1751 }, { "epoch": 3.8997772828507795, "loss": 0.8142361640930176, "loss_ce": 0.00027136015705764294, "loss_iou": 0.365234375, "loss_num": 0.0167236328125, "loss_xval": 0.8125, "num_input_tokens_seen": 98101832, "step": 1751 }, { "epoch": 3.9020044543429844, "grad_norm": 14.840397834777832, "learning_rate": 1e-06, "loss": 0.7732, "num_input_tokens_seen": 98158656, "step": 1752 }, { "epoch": 3.9020044543429844, "loss": 0.9086735248565674, "loss_ce": 0.0004703607992269099, "loss_iou": 0.341796875, "loss_num": 0.044677734375, "loss_xval": 0.90625, "num_input_tokens_seen": 98158656, "step": 1752 }, { "epoch": 3.9042316258351892, "grad_norm": 16.115385055541992, "learning_rate": 1e-06, "loss": 0.7102, "num_input_tokens_seen": 98216120, "step": 1753 }, { "epoch": 3.9042316258351892, "loss": 0.6780398488044739, "loss_ce": 0.0003054844564758241, "loss_iou": 0.2890625, "loss_num": 0.020263671875, "loss_xval": 0.6796875, "num_input_tokens_seen": 98216120, "step": 1753 }, { "epoch": 3.906458797327394, "grad_norm": 22.08925437927246, "learning_rate": 1e-06, "loss": 0.8575, "num_input_tokens_seen": 98273000, "step": 1754 }, { "epoch": 3.906458797327394, "loss": 0.6279443502426147, "loss_ce": 0.0002587907947599888, "loss_iou": 0.291015625, "loss_num": 0.00958251953125, "loss_xval": 0.62890625, "num_input_tokens_seen": 98273000, "step": 1754 }, { "epoch": 3.908685968819599, "grad_norm": 15.868555068969727, "learning_rate": 1e-06, "loss": 0.8731, "num_input_tokens_seen": 98330372, "step": 1755 }, { "epoch": 3.908685968819599, "loss": 1.0122852325439453, "loss_ce": 0.00032239314168691635, "loss_iou": 0.443359375, "loss_num": 0.0250244140625, "loss_xval": 1.015625, "num_input_tokens_seen": 98330372, "step": 1755 }, { "epoch": 3.910913140311804, "grad_norm": 20.903287887573242, "learning_rate": 1e-06, "loss": 1.0159, "num_input_tokens_seen": 98387768, "step": 1756 }, { "epoch": 3.910913140311804, "loss": 0.9633511900901794, "loss_ce": 0.00046056750579737127, "loss_iou": 0.384765625, "loss_num": 0.038818359375, "loss_xval": 0.9609375, "num_input_tokens_seen": 98387768, "step": 1756 }, { "epoch": 3.9131403118040087, "grad_norm": 14.697265625, "learning_rate": 1e-06, "loss": 1.0258, "num_input_tokens_seen": 98443212, "step": 1757 }, { "epoch": 3.9131403118040087, "loss": 1.067274808883667, "loss_ce": 0.00038028520066291094, "loss_iou": 0.435546875, "loss_num": 0.03857421875, "loss_xval": 1.0703125, "num_input_tokens_seen": 98443212, "step": 1757 }, { "epoch": 3.9153674832962135, "grad_norm": 18.442344665527344, "learning_rate": 1e-06, "loss": 0.8647, "num_input_tokens_seen": 98498060, "step": 1758 }, { "epoch": 3.9153674832962135, "loss": 0.6922543048858643, "loss_ce": 0.0002377206110395491, "loss_iou": 0.27734375, "loss_num": 0.027587890625, "loss_xval": 0.69140625, "num_input_tokens_seen": 98498060, "step": 1758 }, { "epoch": 3.917594654788419, "grad_norm": 22.285242080688477, "learning_rate": 1e-06, "loss": 0.716, "num_input_tokens_seen": 98553040, "step": 1759 }, { "epoch": 3.917594654788419, "loss": 0.8192723393440247, "loss_ce": 0.0006687932182103395, "loss_iou": 0.36328125, "loss_num": 0.0181884765625, "loss_xval": 0.8203125, "num_input_tokens_seen": 98553040, "step": 1759 }, { "epoch": 3.9198218262806237, "grad_norm": 19.2437801361084, "learning_rate": 1e-06, "loss": 0.8274, "num_input_tokens_seen": 98607992, "step": 1760 }, { "epoch": 3.9198218262806237, "loss": 1.0185580253601074, "loss_ce": 0.00024743605172261596, "loss_iou": 0.451171875, "loss_num": 0.0234375, "loss_xval": 1.015625, "num_input_tokens_seen": 98607992, "step": 1760 }, { "epoch": 3.9220489977728286, "grad_norm": 28.81540298461914, "learning_rate": 1e-06, "loss": 0.8937, "num_input_tokens_seen": 98663688, "step": 1761 }, { "epoch": 3.9220489977728286, "loss": 0.7943629026412964, "loss_ce": 0.0004176311194896698, "loss_iou": 0.32421875, "loss_num": 0.0291748046875, "loss_xval": 0.79296875, "num_input_tokens_seen": 98663688, "step": 1761 }, { "epoch": 3.9242761692650334, "grad_norm": 13.910033226013184, "learning_rate": 1e-06, "loss": 0.8205, "num_input_tokens_seen": 98720668, "step": 1762 }, { "epoch": 3.9242761692650334, "loss": 0.7007368803024292, "loss_ce": 0.0023726352956146, "loss_iou": 0.298828125, "loss_num": 0.0205078125, "loss_xval": 0.69921875, "num_input_tokens_seen": 98720668, "step": 1762 }, { "epoch": 3.9265033407572383, "grad_norm": 19.399320602416992, "learning_rate": 1e-06, "loss": 0.6872, "num_input_tokens_seen": 98777756, "step": 1763 }, { "epoch": 3.9265033407572383, "loss": 0.4689710736274719, "loss_ce": 0.00022106988762971014, "loss_iou": 0.203125, "loss_num": 0.0123291015625, "loss_xval": 0.46875, "num_input_tokens_seen": 98777756, "step": 1763 }, { "epoch": 3.928730512249443, "grad_norm": 18.18318748474121, "learning_rate": 1e-06, "loss": 0.7284, "num_input_tokens_seen": 98835320, "step": 1764 }, { "epoch": 3.928730512249443, "loss": 0.7946980595588684, "loss_ce": 0.0002644615597091615, "loss_iou": 0.359375, "loss_num": 0.01495361328125, "loss_xval": 0.79296875, "num_input_tokens_seen": 98835320, "step": 1764 }, { "epoch": 3.930957683741648, "grad_norm": 17.789981842041016, "learning_rate": 1e-06, "loss": 0.846, "num_input_tokens_seen": 98891900, "step": 1765 }, { "epoch": 3.930957683741648, "loss": 0.5826168060302734, "loss_ce": 0.0005855775089003146, "loss_iou": 0.255859375, "loss_num": 0.01446533203125, "loss_xval": 0.58203125, "num_input_tokens_seen": 98891900, "step": 1765 }, { "epoch": 3.933184855233853, "grad_norm": 14.104592323303223, "learning_rate": 1e-06, "loss": 0.8345, "num_input_tokens_seen": 98948704, "step": 1766 }, { "epoch": 3.933184855233853, "loss": 1.0315115451812744, "loss_ce": 0.0002615266712382436, "loss_iou": 0.43359375, "loss_num": 0.033203125, "loss_xval": 1.03125, "num_input_tokens_seen": 98948704, "step": 1766 }, { "epoch": 3.935412026726058, "grad_norm": 17.459924697875977, "learning_rate": 1e-06, "loss": 0.8304, "num_input_tokens_seen": 99005660, "step": 1767 }, { "epoch": 3.935412026726058, "loss": 0.9141278266906738, "loss_ce": 0.00030949406209401786, "loss_iou": 0.404296875, "loss_num": 0.0208740234375, "loss_xval": 0.9140625, "num_input_tokens_seen": 99005660, "step": 1767 }, { "epoch": 3.937639198218263, "grad_norm": 18.547863006591797, "learning_rate": 1e-06, "loss": 1.1333, "num_input_tokens_seen": 99061216, "step": 1768 }, { "epoch": 3.937639198218263, "loss": 0.909751296043396, "loss_ce": 0.00032748148078098893, "loss_iou": 0.380859375, "loss_num": 0.029541015625, "loss_xval": 0.91015625, "num_input_tokens_seen": 99061216, "step": 1768 }, { "epoch": 3.939866369710468, "grad_norm": 17.022930145263672, "learning_rate": 1e-06, "loss": 0.7887, "num_input_tokens_seen": 99120308, "step": 1769 }, { "epoch": 3.939866369710468, "loss": 0.8347407579421997, "loss_ce": 0.0002681155747268349, "loss_iou": 0.36328125, "loss_num": 0.0216064453125, "loss_xval": 0.8359375, "num_input_tokens_seen": 99120308, "step": 1769 }, { "epoch": 3.9420935412026727, "grad_norm": 18.062122344970703, "learning_rate": 1e-06, "loss": 0.846, "num_input_tokens_seen": 99176104, "step": 1770 }, { "epoch": 3.9420935412026727, "loss": 0.7344276905059814, "loss_ce": 0.00029679658473469317, "loss_iou": 0.29296875, "loss_num": 0.030029296875, "loss_xval": 0.734375, "num_input_tokens_seen": 99176104, "step": 1770 }, { "epoch": 3.9443207126948776, "grad_norm": 24.9909610748291, "learning_rate": 1e-06, "loss": 0.9439, "num_input_tokens_seen": 99230876, "step": 1771 }, { "epoch": 3.9443207126948776, "loss": 0.9261212944984436, "loss_ce": 0.0003400646965019405, "loss_iou": 0.41015625, "loss_num": 0.0211181640625, "loss_xval": 0.92578125, "num_input_tokens_seen": 99230876, "step": 1771 }, { "epoch": 3.9465478841870825, "grad_norm": 14.309945106506348, "learning_rate": 1e-06, "loss": 0.7448, "num_input_tokens_seen": 99286860, "step": 1772 }, { "epoch": 3.9465478841870825, "loss": 0.7100058794021606, "loss_ce": 0.000289075484033674, "loss_iou": 0.310546875, "loss_num": 0.017578125, "loss_xval": 0.7109375, "num_input_tokens_seen": 99286860, "step": 1772 }, { "epoch": 3.9487750556792873, "grad_norm": 12.438628196716309, "learning_rate": 1e-06, "loss": 1.0524, "num_input_tokens_seen": 99344372, "step": 1773 }, { "epoch": 3.9487750556792873, "loss": 1.3829472064971924, "loss_ce": 0.00037877040449529886, "loss_iou": 0.59375, "loss_num": 0.038330078125, "loss_xval": 1.3828125, "num_input_tokens_seen": 99344372, "step": 1773 }, { "epoch": 3.951002227171492, "grad_norm": 19.387874603271484, "learning_rate": 1e-06, "loss": 0.8822, "num_input_tokens_seen": 99401420, "step": 1774 }, { "epoch": 3.951002227171492, "loss": 1.1286416053771973, "loss_ce": 0.0004677603137679398, "loss_iou": 0.455078125, "loss_num": 0.043701171875, "loss_xval": 1.125, "num_input_tokens_seen": 99401420, "step": 1774 }, { "epoch": 3.953229398663697, "grad_norm": 16.427181243896484, "learning_rate": 1e-06, "loss": 0.9915, "num_input_tokens_seen": 99456744, "step": 1775 }, { "epoch": 3.953229398663697, "loss": 0.9648089408874512, "loss_ce": 0.00045345089165493846, "loss_iou": 0.40234375, "loss_num": 0.03173828125, "loss_xval": 0.96484375, "num_input_tokens_seen": 99456744, "step": 1775 }, { "epoch": 3.955456570155902, "grad_norm": 16.38910675048828, "learning_rate": 1e-06, "loss": 0.8111, "num_input_tokens_seen": 99511464, "step": 1776 }, { "epoch": 3.955456570155902, "loss": 1.0158613920211792, "loss_ce": 0.0002364249958191067, "loss_iou": 0.42578125, "loss_num": 0.03271484375, "loss_xval": 1.015625, "num_input_tokens_seen": 99511464, "step": 1776 }, { "epoch": 3.9576837416481068, "grad_norm": 28.928443908691406, "learning_rate": 1e-06, "loss": 0.8633, "num_input_tokens_seen": 99567452, "step": 1777 }, { "epoch": 3.9576837416481068, "loss": 1.1553375720977783, "loss_ce": 0.0003082446346525103, "loss_iou": 0.49609375, "loss_num": 0.03271484375, "loss_xval": 1.15625, "num_input_tokens_seen": 99567452, "step": 1777 }, { "epoch": 3.9599109131403116, "grad_norm": 21.195846557617188, "learning_rate": 1e-06, "loss": 0.846, "num_input_tokens_seen": 99624932, "step": 1778 }, { "epoch": 3.9599109131403116, "loss": 0.6980462074279785, "loss_ce": 0.0002923067077063024, "loss_iou": 0.2734375, "loss_num": 0.0299072265625, "loss_xval": 0.69921875, "num_input_tokens_seen": 99624932, "step": 1778 }, { "epoch": 3.9621380846325165, "grad_norm": 16.269763946533203, "learning_rate": 1e-06, "loss": 0.6815, "num_input_tokens_seen": 99681052, "step": 1779 }, { "epoch": 3.9621380846325165, "loss": 0.7171025276184082, "loss_ce": 0.00030569382943212986, "loss_iou": 0.291015625, "loss_num": 0.0267333984375, "loss_xval": 0.71875, "num_input_tokens_seen": 99681052, "step": 1779 }, { "epoch": 3.9643652561247213, "grad_norm": 17.27662467956543, "learning_rate": 1e-06, "loss": 0.7603, "num_input_tokens_seen": 99738304, "step": 1780 }, { "epoch": 3.9643652561247213, "loss": 0.9583814740180969, "loss_ce": 0.0002515443484298885, "loss_iou": 0.431640625, "loss_num": 0.01904296875, "loss_xval": 0.95703125, "num_input_tokens_seen": 99738304, "step": 1780 }, { "epoch": 3.9665924276169267, "grad_norm": 16.905935287475586, "learning_rate": 1e-06, "loss": 0.701, "num_input_tokens_seen": 99793864, "step": 1781 }, { "epoch": 3.9665924276169267, "loss": 0.8442895412445068, "loss_ce": 0.0002953645307570696, "loss_iou": 0.359375, "loss_num": 0.025146484375, "loss_xval": 0.84375, "num_input_tokens_seen": 99793864, "step": 1781 }, { "epoch": 3.9688195991091315, "grad_norm": 17.269075393676758, "learning_rate": 1e-06, "loss": 0.6253, "num_input_tokens_seen": 99849596, "step": 1782 }, { "epoch": 3.9688195991091315, "loss": 0.6899313926696777, "loss_ce": 0.0004782522446475923, "loss_iou": 0.279296875, "loss_num": 0.0262451171875, "loss_xval": 0.6875, "num_input_tokens_seen": 99849596, "step": 1782 }, { "epoch": 3.9710467706013364, "grad_norm": 12.376049995422363, "learning_rate": 1e-06, "loss": 0.7759, "num_input_tokens_seen": 99905032, "step": 1783 }, { "epoch": 3.9710467706013364, "loss": 0.8750180602073669, "loss_ce": 0.0002622200991027057, "loss_iou": 0.361328125, "loss_num": 0.0302734375, "loss_xval": 0.875, "num_input_tokens_seen": 99905032, "step": 1783 }, { "epoch": 3.9732739420935412, "grad_norm": 15.985518455505371, "learning_rate": 1e-06, "loss": 0.7939, "num_input_tokens_seen": 99961684, "step": 1784 }, { "epoch": 3.9732739420935412, "loss": 0.7649087905883789, "loss_ce": 0.00026036915369331837, "loss_iou": 0.349609375, "loss_num": 0.0128173828125, "loss_xval": 0.765625, "num_input_tokens_seen": 99961684, "step": 1784 }, { "epoch": 3.975501113585746, "grad_norm": 17.415231704711914, "learning_rate": 1e-06, "loss": 0.9524, "num_input_tokens_seen": 100018708, "step": 1785 }, { "epoch": 3.975501113585746, "loss": 1.1517889499664307, "loss_ce": 0.00042183659388683736, "loss_iou": 0.48828125, "loss_num": 0.034912109375, "loss_xval": 1.1484375, "num_input_tokens_seen": 100018708, "step": 1785 }, { "epoch": 3.977728285077951, "grad_norm": 21.656478881835938, "learning_rate": 1e-06, "loss": 0.7554, "num_input_tokens_seen": 100073244, "step": 1786 }, { "epoch": 3.977728285077951, "loss": 0.634032130241394, "loss_ce": 0.0002430875028949231, "loss_iou": 0.287109375, "loss_num": 0.01214599609375, "loss_xval": 0.6328125, "num_input_tokens_seen": 100073244, "step": 1786 }, { "epoch": 3.979955456570156, "grad_norm": 55.99577713012695, "learning_rate": 1e-06, "loss": 1.0249, "num_input_tokens_seen": 100127112, "step": 1787 }, { "epoch": 3.979955456570156, "loss": 0.8672703504562378, "loss_ce": 0.00032695726258680224, "loss_iou": 0.328125, "loss_num": 0.04248046875, "loss_xval": 0.8671875, "num_input_tokens_seen": 100127112, "step": 1787 }, { "epoch": 3.9821826280623607, "grad_norm": 20.532896041870117, "learning_rate": 1e-06, "loss": 0.8107, "num_input_tokens_seen": 100181880, "step": 1788 }, { "epoch": 3.9821826280623607, "loss": 0.7985996603965759, "loss_ce": 0.000259819149505347, "loss_iou": 0.349609375, "loss_num": 0.02001953125, "loss_xval": 0.796875, "num_input_tokens_seen": 100181880, "step": 1788 }, { "epoch": 3.984409799554566, "grad_norm": 22.649169921875, "learning_rate": 1e-06, "loss": 0.7598, "num_input_tokens_seen": 100238956, "step": 1789 }, { "epoch": 3.984409799554566, "loss": 0.7128958702087402, "loss_ce": 0.00024937212583608925, "loss_iou": 0.30859375, "loss_num": 0.019287109375, "loss_xval": 0.7109375, "num_input_tokens_seen": 100238956, "step": 1789 }, { "epoch": 3.986636971046771, "grad_norm": 37.473751068115234, "learning_rate": 1e-06, "loss": 0.9002, "num_input_tokens_seen": 100293580, "step": 1790 }, { "epoch": 3.986636971046771, "loss": 0.7505241632461548, "loss_ce": 0.0005241355975158513, "loss_iou": 0.330078125, "loss_num": 0.017822265625, "loss_xval": 0.75, "num_input_tokens_seen": 100293580, "step": 1790 }, { "epoch": 3.9888641425389757, "grad_norm": 17.84164810180664, "learning_rate": 1e-06, "loss": 0.8214, "num_input_tokens_seen": 100347096, "step": 1791 }, { "epoch": 3.9888641425389757, "loss": 0.83009272813797, "loss_ce": 0.000258743908489123, "loss_iou": 0.32421875, "loss_num": 0.035888671875, "loss_xval": 0.828125, "num_input_tokens_seen": 100347096, "step": 1791 }, { "epoch": 3.9910913140311806, "grad_norm": 15.902947425842285, "learning_rate": 1e-06, "loss": 0.8695, "num_input_tokens_seen": 100403084, "step": 1792 }, { "epoch": 3.9910913140311806, "loss": 0.8725968599319458, "loss_ce": 0.0002824169350787997, "loss_iou": 0.376953125, "loss_num": 0.0238037109375, "loss_xval": 0.87109375, "num_input_tokens_seen": 100403084, "step": 1792 }, { "epoch": 3.9933184855233854, "grad_norm": 32.91238784790039, "learning_rate": 1e-06, "loss": 0.6368, "num_input_tokens_seen": 100460528, "step": 1793 }, { "epoch": 3.9933184855233854, "loss": 0.6155407428741455, "loss_ce": 0.00030632468406111, "loss_iou": 0.2578125, "loss_num": 0.020263671875, "loss_xval": 0.6171875, "num_input_tokens_seen": 100460528, "step": 1793 }, { "epoch": 3.9955456570155903, "grad_norm": 17.373069763183594, "learning_rate": 1e-06, "loss": 0.6237, "num_input_tokens_seen": 100518164, "step": 1794 }, { "epoch": 3.9955456570155903, "loss": 0.748991847038269, "loss_ce": 0.00045668811071664095, "loss_iou": 0.3125, "loss_num": 0.024658203125, "loss_xval": 0.75, "num_input_tokens_seen": 100518164, "step": 1794 }, { "epoch": 3.997772828507795, "grad_norm": 18.933780670166016, "learning_rate": 1e-06, "loss": 0.7479, "num_input_tokens_seen": 100576824, "step": 1795 }, { "epoch": 3.997772828507795, "loss": 0.8931920528411865, "loss_ce": 0.0003698251966852695, "loss_iou": 0.390625, "loss_num": 0.0220947265625, "loss_xval": 0.89453125, "num_input_tokens_seen": 100576824, "step": 1795 }, { "epoch": 4.0, "grad_norm": 18.814178466796875, "learning_rate": 1e-06, "loss": 0.8102, "num_input_tokens_seen": 100631512, "step": 1796 }, { "epoch": 4.0, "loss": 0.976672887802124, "loss_ce": 0.0003544443752616644, "loss_iou": 0.419921875, "loss_num": 0.0274658203125, "loss_xval": 0.9765625, "num_input_tokens_seen": 100631512, "step": 1796 }, { "epoch": 4.002227171492205, "grad_norm": 18.784257888793945, "learning_rate": 1e-06, "loss": 1.0043, "num_input_tokens_seen": 100688068, "step": 1797 }, { "epoch": 4.002227171492205, "loss": 1.069665551185608, "loss_ce": 0.00032957567600533366, "loss_iou": 0.431640625, "loss_num": 0.041015625, "loss_xval": 1.0703125, "num_input_tokens_seen": 100688068, "step": 1797 }, { "epoch": 4.00445434298441, "grad_norm": 15.86024284362793, "learning_rate": 1e-06, "loss": 0.9139, "num_input_tokens_seen": 100740940, "step": 1798 }, { "epoch": 4.00445434298441, "loss": 0.6887216567993164, "loss_ce": 0.00024506475892849267, "loss_iou": 0.302734375, "loss_num": 0.0166015625, "loss_xval": 0.6875, "num_input_tokens_seen": 100740940, "step": 1798 }, { "epoch": 4.006681514476615, "grad_norm": 41.829795837402344, "learning_rate": 1e-06, "loss": 0.7138, "num_input_tokens_seen": 100797540, "step": 1799 }, { "epoch": 4.006681514476615, "loss": 0.5916675329208374, "loss_ce": 0.00035895261680707335, "loss_iou": 0.248046875, "loss_num": 0.0191650390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 100797540, "step": 1799 }, { "epoch": 4.008908685968819, "grad_norm": 15.94815731048584, "learning_rate": 1e-06, "loss": 0.8192, "num_input_tokens_seen": 100852428, "step": 1800 }, { "epoch": 4.008908685968819, "loss": 0.628261148929596, "loss_ce": 0.000331463961629197, "loss_iou": 0.259765625, "loss_num": 0.0218505859375, "loss_xval": 0.62890625, "num_input_tokens_seen": 100852428, "step": 1800 }, { "epoch": 4.011135857461024, "grad_norm": 17.08213233947754, "learning_rate": 1e-06, "loss": 0.6745, "num_input_tokens_seen": 100909988, "step": 1801 }, { "epoch": 4.011135857461024, "loss": 0.49927961826324463, "loss_ce": 0.00025619484949856997, "loss_iou": 0.2138671875, "loss_num": 0.0142822265625, "loss_xval": 0.5, "num_input_tokens_seen": 100909988, "step": 1801 }, { "epoch": 4.013363028953229, "grad_norm": 15.652112007141113, "learning_rate": 1e-06, "loss": 0.7803, "num_input_tokens_seen": 100967540, "step": 1802 }, { "epoch": 4.013363028953229, "loss": 0.6135426759719849, "loss_ce": 0.0002614644472487271, "loss_iou": 0.26171875, "loss_num": 0.0179443359375, "loss_xval": 0.61328125, "num_input_tokens_seen": 100967540, "step": 1802 }, { "epoch": 4.015590200445434, "grad_norm": 18.24171257019043, "learning_rate": 1e-06, "loss": 0.9651, "num_input_tokens_seen": 101023900, "step": 1803 }, { "epoch": 4.015590200445434, "loss": 0.9055318832397461, "loss_ce": 0.00025842676404863596, "loss_iou": 0.376953125, "loss_num": 0.030029296875, "loss_xval": 0.90625, "num_input_tokens_seen": 101023900, "step": 1803 }, { "epoch": 4.017817371937639, "grad_norm": 26.98381996154785, "learning_rate": 1e-06, "loss": 0.9335, "num_input_tokens_seen": 101081336, "step": 1804 }, { "epoch": 4.017817371937639, "loss": 0.942715048789978, "loss_ce": 0.00033223340869881213, "loss_iou": 0.3828125, "loss_num": 0.03564453125, "loss_xval": 0.94140625, "num_input_tokens_seen": 101081336, "step": 1804 }, { "epoch": 4.020044543429844, "grad_norm": 14.921833038330078, "learning_rate": 1e-06, "loss": 0.8849, "num_input_tokens_seen": 101139376, "step": 1805 }, { "epoch": 4.020044543429844, "loss": 1.0176324844360352, "loss_ce": 0.0007868300890550017, "loss_iou": 0.44921875, "loss_num": 0.024169921875, "loss_xval": 1.015625, "num_input_tokens_seen": 101139376, "step": 1805 }, { "epoch": 4.022271714922049, "grad_norm": 53.253265380859375, "learning_rate": 1e-06, "loss": 0.6965, "num_input_tokens_seen": 101197336, "step": 1806 }, { "epoch": 4.022271714922049, "loss": 0.5695779323577881, "loss_ce": 0.00024200681946240366, "loss_iou": 0.24609375, "loss_num": 0.0155029296875, "loss_xval": 0.5703125, "num_input_tokens_seen": 101197336, "step": 1806 }, { "epoch": 4.0244988864142535, "grad_norm": 12.943678855895996, "learning_rate": 1e-06, "loss": 0.6534, "num_input_tokens_seen": 101253816, "step": 1807 }, { "epoch": 4.0244988864142535, "loss": 0.6627930402755737, "loss_ce": 0.00043953658314421773, "loss_iou": 0.26953125, "loss_num": 0.0250244140625, "loss_xval": 0.6640625, "num_input_tokens_seen": 101253816, "step": 1807 }, { "epoch": 4.026726057906459, "grad_norm": 45.154788970947266, "learning_rate": 1e-06, "loss": 0.9206, "num_input_tokens_seen": 101307840, "step": 1808 }, { "epoch": 4.026726057906459, "loss": 0.8648847341537476, "loss_ce": 0.0002606944181025028, "loss_iou": 0.34375, "loss_num": 0.03564453125, "loss_xval": 0.86328125, "num_input_tokens_seen": 101307840, "step": 1808 }, { "epoch": 4.028953229398664, "grad_norm": 18.014551162719727, "learning_rate": 1e-06, "loss": 0.7334, "num_input_tokens_seen": 101363116, "step": 1809 }, { "epoch": 4.028953229398664, "loss": 0.5737777352333069, "loss_ce": 0.0002914007636718452, "loss_iou": 0.255859375, "loss_num": 0.0126953125, "loss_xval": 0.57421875, "num_input_tokens_seen": 101363116, "step": 1809 }, { "epoch": 4.031180400890869, "grad_norm": 18.99349021911621, "learning_rate": 1e-06, "loss": 0.8291, "num_input_tokens_seen": 101419912, "step": 1810 }, { "epoch": 4.031180400890869, "loss": 0.7891653180122375, "loss_ce": 0.0003469590446911752, "loss_iou": 0.30859375, "loss_num": 0.034423828125, "loss_xval": 0.7890625, "num_input_tokens_seen": 101419912, "step": 1810 }, { "epoch": 4.033407572383074, "grad_norm": 19.479049682617188, "learning_rate": 1e-06, "loss": 0.6363, "num_input_tokens_seen": 101478732, "step": 1811 }, { "epoch": 4.033407572383074, "loss": 0.5214917659759521, "loss_ce": 0.0002515409141778946, "loss_iou": 0.2236328125, "loss_num": 0.0146484375, "loss_xval": 0.51953125, "num_input_tokens_seen": 101478732, "step": 1811 }, { "epoch": 4.035634743875279, "grad_norm": 21.70351791381836, "learning_rate": 1e-06, "loss": 0.8533, "num_input_tokens_seen": 101536500, "step": 1812 }, { "epoch": 4.035634743875279, "loss": 0.8468989133834839, "loss_ce": 0.0003413098747842014, "loss_iou": 0.365234375, "loss_num": 0.0234375, "loss_xval": 0.84765625, "num_input_tokens_seen": 101536500, "step": 1812 }, { "epoch": 4.0378619153674835, "grad_norm": 15.182201385498047, "learning_rate": 1e-06, "loss": 0.9404, "num_input_tokens_seen": 101592900, "step": 1813 }, { "epoch": 4.0378619153674835, "loss": 1.334275484085083, "loss_ce": 0.00029111921321600676, "loss_iou": 0.59375, "loss_num": 0.030029296875, "loss_xval": 1.3359375, "num_input_tokens_seen": 101592900, "step": 1813 }, { "epoch": 4.040089086859688, "grad_norm": 18.792926788330078, "learning_rate": 1e-06, "loss": 0.7653, "num_input_tokens_seen": 101648332, "step": 1814 }, { "epoch": 4.040089086859688, "loss": 0.7761302590370178, "loss_ce": 0.0002513654180802405, "loss_iou": 0.328125, "loss_num": 0.0245361328125, "loss_xval": 0.77734375, "num_input_tokens_seen": 101648332, "step": 1814 }, { "epoch": 4.042316258351893, "grad_norm": 22.391029357910156, "learning_rate": 1e-06, "loss": 0.7972, "num_input_tokens_seen": 101702948, "step": 1815 }, { "epoch": 4.042316258351893, "loss": 0.9708037376403809, "loss_ce": 0.00034468824742361903, "loss_iou": 0.427734375, "loss_num": 0.0230712890625, "loss_xval": 0.96875, "num_input_tokens_seen": 101702948, "step": 1815 }, { "epoch": 4.044543429844098, "grad_norm": 19.350894927978516, "learning_rate": 1e-06, "loss": 0.8455, "num_input_tokens_seen": 101756712, "step": 1816 }, { "epoch": 4.044543429844098, "loss": 0.6643034219741821, "loss_ce": 0.00024090104852803051, "loss_iou": 0.275390625, "loss_num": 0.0228271484375, "loss_xval": 0.6640625, "num_input_tokens_seen": 101756712, "step": 1816 }, { "epoch": 4.046770601336303, "grad_norm": 18.292842864990234, "learning_rate": 1e-06, "loss": 0.8264, "num_input_tokens_seen": 101812980, "step": 1817 }, { "epoch": 4.046770601336303, "loss": 0.9234464168548584, "loss_ce": 0.00035073645994998515, "loss_iou": 0.392578125, "loss_num": 0.027587890625, "loss_xval": 0.921875, "num_input_tokens_seen": 101812980, "step": 1817 }, { "epoch": 4.048997772828508, "grad_norm": 19.12327003479004, "learning_rate": 1e-06, "loss": 0.7468, "num_input_tokens_seen": 101868528, "step": 1818 }, { "epoch": 4.048997772828508, "loss": 0.8360873460769653, "loss_ce": 0.0003940344322472811, "loss_iou": 0.349609375, "loss_num": 0.02734375, "loss_xval": 0.8359375, "num_input_tokens_seen": 101868528, "step": 1818 }, { "epoch": 4.051224944320713, "grad_norm": 15.702198028564453, "learning_rate": 1e-06, "loss": 0.6871, "num_input_tokens_seen": 101923832, "step": 1819 }, { "epoch": 4.051224944320713, "loss": 0.7370009422302246, "loss_ce": 0.0014051980106160045, "loss_iou": 0.30859375, "loss_num": 0.0234375, "loss_xval": 0.734375, "num_input_tokens_seen": 101923832, "step": 1819 }, { "epoch": 4.0534521158129175, "grad_norm": 21.061260223388672, "learning_rate": 1e-06, "loss": 0.7905, "num_input_tokens_seen": 101978472, "step": 1820 }, { "epoch": 4.0534521158129175, "loss": 0.8799552917480469, "loss_ce": 0.0006828849436715245, "loss_iou": 0.353515625, "loss_num": 0.034423828125, "loss_xval": 0.87890625, "num_input_tokens_seen": 101978472, "step": 1820 }, { "epoch": 4.055679287305122, "grad_norm": 23.500896453857422, "learning_rate": 1e-06, "loss": 0.9895, "num_input_tokens_seen": 102035200, "step": 1821 }, { "epoch": 4.055679287305122, "loss": 0.9768585562705994, "loss_ce": 0.0002960490819532424, "loss_iou": 0.40234375, "loss_num": 0.034423828125, "loss_xval": 0.9765625, "num_input_tokens_seen": 102035200, "step": 1821 }, { "epoch": 4.057906458797327, "grad_norm": 16.685222625732422, "learning_rate": 1e-06, "loss": 0.7959, "num_input_tokens_seen": 102088752, "step": 1822 }, { "epoch": 4.057906458797327, "loss": 0.8308522701263428, "loss_ce": 0.0002858432417269796, "loss_iou": 0.34375, "loss_num": 0.0283203125, "loss_xval": 0.83203125, "num_input_tokens_seen": 102088752, "step": 1822 }, { "epoch": 4.060133630289532, "grad_norm": 36.30082702636719, "learning_rate": 1e-06, "loss": 0.7506, "num_input_tokens_seen": 102139624, "step": 1823 }, { "epoch": 4.060133630289532, "loss": 0.8599013090133667, "loss_ce": 0.0002821852976921946, "loss_iou": 0.375, "loss_num": 0.02197265625, "loss_xval": 0.859375, "num_input_tokens_seen": 102139624, "step": 1823 }, { "epoch": 4.062360801781737, "grad_norm": 17.49234962463379, "learning_rate": 1e-06, "loss": 0.7182, "num_input_tokens_seen": 102196280, "step": 1824 }, { "epoch": 4.062360801781737, "loss": 0.7073689103126526, "loss_ce": 0.0003376654349267483, "loss_iou": 0.3125, "loss_num": 0.016845703125, "loss_xval": 0.70703125, "num_input_tokens_seen": 102196280, "step": 1824 }, { "epoch": 4.064587973273942, "grad_norm": 23.85994529724121, "learning_rate": 1e-06, "loss": 0.9767, "num_input_tokens_seen": 102252520, "step": 1825 }, { "epoch": 4.064587973273942, "loss": 1.1830354928970337, "loss_ce": 0.0002961636637337506, "loss_iou": 0.482421875, "loss_num": 0.043212890625, "loss_xval": 1.1796875, "num_input_tokens_seen": 102252520, "step": 1825 }, { "epoch": 4.066815144766147, "grad_norm": 15.231406211853027, "learning_rate": 1e-06, "loss": 0.7989, "num_input_tokens_seen": 102308852, "step": 1826 }, { "epoch": 4.066815144766147, "loss": 0.7088180780410767, "loss_ce": 0.0003219395875930786, "loss_iou": 0.291015625, "loss_num": 0.02490234375, "loss_xval": 0.70703125, "num_input_tokens_seen": 102308852, "step": 1826 }, { "epoch": 4.0690423162583516, "grad_norm": 20.627899169921875, "learning_rate": 1e-06, "loss": 0.8451, "num_input_tokens_seen": 102363256, "step": 1827 }, { "epoch": 4.0690423162583516, "loss": 0.43590307235717773, "loss_ce": 0.0003562027995940298, "loss_iou": 0.177734375, "loss_num": 0.0159912109375, "loss_xval": 0.435546875, "num_input_tokens_seen": 102363256, "step": 1827 }, { "epoch": 4.071269487750556, "grad_norm": 15.15736198425293, "learning_rate": 1e-06, "loss": 0.8284, "num_input_tokens_seen": 102417712, "step": 1828 }, { "epoch": 4.071269487750556, "loss": 0.876508355140686, "loss_ce": 0.0002876708167605102, "loss_iou": 0.390625, "loss_num": 0.01904296875, "loss_xval": 0.875, "num_input_tokens_seen": 102417712, "step": 1828 }, { "epoch": 4.073496659242761, "grad_norm": 16.91136360168457, "learning_rate": 1e-06, "loss": 0.7477, "num_input_tokens_seen": 102476200, "step": 1829 }, { "epoch": 4.073496659242761, "loss": 0.862647294998169, "loss_ce": 0.0003425973991397768, "loss_iou": 0.3828125, "loss_num": 0.0196533203125, "loss_xval": 0.86328125, "num_input_tokens_seen": 102476200, "step": 1829 }, { "epoch": 4.075723830734967, "grad_norm": 21.360063552856445, "learning_rate": 1e-06, "loss": 0.6998, "num_input_tokens_seen": 102533876, "step": 1830 }, { "epoch": 4.075723830734967, "loss": 0.8524512052536011, "loss_ce": 0.0004004453949164599, "loss_iou": 0.35546875, "loss_num": 0.0284423828125, "loss_xval": 0.8515625, "num_input_tokens_seen": 102533876, "step": 1830 }, { "epoch": 4.077951002227172, "grad_norm": 18.112483978271484, "learning_rate": 1e-06, "loss": 0.6848, "num_input_tokens_seen": 102590972, "step": 1831 }, { "epoch": 4.077951002227172, "loss": 0.6477828621864319, "loss_ce": 0.0003219213103875518, "loss_iou": 0.279296875, "loss_num": 0.0177001953125, "loss_xval": 0.6484375, "num_input_tokens_seen": 102590972, "step": 1831 }, { "epoch": 4.080178173719377, "grad_norm": 20.459156036376953, "learning_rate": 1e-06, "loss": 0.8724, "num_input_tokens_seen": 102648508, "step": 1832 }, { "epoch": 4.080178173719377, "loss": 0.8328180909156799, "loss_ce": 0.00029852997977286577, "loss_iou": 0.345703125, "loss_num": 0.0281982421875, "loss_xval": 0.83203125, "num_input_tokens_seen": 102648508, "step": 1832 }, { "epoch": 4.082405345211582, "grad_norm": 21.345972061157227, "learning_rate": 1e-06, "loss": 0.7649, "num_input_tokens_seen": 102705228, "step": 1833 }, { "epoch": 4.082405345211582, "loss": 0.7076017260551453, "loss_ce": 0.0003263298131059855, "loss_iou": 0.29296875, "loss_num": 0.0238037109375, "loss_xval": 0.70703125, "num_input_tokens_seen": 102705228, "step": 1833 }, { "epoch": 4.0846325167037865, "grad_norm": 17.683818817138672, "learning_rate": 1e-06, "loss": 0.8536, "num_input_tokens_seen": 102761536, "step": 1834 }, { "epoch": 4.0846325167037865, "loss": 0.8223095536231995, "loss_ce": 0.00028804835164919496, "loss_iou": 0.341796875, "loss_num": 0.027099609375, "loss_xval": 0.8203125, "num_input_tokens_seen": 102761536, "step": 1834 }, { "epoch": 4.086859688195991, "grad_norm": 20.6676082611084, "learning_rate": 1e-06, "loss": 0.9164, "num_input_tokens_seen": 102818308, "step": 1835 }, { "epoch": 4.086859688195991, "loss": 0.900152325630188, "loss_ce": 0.0002499477122910321, "loss_iou": 0.384765625, "loss_num": 0.0263671875, "loss_xval": 0.8984375, "num_input_tokens_seen": 102818308, "step": 1835 }, { "epoch": 4.089086859688196, "grad_norm": 16.989944458007812, "learning_rate": 1e-06, "loss": 0.7689, "num_input_tokens_seen": 102870992, "step": 1836 }, { "epoch": 4.089086859688196, "loss": 0.6965099573135376, "loss_ce": 0.00022090264246799052, "loss_iou": 0.2890625, "loss_num": 0.023681640625, "loss_xval": 0.6953125, "num_input_tokens_seen": 102870992, "step": 1836 }, { "epoch": 4.091314031180401, "grad_norm": 18.3848934173584, "learning_rate": 1e-06, "loss": 0.9445, "num_input_tokens_seen": 102925516, "step": 1837 }, { "epoch": 4.091314031180401, "loss": 0.8509833812713623, "loss_ce": 0.00027541263261809945, "loss_iou": 0.345703125, "loss_num": 0.03173828125, "loss_xval": 0.8515625, "num_input_tokens_seen": 102925516, "step": 1837 }, { "epoch": 4.093541202672606, "grad_norm": 19.348962783813477, "learning_rate": 1e-06, "loss": 1.0582, "num_input_tokens_seen": 102980780, "step": 1838 }, { "epoch": 4.093541202672606, "loss": 0.901675820350647, "loss_ce": 0.00030863762367516756, "loss_iou": 0.39453125, "loss_num": 0.022705078125, "loss_xval": 0.90234375, "num_input_tokens_seen": 102980780, "step": 1838 }, { "epoch": 4.095768374164811, "grad_norm": 18.351146697998047, "learning_rate": 1e-06, "loss": 0.6857, "num_input_tokens_seen": 103038208, "step": 1839 }, { "epoch": 4.095768374164811, "loss": 0.7659989595413208, "loss_ce": 0.00025187875144183636, "loss_iou": 0.310546875, "loss_num": 0.02880859375, "loss_xval": 0.765625, "num_input_tokens_seen": 103038208, "step": 1839 }, { "epoch": 4.097995545657016, "grad_norm": 15.127579689025879, "learning_rate": 1e-06, "loss": 0.8049, "num_input_tokens_seen": 103090740, "step": 1840 }, { "epoch": 4.097995545657016, "loss": 0.9890508055686951, "loss_ce": 0.0002812229795381427, "loss_iou": 0.439453125, "loss_num": 0.022216796875, "loss_xval": 0.98828125, "num_input_tokens_seen": 103090740, "step": 1840 }, { "epoch": 4.1002227171492205, "grad_norm": 25.431516647338867, "learning_rate": 1e-06, "loss": 0.7564, "num_input_tokens_seen": 103147724, "step": 1841 }, { "epoch": 4.1002227171492205, "loss": 0.5847120881080627, "loss_ce": 0.00023942730331327766, "loss_iou": 0.25390625, "loss_num": 0.0155029296875, "loss_xval": 0.5859375, "num_input_tokens_seen": 103147724, "step": 1841 }, { "epoch": 4.102449888641425, "grad_norm": 14.01602554321289, "learning_rate": 1e-06, "loss": 0.6801, "num_input_tokens_seen": 103206520, "step": 1842 }, { "epoch": 4.102449888641425, "loss": 0.661648690700531, "loss_ce": 0.004055918660014868, "loss_iou": 0.27734375, "loss_num": 0.0203857421875, "loss_xval": 0.65625, "num_input_tokens_seen": 103206520, "step": 1842 }, { "epoch": 4.10467706013363, "grad_norm": 30.822895050048828, "learning_rate": 1e-06, "loss": 0.8477, "num_input_tokens_seen": 103260592, "step": 1843 }, { "epoch": 4.10467706013363, "loss": 1.0794177055358887, "loss_ce": 0.0003160441410727799, "loss_iou": 0.48046875, "loss_num": 0.023681640625, "loss_xval": 1.078125, "num_input_tokens_seen": 103260592, "step": 1843 }, { "epoch": 4.106904231625835, "grad_norm": 19.740049362182617, "learning_rate": 1e-06, "loss": 0.8343, "num_input_tokens_seen": 103318044, "step": 1844 }, { "epoch": 4.106904231625835, "loss": 0.7112168669700623, "loss_ce": 0.00027938373386859894, "loss_iou": 0.3125, "loss_num": 0.01708984375, "loss_xval": 0.7109375, "num_input_tokens_seen": 103318044, "step": 1844 }, { "epoch": 4.10913140311804, "grad_norm": 23.76945686340332, "learning_rate": 1e-06, "loss": 0.5955, "num_input_tokens_seen": 103371804, "step": 1845 }, { "epoch": 4.10913140311804, "loss": 0.5928046107292175, "loss_ce": 0.00027534179389476776, "loss_iou": 0.240234375, "loss_num": 0.0224609375, "loss_xval": 0.59375, "num_input_tokens_seen": 103371804, "step": 1845 }, { "epoch": 4.111358574610245, "grad_norm": 19.958959579467773, "learning_rate": 1e-06, "loss": 0.7486, "num_input_tokens_seen": 103428372, "step": 1846 }, { "epoch": 4.111358574610245, "loss": 0.8037309646606445, "loss_ce": 0.00026413900195620954, "loss_iou": 0.375, "loss_num": 0.0108642578125, "loss_xval": 0.8046875, "num_input_tokens_seen": 103428372, "step": 1846 }, { "epoch": 4.11358574610245, "grad_norm": 15.336278915405273, "learning_rate": 1e-06, "loss": 0.6876, "num_input_tokens_seen": 103484976, "step": 1847 }, { "epoch": 4.11358574610245, "loss": 0.7304455637931824, "loss_ce": 0.0002209786616731435, "loss_iou": 0.302734375, "loss_num": 0.02490234375, "loss_xval": 0.73046875, "num_input_tokens_seen": 103484976, "step": 1847 }, { "epoch": 4.1158129175946545, "grad_norm": 13.736832618713379, "learning_rate": 1e-06, "loss": 0.8494, "num_input_tokens_seen": 103542500, "step": 1848 }, { "epoch": 4.1158129175946545, "loss": 0.8857908248901367, "loss_ce": 0.00041487094131298363, "loss_iou": 0.35546875, "loss_num": 0.034912109375, "loss_xval": 0.88671875, "num_input_tokens_seen": 103542500, "step": 1848 }, { "epoch": 4.118040089086859, "grad_norm": 18.131500244140625, "learning_rate": 1e-06, "loss": 0.9464, "num_input_tokens_seen": 103597080, "step": 1849 }, { "epoch": 4.118040089086859, "loss": 0.9518570899963379, "loss_ce": 0.0003189902927260846, "loss_iou": 0.38671875, "loss_num": 0.035888671875, "loss_xval": 0.953125, "num_input_tokens_seen": 103597080, "step": 1849 }, { "epoch": 4.120267260579064, "grad_norm": 18.16317367553711, "learning_rate": 1e-06, "loss": 1.0965, "num_input_tokens_seen": 103651008, "step": 1850 }, { "epoch": 4.120267260579064, "loss": 1.1296725273132324, "loss_ce": 0.00027801876422017813, "loss_iou": 0.458984375, "loss_num": 0.042236328125, "loss_xval": 1.1328125, "num_input_tokens_seen": 103651008, "step": 1850 }, { "epoch": 4.122494432071269, "grad_norm": 29.750585556030273, "learning_rate": 1e-06, "loss": 0.8961, "num_input_tokens_seen": 103709900, "step": 1851 }, { "epoch": 4.122494432071269, "loss": 0.8951970934867859, "loss_ce": 0.00042168618529103696, "loss_iou": 0.373046875, "loss_num": 0.0296630859375, "loss_xval": 0.89453125, "num_input_tokens_seen": 103709900, "step": 1851 }, { "epoch": 4.124721603563474, "grad_norm": 27.113773345947266, "learning_rate": 1e-06, "loss": 0.6256, "num_input_tokens_seen": 103766964, "step": 1852 }, { "epoch": 4.124721603563474, "loss": 0.6550110578536987, "loss_ce": 0.00022586948762182146, "loss_iou": 0.298828125, "loss_num": 0.01153564453125, "loss_xval": 0.65625, "num_input_tokens_seen": 103766964, "step": 1852 }, { "epoch": 4.12694877505568, "grad_norm": 15.007980346679688, "learning_rate": 1e-06, "loss": 1.0075, "num_input_tokens_seen": 103821128, "step": 1853 }, { "epoch": 4.12694877505568, "loss": 0.9304366707801819, "loss_ce": 0.00026090393657796085, "loss_iou": 0.3984375, "loss_num": 0.0267333984375, "loss_xval": 0.9296875, "num_input_tokens_seen": 103821128, "step": 1853 }, { "epoch": 4.129175946547885, "grad_norm": 16.54915428161621, "learning_rate": 1e-06, "loss": 0.8283, "num_input_tokens_seen": 103874556, "step": 1854 }, { "epoch": 4.129175946547885, "loss": 0.6098688840866089, "loss_ce": 0.00024977908469736576, "loss_iou": 0.25390625, "loss_num": 0.0201416015625, "loss_xval": 0.609375, "num_input_tokens_seen": 103874556, "step": 1854 }, { "epoch": 4.131403118040089, "grad_norm": 18.67920684814453, "learning_rate": 1e-06, "loss": 0.8078, "num_input_tokens_seen": 103929176, "step": 1855 }, { "epoch": 4.131403118040089, "loss": 0.6376897692680359, "loss_ce": 0.00023860471264924854, "loss_iou": 0.28515625, "loss_num": 0.01318359375, "loss_xval": 0.63671875, "num_input_tokens_seen": 103929176, "step": 1855 }, { "epoch": 4.133630289532294, "grad_norm": 14.864916801452637, "learning_rate": 1e-06, "loss": 0.7531, "num_input_tokens_seen": 103985216, "step": 1856 }, { "epoch": 4.133630289532294, "loss": 0.606116533279419, "loss_ce": 0.0002816014748532325, "loss_iou": 0.244140625, "loss_num": 0.023681640625, "loss_xval": 0.60546875, "num_input_tokens_seen": 103985216, "step": 1856 }, { "epoch": 4.135857461024499, "grad_norm": 15.250370025634766, "learning_rate": 1e-06, "loss": 0.7684, "num_input_tokens_seen": 104043664, "step": 1857 }, { "epoch": 4.135857461024499, "loss": 0.810298502445221, "loss_ce": 0.00023990226327441633, "loss_iou": 0.337890625, "loss_num": 0.02734375, "loss_xval": 0.80859375, "num_input_tokens_seen": 104043664, "step": 1857 }, { "epoch": 4.138084632516704, "grad_norm": 33.826820373535156, "learning_rate": 1e-06, "loss": 0.6527, "num_input_tokens_seen": 104101224, "step": 1858 }, { "epoch": 4.138084632516704, "loss": 0.5385033488273621, "loss_ce": 0.00029532582266256213, "loss_iou": 0.23046875, "loss_num": 0.01519775390625, "loss_xval": 0.5390625, "num_input_tokens_seen": 104101224, "step": 1858 }, { "epoch": 4.140311804008909, "grad_norm": 26.74302101135254, "learning_rate": 1e-06, "loss": 0.7609, "num_input_tokens_seen": 104159992, "step": 1859 }, { "epoch": 4.140311804008909, "loss": 0.7529861927032471, "loss_ce": 0.00030061625875532627, "loss_iou": 0.32421875, "loss_num": 0.0208740234375, "loss_xval": 0.75390625, "num_input_tokens_seen": 104159992, "step": 1859 }, { "epoch": 4.142538975501114, "grad_norm": 16.13300895690918, "learning_rate": 1e-06, "loss": 0.8149, "num_input_tokens_seen": 104217472, "step": 1860 }, { "epoch": 4.142538975501114, "loss": 0.7612708806991577, "loss_ce": 0.00028455047868192196, "loss_iou": 0.328125, "loss_num": 0.02099609375, "loss_xval": 0.76171875, "num_input_tokens_seen": 104217472, "step": 1860 }, { "epoch": 4.144766146993319, "grad_norm": 14.439784049987793, "learning_rate": 1e-06, "loss": 0.7813, "num_input_tokens_seen": 104274900, "step": 1861 }, { "epoch": 4.144766146993319, "loss": 0.7901536226272583, "loss_ce": 0.0003587682731449604, "loss_iou": 0.302734375, "loss_num": 0.037109375, "loss_xval": 0.7890625, "num_input_tokens_seen": 104274900, "step": 1861 }, { "epoch": 4.146993318485523, "grad_norm": 15.760991096496582, "learning_rate": 1e-06, "loss": 0.9137, "num_input_tokens_seen": 104330380, "step": 1862 }, { "epoch": 4.146993318485523, "loss": 0.831478476524353, "loss_ce": 0.0004237755201756954, "loss_iou": 0.33203125, "loss_num": 0.033203125, "loss_xval": 0.83203125, "num_input_tokens_seen": 104330380, "step": 1862 }, { "epoch": 4.149220489977728, "grad_norm": 16.417325973510742, "learning_rate": 1e-06, "loss": 0.796, "num_input_tokens_seen": 104386476, "step": 1863 }, { "epoch": 4.149220489977728, "loss": 0.8149293065071106, "loss_ce": 0.0002320479543413967, "loss_iou": 0.365234375, "loss_num": 0.0169677734375, "loss_xval": 0.81640625, "num_input_tokens_seen": 104386476, "step": 1863 }, { "epoch": 4.151447661469933, "grad_norm": 26.965940475463867, "learning_rate": 1e-06, "loss": 0.6169, "num_input_tokens_seen": 104442536, "step": 1864 }, { "epoch": 4.151447661469933, "loss": 0.6312210559844971, "loss_ce": 0.00023957040684763342, "loss_iou": 0.275390625, "loss_num": 0.0159912109375, "loss_xval": 0.6328125, "num_input_tokens_seen": 104442536, "step": 1864 }, { "epoch": 4.153674832962138, "grad_norm": 26.452974319458008, "learning_rate": 1e-06, "loss": 0.8482, "num_input_tokens_seen": 104497928, "step": 1865 }, { "epoch": 4.153674832962138, "loss": 0.9214756488800049, "loss_ce": 0.00033305271062999964, "loss_iou": 0.388671875, "loss_num": 0.0289306640625, "loss_xval": 0.921875, "num_input_tokens_seen": 104497928, "step": 1865 }, { "epoch": 4.155902004454343, "grad_norm": 22.56464385986328, "learning_rate": 1e-06, "loss": 0.807, "num_input_tokens_seen": 104553604, "step": 1866 }, { "epoch": 4.155902004454343, "loss": 0.9364668130874634, "loss_ce": 0.00030959240393713117, "loss_iou": 0.36328125, "loss_num": 0.042236328125, "loss_xval": 0.9375, "num_input_tokens_seen": 104553604, "step": 1866 }, { "epoch": 4.158129175946548, "grad_norm": 16.754383087158203, "learning_rate": 1e-06, "loss": 0.7331, "num_input_tokens_seen": 104609096, "step": 1867 }, { "epoch": 4.158129175946548, "loss": 0.7379820942878723, "loss_ce": 0.0003112004487775266, "loss_iou": 0.30859375, "loss_num": 0.0244140625, "loss_xval": 0.73828125, "num_input_tokens_seen": 104609096, "step": 1867 }, { "epoch": 4.160356347438753, "grad_norm": 21.52208709716797, "learning_rate": 1e-06, "loss": 0.9237, "num_input_tokens_seen": 104662120, "step": 1868 }, { "epoch": 4.160356347438753, "loss": 0.9280723333358765, "loss_ce": 0.00033793269540183246, "loss_iou": 0.37109375, "loss_num": 0.036865234375, "loss_xval": 0.9296875, "num_input_tokens_seen": 104662120, "step": 1868 }, { "epoch": 4.1625835189309575, "grad_norm": 21.401424407958984, "learning_rate": 1e-06, "loss": 0.6609, "num_input_tokens_seen": 104720356, "step": 1869 }, { "epoch": 4.1625835189309575, "loss": 0.6265453100204468, "loss_ce": 0.00032457264023832977, "loss_iou": 0.27734375, "loss_num": 0.01416015625, "loss_xval": 0.625, "num_input_tokens_seen": 104720356, "step": 1869 }, { "epoch": 4.164810690423162, "grad_norm": 20.714725494384766, "learning_rate": 1e-06, "loss": 0.8871, "num_input_tokens_seen": 104777020, "step": 1870 }, { "epoch": 4.164810690423162, "loss": 0.9072417616844177, "loss_ce": 0.00038138843956403434, "loss_iou": 0.3984375, "loss_num": 0.0220947265625, "loss_xval": 0.90625, "num_input_tokens_seen": 104777020, "step": 1870 }, { "epoch": 4.167037861915367, "grad_norm": 19.34101104736328, "learning_rate": 1e-06, "loss": 0.9844, "num_input_tokens_seen": 104832044, "step": 1871 }, { "epoch": 4.167037861915367, "loss": 1.2446290254592896, "loss_ce": 0.0004884034278802574, "loss_iou": 0.5234375, "loss_num": 0.038818359375, "loss_xval": 1.2421875, "num_input_tokens_seen": 104832044, "step": 1871 }, { "epoch": 4.169265033407572, "grad_norm": 22.785999298095703, "learning_rate": 1e-06, "loss": 0.7377, "num_input_tokens_seen": 104889672, "step": 1872 }, { "epoch": 4.169265033407572, "loss": 0.5561378002166748, "loss_ce": 0.0002295885351486504, "loss_iou": 0.224609375, "loss_num": 0.021484375, "loss_xval": 0.5546875, "num_input_tokens_seen": 104889672, "step": 1872 }, { "epoch": 4.171492204899777, "grad_norm": 24.524354934692383, "learning_rate": 1e-06, "loss": 0.7842, "num_input_tokens_seen": 104946972, "step": 1873 }, { "epoch": 4.171492204899777, "loss": 0.7845892906188965, "loss_ce": 0.0010199534008279443, "loss_iou": 0.314453125, "loss_num": 0.0311279296875, "loss_xval": 0.78515625, "num_input_tokens_seen": 104946972, "step": 1873 }, { "epoch": 4.173719376391982, "grad_norm": 21.119625091552734, "learning_rate": 1e-06, "loss": 0.7332, "num_input_tokens_seen": 105001336, "step": 1874 }, { "epoch": 4.173719376391982, "loss": 0.803102970123291, "loss_ce": 0.00036857047234661877, "loss_iou": 0.33984375, "loss_num": 0.024658203125, "loss_xval": 0.8046875, "num_input_tokens_seen": 105001336, "step": 1874 }, { "epoch": 4.1759465478841875, "grad_norm": 21.471126556396484, "learning_rate": 1e-06, "loss": 0.9933, "num_input_tokens_seen": 105056692, "step": 1875 }, { "epoch": 4.1759465478841875, "loss": 0.8526608943939209, "loss_ce": 0.0003660406800918281, "loss_iou": 0.369140625, "loss_num": 0.0228271484375, "loss_xval": 0.8515625, "num_input_tokens_seen": 105056692, "step": 1875 }, { "epoch": 4.178173719376392, "grad_norm": 16.687490463256836, "learning_rate": 1e-06, "loss": 0.8024, "num_input_tokens_seen": 105113712, "step": 1876 }, { "epoch": 4.178173719376392, "loss": 0.668755054473877, "loss_ce": 0.0002979860291816294, "loss_iou": 0.28515625, "loss_num": 0.01953125, "loss_xval": 0.66796875, "num_input_tokens_seen": 105113712, "step": 1876 }, { "epoch": 4.180400890868597, "grad_norm": 13.53281021118164, "learning_rate": 1e-06, "loss": 0.9623, "num_input_tokens_seen": 105169732, "step": 1877 }, { "epoch": 4.180400890868597, "loss": 0.9789065718650818, "loss_ce": 0.00026888877619057894, "loss_iou": 0.3828125, "loss_num": 0.04248046875, "loss_xval": 0.98046875, "num_input_tokens_seen": 105169732, "step": 1877 }, { "epoch": 4.182628062360802, "grad_norm": 21.907808303833008, "learning_rate": 1e-06, "loss": 0.8223, "num_input_tokens_seen": 105221624, "step": 1878 }, { "epoch": 4.182628062360802, "loss": 0.7880837917327881, "loss_ce": 0.00024200028565246612, "loss_iou": 0.33984375, "loss_num": 0.0216064453125, "loss_xval": 0.7890625, "num_input_tokens_seen": 105221624, "step": 1878 }, { "epoch": 4.184855233853007, "grad_norm": 28.09380340576172, "learning_rate": 1e-06, "loss": 1.0485, "num_input_tokens_seen": 105278504, "step": 1879 }, { "epoch": 4.184855233853007, "loss": 1.0873863697052002, "loss_ce": 0.00035019166534766555, "loss_iou": 0.458984375, "loss_num": 0.03369140625, "loss_xval": 1.0859375, "num_input_tokens_seen": 105278504, "step": 1879 }, { "epoch": 4.187082405345212, "grad_norm": 28.955883026123047, "learning_rate": 1e-06, "loss": 0.7196, "num_input_tokens_seen": 105335440, "step": 1880 }, { "epoch": 4.187082405345212, "loss": 0.7075688242912292, "loss_ce": 0.0002934132644440979, "loss_iou": 0.31640625, "loss_num": 0.01556396484375, "loss_xval": 0.70703125, "num_input_tokens_seen": 105335440, "step": 1880 }, { "epoch": 4.189309576837417, "grad_norm": 16.112701416015625, "learning_rate": 1e-06, "loss": 0.9407, "num_input_tokens_seen": 105391556, "step": 1881 }, { "epoch": 4.189309576837417, "loss": 1.010768175125122, "loss_ce": 0.00027016678359359503, "loss_iou": 0.365234375, "loss_num": 0.056396484375, "loss_xval": 1.0078125, "num_input_tokens_seen": 105391556, "step": 1881 }, { "epoch": 4.1915367483296215, "grad_norm": 12.533539772033691, "learning_rate": 1e-06, "loss": 0.8191, "num_input_tokens_seen": 105444616, "step": 1882 }, { "epoch": 4.1915367483296215, "loss": 0.994451642036438, "loss_ce": 0.00031105236848816276, "loss_iou": 0.431640625, "loss_num": 0.026123046875, "loss_xval": 0.9921875, "num_input_tokens_seen": 105444616, "step": 1882 }, { "epoch": 4.193763919821826, "grad_norm": 18.672292709350586, "learning_rate": 1e-06, "loss": 0.7595, "num_input_tokens_seen": 105502508, "step": 1883 }, { "epoch": 4.193763919821826, "loss": 0.787238359451294, "loss_ce": 0.00025101928622461855, "loss_iou": 0.326171875, "loss_num": 0.026611328125, "loss_xval": 0.78515625, "num_input_tokens_seen": 105502508, "step": 1883 }, { "epoch": 4.195991091314031, "grad_norm": 18.28311538696289, "learning_rate": 1e-06, "loss": 0.7337, "num_input_tokens_seen": 105559380, "step": 1884 }, { "epoch": 4.195991091314031, "loss": 0.6529708504676819, "loss_ce": 0.000993293710052967, "loss_iou": 0.283203125, "loss_num": 0.0169677734375, "loss_xval": 0.65234375, "num_input_tokens_seen": 105559380, "step": 1884 }, { "epoch": 4.198218262806236, "grad_norm": 23.031770706176758, "learning_rate": 1e-06, "loss": 0.7621, "num_input_tokens_seen": 105614328, "step": 1885 }, { "epoch": 4.198218262806236, "loss": 0.542103111743927, "loss_ce": 0.00023299809254240245, "loss_iou": 0.220703125, "loss_num": 0.0198974609375, "loss_xval": 0.54296875, "num_input_tokens_seen": 105614328, "step": 1885 }, { "epoch": 4.200445434298441, "grad_norm": 23.01361656188965, "learning_rate": 1e-06, "loss": 0.7555, "num_input_tokens_seen": 105670768, "step": 1886 }, { "epoch": 4.200445434298441, "loss": 0.7036082744598389, "loss_ce": 0.00023906549904495478, "loss_iou": 0.28125, "loss_num": 0.0281982421875, "loss_xval": 0.703125, "num_input_tokens_seen": 105670768, "step": 1886 }, { "epoch": 4.202672605790646, "grad_norm": 12.780369758605957, "learning_rate": 1e-06, "loss": 0.7383, "num_input_tokens_seen": 105727652, "step": 1887 }, { "epoch": 4.202672605790646, "loss": 0.7529648542404175, "loss_ce": 0.00027933655655942857, "loss_iou": 0.330078125, "loss_num": 0.018310546875, "loss_xval": 0.75390625, "num_input_tokens_seen": 105727652, "step": 1887 }, { "epoch": 4.204899777282851, "grad_norm": 17.18299674987793, "learning_rate": 1e-06, "loss": 0.775, "num_input_tokens_seen": 105784604, "step": 1888 }, { "epoch": 4.204899777282851, "loss": 0.6293338537216187, "loss_ce": 0.0003055296838283539, "loss_iou": 0.2353515625, "loss_num": 0.031494140625, "loss_xval": 0.62890625, "num_input_tokens_seen": 105784604, "step": 1888 }, { "epoch": 4.2071269487750556, "grad_norm": 48.9861946105957, "learning_rate": 1e-06, "loss": 0.9114, "num_input_tokens_seen": 105840236, "step": 1889 }, { "epoch": 4.2071269487750556, "loss": 1.198516607284546, "loss_ce": 0.0002743962104432285, "loss_iou": 0.48828125, "loss_num": 0.043701171875, "loss_xval": 1.1953125, "num_input_tokens_seen": 105840236, "step": 1889 }, { "epoch": 4.20935412026726, "grad_norm": 18.402477264404297, "learning_rate": 1e-06, "loss": 0.6669, "num_input_tokens_seen": 105895768, "step": 1890 }, { "epoch": 4.20935412026726, "loss": 0.6518987417221069, "loss_ce": 0.0002874084748327732, "loss_iou": 0.283203125, "loss_num": 0.017333984375, "loss_xval": 0.65234375, "num_input_tokens_seen": 105895768, "step": 1890 }, { "epoch": 4.211581291759465, "grad_norm": 19.16950035095215, "learning_rate": 1e-06, "loss": 0.9874, "num_input_tokens_seen": 105950604, "step": 1891 }, { "epoch": 4.211581291759465, "loss": 1.1375155448913574, "loss_ce": 0.00030853564385324717, "loss_iou": 0.462890625, "loss_num": 0.042236328125, "loss_xval": 1.140625, "num_input_tokens_seen": 105950604, "step": 1891 }, { "epoch": 4.21380846325167, "grad_norm": 12.357964515686035, "learning_rate": 1e-06, "loss": 0.6145, "num_input_tokens_seen": 106007040, "step": 1892 }, { "epoch": 4.21380846325167, "loss": 0.6721287965774536, "loss_ce": 0.000253824342507869, "loss_iou": 0.291015625, "loss_num": 0.0177001953125, "loss_xval": 0.671875, "num_input_tokens_seen": 106007040, "step": 1892 }, { "epoch": 4.216035634743875, "grad_norm": 16.27143669128418, "learning_rate": 1e-06, "loss": 0.9689, "num_input_tokens_seen": 106063332, "step": 1893 }, { "epoch": 4.216035634743875, "loss": 0.9934124946594238, "loss_ce": 0.00024840355035848916, "loss_iou": 0.43359375, "loss_num": 0.0255126953125, "loss_xval": 0.9921875, "num_input_tokens_seen": 106063332, "step": 1893 }, { "epoch": 4.21826280623608, "grad_norm": 14.465191841125488, "learning_rate": 1e-06, "loss": 0.8634, "num_input_tokens_seen": 106119888, "step": 1894 }, { "epoch": 4.21826280623608, "loss": 0.752957820892334, "loss_ce": 0.0002722431090660393, "loss_iou": 0.3046875, "loss_num": 0.02880859375, "loss_xval": 0.75390625, "num_input_tokens_seen": 106119888, "step": 1894 }, { "epoch": 4.220489977728285, "grad_norm": 13.513664245605469, "learning_rate": 1e-06, "loss": 0.7989, "num_input_tokens_seen": 106177424, "step": 1895 }, { "epoch": 4.220489977728285, "loss": 0.869196891784668, "loss_ce": 0.00030032347422093153, "loss_iou": 0.376953125, "loss_num": 0.022705078125, "loss_xval": 0.8671875, "num_input_tokens_seen": 106177424, "step": 1895 }, { "epoch": 4.22271714922049, "grad_norm": 18.78656005859375, "learning_rate": 1e-06, "loss": 0.7666, "num_input_tokens_seen": 106233892, "step": 1896 }, { "epoch": 4.22271714922049, "loss": 0.7184998989105225, "loss_ce": 0.00023819129273761064, "loss_iou": 0.32421875, "loss_num": 0.0142822265625, "loss_xval": 0.71875, "num_input_tokens_seen": 106233892, "step": 1896 }, { "epoch": 4.224944320712694, "grad_norm": 41.630496978759766, "learning_rate": 1e-06, "loss": 0.8828, "num_input_tokens_seen": 106289772, "step": 1897 }, { "epoch": 4.224944320712694, "loss": 0.8243512511253357, "loss_ce": 0.00037670054007321596, "loss_iou": 0.35546875, "loss_num": 0.0230712890625, "loss_xval": 0.82421875, "num_input_tokens_seen": 106289772, "step": 1897 }, { "epoch": 4.2271714922049, "grad_norm": 14.743143081665039, "learning_rate": 1e-06, "loss": 0.9489, "num_input_tokens_seen": 106344316, "step": 1898 }, { "epoch": 4.2271714922049, "loss": 0.888018012046814, "loss_ce": 0.0003226427361369133, "loss_iou": 0.41015625, "loss_num": 0.0130615234375, "loss_xval": 0.88671875, "num_input_tokens_seen": 106344316, "step": 1898 }, { "epoch": 4.229398663697105, "grad_norm": 16.215017318725586, "learning_rate": 1e-06, "loss": 0.8319, "num_input_tokens_seen": 106399904, "step": 1899 }, { "epoch": 4.229398663697105, "loss": 0.7622650265693665, "loss_ce": 0.00030216382583603263, "loss_iou": 0.349609375, "loss_num": 0.0125732421875, "loss_xval": 0.76171875, "num_input_tokens_seen": 106399904, "step": 1899 }, { "epoch": 4.23162583518931, "grad_norm": 21.640851974487305, "learning_rate": 1e-06, "loss": 0.7937, "num_input_tokens_seen": 106456716, "step": 1900 }, { "epoch": 4.23162583518931, "loss": 0.8867244124412537, "loss_ce": 0.0002498445101082325, "loss_iou": 0.365234375, "loss_num": 0.03173828125, "loss_xval": 0.88671875, "num_input_tokens_seen": 106456716, "step": 1900 }, { "epoch": 4.233853006681515, "grad_norm": 45.25251770019531, "learning_rate": 1e-06, "loss": 0.9745, "num_input_tokens_seen": 106514668, "step": 1901 }, { "epoch": 4.233853006681515, "loss": 0.8965962529182434, "loss_ce": 0.0003560082404874265, "loss_iou": 0.38671875, "loss_num": 0.0242919921875, "loss_xval": 0.89453125, "num_input_tokens_seen": 106514668, "step": 1901 }, { "epoch": 4.23608017817372, "grad_norm": 23.69890594482422, "learning_rate": 1e-06, "loss": 0.6128, "num_input_tokens_seen": 106570596, "step": 1902 }, { "epoch": 4.23608017817372, "loss": 0.7368512153625488, "loss_ce": 0.0002789643476717174, "loss_iou": 0.318359375, "loss_num": 0.0201416015625, "loss_xval": 0.73828125, "num_input_tokens_seen": 106570596, "step": 1902 }, { "epoch": 4.2383073496659245, "grad_norm": 14.473897933959961, "learning_rate": 1e-06, "loss": 0.8265, "num_input_tokens_seen": 106625068, "step": 1903 }, { "epoch": 4.2383073496659245, "loss": 0.7507649660110474, "loss_ce": 0.00027672675787471235, "loss_iou": 0.310546875, "loss_num": 0.025634765625, "loss_xval": 0.75, "num_input_tokens_seen": 106625068, "step": 1903 }, { "epoch": 4.240534521158129, "grad_norm": 15.77379322052002, "learning_rate": 1e-06, "loss": 0.651, "num_input_tokens_seen": 106683920, "step": 1904 }, { "epoch": 4.240534521158129, "loss": 0.5201445817947388, "loss_ce": 0.0002471142797730863, "loss_iou": 0.2265625, "loss_num": 0.0133056640625, "loss_xval": 0.51953125, "num_input_tokens_seen": 106683920, "step": 1904 }, { "epoch": 4.242761692650334, "grad_norm": 16.723033905029297, "learning_rate": 1e-06, "loss": 0.6274, "num_input_tokens_seen": 106740932, "step": 1905 }, { "epoch": 4.242761692650334, "loss": 0.5735186338424683, "loss_ce": 0.00027646025409922004, "loss_iou": 0.2578125, "loss_num": 0.0115966796875, "loss_xval": 0.57421875, "num_input_tokens_seen": 106740932, "step": 1905 }, { "epoch": 4.244988864142539, "grad_norm": 23.549467086791992, "learning_rate": 1e-06, "loss": 0.9234, "num_input_tokens_seen": 106798536, "step": 1906 }, { "epoch": 4.244988864142539, "loss": 0.8834173679351807, "loss_ce": 0.0006048293435014784, "loss_iou": 0.33984375, "loss_num": 0.040771484375, "loss_xval": 0.8828125, "num_input_tokens_seen": 106798536, "step": 1906 }, { "epoch": 4.247216035634744, "grad_norm": 20.045562744140625, "learning_rate": 1e-06, "loss": 0.9631, "num_input_tokens_seen": 106855984, "step": 1907 }, { "epoch": 4.247216035634744, "loss": 1.116750955581665, "loss_ce": 0.0002958837430924177, "loss_iou": 0.423828125, "loss_num": 0.053466796875, "loss_xval": 1.1171875, "num_input_tokens_seen": 106855984, "step": 1907 }, { "epoch": 4.249443207126949, "grad_norm": 20.793867111206055, "learning_rate": 1e-06, "loss": 0.7098, "num_input_tokens_seen": 106910552, "step": 1908 }, { "epoch": 4.249443207126949, "loss": 0.691659152507782, "loss_ce": 0.0002528924378566444, "loss_iou": 0.318359375, "loss_num": 0.01092529296875, "loss_xval": 0.69140625, "num_input_tokens_seen": 106910552, "step": 1908 }, { "epoch": 4.251670378619154, "grad_norm": 31.35016632080078, "learning_rate": 1e-06, "loss": 0.8538, "num_input_tokens_seen": 106966684, "step": 1909 }, { "epoch": 4.251670378619154, "loss": 0.7839844226837158, "loss_ce": 0.000293065735604614, "loss_iou": 0.34765625, "loss_num": 0.0174560546875, "loss_xval": 0.78515625, "num_input_tokens_seen": 106966684, "step": 1909 }, { "epoch": 4.2538975501113585, "grad_norm": 34.71610641479492, "learning_rate": 1e-06, "loss": 0.6133, "num_input_tokens_seen": 107025092, "step": 1910 }, { "epoch": 4.2538975501113585, "loss": 0.6420915722846985, "loss_ce": 0.0002459048992022872, "loss_iou": 0.27734375, "loss_num": 0.0174560546875, "loss_xval": 0.640625, "num_input_tokens_seen": 107025092, "step": 1910 }, { "epoch": 4.256124721603563, "grad_norm": 14.96351146697998, "learning_rate": 1e-06, "loss": 0.6374, "num_input_tokens_seen": 107081992, "step": 1911 }, { "epoch": 4.256124721603563, "loss": 0.4527207911014557, "loss_ce": 0.00032822368666529655, "loss_iou": 0.1943359375, "loss_num": 0.0125732421875, "loss_xval": 0.453125, "num_input_tokens_seen": 107081992, "step": 1911 }, { "epoch": 4.258351893095768, "grad_norm": 14.8378267288208, "learning_rate": 1e-06, "loss": 0.7082, "num_input_tokens_seen": 107135848, "step": 1912 }, { "epoch": 4.258351893095768, "loss": 0.5522253513336182, "loss_ce": 0.00022335897665470839, "loss_iou": 0.21875, "loss_num": 0.0230712890625, "loss_xval": 0.55078125, "num_input_tokens_seen": 107135848, "step": 1912 }, { "epoch": 4.260579064587973, "grad_norm": 21.59320068359375, "learning_rate": 1e-06, "loss": 0.8173, "num_input_tokens_seen": 107191440, "step": 1913 }, { "epoch": 4.260579064587973, "loss": 1.090116024017334, "loss_ce": 0.0002723358920775354, "loss_iou": 0.486328125, "loss_num": 0.0235595703125, "loss_xval": 1.09375, "num_input_tokens_seen": 107191440, "step": 1913 }, { "epoch": 4.262806236080178, "grad_norm": 16.01348876953125, "learning_rate": 1e-06, "loss": 0.8553, "num_input_tokens_seen": 107247028, "step": 1914 }, { "epoch": 4.262806236080178, "loss": 0.773563027381897, "loss_ce": 0.0002476052031852305, "loss_iou": 0.296875, "loss_num": 0.0361328125, "loss_xval": 0.7734375, "num_input_tokens_seen": 107247028, "step": 1914 }, { "epoch": 4.265033407572383, "grad_norm": 16.764503479003906, "learning_rate": 1e-06, "loss": 1.0386, "num_input_tokens_seen": 107301488, "step": 1915 }, { "epoch": 4.265033407572383, "loss": 1.2570042610168457, "loss_ce": 0.0004123870749026537, "loss_iou": 0.515625, "loss_num": 0.04443359375, "loss_xval": 1.2578125, "num_input_tokens_seen": 107301488, "step": 1915 }, { "epoch": 4.267260579064588, "grad_norm": 17.2139949798584, "learning_rate": 1e-06, "loss": 0.7674, "num_input_tokens_seen": 107357008, "step": 1916 }, { "epoch": 4.267260579064588, "loss": 0.8225438594818115, "loss_ce": 0.0002781503717415035, "loss_iou": 0.34765625, "loss_num": 0.0252685546875, "loss_xval": 0.8203125, "num_input_tokens_seen": 107357008, "step": 1916 }, { "epoch": 4.2694877505567925, "grad_norm": 18.4807186126709, "learning_rate": 1e-06, "loss": 0.4431, "num_input_tokens_seen": 107411784, "step": 1917 }, { "epoch": 4.2694877505567925, "loss": 0.4435253143310547, "loss_ce": 0.00041007628897204995, "loss_iou": 0.1630859375, "loss_num": 0.0234375, "loss_xval": 0.443359375, "num_input_tokens_seen": 107411784, "step": 1917 }, { "epoch": 4.271714922048997, "grad_norm": 16.725303649902344, "learning_rate": 1e-06, "loss": 0.7385, "num_input_tokens_seen": 107463544, "step": 1918 }, { "epoch": 4.271714922048997, "loss": 0.7992796897888184, "loss_ce": 0.00032942870166152716, "loss_iou": 0.3515625, "loss_num": 0.01904296875, "loss_xval": 0.80078125, "num_input_tokens_seen": 107463544, "step": 1918 }, { "epoch": 4.273942093541203, "grad_norm": 17.902894973754883, "learning_rate": 1e-06, "loss": 0.6089, "num_input_tokens_seen": 107518944, "step": 1919 }, { "epoch": 4.273942093541203, "loss": 0.7945454120635986, "loss_ce": 0.0002338896010769531, "loss_iou": 0.328125, "loss_num": 0.027587890625, "loss_xval": 0.79296875, "num_input_tokens_seen": 107518944, "step": 1919 }, { "epoch": 4.276169265033408, "grad_norm": 21.469985961914062, "learning_rate": 1e-06, "loss": 0.7481, "num_input_tokens_seen": 107574840, "step": 1920 }, { "epoch": 4.276169265033408, "loss": 0.7355300188064575, "loss_ce": 0.0003004681202583015, "loss_iou": 0.310546875, "loss_num": 0.0228271484375, "loss_xval": 0.734375, "num_input_tokens_seen": 107574840, "step": 1920 }, { "epoch": 4.278396436525613, "grad_norm": 18.499727249145508, "learning_rate": 1e-06, "loss": 0.8113, "num_input_tokens_seen": 107629476, "step": 1921 }, { "epoch": 4.278396436525613, "loss": 0.7343944311141968, "loss_ce": 0.00026357907336205244, "loss_iou": 0.30859375, "loss_num": 0.02294921875, "loss_xval": 0.734375, "num_input_tokens_seen": 107629476, "step": 1921 }, { "epoch": 4.280623608017818, "grad_norm": 25.940811157226562, "learning_rate": 1e-06, "loss": 0.7132, "num_input_tokens_seen": 107686892, "step": 1922 }, { "epoch": 4.280623608017818, "loss": 0.6143178343772888, "loss_ce": 0.0003041609888896346, "loss_iou": 0.2578125, "loss_num": 0.0194091796875, "loss_xval": 0.61328125, "num_input_tokens_seen": 107686892, "step": 1922 }, { "epoch": 4.282850779510023, "grad_norm": 21.650733947753906, "learning_rate": 1e-06, "loss": 0.726, "num_input_tokens_seen": 107743916, "step": 1923 }, { "epoch": 4.282850779510023, "loss": 0.6625887751579285, "loss_ce": 0.00023527286248281598, "loss_iou": 0.27734375, "loss_num": 0.02197265625, "loss_xval": 0.6640625, "num_input_tokens_seen": 107743916, "step": 1923 }, { "epoch": 4.285077951002227, "grad_norm": 15.383871078491211, "learning_rate": 1e-06, "loss": 0.7414, "num_input_tokens_seen": 107800000, "step": 1924 }, { "epoch": 4.285077951002227, "loss": 0.7688758373260498, "loss_ce": 0.0003211286966688931, "loss_iou": 0.33203125, "loss_num": 0.02099609375, "loss_xval": 0.76953125, "num_input_tokens_seen": 107800000, "step": 1924 }, { "epoch": 4.287305122494432, "grad_norm": 18.70506477355957, "learning_rate": 1e-06, "loss": 0.6572, "num_input_tokens_seen": 107856952, "step": 1925 }, { "epoch": 4.287305122494432, "loss": 0.5217354893684387, "loss_ce": 0.0002510941121727228, "loss_iou": 0.2314453125, "loss_num": 0.01177978515625, "loss_xval": 0.5234375, "num_input_tokens_seen": 107856952, "step": 1925 }, { "epoch": 4.289532293986637, "grad_norm": 15.325700759887695, "learning_rate": 1e-06, "loss": 0.8531, "num_input_tokens_seen": 107914656, "step": 1926 }, { "epoch": 4.289532293986637, "loss": 0.7412204146385193, "loss_ce": 0.0002536483807489276, "loss_iou": 0.328125, "loss_num": 0.0169677734375, "loss_xval": 0.7421875, "num_input_tokens_seen": 107914656, "step": 1926 }, { "epoch": 4.291759465478842, "grad_norm": 36.90719985961914, "learning_rate": 1e-06, "loss": 1.0012, "num_input_tokens_seen": 107971084, "step": 1927 }, { "epoch": 4.291759465478842, "loss": 0.8567878007888794, "loss_ce": 0.00034247490111738443, "loss_iou": 0.376953125, "loss_num": 0.0206298828125, "loss_xval": 0.85546875, "num_input_tokens_seen": 107971084, "step": 1927 }, { "epoch": 4.293986636971047, "grad_norm": 21.110570907592773, "learning_rate": 1e-06, "loss": 0.5608, "num_input_tokens_seen": 108027536, "step": 1928 }, { "epoch": 4.293986636971047, "loss": 0.49611085653305054, "loss_ce": 0.0002612844982650131, "loss_iou": 0.2041015625, "loss_num": 0.017578125, "loss_xval": 0.49609375, "num_input_tokens_seen": 108027536, "step": 1928 }, { "epoch": 4.296213808463252, "grad_norm": 19.765779495239258, "learning_rate": 1e-06, "loss": 0.6742, "num_input_tokens_seen": 108086688, "step": 1929 }, { "epoch": 4.296213808463252, "loss": 0.7515699863433838, "loss_ce": 0.0002272275451105088, "loss_iou": 0.32421875, "loss_num": 0.0203857421875, "loss_xval": 0.75, "num_input_tokens_seen": 108086688, "step": 1929 }, { "epoch": 4.298440979955457, "grad_norm": 27.658580780029297, "learning_rate": 1e-06, "loss": 0.8779, "num_input_tokens_seen": 108138612, "step": 1930 }, { "epoch": 4.298440979955457, "loss": 0.9459295272827148, "loss_ce": 0.0002508389297872782, "loss_iou": 0.40625, "loss_num": 0.0267333984375, "loss_xval": 0.9453125, "num_input_tokens_seen": 108138612, "step": 1930 }, { "epoch": 4.3006681514476615, "grad_norm": 18.092632293701172, "learning_rate": 1e-06, "loss": 0.9169, "num_input_tokens_seen": 108197300, "step": 1931 }, { "epoch": 4.3006681514476615, "loss": 1.1985375881195068, "loss_ce": 0.00029541528783738613, "loss_iou": 0.46875, "loss_num": 0.0517578125, "loss_xval": 1.1953125, "num_input_tokens_seen": 108197300, "step": 1931 }, { "epoch": 4.302895322939866, "grad_norm": 22.87906265258789, "learning_rate": 1e-06, "loss": 0.7375, "num_input_tokens_seen": 108253652, "step": 1932 }, { "epoch": 4.302895322939866, "loss": 0.6818982362747192, "loss_ce": 0.0002576185797806829, "loss_iou": 0.30078125, "loss_num": 0.015625, "loss_xval": 0.6796875, "num_input_tokens_seen": 108253652, "step": 1932 }, { "epoch": 4.305122494432071, "grad_norm": 19.443403244018555, "learning_rate": 1e-06, "loss": 0.7365, "num_input_tokens_seen": 108306668, "step": 1933 }, { "epoch": 4.305122494432071, "loss": 0.8623777627944946, "loss_ce": 0.0003171888238284737, "loss_iou": 0.376953125, "loss_num": 0.0216064453125, "loss_xval": 0.86328125, "num_input_tokens_seen": 108306668, "step": 1933 }, { "epoch": 4.307349665924276, "grad_norm": 19.513713836669922, "learning_rate": 1e-06, "loss": 0.8245, "num_input_tokens_seen": 108364872, "step": 1934 }, { "epoch": 4.307349665924276, "loss": 0.8435297608375549, "loss_ce": 0.00026804511435329914, "loss_iou": 0.3671875, "loss_num": 0.0220947265625, "loss_xval": 0.84375, "num_input_tokens_seen": 108364872, "step": 1934 }, { "epoch": 4.309576837416481, "grad_norm": 17.910724639892578, "learning_rate": 1e-06, "loss": 0.8137, "num_input_tokens_seen": 108421172, "step": 1935 }, { "epoch": 4.309576837416481, "loss": 1.0195529460906982, "loss_ce": 0.000510023906826973, "loss_iou": 0.41796875, "loss_num": 0.03662109375, "loss_xval": 1.015625, "num_input_tokens_seen": 108421172, "step": 1935 }, { "epoch": 4.311804008908686, "grad_norm": 20.821613311767578, "learning_rate": 1e-06, "loss": 0.7728, "num_input_tokens_seen": 108476148, "step": 1936 }, { "epoch": 4.311804008908686, "loss": 0.7290493249893188, "loss_ce": 0.00028955648303963244, "loss_iou": 0.2890625, "loss_num": 0.0301513671875, "loss_xval": 0.73046875, "num_input_tokens_seen": 108476148, "step": 1936 }, { "epoch": 4.314031180400891, "grad_norm": 14.239187240600586, "learning_rate": 1e-06, "loss": 0.7459, "num_input_tokens_seen": 108530832, "step": 1937 }, { "epoch": 4.314031180400891, "loss": 0.7683401107788086, "loss_ce": 0.0002737450413405895, "loss_iou": 0.34375, "loss_num": 0.0164794921875, "loss_xval": 0.76953125, "num_input_tokens_seen": 108530832, "step": 1937 }, { "epoch": 4.3162583518930955, "grad_norm": 22.064966201782227, "learning_rate": 1e-06, "loss": 0.827, "num_input_tokens_seen": 108586484, "step": 1938 }, { "epoch": 4.3162583518930955, "loss": 0.7754453420639038, "loss_ce": 0.00029886612901464105, "loss_iou": 0.349609375, "loss_num": 0.0150146484375, "loss_xval": 0.7734375, "num_input_tokens_seen": 108586484, "step": 1938 }, { "epoch": 4.3184855233853, "grad_norm": 22.323854446411133, "learning_rate": 1e-06, "loss": 0.6395, "num_input_tokens_seen": 108643224, "step": 1939 }, { "epoch": 4.3184855233853, "loss": 0.7868392467498779, "loss_ce": 0.000950582732912153, "loss_iou": 0.34765625, "loss_num": 0.0184326171875, "loss_xval": 0.78515625, "num_input_tokens_seen": 108643224, "step": 1939 }, { "epoch": 4.320712694877505, "grad_norm": 18.545169830322266, "learning_rate": 1e-06, "loss": 0.6774, "num_input_tokens_seen": 108699852, "step": 1940 }, { "epoch": 4.320712694877505, "loss": 0.8520459532737732, "loss_ce": 0.0003613817389123142, "loss_iou": 0.337890625, "loss_num": 0.035400390625, "loss_xval": 0.8515625, "num_input_tokens_seen": 108699852, "step": 1940 }, { "epoch": 4.32293986636971, "grad_norm": 20.88629150390625, "learning_rate": 1e-06, "loss": 0.8882, "num_input_tokens_seen": 108755364, "step": 1941 }, { "epoch": 4.32293986636971, "loss": 0.8283944129943848, "loss_ce": 0.000269408046733588, "loss_iou": 0.322265625, "loss_num": 0.037109375, "loss_xval": 0.828125, "num_input_tokens_seen": 108755364, "step": 1941 }, { "epoch": 4.325167037861915, "grad_norm": 26.91864585876465, "learning_rate": 1e-06, "loss": 0.573, "num_input_tokens_seen": 108814096, "step": 1942 }, { "epoch": 4.325167037861915, "loss": 0.6568059921264648, "loss_ce": 0.00031184524414129555, "loss_iou": 0.298828125, "loss_num": 0.0120849609375, "loss_xval": 0.65625, "num_input_tokens_seen": 108814096, "step": 1942 }, { "epoch": 4.327394209354121, "grad_norm": 14.774848937988281, "learning_rate": 1e-06, "loss": 0.9021, "num_input_tokens_seen": 108870164, "step": 1943 }, { "epoch": 4.327394209354121, "loss": 0.8290045261383057, "loss_ce": 0.001856090733781457, "loss_iou": 0.33984375, "loss_num": 0.0294189453125, "loss_xval": 0.828125, "num_input_tokens_seen": 108870164, "step": 1943 }, { "epoch": 4.3296213808463255, "grad_norm": 18.11646270751953, "learning_rate": 1e-06, "loss": 0.8127, "num_input_tokens_seen": 108927584, "step": 1944 }, { "epoch": 4.3296213808463255, "loss": 0.6893248558044434, "loss_ce": 0.0002379524812567979, "loss_iou": 0.29296875, "loss_num": 0.0201416015625, "loss_xval": 0.6875, "num_input_tokens_seen": 108927584, "step": 1944 }, { "epoch": 4.33184855233853, "grad_norm": 14.257638931274414, "learning_rate": 1e-06, "loss": 0.8333, "num_input_tokens_seen": 108983712, "step": 1945 }, { "epoch": 4.33184855233853, "loss": 0.9684985876083374, "loss_ce": 0.0003589991247281432, "loss_iou": 0.400390625, "loss_num": 0.03369140625, "loss_xval": 0.96875, "num_input_tokens_seen": 108983712, "step": 1945 }, { "epoch": 4.334075723830735, "grad_norm": 24.499502182006836, "learning_rate": 1e-06, "loss": 0.5697, "num_input_tokens_seen": 109040176, "step": 1946 }, { "epoch": 4.334075723830735, "loss": 0.4902326464653015, "loss_ce": 0.00024243266670964658, "loss_iou": 0.2158203125, "loss_num": 0.01153564453125, "loss_xval": 0.490234375, "num_input_tokens_seen": 109040176, "step": 1946 }, { "epoch": 4.33630289532294, "grad_norm": 28.52815055847168, "learning_rate": 1e-06, "loss": 0.8584, "num_input_tokens_seen": 109096776, "step": 1947 }, { "epoch": 4.33630289532294, "loss": 0.8685885667800903, "loss_ce": 0.0003024664765689522, "loss_iou": 0.345703125, "loss_num": 0.035400390625, "loss_xval": 0.8671875, "num_input_tokens_seen": 109096776, "step": 1947 }, { "epoch": 4.338530066815145, "grad_norm": 24.575611114501953, "learning_rate": 1e-06, "loss": 1.0167, "num_input_tokens_seen": 109153828, "step": 1948 }, { "epoch": 4.338530066815145, "loss": 0.993535041809082, "loss_ce": 0.0002489146136213094, "loss_iou": 0.43359375, "loss_num": 0.025634765625, "loss_xval": 0.9921875, "num_input_tokens_seen": 109153828, "step": 1948 }, { "epoch": 4.34075723830735, "grad_norm": 15.715718269348145, "learning_rate": 1e-06, "loss": 0.8722, "num_input_tokens_seen": 109212620, "step": 1949 }, { "epoch": 4.34075723830735, "loss": 0.8171886205673218, "loss_ce": 0.00029411769355647266, "loss_iou": 0.34765625, "loss_num": 0.0240478515625, "loss_xval": 0.81640625, "num_input_tokens_seen": 109212620, "step": 1949 }, { "epoch": 4.342984409799555, "grad_norm": 23.913692474365234, "learning_rate": 1e-06, "loss": 0.6369, "num_input_tokens_seen": 109270456, "step": 1950 }, { "epoch": 4.342984409799555, "loss": 0.641880989074707, "loss_ce": 0.0002794649626594037, "loss_iou": 0.28125, "loss_num": 0.015625, "loss_xval": 0.640625, "num_input_tokens_seen": 109270456, "step": 1950 }, { "epoch": 4.3452115812917596, "grad_norm": 18.232446670532227, "learning_rate": 1e-06, "loss": 0.768, "num_input_tokens_seen": 109325660, "step": 1951 }, { "epoch": 4.3452115812917596, "loss": 0.9707180857658386, "loss_ce": 0.0002590723452158272, "loss_iou": 0.416015625, "loss_num": 0.0277099609375, "loss_xval": 0.96875, "num_input_tokens_seen": 109325660, "step": 1951 }, { "epoch": 4.347438752783964, "grad_norm": 16.996084213256836, "learning_rate": 1e-06, "loss": 0.9126, "num_input_tokens_seen": 109381844, "step": 1952 }, { "epoch": 4.347438752783964, "loss": 0.7782900333404541, "loss_ce": 0.00027485546888783574, "loss_iou": 0.3515625, "loss_num": 0.01483154296875, "loss_xval": 0.77734375, "num_input_tokens_seen": 109381844, "step": 1952 }, { "epoch": 4.349665924276169, "grad_norm": 17.12710189819336, "learning_rate": 1e-06, "loss": 0.826, "num_input_tokens_seen": 109440512, "step": 1953 }, { "epoch": 4.349665924276169, "loss": 1.0132062435150146, "loss_ce": 0.00026673171669244766, "loss_iou": 0.427734375, "loss_num": 0.03173828125, "loss_xval": 1.015625, "num_input_tokens_seen": 109440512, "step": 1953 }, { "epoch": 4.351893095768374, "grad_norm": 14.912611961364746, "learning_rate": 1e-06, "loss": 0.8326, "num_input_tokens_seen": 109497420, "step": 1954 }, { "epoch": 4.351893095768374, "loss": 0.42840301990509033, "loss_ce": 0.00030242273351177573, "loss_iou": 0.17578125, "loss_num": 0.01519775390625, "loss_xval": 0.427734375, "num_input_tokens_seen": 109497420, "step": 1954 }, { "epoch": 4.354120267260579, "grad_norm": 17.690128326416016, "learning_rate": 1e-06, "loss": 1.0669, "num_input_tokens_seen": 109554340, "step": 1955 }, { "epoch": 4.354120267260579, "loss": 1.157071590423584, "loss_ce": 0.00033330710721202195, "loss_iou": 0.466796875, "loss_num": 0.044677734375, "loss_xval": 1.15625, "num_input_tokens_seen": 109554340, "step": 1955 }, { "epoch": 4.356347438752784, "grad_norm": 279.8905029296875, "learning_rate": 1e-06, "loss": 0.7712, "num_input_tokens_seen": 109611648, "step": 1956 }, { "epoch": 4.356347438752784, "loss": 0.8818769454956055, "loss_ce": 0.0010175781790167093, "loss_iou": 0.384765625, "loss_num": 0.0224609375, "loss_xval": 0.8828125, "num_input_tokens_seen": 109611648, "step": 1956 }, { "epoch": 4.358574610244989, "grad_norm": 21.142581939697266, "learning_rate": 1e-06, "loss": 0.8436, "num_input_tokens_seen": 109670164, "step": 1957 }, { "epoch": 4.358574610244989, "loss": 0.8566855192184448, "loss_ce": 0.00024018189287744462, "loss_iou": 0.353515625, "loss_num": 0.02978515625, "loss_xval": 0.85546875, "num_input_tokens_seen": 109670164, "step": 1957 }, { "epoch": 4.360801781737194, "grad_norm": 23.72075080871582, "learning_rate": 1e-06, "loss": 0.5878, "num_input_tokens_seen": 109727780, "step": 1958 }, { "epoch": 4.360801781737194, "loss": 0.7790469527244568, "loss_ce": 0.00023835319734644145, "loss_iou": 0.318359375, "loss_num": 0.0289306640625, "loss_xval": 0.77734375, "num_input_tokens_seen": 109727780, "step": 1958 }, { "epoch": 4.363028953229398, "grad_norm": 22.97592544555664, "learning_rate": 1e-06, "loss": 0.9028, "num_input_tokens_seen": 109786720, "step": 1959 }, { "epoch": 4.363028953229398, "loss": 1.0544261932373047, "loss_ce": 0.00022698812244925648, "loss_iou": 0.4453125, "loss_num": 0.032470703125, "loss_xval": 1.0546875, "num_input_tokens_seen": 109786720, "step": 1959 }, { "epoch": 4.365256124721603, "grad_norm": 35.19451904296875, "learning_rate": 1e-06, "loss": 1.0087, "num_input_tokens_seen": 109840344, "step": 1960 }, { "epoch": 4.365256124721603, "loss": 0.9346234202384949, "loss_ce": 0.00029728777008131146, "loss_iou": 0.412109375, "loss_num": 0.0218505859375, "loss_xval": 0.93359375, "num_input_tokens_seen": 109840344, "step": 1960 }, { "epoch": 4.367483296213808, "grad_norm": 21.005878448486328, "learning_rate": 1e-06, "loss": 0.8335, "num_input_tokens_seen": 109898760, "step": 1961 }, { "epoch": 4.367483296213808, "loss": 0.7780290842056274, "loss_ce": 0.0004412271664477885, "loss_iou": 0.306640625, "loss_num": 0.033203125, "loss_xval": 0.77734375, "num_input_tokens_seen": 109898760, "step": 1961 }, { "epoch": 4.369710467706013, "grad_norm": 20.942707061767578, "learning_rate": 1e-06, "loss": 0.7858, "num_input_tokens_seen": 109956856, "step": 1962 }, { "epoch": 4.369710467706013, "loss": 0.6604050397872925, "loss_ce": 0.0002488172031007707, "loss_iou": 0.296875, "loss_num": 0.0133056640625, "loss_xval": 0.66015625, "num_input_tokens_seen": 109956856, "step": 1962 }, { "epoch": 4.371937639198218, "grad_norm": 14.403862953186035, "learning_rate": 1e-06, "loss": 0.5435, "num_input_tokens_seen": 110010668, "step": 1963 }, { "epoch": 4.371937639198218, "loss": 0.32872194051742554, "loss_ce": 0.00023072000476531684, "loss_iou": 0.1455078125, "loss_num": 0.007476806640625, "loss_xval": 0.328125, "num_input_tokens_seen": 110010668, "step": 1963 }, { "epoch": 4.374164810690424, "grad_norm": 27.356098175048828, "learning_rate": 1e-06, "loss": 0.5899, "num_input_tokens_seen": 110067848, "step": 1964 }, { "epoch": 4.374164810690424, "loss": 0.6242412328720093, "loss_ce": 0.00021780356473755091, "loss_iou": 0.25390625, "loss_num": 0.0233154296875, "loss_xval": 0.625, "num_input_tokens_seen": 110067848, "step": 1964 }, { "epoch": 4.3763919821826285, "grad_norm": 24.027332305908203, "learning_rate": 1e-06, "loss": 0.6839, "num_input_tokens_seen": 110122296, "step": 1965 }, { "epoch": 4.3763919821826285, "loss": 0.6491550207138062, "loss_ce": 0.00022922824427951127, "loss_iou": 0.28125, "loss_num": 0.0169677734375, "loss_xval": 0.6484375, "num_input_tokens_seen": 110122296, "step": 1965 }, { "epoch": 4.378619153674833, "grad_norm": 18.06757354736328, "learning_rate": 1e-06, "loss": 0.8689, "num_input_tokens_seen": 110177296, "step": 1966 }, { "epoch": 4.378619153674833, "loss": 0.7419674396514893, "loss_ce": 0.0002682540216483176, "loss_iou": 0.31640625, "loss_num": 0.021484375, "loss_xval": 0.7421875, "num_input_tokens_seen": 110177296, "step": 1966 }, { "epoch": 4.380846325167038, "grad_norm": 22.51265525817871, "learning_rate": 1e-06, "loss": 0.8991, "num_input_tokens_seen": 110233976, "step": 1967 }, { "epoch": 4.380846325167038, "loss": 0.7884268760681152, "loss_ce": 0.000340936123393476, "loss_iou": 0.318359375, "loss_num": 0.030029296875, "loss_xval": 0.7890625, "num_input_tokens_seen": 110233976, "step": 1967 }, { "epoch": 4.383073496659243, "grad_norm": 34.43399429321289, "learning_rate": 1e-06, "loss": 1.0184, "num_input_tokens_seen": 110289432, "step": 1968 }, { "epoch": 4.383073496659243, "loss": 1.0707261562347412, "loss_ce": 0.00041369907557964325, "loss_iou": 0.4609375, "loss_num": 0.0296630859375, "loss_xval": 1.0703125, "num_input_tokens_seen": 110289432, "step": 1968 }, { "epoch": 4.385300668151448, "grad_norm": 21.057336807250977, "learning_rate": 1e-06, "loss": 0.8572, "num_input_tokens_seen": 110345920, "step": 1969 }, { "epoch": 4.385300668151448, "loss": 0.8917399644851685, "loss_ce": 0.0003824798041023314, "loss_iou": 0.369140625, "loss_num": 0.0303955078125, "loss_xval": 0.890625, "num_input_tokens_seen": 110345920, "step": 1969 }, { "epoch": 4.387527839643653, "grad_norm": 17.0823917388916, "learning_rate": 1e-06, "loss": 0.8178, "num_input_tokens_seen": 110398524, "step": 1970 }, { "epoch": 4.387527839643653, "loss": 0.9053047895431519, "loss_ce": 0.0002754859742708504, "loss_iou": 0.349609375, "loss_num": 0.041015625, "loss_xval": 0.90625, "num_input_tokens_seen": 110398524, "step": 1970 }, { "epoch": 4.389755011135858, "grad_norm": 19.87203598022461, "learning_rate": 1e-06, "loss": 0.89, "num_input_tokens_seen": 110454132, "step": 1971 }, { "epoch": 4.389755011135858, "loss": 0.851262092590332, "loss_ce": 0.00030996621353551745, "loss_iou": 0.3515625, "loss_num": 0.02978515625, "loss_xval": 0.8515625, "num_input_tokens_seen": 110454132, "step": 1971 }, { "epoch": 4.3919821826280625, "grad_norm": 25.85304069519043, "learning_rate": 1e-06, "loss": 0.668, "num_input_tokens_seen": 110510644, "step": 1972 }, { "epoch": 4.3919821826280625, "loss": 0.6331183314323425, "loss_ce": 0.0003058624570257962, "loss_iou": 0.265625, "loss_num": 0.0201416015625, "loss_xval": 0.6328125, "num_input_tokens_seen": 110510644, "step": 1972 }, { "epoch": 4.394209354120267, "grad_norm": 20.608179092407227, "learning_rate": 1e-06, "loss": 0.4726, "num_input_tokens_seen": 110568336, "step": 1973 }, { "epoch": 4.394209354120267, "loss": 0.5503981113433838, "loss_ce": 0.00022721837740391493, "loss_iou": 0.25, "loss_num": 0.00994873046875, "loss_xval": 0.55078125, "num_input_tokens_seen": 110568336, "step": 1973 }, { "epoch": 4.396436525612472, "grad_norm": 27.215190887451172, "learning_rate": 1e-06, "loss": 0.7294, "num_input_tokens_seen": 110624496, "step": 1974 }, { "epoch": 4.396436525612472, "loss": 0.8059403896331787, "loss_ce": 0.00027631851844489574, "loss_iou": 0.341796875, "loss_num": 0.02392578125, "loss_xval": 0.8046875, "num_input_tokens_seen": 110624496, "step": 1974 }, { "epoch": 4.398663697104677, "grad_norm": 17.576311111450195, "learning_rate": 1e-06, "loss": 0.5477, "num_input_tokens_seen": 110679224, "step": 1975 }, { "epoch": 4.398663697104677, "loss": 0.3737618923187256, "loss_ce": 0.0002267223026137799, "loss_iou": 0.1455078125, "loss_num": 0.0167236328125, "loss_xval": 0.373046875, "num_input_tokens_seen": 110679224, "step": 1975 }, { "epoch": 4.400890868596882, "grad_norm": 22.326269149780273, "learning_rate": 1e-06, "loss": 0.7698, "num_input_tokens_seen": 110735720, "step": 1976 }, { "epoch": 4.400890868596882, "loss": 0.7260935306549072, "loss_ce": 0.0002634518896229565, "loss_iou": 0.33203125, "loss_num": 0.0120849609375, "loss_xval": 0.7265625, "num_input_tokens_seen": 110735720, "step": 1976 }, { "epoch": 4.403118040089087, "grad_norm": 31.376184463500977, "learning_rate": 1e-06, "loss": 0.7239, "num_input_tokens_seen": 110790824, "step": 1977 }, { "epoch": 4.403118040089087, "loss": 0.8379002213478088, "loss_ce": 0.0002537188120186329, "loss_iou": 0.359375, "loss_num": 0.024169921875, "loss_xval": 0.8359375, "num_input_tokens_seen": 110790824, "step": 1977 }, { "epoch": 4.405345211581292, "grad_norm": 22.093713760375977, "learning_rate": 1e-06, "loss": 1.2021, "num_input_tokens_seen": 110845432, "step": 1978 }, { "epoch": 4.405345211581292, "loss": 1.1360301971435547, "loss_ce": 0.00028802291490137577, "loss_iou": 0.482421875, "loss_num": 0.033935546875, "loss_xval": 1.1328125, "num_input_tokens_seen": 110845432, "step": 1978 }, { "epoch": 4.4075723830734965, "grad_norm": 21.3291072845459, "learning_rate": 1e-06, "loss": 0.7553, "num_input_tokens_seen": 110901496, "step": 1979 }, { "epoch": 4.4075723830734965, "loss": 0.6370877623558044, "loss_ce": 0.0003689899167511612, "loss_iou": 0.279296875, "loss_num": 0.015625, "loss_xval": 0.63671875, "num_input_tokens_seen": 110901496, "step": 1979 }, { "epoch": 4.409799554565701, "grad_norm": 26.429105758666992, "learning_rate": 1e-06, "loss": 0.8782, "num_input_tokens_seen": 110960112, "step": 1980 }, { "epoch": 4.409799554565701, "loss": 0.8666300773620605, "loss_ce": 0.00029704906046390533, "loss_iou": 0.365234375, "loss_num": 0.02734375, "loss_xval": 0.8671875, "num_input_tokens_seen": 110960112, "step": 1980 }, { "epoch": 4.412026726057906, "grad_norm": 22.83293342590332, "learning_rate": 1e-06, "loss": 0.6349, "num_input_tokens_seen": 111016200, "step": 1981 }, { "epoch": 4.412026726057906, "loss": 0.6940905451774597, "loss_ce": 0.0002428823063382879, "loss_iou": 0.30078125, "loss_num": 0.0184326171875, "loss_xval": 0.6953125, "num_input_tokens_seen": 111016200, "step": 1981 }, { "epoch": 4.414253897550111, "grad_norm": 31.386951446533203, "learning_rate": 1e-06, "loss": 0.9319, "num_input_tokens_seen": 111070980, "step": 1982 }, { "epoch": 4.414253897550111, "loss": 0.7642084956169128, "loss_ce": 0.00029245836776681244, "loss_iou": 0.330078125, "loss_num": 0.0206298828125, "loss_xval": 0.765625, "num_input_tokens_seen": 111070980, "step": 1982 }, { "epoch": 4.416481069042316, "grad_norm": 14.445569038391113, "learning_rate": 1e-06, "loss": 0.7298, "num_input_tokens_seen": 111127432, "step": 1983 }, { "epoch": 4.416481069042316, "loss": 0.7684084177017212, "loss_ce": 0.00028093665605410933, "loss_iou": 0.31640625, "loss_num": 0.02734375, "loss_xval": 0.76953125, "num_input_tokens_seen": 111127432, "step": 1983 }, { "epoch": 4.418708240534521, "grad_norm": 27.805484771728516, "learning_rate": 1e-06, "loss": 0.8484, "num_input_tokens_seen": 111182920, "step": 1984 }, { "epoch": 4.418708240534521, "loss": 1.1093249320983887, "loss_ce": 0.0006822688737884164, "loss_iou": 0.498046875, "loss_num": 0.02294921875, "loss_xval": 1.109375, "num_input_tokens_seen": 111182920, "step": 1984 }, { "epoch": 4.420935412026726, "grad_norm": 18.803524017333984, "learning_rate": 1e-06, "loss": 0.8397, "num_input_tokens_seen": 111236748, "step": 1985 }, { "epoch": 4.420935412026726, "loss": 0.7825398445129395, "loss_ce": 0.0003133030259050429, "loss_iou": 0.3125, "loss_num": 0.03125, "loss_xval": 0.78125, "num_input_tokens_seen": 111236748, "step": 1985 }, { "epoch": 4.4231625835189305, "grad_norm": 15.333048820495605, "learning_rate": 1e-06, "loss": 0.7332, "num_input_tokens_seen": 111292696, "step": 1986 }, { "epoch": 4.4231625835189305, "loss": 0.8349952697753906, "loss_ce": 0.00027839711401611567, "loss_iou": 0.3359375, "loss_num": 0.03271484375, "loss_xval": 0.8359375, "num_input_tokens_seen": 111292696, "step": 1986 }, { "epoch": 4.425389755011135, "grad_norm": 19.713153839111328, "learning_rate": 1e-06, "loss": 0.8684, "num_input_tokens_seen": 111348792, "step": 1987 }, { "epoch": 4.425389755011135, "loss": 0.7836449146270752, "loss_ce": 0.0009300732635892928, "loss_iou": 0.310546875, "loss_num": 0.0322265625, "loss_xval": 0.78125, "num_input_tokens_seen": 111348792, "step": 1987 }, { "epoch": 4.427616926503341, "grad_norm": 16.41890525817871, "learning_rate": 1e-06, "loss": 0.9639, "num_input_tokens_seen": 111405636, "step": 1988 }, { "epoch": 4.427616926503341, "loss": 0.8102682828903198, "loss_ce": 0.0003318004310131073, "loss_iou": 0.345703125, "loss_num": 0.023193359375, "loss_xval": 0.80859375, "num_input_tokens_seen": 111405636, "step": 1988 }, { "epoch": 4.429844097995546, "grad_norm": 17.086889266967773, "learning_rate": 1e-06, "loss": 0.9369, "num_input_tokens_seen": 111463272, "step": 1989 }, { "epoch": 4.429844097995546, "loss": 0.8751001358032227, "loss_ce": 0.0003442912711761892, "loss_iou": 0.36328125, "loss_num": 0.0299072265625, "loss_xval": 0.875, "num_input_tokens_seen": 111463272, "step": 1989 }, { "epoch": 4.432071269487751, "grad_norm": 25.840059280395508, "learning_rate": 1e-06, "loss": 0.8088, "num_input_tokens_seen": 111518036, "step": 1990 }, { "epoch": 4.432071269487751, "loss": 0.8438689708709717, "loss_ce": 0.0003630804130807519, "loss_iou": 0.37109375, "loss_num": 0.020751953125, "loss_xval": 0.84375, "num_input_tokens_seen": 111518036, "step": 1990 }, { "epoch": 4.434298440979956, "grad_norm": 59.44672775268555, "learning_rate": 1e-06, "loss": 0.739, "num_input_tokens_seen": 111575688, "step": 1991 }, { "epoch": 4.434298440979956, "loss": 0.8342432975769043, "loss_ce": 0.00025894519058056176, "loss_iou": 0.3671875, "loss_num": 0.019775390625, "loss_xval": 0.8359375, "num_input_tokens_seen": 111575688, "step": 1991 }, { "epoch": 4.436525612472161, "grad_norm": 18.12961769104004, "learning_rate": 1e-06, "loss": 0.7432, "num_input_tokens_seen": 111628236, "step": 1992 }, { "epoch": 4.436525612472161, "loss": 0.42511090636253357, "loss_ce": 0.00030621426412835717, "loss_iou": 0.1845703125, "loss_num": 0.01092529296875, "loss_xval": 0.42578125, "num_input_tokens_seen": 111628236, "step": 1992 }, { "epoch": 4.4387527839643655, "grad_norm": 34.39860916137695, "learning_rate": 1e-06, "loss": 0.6518, "num_input_tokens_seen": 111685388, "step": 1993 }, { "epoch": 4.4387527839643655, "loss": 0.8032548427581787, "loss_ce": 0.0002763355150818825, "loss_iou": 0.326171875, "loss_num": 0.0299072265625, "loss_xval": 0.8046875, "num_input_tokens_seen": 111685388, "step": 1993 }, { "epoch": 4.44097995545657, "grad_norm": 18.156936645507812, "learning_rate": 1e-06, "loss": 0.6957, "num_input_tokens_seen": 111740404, "step": 1994 }, { "epoch": 4.44097995545657, "loss": 0.7895779013633728, "loss_ce": 0.0002712547720875591, "loss_iou": 0.318359375, "loss_num": 0.0303955078125, "loss_xval": 0.7890625, "num_input_tokens_seen": 111740404, "step": 1994 }, { "epoch": 4.443207126948775, "grad_norm": 16.068462371826172, "learning_rate": 1e-06, "loss": 0.8716, "num_input_tokens_seen": 111795960, "step": 1995 }, { "epoch": 4.443207126948775, "loss": 1.020226001739502, "loss_ce": 0.0004505877732299268, "loss_iou": 0.40625, "loss_num": 0.041259765625, "loss_xval": 1.0234375, "num_input_tokens_seen": 111795960, "step": 1995 }, { "epoch": 4.44543429844098, "grad_norm": 17.97376251220703, "learning_rate": 1e-06, "loss": 0.5944, "num_input_tokens_seen": 111854164, "step": 1996 }, { "epoch": 4.44543429844098, "loss": 0.6652653813362122, "loss_ce": 0.00034838789724744856, "loss_iou": 0.279296875, "loss_num": 0.021484375, "loss_xval": 0.6640625, "num_input_tokens_seen": 111854164, "step": 1996 }, { "epoch": 4.447661469933185, "grad_norm": 17.494909286499023, "learning_rate": 1e-06, "loss": 0.7858, "num_input_tokens_seen": 111908960, "step": 1997 }, { "epoch": 4.447661469933185, "loss": 0.6890178918838501, "loss_ce": 0.0002971593930851668, "loss_iou": 0.30078125, "loss_num": 0.0172119140625, "loss_xval": 0.6875, "num_input_tokens_seen": 111908960, "step": 1997 }, { "epoch": 4.44988864142539, "grad_norm": 41.67517852783203, "learning_rate": 1e-06, "loss": 0.8325, "num_input_tokens_seen": 111961680, "step": 1998 }, { "epoch": 4.44988864142539, "loss": 0.6864081621170044, "loss_ce": 0.0011054326314479113, "loss_iou": 0.2890625, "loss_num": 0.021728515625, "loss_xval": 0.68359375, "num_input_tokens_seen": 111961680, "step": 1998 }, { "epoch": 4.452115812917595, "grad_norm": 31.639928817749023, "learning_rate": 1e-06, "loss": 0.6754, "num_input_tokens_seen": 112020664, "step": 1999 }, { "epoch": 4.452115812917595, "loss": 0.7533044219017029, "loss_ce": 0.00025264680152758956, "loss_iou": 0.326171875, "loss_num": 0.02001953125, "loss_xval": 0.75390625, "num_input_tokens_seen": 112020664, "step": 1999 }, { "epoch": 4.4543429844097995, "grad_norm": 17.85369300842285, "learning_rate": 1e-06, "loss": 0.6786, "num_input_tokens_seen": 112077028, "step": 2000 }, { "epoch": 4.4543429844097995, "eval_seeclick_web_CIoU": 0.565800279378891, "eval_seeclick_web_GIoU": 0.5634768307209015, "eval_seeclick_web_IoU": 0.581417441368103, "eval_seeclick_web_MAE_all": 0.017455607652664185, "eval_seeclick_web_MAE_h": 0.010133389849215746, "eval_seeclick_web_MAE_w": 0.01889999955892563, "eval_seeclick_web_MAE_x_boxes": 0.00820656050927937, "eval_seeclick_web_MAE_y_boxes": 0.02206642786040902, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9660096764564514, "eval_seeclick_web_loss_ce": 0.0003684775874717161, "eval_seeclick_web_loss_iou": 0.4342041015625, "eval_seeclick_web_loss_num": 0.013675689697265625, "eval_seeclick_web_loss_xval": 0.9365234375, "eval_seeclick_web_runtime": 30.3044, "eval_seeclick_web_samples_per_second": 1.65, "eval_seeclick_web_steps_per_second": 0.066, "num_input_tokens_seen": 112077028, "step": 2000 }, { "epoch": 4.4543429844097995, "eval_icons_CIoU": 0.30845747888088226, "eval_icons_GIoU": 0.33300746977329254, "eval_icons_IoU": 0.38466861844062805, "eval_icons_MAE_all": 0.06535855308175087, "eval_icons_MAE_h": 0.03866162151098251, "eval_icons_MAE_w": 0.0722741037607193, "eval_icons_MAE_x_boxes": 0.06343554332852364, "eval_icons_MAE_y_boxes": 0.03758078906685114, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.709810495376587, "eval_icons_loss_ce": 0.00041042393422685564, "eval_icons_loss_iou": 0.6688232421875, "eval_icons_loss_num": 0.06202888488769531, "eval_icons_loss_xval": 1.6484375, "eval_icons_runtime": 23.8114, "eval_icons_samples_per_second": 2.1, "eval_icons_steps_per_second": 0.084, "num_input_tokens_seen": 112077028, "step": 2000 }, { "epoch": 4.4543429844097995, "eval_screenspot_CIoU": 0.3293018539746602, "eval_screenspot_GIoU": 0.34788986047108966, "eval_screenspot_IoU": 0.4084410071372986, "eval_screenspot_MAE_all": 0.06550942113002141, "eval_screenspot_MAE_h": 0.036151558781663574, "eval_screenspot_MAE_w": 0.07488848641514778, "eval_screenspot_MAE_x_boxes": 0.07839654758572578, "eval_screenspot_MAE_y_boxes": 0.04824645258486271, "eval_screenspot_inside_bbox": 0.6462500095367432, "eval_screenspot_loss": 1.6914161443710327, "eval_screenspot_loss_ce": 0.00041740476929893094, "eval_screenspot_loss_iou": 0.695556640625, "eval_screenspot_loss_num": 0.07661692301432292, "eval_screenspot_loss_xval": 1.7740885416666667, "eval_screenspot_runtime": 39.3803, "eval_screenspot_samples_per_second": 2.26, "eval_screenspot_steps_per_second": 0.076, "num_input_tokens_seen": 112077028, "step": 2000 }, { "epoch": 4.4543429844097995, "eval_compot_CIoU": 0.35170771181583405, "eval_compot_GIoU": 0.37089845538139343, "eval_compot_IoU": 0.4089939594268799, "eval_compot_MAE_all": 0.018402607180178165, "eval_compot_MAE_h": 0.008481500204652548, "eval_compot_MAE_w": 0.02456105500459671, "eval_compot_MAE_x_boxes": 0.029113260563462973, "eval_compot_MAE_y_boxes": 0.0059411004185676575, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3837639093399048, "eval_compot_loss_ce": 0.0003268021682742983, "eval_compot_loss_iou": 0.6363525390625, "eval_compot_loss_num": 0.017194747924804688, "eval_compot_loss_xval": 1.357666015625, "eval_compot_runtime": 23.2732, "eval_compot_samples_per_second": 2.148, "eval_compot_steps_per_second": 0.086, "num_input_tokens_seen": 112077028, "step": 2000 }, { "epoch": 4.4543429844097995, "eval_custom_ui_val_CIoU": 0.4485127362940047, "eval_custom_ui_val_GIoU": 0.4697931508223216, "eval_custom_ui_val_IoU": 0.5003485398160087, "eval_custom_ui_val_MAE_all": 0.03264946728530857, "eval_custom_ui_val_MAE_h": 0.01890565103126897, "eval_custom_ui_val_MAE_w": 0.03845942123896546, "eval_custom_ui_val_MAE_x_boxes": 0.03562343731108639, "eval_custom_ui_val_MAE_y_boxes": 0.017556848521861766, "eval_custom_ui_val_inside_bbox": 0.7040895091162788, "eval_custom_ui_val_loss": 1.2602308988571167, "eval_custom_ui_val_loss_ce": 0.000424561229819018, "eval_custom_ui_val_loss_iou": 0.533935546875, "eval_custom_ui_val_loss_num": 0.03228992886013455, "eval_custom_ui_val_loss_xval": 1.2295193142361112, "eval_custom_ui_val_runtime": 61.5934, "eval_custom_ui_val_samples_per_second": 4.302, "eval_custom_ui_val_steps_per_second": 0.146, "num_input_tokens_seen": 112077028, "step": 2000 }, { "epoch": 4.4543429844097995, "loss": 0.9637588858604431, "loss_ce": 0.0003799735859502107, "loss_iou": 0.41796875, "loss_num": 0.025146484375, "loss_xval": 0.96484375, "num_input_tokens_seen": 112077028, "step": 2000 }, { "epoch": 4.456570155902004, "grad_norm": 21.966798782348633, "learning_rate": 1e-06, "loss": 0.8968, "num_input_tokens_seen": 112130504, "step": 2001 }, { "epoch": 4.456570155902004, "loss": 0.7630659341812134, "loss_ce": 0.0003706031129695475, "loss_iou": 0.31640625, "loss_num": 0.025634765625, "loss_xval": 0.76171875, "num_input_tokens_seen": 112130504, "step": 2001 }, { "epoch": 4.458797327394209, "grad_norm": 16.523204803466797, "learning_rate": 1e-06, "loss": 0.7768, "num_input_tokens_seen": 112188064, "step": 2002 }, { "epoch": 4.458797327394209, "loss": 0.6959242820739746, "loss_ce": 0.00024555198615416884, "loss_iou": 0.28515625, "loss_num": 0.025390625, "loss_xval": 0.6953125, "num_input_tokens_seen": 112188064, "step": 2002 }, { "epoch": 4.461024498886414, "grad_norm": 14.921442985534668, "learning_rate": 1e-06, "loss": 0.8676, "num_input_tokens_seen": 112245504, "step": 2003 }, { "epoch": 4.461024498886414, "loss": 0.7521045207977295, "loss_ce": 0.00027342155226506293, "loss_iou": 0.322265625, "loss_num": 0.021728515625, "loss_xval": 0.75, "num_input_tokens_seen": 112245504, "step": 2003 }, { "epoch": 4.463251670378619, "grad_norm": 28.89473533630371, "learning_rate": 1e-06, "loss": 0.8408, "num_input_tokens_seen": 112300972, "step": 2004 }, { "epoch": 4.463251670378619, "loss": 0.6384103298187256, "loss_ce": 0.00022677732340525836, "loss_iou": 0.265625, "loss_num": 0.021484375, "loss_xval": 0.63671875, "num_input_tokens_seen": 112300972, "step": 2004 }, { "epoch": 4.465478841870824, "grad_norm": 21.826824188232422, "learning_rate": 1e-06, "loss": 0.9587, "num_input_tokens_seen": 112353748, "step": 2005 }, { "epoch": 4.465478841870824, "loss": 0.8835301399230957, "loss_ce": 0.00022938736947253346, "loss_iou": 0.380859375, "loss_num": 0.0244140625, "loss_xval": 0.8828125, "num_input_tokens_seen": 112353748, "step": 2005 }, { "epoch": 4.467706013363029, "grad_norm": 18.621387481689453, "learning_rate": 1e-06, "loss": 0.857, "num_input_tokens_seen": 112408688, "step": 2006 }, { "epoch": 4.467706013363029, "loss": 0.7712559700012207, "loss_ce": 0.0002599011640995741, "loss_iou": 0.35546875, "loss_num": 0.0123291015625, "loss_xval": 0.76953125, "num_input_tokens_seen": 112408688, "step": 2006 }, { "epoch": 4.4699331848552335, "grad_norm": 20.207508087158203, "learning_rate": 1e-06, "loss": 0.7325, "num_input_tokens_seen": 112465128, "step": 2007 }, { "epoch": 4.4699331848552335, "loss": 0.6955528855323792, "loss_ce": 0.00024040245625656098, "loss_iou": 0.294921875, "loss_num": 0.0211181640625, "loss_xval": 0.6953125, "num_input_tokens_seen": 112465128, "step": 2007 }, { "epoch": 4.472160356347438, "grad_norm": 15.423189163208008, "learning_rate": 1e-06, "loss": 0.7108, "num_input_tokens_seen": 112520084, "step": 2008 }, { "epoch": 4.472160356347438, "loss": 0.7231366038322449, "loss_ce": 0.0002362322702538222, "loss_iou": 0.28125, "loss_num": 0.03173828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 112520084, "step": 2008 }, { "epoch": 4.474387527839644, "grad_norm": 23.1674861907959, "learning_rate": 1e-06, "loss": 0.7111, "num_input_tokens_seen": 112577288, "step": 2009 }, { "epoch": 4.474387527839644, "loss": 0.6543446779251099, "loss_ce": 0.0002919051912613213, "loss_iou": 0.279296875, "loss_num": 0.0194091796875, "loss_xval": 0.65234375, "num_input_tokens_seen": 112577288, "step": 2009 }, { "epoch": 4.476614699331849, "grad_norm": 21.063098907470703, "learning_rate": 1e-06, "loss": 0.8211, "num_input_tokens_seen": 112632456, "step": 2010 }, { "epoch": 4.476614699331849, "loss": 0.9507274627685547, "loss_ce": 0.000532110221683979, "loss_iou": 0.421875, "loss_num": 0.0208740234375, "loss_xval": 0.94921875, "num_input_tokens_seen": 112632456, "step": 2010 }, { "epoch": 4.478841870824054, "grad_norm": 14.694646835327148, "learning_rate": 1e-06, "loss": 0.7868, "num_input_tokens_seen": 112685992, "step": 2011 }, { "epoch": 4.478841870824054, "loss": 0.8718289732933044, "loss_ce": 0.0002469439641572535, "loss_iou": 0.3359375, "loss_num": 0.03955078125, "loss_xval": 0.87109375, "num_input_tokens_seen": 112685992, "step": 2011 }, { "epoch": 4.481069042316259, "grad_norm": 15.614250183105469, "learning_rate": 1e-06, "loss": 0.6904, "num_input_tokens_seen": 112736456, "step": 2012 }, { "epoch": 4.481069042316259, "loss": 0.7561224699020386, "loss_ce": 0.0002630477538332343, "loss_iou": 0.30078125, "loss_num": 0.031005859375, "loss_xval": 0.7578125, "num_input_tokens_seen": 112736456, "step": 2012 }, { "epoch": 4.4832962138084635, "grad_norm": 19.059675216674805, "learning_rate": 1e-06, "loss": 0.7422, "num_input_tokens_seen": 112791472, "step": 2013 }, { "epoch": 4.4832962138084635, "loss": 0.5962854623794556, "loss_ce": 0.00021605889196507633, "loss_iou": 0.267578125, "loss_num": 0.0120849609375, "loss_xval": 0.59765625, "num_input_tokens_seen": 112791472, "step": 2013 }, { "epoch": 4.485523385300668, "grad_norm": 16.322175979614258, "learning_rate": 1e-06, "loss": 0.6247, "num_input_tokens_seen": 112847224, "step": 2014 }, { "epoch": 4.485523385300668, "loss": 0.6646714806556702, "loss_ce": 0.00024277158081531525, "loss_iou": 0.267578125, "loss_num": 0.025390625, "loss_xval": 0.6640625, "num_input_tokens_seen": 112847224, "step": 2014 }, { "epoch": 4.487750556792873, "grad_norm": 35.126644134521484, "learning_rate": 1e-06, "loss": 0.6962, "num_input_tokens_seen": 112905124, "step": 2015 }, { "epoch": 4.487750556792873, "loss": 0.7029978632926941, "loss_ce": 0.00023909028095658869, "loss_iou": 0.291015625, "loss_num": 0.0242919921875, "loss_xval": 0.703125, "num_input_tokens_seen": 112905124, "step": 2015 }, { "epoch": 4.489977728285078, "grad_norm": 17.807477951049805, "learning_rate": 1e-06, "loss": 0.7735, "num_input_tokens_seen": 112963088, "step": 2016 }, { "epoch": 4.489977728285078, "loss": 0.7800430655479431, "loss_ce": 0.0002578938438091427, "loss_iou": 0.3515625, "loss_num": 0.01495361328125, "loss_xval": 0.78125, "num_input_tokens_seen": 112963088, "step": 2016 }, { "epoch": 4.492204899777283, "grad_norm": 25.92805290222168, "learning_rate": 1e-06, "loss": 0.8668, "num_input_tokens_seen": 113012908, "step": 2017 }, { "epoch": 4.492204899777283, "loss": 0.7654862403869629, "loss_ce": 0.0003495719865895808, "loss_iou": 0.3359375, "loss_num": 0.0181884765625, "loss_xval": 0.765625, "num_input_tokens_seen": 113012908, "step": 2017 }, { "epoch": 4.494432071269488, "grad_norm": 26.185386657714844, "learning_rate": 1e-06, "loss": 0.666, "num_input_tokens_seen": 113069828, "step": 2018 }, { "epoch": 4.494432071269488, "loss": 0.5187711715698242, "loss_ce": 0.0003385008021723479, "loss_iou": 0.2294921875, "loss_num": 0.0118408203125, "loss_xval": 0.51953125, "num_input_tokens_seen": 113069828, "step": 2018 }, { "epoch": 4.496659242761693, "grad_norm": 16.547481536865234, "learning_rate": 1e-06, "loss": 0.7828, "num_input_tokens_seen": 113126292, "step": 2019 }, { "epoch": 4.496659242761693, "loss": 0.7228785753250122, "loss_ce": 0.0002223363844677806, "loss_iou": 0.302734375, "loss_num": 0.02294921875, "loss_xval": 0.72265625, "num_input_tokens_seen": 113126292, "step": 2019 }, { "epoch": 4.498886414253898, "grad_norm": 16.149658203125, "learning_rate": 1e-06, "loss": 0.7314, "num_input_tokens_seen": 113180940, "step": 2020 }, { "epoch": 4.498886414253898, "loss": 0.8957573771476746, "loss_ce": 0.0002495555963832885, "loss_iou": 0.392578125, "loss_num": 0.021728515625, "loss_xval": 0.89453125, "num_input_tokens_seen": 113180940, "step": 2020 }, { "epoch": 4.501113585746102, "grad_norm": 19.080398559570312, "learning_rate": 1e-06, "loss": 0.6275, "num_input_tokens_seen": 113237920, "step": 2021 }, { "epoch": 4.501113585746102, "loss": 0.8777415752410889, "loss_ce": 0.0004222409042995423, "loss_iou": 0.396484375, "loss_num": 0.0166015625, "loss_xval": 0.87890625, "num_input_tokens_seen": 113237920, "step": 2021 }, { "epoch": 4.503340757238307, "grad_norm": 18.565778732299805, "learning_rate": 1e-06, "loss": 0.8399, "num_input_tokens_seen": 113292456, "step": 2022 }, { "epoch": 4.503340757238307, "loss": 0.8745211362838745, "loss_ce": 0.0002535720122978091, "loss_iou": 0.373046875, "loss_num": 0.025390625, "loss_xval": 0.875, "num_input_tokens_seen": 113292456, "step": 2022 }, { "epoch": 4.505567928730512, "grad_norm": 16.41952133178711, "learning_rate": 1e-06, "loss": 0.6277, "num_input_tokens_seen": 113348476, "step": 2023 }, { "epoch": 4.505567928730512, "loss": 0.6538951992988586, "loss_ce": 0.0005749051342718303, "loss_iou": 0.302734375, "loss_num": 0.00946044921875, "loss_xval": 0.65234375, "num_input_tokens_seen": 113348476, "step": 2023 }, { "epoch": 4.507795100222717, "grad_norm": 21.629709243774414, "learning_rate": 1e-06, "loss": 0.6398, "num_input_tokens_seen": 113406428, "step": 2024 }, { "epoch": 4.507795100222717, "loss": 0.7275168299674988, "loss_ce": 0.00022192415781319141, "loss_iou": 0.298828125, "loss_num": 0.0260009765625, "loss_xval": 0.7265625, "num_input_tokens_seen": 113406428, "step": 2024 }, { "epoch": 4.510022271714922, "grad_norm": 20.57599449157715, "learning_rate": 1e-06, "loss": 0.7676, "num_input_tokens_seen": 113458468, "step": 2025 }, { "epoch": 4.510022271714922, "loss": 0.8076622486114502, "loss_ce": 0.0002891695185098797, "loss_iou": 0.349609375, "loss_num": 0.0218505859375, "loss_xval": 0.80859375, "num_input_tokens_seen": 113458468, "step": 2025 }, { "epoch": 4.512249443207127, "grad_norm": 17.691875457763672, "learning_rate": 1e-06, "loss": 0.7612, "num_input_tokens_seen": 113514212, "step": 2026 }, { "epoch": 4.512249443207127, "loss": 0.46555206179618835, "loss_ce": 0.00046417216071859, "loss_iou": 0.2060546875, "loss_num": 0.01043701171875, "loss_xval": 0.46484375, "num_input_tokens_seen": 113514212, "step": 2026 }, { "epoch": 4.514476614699332, "grad_norm": 21.262399673461914, "learning_rate": 1e-06, "loss": 0.7058, "num_input_tokens_seen": 113568052, "step": 2027 }, { "epoch": 4.514476614699332, "loss": 0.7981184720993042, "loss_ce": 0.00026689909282140434, "loss_iou": 0.328125, "loss_num": 0.0281982421875, "loss_xval": 0.796875, "num_input_tokens_seen": 113568052, "step": 2027 }, { "epoch": 4.5167037861915365, "grad_norm": 19.464096069335938, "learning_rate": 1e-06, "loss": 0.7808, "num_input_tokens_seen": 113625848, "step": 2028 }, { "epoch": 4.5167037861915365, "loss": 0.9021308422088623, "loss_ce": 0.0016181376995518804, "loss_iou": 0.400390625, "loss_num": 0.019775390625, "loss_xval": 0.90234375, "num_input_tokens_seen": 113625848, "step": 2028 }, { "epoch": 4.518930957683741, "grad_norm": 18.429672241210938, "learning_rate": 1e-06, "loss": 0.723, "num_input_tokens_seen": 113680916, "step": 2029 }, { "epoch": 4.518930957683741, "loss": 0.9203833341598511, "loss_ce": 0.00021731224842369556, "loss_iou": 0.390625, "loss_num": 0.027587890625, "loss_xval": 0.921875, "num_input_tokens_seen": 113680916, "step": 2029 }, { "epoch": 4.521158129175946, "grad_norm": 15.999838829040527, "learning_rate": 1e-06, "loss": 0.8875, "num_input_tokens_seen": 113736420, "step": 2030 }, { "epoch": 4.521158129175946, "loss": 0.799384593963623, "loss_ce": 0.0003123145434074104, "loss_iou": 0.33203125, "loss_num": 0.0267333984375, "loss_xval": 0.80078125, "num_input_tokens_seen": 113736420, "step": 2030 }, { "epoch": 4.523385300668151, "grad_norm": 15.953963279724121, "learning_rate": 1e-06, "loss": 0.7948, "num_input_tokens_seen": 113792556, "step": 2031 }, { "epoch": 4.523385300668151, "loss": 0.6049848794937134, "loss_ce": 0.0002485427539795637, "loss_iou": 0.26171875, "loss_num": 0.0162353515625, "loss_xval": 0.60546875, "num_input_tokens_seen": 113792556, "step": 2031 }, { "epoch": 4.525612472160356, "grad_norm": 24.632400512695312, "learning_rate": 1e-06, "loss": 0.707, "num_input_tokens_seen": 113845716, "step": 2032 }, { "epoch": 4.525612472160356, "loss": 0.9207833409309387, "loss_ce": 0.00025110485148616135, "loss_iou": 0.3984375, "loss_num": 0.0244140625, "loss_xval": 0.921875, "num_input_tokens_seen": 113845716, "step": 2032 }, { "epoch": 4.527839643652561, "grad_norm": 17.756380081176758, "learning_rate": 1e-06, "loss": 1.0081, "num_input_tokens_seen": 113902444, "step": 2033 }, { "epoch": 4.527839643652561, "loss": 1.0257360935211182, "loss_ce": 0.00034554791636765003, "loss_iou": 0.384765625, "loss_num": 0.051025390625, "loss_xval": 1.0234375, "num_input_tokens_seen": 113902444, "step": 2033 }, { "epoch": 4.5300668151447665, "grad_norm": 18.746212005615234, "learning_rate": 1e-06, "loss": 0.731, "num_input_tokens_seen": 113957736, "step": 2034 }, { "epoch": 4.5300668151447665, "loss": 0.6118181943893433, "loss_ce": 0.00024596002185717225, "loss_iou": 0.265625, "loss_num": 0.015869140625, "loss_xval": 0.61328125, "num_input_tokens_seen": 113957736, "step": 2034 }, { "epoch": 4.532293986636971, "grad_norm": 18.45545196533203, "learning_rate": 1e-06, "loss": 0.7164, "num_input_tokens_seen": 114014944, "step": 2035 }, { "epoch": 4.532293986636971, "loss": 0.799105167388916, "loss_ce": 0.00027704003150574863, "loss_iou": 0.353515625, "loss_num": 0.0185546875, "loss_xval": 0.796875, "num_input_tokens_seen": 114014944, "step": 2035 }, { "epoch": 4.534521158129176, "grad_norm": 16.944326400756836, "learning_rate": 1e-06, "loss": 0.7828, "num_input_tokens_seen": 114073000, "step": 2036 }, { "epoch": 4.534521158129176, "loss": 0.7347084283828735, "loss_ce": 0.00033347506541758776, "loss_iou": 0.3203125, "loss_num": 0.018798828125, "loss_xval": 0.734375, "num_input_tokens_seen": 114073000, "step": 2036 }, { "epoch": 4.536748329621381, "grad_norm": 42.41679000854492, "learning_rate": 1e-06, "loss": 0.6489, "num_input_tokens_seen": 114129412, "step": 2037 }, { "epoch": 4.536748329621381, "loss": 0.591286838054657, "loss_ce": 0.0004665802407544106, "loss_iou": 0.2392578125, "loss_num": 0.0223388671875, "loss_xval": 0.58984375, "num_input_tokens_seen": 114129412, "step": 2037 }, { "epoch": 4.538975501113586, "grad_norm": 20.462520599365234, "learning_rate": 1e-06, "loss": 0.8208, "num_input_tokens_seen": 114182588, "step": 2038 }, { "epoch": 4.538975501113586, "loss": 0.692272424697876, "loss_ce": 0.00037790805799886584, "loss_iou": 0.28125, "loss_num": 0.0257568359375, "loss_xval": 0.69140625, "num_input_tokens_seen": 114182588, "step": 2038 }, { "epoch": 4.541202672605791, "grad_norm": 23.208797454833984, "learning_rate": 1e-06, "loss": 1.2438, "num_input_tokens_seen": 114236432, "step": 2039 }, { "epoch": 4.541202672605791, "loss": 1.3942797183990479, "loss_ce": 0.000725053483620286, "loss_iou": 0.55078125, "loss_num": 0.0576171875, "loss_xval": 1.390625, "num_input_tokens_seen": 114236432, "step": 2039 }, { "epoch": 4.543429844097996, "grad_norm": 20.509309768676758, "learning_rate": 1e-06, "loss": 0.894, "num_input_tokens_seen": 114291080, "step": 2040 }, { "epoch": 4.543429844097996, "loss": 0.9274067878723145, "loss_ce": 0.0002827763673849404, "loss_iou": 0.3828125, "loss_num": 0.03271484375, "loss_xval": 0.92578125, "num_input_tokens_seen": 114291080, "step": 2040 }, { "epoch": 4.5456570155902005, "grad_norm": 36.92770767211914, "learning_rate": 1e-06, "loss": 0.7234, "num_input_tokens_seen": 114345108, "step": 2041 }, { "epoch": 4.5456570155902005, "loss": 0.7667504549026489, "loss_ce": 0.00027098384452983737, "loss_iou": 0.3046875, "loss_num": 0.03125, "loss_xval": 0.765625, "num_input_tokens_seen": 114345108, "step": 2041 }, { "epoch": 4.547884187082405, "grad_norm": 21.100217819213867, "learning_rate": 1e-06, "loss": 0.7731, "num_input_tokens_seen": 114399096, "step": 2042 }, { "epoch": 4.547884187082405, "loss": 0.7943999767303467, "loss_ce": 0.00045465261791832745, "loss_iou": 0.349609375, "loss_num": 0.018798828125, "loss_xval": 0.79296875, "num_input_tokens_seen": 114399096, "step": 2042 }, { "epoch": 4.55011135857461, "grad_norm": 20.20854949951172, "learning_rate": 1e-06, "loss": 0.6397, "num_input_tokens_seen": 114454576, "step": 2043 }, { "epoch": 4.55011135857461, "loss": 0.620108962059021, "loss_ce": 0.00023591173521708697, "loss_iou": 0.2431640625, "loss_num": 0.026611328125, "loss_xval": 0.62109375, "num_input_tokens_seen": 114454576, "step": 2043 }, { "epoch": 4.552338530066815, "grad_norm": 14.868428230285645, "learning_rate": 1e-06, "loss": 0.7342, "num_input_tokens_seen": 114508528, "step": 2044 }, { "epoch": 4.552338530066815, "loss": 0.7006996870040894, "loss_ce": 0.0002602189779281616, "loss_iou": 0.30078125, "loss_num": 0.01953125, "loss_xval": 0.69921875, "num_input_tokens_seen": 114508528, "step": 2044 }, { "epoch": 4.55456570155902, "grad_norm": 14.888712882995605, "learning_rate": 1e-06, "loss": 0.8115, "num_input_tokens_seen": 114565380, "step": 2045 }, { "epoch": 4.55456570155902, "loss": 0.7298033237457275, "loss_ce": 0.00031118281185626984, "loss_iou": 0.3203125, "loss_num": 0.017578125, "loss_xval": 0.73046875, "num_input_tokens_seen": 114565380, "step": 2045 }, { "epoch": 4.556792873051225, "grad_norm": 18.843162536621094, "learning_rate": 1e-06, "loss": 0.8365, "num_input_tokens_seen": 114621044, "step": 2046 }, { "epoch": 4.556792873051225, "loss": 0.9204500913619995, "loss_ce": 0.0002840836241375655, "loss_iou": 0.41796875, "loss_num": 0.0167236328125, "loss_xval": 0.921875, "num_input_tokens_seen": 114621044, "step": 2046 }, { "epoch": 4.55902004454343, "grad_norm": 22.14583969116211, "learning_rate": 1e-06, "loss": 0.7857, "num_input_tokens_seen": 114678684, "step": 2047 }, { "epoch": 4.55902004454343, "loss": 0.9415132403373718, "loss_ce": 0.0003511565155349672, "loss_iou": 0.37890625, "loss_num": 0.036865234375, "loss_xval": 0.94140625, "num_input_tokens_seen": 114678684, "step": 2047 }, { "epoch": 4.5612472160356345, "grad_norm": 17.67383575439453, "learning_rate": 1e-06, "loss": 0.8186, "num_input_tokens_seen": 114736248, "step": 2048 }, { "epoch": 4.5612472160356345, "loss": 0.8276271820068359, "loss_ce": 0.0004787354846484959, "loss_iou": 0.35546875, "loss_num": 0.023193359375, "loss_xval": 0.828125, "num_input_tokens_seen": 114736248, "step": 2048 }, { "epoch": 4.563474387527839, "grad_norm": 20.229520797729492, "learning_rate": 1e-06, "loss": 0.7565, "num_input_tokens_seen": 114793940, "step": 2049 }, { "epoch": 4.563474387527839, "loss": 0.8892278671264648, "loss_ce": 0.00031182251404970884, "loss_iou": 0.369140625, "loss_num": 0.02978515625, "loss_xval": 0.890625, "num_input_tokens_seen": 114793940, "step": 2049 }, { "epoch": 4.565701559020044, "grad_norm": 18.42917823791504, "learning_rate": 1e-06, "loss": 0.611, "num_input_tokens_seen": 114851312, "step": 2050 }, { "epoch": 4.565701559020044, "loss": 0.5913413763046265, "loss_ce": 0.0002769285929389298, "loss_iou": 0.267578125, "loss_num": 0.01153564453125, "loss_xval": 0.58984375, "num_input_tokens_seen": 114851312, "step": 2050 }, { "epoch": 4.567928730512249, "grad_norm": 15.126654624938965, "learning_rate": 1e-06, "loss": 0.7946, "num_input_tokens_seen": 114904084, "step": 2051 }, { "epoch": 4.567928730512249, "loss": 0.9705009460449219, "loss_ce": 0.0002861037792172283, "loss_iou": 0.404296875, "loss_num": 0.031982421875, "loss_xval": 0.96875, "num_input_tokens_seen": 114904084, "step": 2051 }, { "epoch": 4.570155902004454, "grad_norm": 101.9856948852539, "learning_rate": 1e-06, "loss": 0.7333, "num_input_tokens_seen": 114958232, "step": 2052 }, { "epoch": 4.570155902004454, "loss": 0.759264349937439, "loss_ce": 0.00023114567738957703, "loss_iou": 0.337890625, "loss_num": 0.016845703125, "loss_xval": 0.7578125, "num_input_tokens_seen": 114958232, "step": 2052 }, { "epoch": 4.57238307349666, "grad_norm": 16.39866065979004, "learning_rate": 1e-06, "loss": 0.6814, "num_input_tokens_seen": 115015688, "step": 2053 }, { "epoch": 4.57238307349666, "loss": 0.8523510694503784, "loss_ce": 0.00030033523216843605, "loss_iou": 0.361328125, "loss_num": 0.025634765625, "loss_xval": 0.8515625, "num_input_tokens_seen": 115015688, "step": 2053 }, { "epoch": 4.574610244988865, "grad_norm": 24.29990005493164, "learning_rate": 1e-06, "loss": 0.8799, "num_input_tokens_seen": 115072520, "step": 2054 }, { "epoch": 4.574610244988865, "loss": 0.7028564214706421, "loss_ce": 0.00046386715257540345, "loss_iou": 0.30859375, "loss_num": 0.0172119140625, "loss_xval": 0.703125, "num_input_tokens_seen": 115072520, "step": 2054 }, { "epoch": 4.5768374164810695, "grad_norm": 42.747581481933594, "learning_rate": 1e-06, "loss": 0.7572, "num_input_tokens_seen": 115128176, "step": 2055 }, { "epoch": 4.5768374164810695, "loss": 0.7119477987289429, "loss_ce": 0.00027786268037743866, "loss_iou": 0.291015625, "loss_num": 0.02587890625, "loss_xval": 0.7109375, "num_input_tokens_seen": 115128176, "step": 2055 }, { "epoch": 4.579064587973274, "grad_norm": 28.751867294311523, "learning_rate": 1e-06, "loss": 0.7886, "num_input_tokens_seen": 115185668, "step": 2056 }, { "epoch": 4.579064587973274, "loss": 0.7783513069152832, "loss_ce": 0.0002751080028247088, "loss_iou": 0.330078125, "loss_num": 0.0238037109375, "loss_xval": 0.77734375, "num_input_tokens_seen": 115185668, "step": 2056 }, { "epoch": 4.581291759465479, "grad_norm": 20.60097885131836, "learning_rate": 1e-06, "loss": 0.8628, "num_input_tokens_seen": 115242368, "step": 2057 }, { "epoch": 4.581291759465479, "loss": 0.954852283000946, "loss_ce": 0.00026240124134346843, "loss_iou": 0.41796875, "loss_num": 0.023681640625, "loss_xval": 0.953125, "num_input_tokens_seen": 115242368, "step": 2057 }, { "epoch": 4.583518930957684, "grad_norm": 31.440357208251953, "learning_rate": 1e-06, "loss": 1.0689, "num_input_tokens_seen": 115297348, "step": 2058 }, { "epoch": 4.583518930957684, "loss": 1.095000982284546, "loss_ce": 0.000274458434432745, "loss_iou": 0.46484375, "loss_num": 0.033203125, "loss_xval": 1.09375, "num_input_tokens_seen": 115297348, "step": 2058 }, { "epoch": 4.585746102449889, "grad_norm": 22.026607513427734, "learning_rate": 1e-06, "loss": 0.9932, "num_input_tokens_seen": 115351760, "step": 2059 }, { "epoch": 4.585746102449889, "loss": 0.8425930738449097, "loss_ce": 0.0003078907902818173, "loss_iou": 0.341796875, "loss_num": 0.03173828125, "loss_xval": 0.84375, "num_input_tokens_seen": 115351760, "step": 2059 }, { "epoch": 4.587973273942094, "grad_norm": 21.124006271362305, "learning_rate": 1e-06, "loss": 0.7942, "num_input_tokens_seen": 115407436, "step": 2060 }, { "epoch": 4.587973273942094, "loss": 0.8991901874542236, "loss_ce": 0.0002643706393428147, "loss_iou": 0.369140625, "loss_num": 0.032470703125, "loss_xval": 0.8984375, "num_input_tokens_seen": 115407436, "step": 2060 }, { "epoch": 4.590200445434299, "grad_norm": 13.804505348205566, "learning_rate": 1e-06, "loss": 0.8642, "num_input_tokens_seen": 115464140, "step": 2061 }, { "epoch": 4.590200445434299, "loss": 0.6700685620307922, "loss_ce": 0.0002687171217985451, "loss_iou": 0.2578125, "loss_num": 0.0308837890625, "loss_xval": 0.66796875, "num_input_tokens_seen": 115464140, "step": 2061 }, { "epoch": 4.5924276169265035, "grad_norm": 30.673608779907227, "learning_rate": 1e-06, "loss": 0.8756, "num_input_tokens_seen": 115517956, "step": 2062 }, { "epoch": 4.5924276169265035, "loss": 0.6949182748794556, "loss_ce": 0.00033817399526014924, "loss_iou": 0.287109375, "loss_num": 0.024169921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 115517956, "step": 2062 }, { "epoch": 4.594654788418708, "grad_norm": 16.69051170349121, "learning_rate": 1e-06, "loss": 0.7354, "num_input_tokens_seen": 115575676, "step": 2063 }, { "epoch": 4.594654788418708, "loss": 0.8584408760070801, "loss_ce": 0.00028658530209213495, "loss_iou": 0.330078125, "loss_num": 0.039306640625, "loss_xval": 0.859375, "num_input_tokens_seen": 115575676, "step": 2063 }, { "epoch": 4.596881959910913, "grad_norm": 16.270042419433594, "learning_rate": 1e-06, "loss": 0.7017, "num_input_tokens_seen": 115631496, "step": 2064 }, { "epoch": 4.596881959910913, "loss": 0.5482097268104553, "loss_ce": 0.0006023210007697344, "loss_iou": 0.212890625, "loss_num": 0.024169921875, "loss_xval": 0.546875, "num_input_tokens_seen": 115631496, "step": 2064 }, { "epoch": 4.599109131403118, "grad_norm": 18.328039169311523, "learning_rate": 1e-06, "loss": 0.9602, "num_input_tokens_seen": 115685200, "step": 2065 }, { "epoch": 4.599109131403118, "loss": 0.9518949389457703, "loss_ce": 0.00023480196250602603, "loss_iou": 0.41796875, "loss_num": 0.022705078125, "loss_xval": 0.953125, "num_input_tokens_seen": 115685200, "step": 2065 }, { "epoch": 4.601336302895323, "grad_norm": 18.395797729492188, "learning_rate": 1e-06, "loss": 0.9051, "num_input_tokens_seen": 115740640, "step": 2066 }, { "epoch": 4.601336302895323, "loss": 0.9198845624923706, "loss_ce": 0.00032886205008253455, "loss_iou": 0.369140625, "loss_num": 0.035888671875, "loss_xval": 0.91796875, "num_input_tokens_seen": 115740640, "step": 2066 }, { "epoch": 4.603563474387528, "grad_norm": 78.51664733886719, "learning_rate": 1e-06, "loss": 0.7135, "num_input_tokens_seen": 115794096, "step": 2067 }, { "epoch": 4.603563474387528, "loss": 0.912857174873352, "loss_ce": 0.00025952988653443754, "loss_iou": 0.392578125, "loss_num": 0.0260009765625, "loss_xval": 0.9140625, "num_input_tokens_seen": 115794096, "step": 2067 }, { "epoch": 4.605790645879733, "grad_norm": 13.404295921325684, "learning_rate": 1e-06, "loss": 0.6822, "num_input_tokens_seen": 115852012, "step": 2068 }, { "epoch": 4.605790645879733, "loss": 0.6375604867935181, "loss_ce": 0.00023134646471589804, "loss_iou": 0.271484375, "loss_num": 0.018798828125, "loss_xval": 0.63671875, "num_input_tokens_seen": 115852012, "step": 2068 }, { "epoch": 4.6080178173719375, "grad_norm": 35.90913772583008, "learning_rate": 1e-06, "loss": 0.9463, "num_input_tokens_seen": 115910272, "step": 2069 }, { "epoch": 4.6080178173719375, "loss": 0.7615780830383301, "loss_ce": 0.0003476125421002507, "loss_iou": 0.33984375, "loss_num": 0.0166015625, "loss_xval": 0.76171875, "num_input_tokens_seen": 115910272, "step": 2069 }, { "epoch": 4.610244988864142, "grad_norm": 48.58989715576172, "learning_rate": 1e-06, "loss": 0.8322, "num_input_tokens_seen": 115966060, "step": 2070 }, { "epoch": 4.610244988864142, "loss": 0.9044246077537537, "loss_ce": 0.00024984654737636447, "loss_iou": 0.3828125, "loss_num": 0.027587890625, "loss_xval": 0.90234375, "num_input_tokens_seen": 115966060, "step": 2070 }, { "epoch": 4.612472160356347, "grad_norm": 24.12740135192871, "learning_rate": 1e-06, "loss": 0.8331, "num_input_tokens_seen": 116023692, "step": 2071 }, { "epoch": 4.612472160356347, "loss": 0.7075637578964233, "loss_ce": 0.0002883510896936059, "loss_iou": 0.296875, "loss_num": 0.0228271484375, "loss_xval": 0.70703125, "num_input_tokens_seen": 116023692, "step": 2071 }, { "epoch": 4.614699331848552, "grad_norm": 28.676658630371094, "learning_rate": 1e-06, "loss": 0.8544, "num_input_tokens_seen": 116078540, "step": 2072 }, { "epoch": 4.614699331848552, "loss": 0.9685863852500916, "loss_ce": 0.00032463445677421987, "loss_iou": 0.408203125, "loss_num": 0.0303955078125, "loss_xval": 0.96875, "num_input_tokens_seen": 116078540, "step": 2072 }, { "epoch": 4.616926503340757, "grad_norm": 19.603900909423828, "learning_rate": 1e-06, "loss": 0.7623, "num_input_tokens_seen": 116133732, "step": 2073 }, { "epoch": 4.616926503340757, "loss": 0.7463170886039734, "loss_ce": 0.0002233087579952553, "loss_iou": 0.27734375, "loss_num": 0.0380859375, "loss_xval": 0.74609375, "num_input_tokens_seen": 116133732, "step": 2073 }, { "epoch": 4.619153674832962, "grad_norm": 15.20378589630127, "learning_rate": 1e-06, "loss": 0.7644, "num_input_tokens_seen": 116188860, "step": 2074 }, { "epoch": 4.619153674832962, "loss": 0.748953104019165, "loss_ce": 0.0004179720126558095, "loss_iou": 0.287109375, "loss_num": 0.034423828125, "loss_xval": 0.75, "num_input_tokens_seen": 116188860, "step": 2074 }, { "epoch": 4.621380846325167, "grad_norm": 15.175333976745605, "learning_rate": 1e-06, "loss": 0.8667, "num_input_tokens_seen": 116243408, "step": 2075 }, { "epoch": 4.621380846325167, "loss": 0.890459418296814, "loss_ce": 0.00032265347545035183, "loss_iou": 0.384765625, "loss_num": 0.024169921875, "loss_xval": 0.890625, "num_input_tokens_seen": 116243408, "step": 2075 }, { "epoch": 4.6236080178173715, "grad_norm": 13.573973655700684, "learning_rate": 1e-06, "loss": 1.0168, "num_input_tokens_seen": 116300164, "step": 2076 }, { "epoch": 4.6236080178173715, "loss": 1.1477470397949219, "loss_ce": 0.0002860655076801777, "loss_iou": 0.486328125, "loss_num": 0.035400390625, "loss_xval": 1.1484375, "num_input_tokens_seen": 116300164, "step": 2076 }, { "epoch": 4.625835189309576, "grad_norm": 19.694929122924805, "learning_rate": 1e-06, "loss": 0.8291, "num_input_tokens_seen": 116357772, "step": 2077 }, { "epoch": 4.625835189309576, "loss": 0.9049163460731506, "loss_ce": 0.00037533161230385303, "loss_iou": 0.35546875, "loss_num": 0.038330078125, "loss_xval": 0.90625, "num_input_tokens_seen": 116357772, "step": 2077 }, { "epoch": 4.628062360801781, "grad_norm": 14.48517894744873, "learning_rate": 1e-06, "loss": 0.8624, "num_input_tokens_seen": 116413892, "step": 2078 }, { "epoch": 4.628062360801781, "loss": 0.7932718992233276, "loss_ce": 0.0005472734337672591, "loss_iou": 0.330078125, "loss_num": 0.02685546875, "loss_xval": 0.79296875, "num_input_tokens_seen": 116413892, "step": 2078 }, { "epoch": 4.630289532293987, "grad_norm": 26.142356872558594, "learning_rate": 1e-06, "loss": 0.9727, "num_input_tokens_seen": 116471888, "step": 2079 }, { "epoch": 4.630289532293987, "loss": 1.051328420639038, "loss_ce": 0.0003031002124771476, "loss_iou": 0.458984375, "loss_num": 0.026611328125, "loss_xval": 1.0546875, "num_input_tokens_seen": 116471888, "step": 2079 }, { "epoch": 4.632516703786192, "grad_norm": 38.62444305419922, "learning_rate": 1e-06, "loss": 0.7796, "num_input_tokens_seen": 116527800, "step": 2080 }, { "epoch": 4.632516703786192, "loss": 0.8833059668540955, "loss_ce": 0.00024934165412560105, "loss_iou": 0.404296875, "loss_num": 0.01519775390625, "loss_xval": 0.8828125, "num_input_tokens_seen": 116527800, "step": 2080 }, { "epoch": 4.634743875278397, "grad_norm": 26.539600372314453, "learning_rate": 1e-06, "loss": 0.9048, "num_input_tokens_seen": 116583864, "step": 2081 }, { "epoch": 4.634743875278397, "loss": 0.9949434995651245, "loss_ce": 0.0005587629275396466, "loss_iou": 0.4453125, "loss_num": 0.0211181640625, "loss_xval": 0.99609375, "num_input_tokens_seen": 116583864, "step": 2081 }, { "epoch": 4.636971046770602, "grad_norm": 19.73139190673828, "learning_rate": 1e-06, "loss": 0.6691, "num_input_tokens_seen": 116635540, "step": 2082 }, { "epoch": 4.636971046770602, "loss": 0.6694454550743103, "loss_ce": 0.0009884194005280733, "loss_iou": 0.2578125, "loss_num": 0.0302734375, "loss_xval": 0.66796875, "num_input_tokens_seen": 116635540, "step": 2082 }, { "epoch": 4.639198218262806, "grad_norm": 22.97101593017578, "learning_rate": 1e-06, "loss": 0.6584, "num_input_tokens_seen": 116691464, "step": 2083 }, { "epoch": 4.639198218262806, "loss": 0.5923573970794678, "loss_ce": 0.0003163928631693125, "loss_iou": 0.265625, "loss_num": 0.012451171875, "loss_xval": 0.59375, "num_input_tokens_seen": 116691464, "step": 2083 }, { "epoch": 4.641425389755011, "grad_norm": 21.242584228515625, "learning_rate": 1e-06, "loss": 0.7134, "num_input_tokens_seen": 116743160, "step": 2084 }, { "epoch": 4.641425389755011, "loss": 0.5542248487472534, "loss_ce": 0.0002697905874811113, "loss_iou": 0.2177734375, "loss_num": 0.02392578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 116743160, "step": 2084 }, { "epoch": 4.643652561247216, "grad_norm": 25.813098907470703, "learning_rate": 1e-06, "loss": 0.7164, "num_input_tokens_seen": 116797988, "step": 2085 }, { "epoch": 4.643652561247216, "loss": 0.5944602489471436, "loss_ce": 0.00022195720521267503, "loss_iou": 0.2578125, "loss_num": 0.0159912109375, "loss_xval": 0.59375, "num_input_tokens_seen": 116797988, "step": 2085 }, { "epoch": 4.645879732739421, "grad_norm": 35.45105743408203, "learning_rate": 1e-06, "loss": 0.9267, "num_input_tokens_seen": 116853140, "step": 2086 }, { "epoch": 4.645879732739421, "loss": 0.6704750657081604, "loss_ce": 0.0005531828501261771, "loss_iou": 0.296875, "loss_num": 0.0155029296875, "loss_xval": 0.671875, "num_input_tokens_seen": 116853140, "step": 2086 }, { "epoch": 4.648106904231626, "grad_norm": 19.315475463867188, "learning_rate": 1e-06, "loss": 0.7122, "num_input_tokens_seen": 116909096, "step": 2087 }, { "epoch": 4.648106904231626, "loss": 0.5913118720054626, "loss_ce": 0.0002473921631462872, "loss_iou": 0.251953125, "loss_num": 0.0177001953125, "loss_xval": 0.58984375, "num_input_tokens_seen": 116909096, "step": 2087 }, { "epoch": 4.650334075723831, "grad_norm": 27.988327026367188, "learning_rate": 1e-06, "loss": 0.8224, "num_input_tokens_seen": 116966212, "step": 2088 }, { "epoch": 4.650334075723831, "loss": 0.9297181367874146, "loss_ce": 0.00027486798353493214, "loss_iou": 0.376953125, "loss_num": 0.03515625, "loss_xval": 0.9296875, "num_input_tokens_seen": 116966212, "step": 2088 }, { "epoch": 4.652561247216036, "grad_norm": 16.152359008789062, "learning_rate": 1e-06, "loss": 0.5982, "num_input_tokens_seen": 117022116, "step": 2089 }, { "epoch": 4.652561247216036, "loss": 0.6331831216812134, "loss_ce": 0.0002486219455022365, "loss_iou": 0.265625, "loss_num": 0.0201416015625, "loss_xval": 0.6328125, "num_input_tokens_seen": 117022116, "step": 2089 }, { "epoch": 4.6547884187082404, "grad_norm": 14.966622352600098, "learning_rate": 1e-06, "loss": 0.8524, "num_input_tokens_seen": 117077144, "step": 2090 }, { "epoch": 4.6547884187082404, "loss": 0.8334739208221436, "loss_ce": 0.0007102236268110573, "loss_iou": 0.341796875, "loss_num": 0.0294189453125, "loss_xval": 0.83203125, "num_input_tokens_seen": 117077144, "step": 2090 }, { "epoch": 4.657015590200445, "grad_norm": 18.069534301757812, "learning_rate": 1e-06, "loss": 0.6994, "num_input_tokens_seen": 117132324, "step": 2091 }, { "epoch": 4.657015590200445, "loss": 0.6397680044174194, "loss_ce": 0.00024168557138182223, "loss_iou": 0.283203125, "loss_num": 0.014404296875, "loss_xval": 0.640625, "num_input_tokens_seen": 117132324, "step": 2091 }, { "epoch": 4.65924276169265, "grad_norm": 16.776460647583008, "learning_rate": 1e-06, "loss": 0.6939, "num_input_tokens_seen": 117192008, "step": 2092 }, { "epoch": 4.65924276169265, "loss": 0.8577221632003784, "loss_ce": 0.0003002659068442881, "loss_iou": 0.3671875, "loss_num": 0.0250244140625, "loss_xval": 0.859375, "num_input_tokens_seen": 117192008, "step": 2092 }, { "epoch": 4.661469933184855, "grad_norm": 12.129549980163574, "learning_rate": 1e-06, "loss": 0.7361, "num_input_tokens_seen": 117248744, "step": 2093 }, { "epoch": 4.661469933184855, "loss": 0.8417858481407166, "loss_ce": 0.00035519120865501463, "loss_iou": 0.3671875, "loss_num": 0.0216064453125, "loss_xval": 0.83984375, "num_input_tokens_seen": 117248744, "step": 2093 }, { "epoch": 4.66369710467706, "grad_norm": 47.98014450073242, "learning_rate": 1e-06, "loss": 1.0221, "num_input_tokens_seen": 117303972, "step": 2094 }, { "epoch": 4.66369710467706, "loss": 0.8582909107208252, "loss_ce": 0.0002586581977084279, "loss_iou": 0.365234375, "loss_num": 0.0255126953125, "loss_xval": 0.859375, "num_input_tokens_seen": 117303972, "step": 2094 }, { "epoch": 4.665924276169265, "grad_norm": 17.894067764282227, "learning_rate": 1e-06, "loss": 0.7274, "num_input_tokens_seen": 117360916, "step": 2095 }, { "epoch": 4.665924276169265, "loss": 0.9711999297142029, "loss_ce": 0.0002527014003135264, "loss_iou": 0.39453125, "loss_num": 0.036376953125, "loss_xval": 0.97265625, "num_input_tokens_seen": 117360916, "step": 2095 }, { "epoch": 4.66815144766147, "grad_norm": 22.916641235351562, "learning_rate": 1e-06, "loss": 0.7064, "num_input_tokens_seen": 117416304, "step": 2096 }, { "epoch": 4.66815144766147, "loss": 0.7692503929138184, "loss_ce": 0.00045159138971939683, "loss_iou": 0.3125, "loss_num": 0.0289306640625, "loss_xval": 0.76953125, "num_input_tokens_seen": 117416304, "step": 2096 }, { "epoch": 4.6703786191536745, "grad_norm": 17.155508041381836, "learning_rate": 1e-06, "loss": 0.9104, "num_input_tokens_seen": 117473004, "step": 2097 }, { "epoch": 4.6703786191536745, "loss": 1.1037812232971191, "loss_ce": 0.0002655387215781957, "loss_iou": 0.453125, "loss_num": 0.03955078125, "loss_xval": 1.1015625, "num_input_tokens_seen": 117473004, "step": 2097 }, { "epoch": 4.67260579064588, "grad_norm": 17.130844116210938, "learning_rate": 1e-06, "loss": 0.7475, "num_input_tokens_seen": 117528424, "step": 2098 }, { "epoch": 4.67260579064588, "loss": 0.8190168738365173, "loss_ce": 0.0004133854527026415, "loss_iou": 0.33984375, "loss_num": 0.02783203125, "loss_xval": 0.8203125, "num_input_tokens_seen": 117528424, "step": 2098 }, { "epoch": 4.674832962138085, "grad_norm": 16.85631561279297, "learning_rate": 1e-06, "loss": 0.5101, "num_input_tokens_seen": 117581876, "step": 2099 }, { "epoch": 4.674832962138085, "loss": 0.469228595495224, "loss_ce": 0.00023444523685611784, "loss_iou": 0.19921875, "loss_num": 0.01416015625, "loss_xval": 0.46875, "num_input_tokens_seen": 117581876, "step": 2099 }, { "epoch": 4.67706013363029, "grad_norm": 17.609556198120117, "learning_rate": 1e-06, "loss": 1.0525, "num_input_tokens_seen": 117638468, "step": 2100 }, { "epoch": 4.67706013363029, "loss": 0.9739843606948853, "loss_ce": 0.0003515969729050994, "loss_iou": 0.43359375, "loss_num": 0.0213623046875, "loss_xval": 0.97265625, "num_input_tokens_seen": 117638468, "step": 2100 }, { "epoch": 4.679287305122495, "grad_norm": 20.728130340576172, "learning_rate": 1e-06, "loss": 0.7308, "num_input_tokens_seen": 117694144, "step": 2101 }, { "epoch": 4.679287305122495, "loss": 0.7154548168182373, "loss_ce": 0.0003668900462798774, "loss_iou": 0.298828125, "loss_num": 0.0238037109375, "loss_xval": 0.71484375, "num_input_tokens_seen": 117694144, "step": 2101 }, { "epoch": 4.6815144766147, "grad_norm": 18.357759475708008, "learning_rate": 1e-06, "loss": 0.9824, "num_input_tokens_seen": 117747644, "step": 2102 }, { "epoch": 4.6815144766147, "loss": 0.903360903263092, "loss_ce": 0.0002847156720235944, "loss_iou": 0.3515625, "loss_num": 0.039794921875, "loss_xval": 0.90234375, "num_input_tokens_seen": 117747644, "step": 2102 }, { "epoch": 4.6837416481069045, "grad_norm": 18.632221221923828, "learning_rate": 1e-06, "loss": 0.7674, "num_input_tokens_seen": 117802256, "step": 2103 }, { "epoch": 4.6837416481069045, "loss": 0.8225224018096924, "loss_ce": 0.00025684869615361094, "loss_iou": 0.337890625, "loss_num": 0.029541015625, "loss_xval": 0.8203125, "num_input_tokens_seen": 117802256, "step": 2103 }, { "epoch": 4.685968819599109, "grad_norm": 22.77741813659668, "learning_rate": 1e-06, "loss": 0.6407, "num_input_tokens_seen": 117857308, "step": 2104 }, { "epoch": 4.685968819599109, "loss": 0.6071910858154297, "loss_ce": 0.0002575043763499707, "loss_iou": 0.2216796875, "loss_num": 0.03271484375, "loss_xval": 0.60546875, "num_input_tokens_seen": 117857308, "step": 2104 }, { "epoch": 4.688195991091314, "grad_norm": 12.895187377929688, "learning_rate": 1e-06, "loss": 0.5916, "num_input_tokens_seen": 117913116, "step": 2105 }, { "epoch": 4.688195991091314, "loss": 0.7664899230003357, "loss_ce": 0.0002545661700423807, "loss_iou": 0.330078125, "loss_num": 0.020751953125, "loss_xval": 0.765625, "num_input_tokens_seen": 117913116, "step": 2105 }, { "epoch": 4.690423162583519, "grad_norm": 36.03581237792969, "learning_rate": 1e-06, "loss": 0.7424, "num_input_tokens_seen": 117970296, "step": 2106 }, { "epoch": 4.690423162583519, "loss": 0.785170316696167, "loss_ce": 0.00025823366013355553, "loss_iou": 0.341796875, "loss_num": 0.0205078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 117970296, "step": 2106 }, { "epoch": 4.692650334075724, "grad_norm": 24.407381057739258, "learning_rate": 1e-06, "loss": 0.7918, "num_input_tokens_seen": 118024948, "step": 2107 }, { "epoch": 4.692650334075724, "loss": 0.6611515283584595, "loss_ce": 0.00026292851543985307, "loss_iou": 0.29296875, "loss_num": 0.01519775390625, "loss_xval": 0.66015625, "num_input_tokens_seen": 118024948, "step": 2107 }, { "epoch": 4.694877505567929, "grad_norm": 30.365440368652344, "learning_rate": 1e-06, "loss": 0.8189, "num_input_tokens_seen": 118081340, "step": 2108 }, { "epoch": 4.694877505567929, "loss": 0.9890696406364441, "loss_ce": 0.0003001365694217384, "loss_iou": 0.384765625, "loss_num": 0.043701171875, "loss_xval": 0.98828125, "num_input_tokens_seen": 118081340, "step": 2108 }, { "epoch": 4.697104677060134, "grad_norm": 14.271038055419922, "learning_rate": 1e-06, "loss": 0.8856, "num_input_tokens_seen": 118139104, "step": 2109 }, { "epoch": 4.697104677060134, "loss": 1.0308204889297485, "loss_ce": 0.0003028618230018765, "loss_iou": 0.4296875, "loss_num": 0.03466796875, "loss_xval": 1.03125, "num_input_tokens_seen": 118139104, "step": 2109 }, { "epoch": 4.6993318485523385, "grad_norm": 22.121456146240234, "learning_rate": 1e-06, "loss": 0.7422, "num_input_tokens_seen": 118194740, "step": 2110 }, { "epoch": 4.6993318485523385, "loss": 0.7307307124137878, "loss_ce": 0.00026196378166787326, "loss_iou": 0.32421875, "loss_num": 0.0167236328125, "loss_xval": 0.73046875, "num_input_tokens_seen": 118194740, "step": 2110 }, { "epoch": 4.701559020044543, "grad_norm": 21.364805221557617, "learning_rate": 1e-06, "loss": 0.6672, "num_input_tokens_seen": 118248584, "step": 2111 }, { "epoch": 4.701559020044543, "loss": 0.8600109815597534, "loss_ce": 0.000391850684536621, "loss_iou": 0.37890625, "loss_num": 0.02001953125, "loss_xval": 0.859375, "num_input_tokens_seen": 118248584, "step": 2111 }, { "epoch": 4.703786191536748, "grad_norm": 21.18739128112793, "learning_rate": 1e-06, "loss": 0.7989, "num_input_tokens_seen": 118306024, "step": 2112 }, { "epoch": 4.703786191536748, "loss": 0.8781961798667908, "loss_ce": 0.00026655703550204635, "loss_iou": 0.39453125, "loss_num": 0.0177001953125, "loss_xval": 0.87890625, "num_input_tokens_seen": 118306024, "step": 2112 }, { "epoch": 4.706013363028953, "grad_norm": 16.3918399810791, "learning_rate": 1e-06, "loss": 0.7998, "num_input_tokens_seen": 118361808, "step": 2113 }, { "epoch": 4.706013363028953, "loss": 0.5792876482009888, "loss_ce": 0.0003081717586610466, "loss_iou": 0.25390625, "loss_num": 0.01397705078125, "loss_xval": 0.578125, "num_input_tokens_seen": 118361808, "step": 2113 }, { "epoch": 4.708240534521158, "grad_norm": 21.2349910736084, "learning_rate": 1e-06, "loss": 0.6949, "num_input_tokens_seen": 118419444, "step": 2114 }, { "epoch": 4.708240534521158, "loss": 0.6933687925338745, "loss_ce": 0.00025352693046443164, "loss_iou": 0.296875, "loss_num": 0.019775390625, "loss_xval": 0.69140625, "num_input_tokens_seen": 118419444, "step": 2114 }, { "epoch": 4.710467706013363, "grad_norm": 20.82132911682129, "learning_rate": 1e-06, "loss": 0.6637, "num_input_tokens_seen": 118476632, "step": 2115 }, { "epoch": 4.710467706013363, "loss": 0.6345095038414001, "loss_ce": 0.0004763126198668033, "loss_iou": 0.28125, "loss_num": 0.01422119140625, "loss_xval": 0.6328125, "num_input_tokens_seen": 118476632, "step": 2115 }, { "epoch": 4.712694877505568, "grad_norm": 17.699792861938477, "learning_rate": 1e-06, "loss": 0.6965, "num_input_tokens_seen": 118532612, "step": 2116 }, { "epoch": 4.712694877505568, "loss": 0.6084548234939575, "loss_ce": 0.0002699895412661135, "loss_iou": 0.265625, "loss_num": 0.015625, "loss_xval": 0.609375, "num_input_tokens_seen": 118532612, "step": 2116 }, { "epoch": 4.714922048997773, "grad_norm": 15.86410903930664, "learning_rate": 1e-06, "loss": 0.9501, "num_input_tokens_seen": 118589372, "step": 2117 }, { "epoch": 4.714922048997773, "loss": 1.0604746341705322, "loss_ce": 0.000416038790717721, "loss_iou": 0.42578125, "loss_num": 0.041748046875, "loss_xval": 1.0625, "num_input_tokens_seen": 118589372, "step": 2117 }, { "epoch": 4.717149220489977, "grad_norm": 20.29158592224121, "learning_rate": 1e-06, "loss": 0.6542, "num_input_tokens_seen": 118644080, "step": 2118 }, { "epoch": 4.717149220489977, "loss": 0.6954843401908875, "loss_ce": 0.00041598049574531615, "loss_iou": 0.29296875, "loss_num": 0.02197265625, "loss_xval": 0.6953125, "num_input_tokens_seen": 118644080, "step": 2118 }, { "epoch": 4.719376391982182, "grad_norm": 16.680042266845703, "learning_rate": 1e-06, "loss": 0.7097, "num_input_tokens_seen": 118698048, "step": 2119 }, { "epoch": 4.719376391982182, "loss": 0.8289152383804321, "loss_ce": 0.00030192872509360313, "loss_iou": 0.365234375, "loss_num": 0.0196533203125, "loss_xval": 0.828125, "num_input_tokens_seen": 118698048, "step": 2119 }, { "epoch": 4.721603563474387, "grad_norm": 20.128711700439453, "learning_rate": 1e-06, "loss": 0.7996, "num_input_tokens_seen": 118754912, "step": 2120 }, { "epoch": 4.721603563474387, "loss": 0.7679637670516968, "loss_ce": 0.0003856367547996342, "loss_iou": 0.33984375, "loss_num": 0.017578125, "loss_xval": 0.765625, "num_input_tokens_seen": 118754912, "step": 2120 }, { "epoch": 4.723830734966592, "grad_norm": 26.825408935546875, "learning_rate": 1e-06, "loss": 1.0429, "num_input_tokens_seen": 118809528, "step": 2121 }, { "epoch": 4.723830734966592, "loss": 0.6894991993904114, "loss_ce": 0.0002902133564930409, "loss_iou": 0.275390625, "loss_num": 0.02783203125, "loss_xval": 0.6875, "num_input_tokens_seen": 118809528, "step": 2121 }, { "epoch": 4.726057906458797, "grad_norm": 35.720130920410156, "learning_rate": 1e-06, "loss": 0.7786, "num_input_tokens_seen": 118866560, "step": 2122 }, { "epoch": 4.726057906458797, "loss": 0.766879677772522, "loss_ce": 0.0002781344810500741, "loss_iou": 0.298828125, "loss_num": 0.033935546875, "loss_xval": 0.765625, "num_input_tokens_seen": 118866560, "step": 2122 }, { "epoch": 4.728285077951003, "grad_norm": 39.63307189941406, "learning_rate": 1e-06, "loss": 0.7441, "num_input_tokens_seen": 118922208, "step": 2123 }, { "epoch": 4.728285077951003, "loss": 0.6776679158210754, "loss_ce": 0.0002997248957399279, "loss_iou": 0.271484375, "loss_num": 0.02685546875, "loss_xval": 0.67578125, "num_input_tokens_seen": 118922208, "step": 2123 }, { "epoch": 4.7305122494432075, "grad_norm": 29.788860321044922, "learning_rate": 1e-06, "loss": 0.7002, "num_input_tokens_seen": 118976912, "step": 2124 }, { "epoch": 4.7305122494432075, "loss": 0.8606002926826477, "loss_ce": 0.00024869441404007375, "loss_iou": 0.3359375, "loss_num": 0.0380859375, "loss_xval": 0.859375, "num_input_tokens_seen": 118976912, "step": 2124 }, { "epoch": 4.732739420935412, "grad_norm": 17.970624923706055, "learning_rate": 1e-06, "loss": 0.6788, "num_input_tokens_seen": 119032532, "step": 2125 }, { "epoch": 4.732739420935412, "loss": 0.9126179814338684, "loss_ce": 0.0005085880402475595, "loss_iou": 0.38671875, "loss_num": 0.0279541015625, "loss_xval": 0.9140625, "num_input_tokens_seen": 119032532, "step": 2125 }, { "epoch": 4.734966592427617, "grad_norm": 40.0213508605957, "learning_rate": 1e-06, "loss": 0.6293, "num_input_tokens_seen": 119089356, "step": 2126 }, { "epoch": 4.734966592427617, "loss": 0.6347508430480957, "loss_ce": 0.00022931784042157233, "loss_iou": 0.28515625, "loss_num": 0.0130615234375, "loss_xval": 0.6328125, "num_input_tokens_seen": 119089356, "step": 2126 }, { "epoch": 4.737193763919822, "grad_norm": 19.045475006103516, "learning_rate": 1e-06, "loss": 0.7774, "num_input_tokens_seen": 119144504, "step": 2127 }, { "epoch": 4.737193763919822, "loss": 0.9141049385070801, "loss_ce": 0.0002866187132894993, "loss_iou": 0.400390625, "loss_num": 0.0223388671875, "loss_xval": 0.9140625, "num_input_tokens_seen": 119144504, "step": 2127 }, { "epoch": 4.739420935412027, "grad_norm": 17.02509880065918, "learning_rate": 1e-06, "loss": 0.7687, "num_input_tokens_seen": 119201984, "step": 2128 }, { "epoch": 4.739420935412027, "loss": 0.7351812124252319, "loss_ce": 0.0003179654595442116, "loss_iou": 0.322265625, "loss_num": 0.018310546875, "loss_xval": 0.734375, "num_input_tokens_seen": 119201984, "step": 2128 }, { "epoch": 4.741648106904232, "grad_norm": 42.980289459228516, "learning_rate": 1e-06, "loss": 0.6333, "num_input_tokens_seen": 119259860, "step": 2129 }, { "epoch": 4.741648106904232, "loss": 0.538473904132843, "loss_ce": 0.0002658716693986207, "loss_iou": 0.24609375, "loss_num": 0.0091552734375, "loss_xval": 0.5390625, "num_input_tokens_seen": 119259860, "step": 2129 }, { "epoch": 4.743875278396437, "grad_norm": 15.055540084838867, "learning_rate": 1e-06, "loss": 0.8069, "num_input_tokens_seen": 119317616, "step": 2130 }, { "epoch": 4.743875278396437, "loss": 0.8891835808753967, "loss_ce": 0.0002675617579370737, "loss_iou": 0.3671875, "loss_num": 0.0306396484375, "loss_xval": 0.890625, "num_input_tokens_seen": 119317616, "step": 2130 }, { "epoch": 4.7461024498886415, "grad_norm": 20.52086067199707, "learning_rate": 1e-06, "loss": 0.8202, "num_input_tokens_seen": 119373908, "step": 2131 }, { "epoch": 4.7461024498886415, "loss": 0.8626622557640076, "loss_ce": 0.00035758258309215307, "loss_iou": 0.359375, "loss_num": 0.0286865234375, "loss_xval": 0.86328125, "num_input_tokens_seen": 119373908, "step": 2131 }, { "epoch": 4.748329621380846, "grad_norm": 24.670711517333984, "learning_rate": 1e-06, "loss": 0.8469, "num_input_tokens_seen": 119429720, "step": 2132 }, { "epoch": 4.748329621380846, "loss": 0.7893211841583252, "loss_ce": 0.00025862548500299454, "loss_iou": 0.34375, "loss_num": 0.0201416015625, "loss_xval": 0.7890625, "num_input_tokens_seen": 119429720, "step": 2132 }, { "epoch": 4.750556792873051, "grad_norm": 21.29181671142578, "learning_rate": 1e-06, "loss": 0.6766, "num_input_tokens_seen": 119484036, "step": 2133 }, { "epoch": 4.750556792873051, "loss": 0.609359860420227, "loss_ce": 0.00022898372844792902, "loss_iou": 0.240234375, "loss_num": 0.02587890625, "loss_xval": 0.609375, "num_input_tokens_seen": 119484036, "step": 2133 }, { "epoch": 4.752783964365256, "grad_norm": 54.08604431152344, "learning_rate": 1e-06, "loss": 0.6534, "num_input_tokens_seen": 119537184, "step": 2134 }, { "epoch": 4.752783964365256, "loss": 0.6004340648651123, "loss_ce": 0.0003363891737535596, "loss_iou": 0.234375, "loss_num": 0.026123046875, "loss_xval": 0.6015625, "num_input_tokens_seen": 119537184, "step": 2134 }, { "epoch": 4.755011135857461, "grad_norm": 24.42340660095215, "learning_rate": 1e-06, "loss": 0.66, "num_input_tokens_seen": 119591936, "step": 2135 }, { "epoch": 4.755011135857461, "loss": 0.762969970703125, "loss_ce": 0.0002747006365098059, "loss_iou": 0.322265625, "loss_num": 0.023681640625, "loss_xval": 0.76171875, "num_input_tokens_seen": 119591936, "step": 2135 }, { "epoch": 4.757238307349666, "grad_norm": 18.103748321533203, "learning_rate": 1e-06, "loss": 0.7105, "num_input_tokens_seen": 119650452, "step": 2136 }, { "epoch": 4.757238307349666, "loss": 0.8064308166503906, "loss_ce": 0.0002785040996968746, "loss_iou": 0.35546875, "loss_num": 0.0191650390625, "loss_xval": 0.8046875, "num_input_tokens_seen": 119650452, "step": 2136 }, { "epoch": 4.759465478841871, "grad_norm": 25.047883987426758, "learning_rate": 1e-06, "loss": 0.9671, "num_input_tokens_seen": 119705364, "step": 2137 }, { "epoch": 4.759465478841871, "loss": 0.9405218958854675, "loss_ce": 0.0003363378345966339, "loss_iou": 0.390625, "loss_num": 0.03173828125, "loss_xval": 0.94140625, "num_input_tokens_seen": 119705364, "step": 2137 }, { "epoch": 4.7616926503340755, "grad_norm": 11.171337127685547, "learning_rate": 1e-06, "loss": 0.4534, "num_input_tokens_seen": 119761084, "step": 2138 }, { "epoch": 4.7616926503340755, "loss": 0.43443742394447327, "loss_ce": 0.00023330794647336006, "loss_iou": 0.1904296875, "loss_num": 0.01055908203125, "loss_xval": 0.43359375, "num_input_tokens_seen": 119761084, "step": 2138 }, { "epoch": 4.76391982182628, "grad_norm": 14.953879356384277, "learning_rate": 1e-06, "loss": 0.8728, "num_input_tokens_seen": 119816688, "step": 2139 }, { "epoch": 4.76391982182628, "loss": 0.6874940991401672, "loss_ce": 0.00029927384457550943, "loss_iou": 0.3046875, "loss_num": 0.0157470703125, "loss_xval": 0.6875, "num_input_tokens_seen": 119816688, "step": 2139 }, { "epoch": 4.766146993318485, "grad_norm": 25.45313262939453, "learning_rate": 1e-06, "loss": 0.8034, "num_input_tokens_seen": 119874648, "step": 2140 }, { "epoch": 4.766146993318485, "loss": 0.8113042116165161, "loss_ce": 0.00026898583746515214, "loss_iou": 0.328125, "loss_num": 0.03125, "loss_xval": 0.8125, "num_input_tokens_seen": 119874648, "step": 2140 }, { "epoch": 4.76837416481069, "grad_norm": 22.319425582885742, "learning_rate": 1e-06, "loss": 0.7391, "num_input_tokens_seen": 119930376, "step": 2141 }, { "epoch": 4.76837416481069, "loss": 0.7610592246055603, "loss_ce": 0.0005611696396954358, "loss_iou": 0.314453125, "loss_num": 0.0267333984375, "loss_xval": 0.76171875, "num_input_tokens_seen": 119930376, "step": 2141 }, { "epoch": 4.770601336302895, "grad_norm": 27.872591018676758, "learning_rate": 1e-06, "loss": 0.7955, "num_input_tokens_seen": 119985268, "step": 2142 }, { "epoch": 4.770601336302895, "loss": 0.5949513912200928, "loss_ce": 0.00022480121697299182, "loss_iou": 0.2490234375, "loss_num": 0.019287109375, "loss_xval": 0.59375, "num_input_tokens_seen": 119985268, "step": 2142 }, { "epoch": 4.772828507795101, "grad_norm": 21.563512802124023, "learning_rate": 1e-06, "loss": 0.6652, "num_input_tokens_seen": 120041572, "step": 2143 }, { "epoch": 4.772828507795101, "loss": 0.6074680089950562, "loss_ce": 0.0002902495616581291, "loss_iou": 0.26171875, "loss_num": 0.0166015625, "loss_xval": 0.60546875, "num_input_tokens_seen": 120041572, "step": 2143 }, { "epoch": 4.775055679287306, "grad_norm": 21.816646575927734, "learning_rate": 1e-06, "loss": 0.7993, "num_input_tokens_seen": 120097280, "step": 2144 }, { "epoch": 4.775055679287306, "loss": 0.8655096292495728, "loss_ce": 0.00027528638020157814, "loss_iou": 0.3828125, "loss_num": 0.01953125, "loss_xval": 0.8671875, "num_input_tokens_seen": 120097280, "step": 2144 }, { "epoch": 4.77728285077951, "grad_norm": 17.00129508972168, "learning_rate": 1e-06, "loss": 0.8654, "num_input_tokens_seen": 120152928, "step": 2145 }, { "epoch": 4.77728285077951, "loss": 1.0312559604644775, "loss_ce": 0.00025006328360177577, "loss_iou": 0.427734375, "loss_num": 0.035400390625, "loss_xval": 1.03125, "num_input_tokens_seen": 120152928, "step": 2145 }, { "epoch": 4.779510022271715, "grad_norm": 23.580554962158203, "learning_rate": 1e-06, "loss": 0.912, "num_input_tokens_seen": 120211296, "step": 2146 }, { "epoch": 4.779510022271715, "loss": 0.8288871645927429, "loss_ce": 0.0005180308944545686, "loss_iou": 0.34375, "loss_num": 0.02783203125, "loss_xval": 0.828125, "num_input_tokens_seen": 120211296, "step": 2146 }, { "epoch": 4.78173719376392, "grad_norm": 18.791494369506836, "learning_rate": 1e-06, "loss": 0.7229, "num_input_tokens_seen": 120268204, "step": 2147 }, { "epoch": 4.78173719376392, "loss": 0.7734935283660889, "loss_ce": 0.000300163053907454, "loss_iou": 0.310546875, "loss_num": 0.0308837890625, "loss_xval": 0.7734375, "num_input_tokens_seen": 120268204, "step": 2147 }, { "epoch": 4.783964365256125, "grad_norm": 17.211793899536133, "learning_rate": 1e-06, "loss": 0.6943, "num_input_tokens_seen": 120320000, "step": 2148 }, { "epoch": 4.783964365256125, "loss": 0.8804046511650085, "loss_ce": 0.00027765982667915523, "loss_iou": 0.357421875, "loss_num": 0.032958984375, "loss_xval": 0.87890625, "num_input_tokens_seen": 120320000, "step": 2148 }, { "epoch": 4.78619153674833, "grad_norm": 16.56837272644043, "learning_rate": 1e-06, "loss": 0.555, "num_input_tokens_seen": 120375736, "step": 2149 }, { "epoch": 4.78619153674833, "loss": 0.4850603938102722, "loss_ce": 0.0003192013828083873, "loss_iou": 0.2138671875, "loss_num": 0.01116943359375, "loss_xval": 0.484375, "num_input_tokens_seen": 120375736, "step": 2149 }, { "epoch": 4.788418708240535, "grad_norm": 60.28794860839844, "learning_rate": 1e-06, "loss": 0.8792, "num_input_tokens_seen": 120429608, "step": 2150 }, { "epoch": 4.788418708240535, "loss": 0.890438437461853, "loss_ce": 0.00030169825186021626, "loss_iou": 0.38671875, "loss_num": 0.023193359375, "loss_xval": 0.890625, "num_input_tokens_seen": 120429608, "step": 2150 }, { "epoch": 4.79064587973274, "grad_norm": 25.588119506835938, "learning_rate": 1e-06, "loss": 0.705, "num_input_tokens_seen": 120484200, "step": 2151 }, { "epoch": 4.79064587973274, "loss": 0.8378915786743164, "loss_ce": 0.00036717430339194834, "loss_iou": 0.345703125, "loss_num": 0.0289306640625, "loss_xval": 0.8359375, "num_input_tokens_seen": 120484200, "step": 2151 }, { "epoch": 4.7928730512249444, "grad_norm": 20.40205955505371, "learning_rate": 1e-06, "loss": 0.7429, "num_input_tokens_seen": 120539644, "step": 2152 }, { "epoch": 4.7928730512249444, "loss": 0.6924837827682495, "loss_ce": 0.00034514523576945066, "loss_iou": 0.302734375, "loss_num": 0.017578125, "loss_xval": 0.69140625, "num_input_tokens_seen": 120539644, "step": 2152 }, { "epoch": 4.795100222717149, "grad_norm": 24.60614013671875, "learning_rate": 1e-06, "loss": 0.7615, "num_input_tokens_seen": 120594500, "step": 2153 }, { "epoch": 4.795100222717149, "loss": 0.7266230583190918, "loss_ce": 0.000487854442326352, "loss_iou": 0.302734375, "loss_num": 0.023681640625, "loss_xval": 0.7265625, "num_input_tokens_seen": 120594500, "step": 2153 }, { "epoch": 4.797327394209354, "grad_norm": 16.96647834777832, "learning_rate": 1e-06, "loss": 0.7819, "num_input_tokens_seen": 120649288, "step": 2154 }, { "epoch": 4.797327394209354, "loss": 0.6350046396255493, "loss_ce": 0.00023901589156594127, "loss_iou": 0.28125, "loss_num": 0.0145263671875, "loss_xval": 0.6328125, "num_input_tokens_seen": 120649288, "step": 2154 }, { "epoch": 4.799554565701559, "grad_norm": 18.894594192504883, "learning_rate": 1e-06, "loss": 0.9572, "num_input_tokens_seen": 120704828, "step": 2155 }, { "epoch": 4.799554565701559, "loss": 0.9965546131134033, "loss_ce": 0.000460872077383101, "loss_iou": 0.41796875, "loss_num": 0.031982421875, "loss_xval": 0.99609375, "num_input_tokens_seen": 120704828, "step": 2155 }, { "epoch": 4.801781737193764, "grad_norm": 19.677793502807617, "learning_rate": 1e-06, "loss": 0.8639, "num_input_tokens_seen": 120760452, "step": 2156 }, { "epoch": 4.801781737193764, "loss": 0.9670339822769165, "loss_ce": 0.00023707067884970456, "loss_iou": 0.40625, "loss_num": 0.0306396484375, "loss_xval": 0.96875, "num_input_tokens_seen": 120760452, "step": 2156 }, { "epoch": 4.804008908685969, "grad_norm": 28.458290100097656, "learning_rate": 1e-06, "loss": 0.8885, "num_input_tokens_seen": 120813932, "step": 2157 }, { "epoch": 4.804008908685969, "loss": 0.9386167526245117, "loss_ce": 0.0004453619185369462, "loss_iou": 0.37109375, "loss_num": 0.039794921875, "loss_xval": 0.9375, "num_input_tokens_seen": 120813932, "step": 2157 }, { "epoch": 4.806236080178174, "grad_norm": 19.169998168945312, "learning_rate": 1e-06, "loss": 0.7322, "num_input_tokens_seen": 120867148, "step": 2158 }, { "epoch": 4.806236080178174, "loss": 0.6552798748016357, "loss_ce": 0.0008609433425590396, "loss_iou": 0.275390625, "loss_num": 0.02099609375, "loss_xval": 0.65625, "num_input_tokens_seen": 120867148, "step": 2158 }, { "epoch": 4.8084632516703785, "grad_norm": 100.8769302368164, "learning_rate": 1e-06, "loss": 0.8376, "num_input_tokens_seen": 120921256, "step": 2159 }, { "epoch": 4.8084632516703785, "loss": 0.7510162591934204, "loss_ce": 0.0002837868523783982, "loss_iou": 0.330078125, "loss_num": 0.0184326171875, "loss_xval": 0.75, "num_input_tokens_seen": 120921256, "step": 2159 }, { "epoch": 4.810690423162583, "grad_norm": 18.4330997467041, "learning_rate": 1e-06, "loss": 0.9006, "num_input_tokens_seen": 120976736, "step": 2160 }, { "epoch": 4.810690423162583, "loss": 1.1066399812698364, "loss_ce": 0.00043876029667444527, "loss_iou": 0.447265625, "loss_num": 0.0419921875, "loss_xval": 1.109375, "num_input_tokens_seen": 120976736, "step": 2160 }, { "epoch": 4.812917594654788, "grad_norm": 19.734941482543945, "learning_rate": 1e-06, "loss": 0.814, "num_input_tokens_seen": 121034524, "step": 2161 }, { "epoch": 4.812917594654788, "loss": 0.8756116628646851, "loss_ce": 0.0006117259617894888, "loss_iou": 0.3671875, "loss_num": 0.0281982421875, "loss_xval": 0.875, "num_input_tokens_seen": 121034524, "step": 2161 }, { "epoch": 4.815144766146993, "grad_norm": 18.92926788330078, "learning_rate": 1e-06, "loss": 0.6339, "num_input_tokens_seen": 121090056, "step": 2162 }, { "epoch": 4.815144766146993, "loss": 0.6168254017829895, "loss_ce": 0.0003703091642819345, "loss_iou": 0.27734375, "loss_num": 0.01251220703125, "loss_xval": 0.6171875, "num_input_tokens_seen": 121090056, "step": 2162 }, { "epoch": 4.817371937639198, "grad_norm": 30.59477996826172, "learning_rate": 1e-06, "loss": 0.8952, "num_input_tokens_seen": 121148352, "step": 2163 }, { "epoch": 4.817371937639198, "loss": 0.9438307881355286, "loss_ce": 0.0004713647358585149, "loss_iou": 0.388671875, "loss_num": 0.03271484375, "loss_xval": 0.9453125, "num_input_tokens_seen": 121148352, "step": 2163 }, { "epoch": 4.819599109131403, "grad_norm": 21.30016326904297, "learning_rate": 1e-06, "loss": 0.9444, "num_input_tokens_seen": 121206584, "step": 2164 }, { "epoch": 4.819599109131403, "loss": 0.7458754777908325, "loss_ce": 0.000392090150853619, "loss_iou": 0.298828125, "loss_num": 0.0296630859375, "loss_xval": 0.74609375, "num_input_tokens_seen": 121206584, "step": 2164 }, { "epoch": 4.821826280623608, "grad_norm": 25.39438247680664, "learning_rate": 1e-06, "loss": 0.7381, "num_input_tokens_seen": 121265252, "step": 2165 }, { "epoch": 4.821826280623608, "loss": 0.7435488700866699, "loss_ce": 0.00026276521384716034, "loss_iou": 0.326171875, "loss_num": 0.01806640625, "loss_xval": 0.7421875, "num_input_tokens_seen": 121265252, "step": 2165 }, { "epoch": 4.8240534521158125, "grad_norm": 36.96885681152344, "learning_rate": 1e-06, "loss": 0.7863, "num_input_tokens_seen": 121321320, "step": 2166 }, { "epoch": 4.8240534521158125, "loss": 0.82174152135849, "loss_ce": 0.0002082870778394863, "loss_iou": 0.34375, "loss_num": 0.0267333984375, "loss_xval": 0.8203125, "num_input_tokens_seen": 121321320, "step": 2166 }, { "epoch": 4.826280623608017, "grad_norm": 18.965267181396484, "learning_rate": 1e-06, "loss": 0.7267, "num_input_tokens_seen": 121375740, "step": 2167 }, { "epoch": 4.826280623608017, "loss": 0.8186416625976562, "loss_ce": 0.0005264327628538013, "loss_iou": 0.3671875, "loss_num": 0.016845703125, "loss_xval": 0.81640625, "num_input_tokens_seen": 121375740, "step": 2167 }, { "epoch": 4.828507795100223, "grad_norm": 14.640192031860352, "learning_rate": 1e-06, "loss": 0.6366, "num_input_tokens_seen": 121433144, "step": 2168 }, { "epoch": 4.828507795100223, "loss": 0.5576098561286926, "loss_ce": 0.00029783969512209296, "loss_iou": 0.2470703125, "loss_num": 0.012451171875, "loss_xval": 0.55859375, "num_input_tokens_seen": 121433144, "step": 2168 }, { "epoch": 4.830734966592428, "grad_norm": 16.91661834716797, "learning_rate": 1e-06, "loss": 0.7211, "num_input_tokens_seen": 121491312, "step": 2169 }, { "epoch": 4.830734966592428, "loss": 0.4785498082637787, "loss_ce": 0.00027831370243802667, "loss_iou": 0.18359375, "loss_num": 0.0220947265625, "loss_xval": 0.478515625, "num_input_tokens_seen": 121491312, "step": 2169 }, { "epoch": 4.832962138084633, "grad_norm": 31.92616844177246, "learning_rate": 1e-06, "loss": 0.9147, "num_input_tokens_seen": 121546584, "step": 2170 }, { "epoch": 4.832962138084633, "loss": 0.9893652200698853, "loss_ce": 0.00035156396916136146, "loss_iou": 0.384765625, "loss_num": 0.044189453125, "loss_xval": 0.98828125, "num_input_tokens_seen": 121546584, "step": 2170 }, { "epoch": 4.835189309576838, "grad_norm": 18.34603500366211, "learning_rate": 1e-06, "loss": 0.7332, "num_input_tokens_seen": 121601848, "step": 2171 }, { "epoch": 4.835189309576838, "loss": 0.6528722047805786, "loss_ce": 0.00022326521866489202, "loss_iou": 0.2578125, "loss_num": 0.0277099609375, "loss_xval": 0.65234375, "num_input_tokens_seen": 121601848, "step": 2171 }, { "epoch": 4.8374164810690425, "grad_norm": 23.596956253051758, "learning_rate": 1e-06, "loss": 0.6937, "num_input_tokens_seen": 121654980, "step": 2172 }, { "epoch": 4.8374164810690425, "loss": 0.7717235088348389, "loss_ce": 0.00023919279919937253, "loss_iou": 0.326171875, "loss_num": 0.0240478515625, "loss_xval": 0.7734375, "num_input_tokens_seen": 121654980, "step": 2172 }, { "epoch": 4.839643652561247, "grad_norm": 17.30280876159668, "learning_rate": 1e-06, "loss": 0.8213, "num_input_tokens_seen": 121710832, "step": 2173 }, { "epoch": 4.839643652561247, "loss": 0.890198290348053, "loss_ce": 0.0003057056455872953, "loss_iou": 0.388671875, "loss_num": 0.022705078125, "loss_xval": 0.890625, "num_input_tokens_seen": 121710832, "step": 2173 }, { "epoch": 4.841870824053452, "grad_norm": 21.682880401611328, "learning_rate": 1e-06, "loss": 0.7604, "num_input_tokens_seen": 121769308, "step": 2174 }, { "epoch": 4.841870824053452, "loss": 0.8933024406433105, "loss_ce": 0.00023604354646522552, "loss_iou": 0.392578125, "loss_num": 0.0216064453125, "loss_xval": 0.89453125, "num_input_tokens_seen": 121769308, "step": 2174 }, { "epoch": 4.844097995545657, "grad_norm": 23.18276596069336, "learning_rate": 1e-06, "loss": 0.852, "num_input_tokens_seen": 121825936, "step": 2175 }, { "epoch": 4.844097995545657, "loss": 0.8222508430480957, "loss_ce": 0.0004735908005386591, "loss_iou": 0.34765625, "loss_num": 0.0255126953125, "loss_xval": 0.8203125, "num_input_tokens_seen": 121825936, "step": 2175 }, { "epoch": 4.846325167037862, "grad_norm": 16.410219192504883, "learning_rate": 1e-06, "loss": 0.5924, "num_input_tokens_seen": 121880128, "step": 2176 }, { "epoch": 4.846325167037862, "loss": 0.5815201997756958, "loss_ce": 0.00022139312932267785, "loss_iou": 0.2451171875, "loss_num": 0.0181884765625, "loss_xval": 0.58203125, "num_input_tokens_seen": 121880128, "step": 2176 }, { "epoch": 4.848552338530067, "grad_norm": 16.55893898010254, "learning_rate": 1e-06, "loss": 0.5514, "num_input_tokens_seen": 121937432, "step": 2177 }, { "epoch": 4.848552338530067, "loss": 0.5693067312240601, "loss_ce": 0.00021495725377462804, "loss_iou": 0.25, "loss_num": 0.013671875, "loss_xval": 0.5703125, "num_input_tokens_seen": 121937432, "step": 2177 }, { "epoch": 4.850779510022272, "grad_norm": 22.6440372467041, "learning_rate": 1e-06, "loss": 0.694, "num_input_tokens_seen": 121993900, "step": 2178 }, { "epoch": 4.850779510022272, "loss": 0.8454983234405518, "loss_ce": 0.0002834856859408319, "loss_iou": 0.359375, "loss_num": 0.0252685546875, "loss_xval": 0.84375, "num_input_tokens_seen": 121993900, "step": 2178 }, { "epoch": 4.853006681514477, "grad_norm": 47.70549392700195, "learning_rate": 1e-06, "loss": 0.8097, "num_input_tokens_seen": 122050492, "step": 2179 }, { "epoch": 4.853006681514477, "loss": 0.8886595964431763, "loss_ce": 0.00035399917396716774, "loss_iou": 0.38671875, "loss_num": 0.0228271484375, "loss_xval": 0.88671875, "num_input_tokens_seen": 122050492, "step": 2179 }, { "epoch": 4.855233853006681, "grad_norm": 15.52509593963623, "learning_rate": 1e-06, "loss": 0.6431, "num_input_tokens_seen": 122105120, "step": 2180 }, { "epoch": 4.855233853006681, "loss": 0.5089912414550781, "loss_ce": 0.00020220015721861273, "loss_iou": 0.2001953125, "loss_num": 0.0218505859375, "loss_xval": 0.5078125, "num_input_tokens_seen": 122105120, "step": 2180 }, { "epoch": 4.857461024498886, "grad_norm": 21.397817611694336, "learning_rate": 1e-06, "loss": 0.5721, "num_input_tokens_seen": 122162916, "step": 2181 }, { "epoch": 4.857461024498886, "loss": 0.5020558834075928, "loss_ce": 0.0002248543023597449, "loss_iou": 0.2236328125, "loss_num": 0.01080322265625, "loss_xval": 0.5, "num_input_tokens_seen": 122162916, "step": 2181 }, { "epoch": 4.859688195991091, "grad_norm": 18.195024490356445, "learning_rate": 1e-06, "loss": 0.6435, "num_input_tokens_seen": 122219996, "step": 2182 }, { "epoch": 4.859688195991091, "loss": 0.6611765027046204, "loss_ce": 0.0002878474479075521, "loss_iou": 0.283203125, "loss_num": 0.0191650390625, "loss_xval": 0.66015625, "num_input_tokens_seen": 122219996, "step": 2182 }, { "epoch": 4.861915367483296, "grad_norm": 72.6725082397461, "learning_rate": 1e-06, "loss": 0.7279, "num_input_tokens_seen": 122274148, "step": 2183 }, { "epoch": 4.861915367483296, "loss": 0.6504148244857788, "loss_ce": 0.0002683540806174278, "loss_iou": 0.30078125, "loss_num": 0.010009765625, "loss_xval": 0.6484375, "num_input_tokens_seen": 122274148, "step": 2183 }, { "epoch": 4.864142538975501, "grad_norm": 31.536542892456055, "learning_rate": 1e-06, "loss": 0.8554, "num_input_tokens_seen": 122329776, "step": 2184 }, { "epoch": 4.864142538975501, "loss": 0.7600322365760803, "loss_ce": 0.00026659879949875176, "loss_iou": 0.3125, "loss_num": 0.027099609375, "loss_xval": 0.7578125, "num_input_tokens_seen": 122329776, "step": 2184 }, { "epoch": 4.866369710467706, "grad_norm": 25.865697860717773, "learning_rate": 1e-06, "loss": 0.8837, "num_input_tokens_seen": 122387312, "step": 2185 }, { "epoch": 4.866369710467706, "loss": 0.8069164752960205, "loss_ce": 0.00027582579059526324, "loss_iou": 0.33203125, "loss_num": 0.02880859375, "loss_xval": 0.8046875, "num_input_tokens_seen": 122387312, "step": 2185 }, { "epoch": 4.868596881959911, "grad_norm": 20.460817337036133, "learning_rate": 1e-06, "loss": 0.7866, "num_input_tokens_seen": 122444908, "step": 2186 }, { "epoch": 4.868596881959911, "loss": 0.9665539264678955, "loss_ce": 0.00036741438088938594, "loss_iou": 0.396484375, "loss_num": 0.03466796875, "loss_xval": 0.96484375, "num_input_tokens_seen": 122444908, "step": 2186 }, { "epoch": 4.870824053452115, "grad_norm": 29.2179012298584, "learning_rate": 1e-06, "loss": 0.6808, "num_input_tokens_seen": 122499836, "step": 2187 }, { "epoch": 4.870824053452115, "loss": 0.8551727533340454, "loss_ce": 0.00043644450488500297, "loss_iou": 0.337890625, "loss_num": 0.035400390625, "loss_xval": 0.85546875, "num_input_tokens_seen": 122499836, "step": 2187 }, { "epoch": 4.873051224944321, "grad_norm": 26.670900344848633, "learning_rate": 1e-06, "loss": 0.7637, "num_input_tokens_seen": 122556144, "step": 2188 }, { "epoch": 4.873051224944321, "loss": 0.7377973794937134, "loss_ce": 0.00024859551922418177, "loss_iou": 0.33203125, "loss_num": 0.01507568359375, "loss_xval": 0.73828125, "num_input_tokens_seen": 122556144, "step": 2188 }, { "epoch": 4.875278396436526, "grad_norm": 24.723434448242188, "learning_rate": 1e-06, "loss": 0.72, "num_input_tokens_seen": 122612748, "step": 2189 }, { "epoch": 4.875278396436526, "loss": 0.6694687008857727, "loss_ce": 0.0002792471495922655, "loss_iou": 0.298828125, "loss_num": 0.01422119140625, "loss_xval": 0.66796875, "num_input_tokens_seen": 122612748, "step": 2189 }, { "epoch": 4.877505567928731, "grad_norm": 15.66889476776123, "learning_rate": 1e-06, "loss": 0.6759, "num_input_tokens_seen": 122668312, "step": 2190 }, { "epoch": 4.877505567928731, "loss": 0.7348698973655701, "loss_ce": 0.00025075351004488766, "loss_iou": 0.326171875, "loss_num": 0.0166015625, "loss_xval": 0.734375, "num_input_tokens_seen": 122668312, "step": 2190 }, { "epoch": 4.879732739420936, "grad_norm": 25.57622718811035, "learning_rate": 1e-06, "loss": 0.8345, "num_input_tokens_seen": 122721432, "step": 2191 }, { "epoch": 4.879732739420936, "loss": 0.7397469282150269, "loss_ce": 0.00024497421691194177, "loss_iou": 0.31640625, "loss_num": 0.021240234375, "loss_xval": 0.73828125, "num_input_tokens_seen": 122721432, "step": 2191 }, { "epoch": 4.881959910913141, "grad_norm": 24.592416763305664, "learning_rate": 1e-06, "loss": 0.6403, "num_input_tokens_seen": 122777480, "step": 2192 }, { "epoch": 4.881959910913141, "loss": 0.6197606325149536, "loss_ce": 0.00037585641257464886, "loss_iou": 0.265625, "loss_num": 0.0177001953125, "loss_xval": 0.62109375, "num_input_tokens_seen": 122777480, "step": 2192 }, { "epoch": 4.8841870824053455, "grad_norm": 17.536779403686523, "learning_rate": 1e-06, "loss": 0.6577, "num_input_tokens_seen": 122835032, "step": 2193 }, { "epoch": 4.8841870824053455, "loss": 0.7718772888183594, "loss_ce": 0.0002708572428673506, "loss_iou": 0.337890625, "loss_num": 0.0185546875, "loss_xval": 0.7734375, "num_input_tokens_seen": 122835032, "step": 2193 }, { "epoch": 4.88641425389755, "grad_norm": 32.589019775390625, "learning_rate": 1e-06, "loss": 0.9171, "num_input_tokens_seen": 122888828, "step": 2194 }, { "epoch": 4.88641425389755, "loss": 1.2385810613632202, "loss_ce": 0.00029984532739035785, "loss_iou": 0.5234375, "loss_num": 0.03759765625, "loss_xval": 1.234375, "num_input_tokens_seen": 122888828, "step": 2194 }, { "epoch": 4.888641425389755, "grad_norm": 22.544490814208984, "learning_rate": 1e-06, "loss": 0.7052, "num_input_tokens_seen": 122946980, "step": 2195 }, { "epoch": 4.888641425389755, "loss": 0.7754490971565247, "loss_ce": 0.00030256161699071527, "loss_iou": 0.314453125, "loss_num": 0.0296630859375, "loss_xval": 0.7734375, "num_input_tokens_seen": 122946980, "step": 2195 }, { "epoch": 4.89086859688196, "grad_norm": 16.414432525634766, "learning_rate": 1e-06, "loss": 0.6703, "num_input_tokens_seen": 123006736, "step": 2196 }, { "epoch": 4.89086859688196, "loss": 0.6196362972259521, "loss_ce": 0.00025152770103886724, "loss_iou": 0.263671875, "loss_num": 0.0189208984375, "loss_xval": 0.62109375, "num_input_tokens_seen": 123006736, "step": 2196 }, { "epoch": 4.893095768374165, "grad_norm": 21.864694595336914, "learning_rate": 1e-06, "loss": 0.8546, "num_input_tokens_seen": 123065160, "step": 2197 }, { "epoch": 4.893095768374165, "loss": 0.8675484657287598, "loss_ce": 0.00036098493728786707, "loss_iou": 0.365234375, "loss_num": 0.0274658203125, "loss_xval": 0.8671875, "num_input_tokens_seen": 123065160, "step": 2197 }, { "epoch": 4.89532293986637, "grad_norm": 31.04578971862793, "learning_rate": 1e-06, "loss": 0.735, "num_input_tokens_seen": 123117600, "step": 2198 }, { "epoch": 4.89532293986637, "loss": 0.6225305795669556, "loss_ce": 0.00021616063895635307, "loss_iou": 0.275390625, "loss_num": 0.01409912109375, "loss_xval": 0.62109375, "num_input_tokens_seen": 123117600, "step": 2198 }, { "epoch": 4.897550111358575, "grad_norm": 13.26624870300293, "learning_rate": 1e-06, "loss": 0.6433, "num_input_tokens_seen": 123171304, "step": 2199 }, { "epoch": 4.897550111358575, "loss": 0.6789191961288452, "loss_ce": 0.00023874480393715203, "loss_iou": 0.259765625, "loss_num": 0.03173828125, "loss_xval": 0.6796875, "num_input_tokens_seen": 123171304, "step": 2199 }, { "epoch": 4.8997772828507795, "grad_norm": 19.680660247802734, "learning_rate": 1e-06, "loss": 0.8179, "num_input_tokens_seen": 123227968, "step": 2200 }, { "epoch": 4.8997772828507795, "loss": 1.005420446395874, "loss_ce": 0.0002934019430540502, "loss_iou": 0.421875, "loss_num": 0.0322265625, "loss_xval": 1.0078125, "num_input_tokens_seen": 123227968, "step": 2200 }, { "epoch": 4.902004454342984, "grad_norm": 14.69334888458252, "learning_rate": 1e-06, "loss": 0.6017, "num_input_tokens_seen": 123286024, "step": 2201 }, { "epoch": 4.902004454342984, "loss": 0.7753942012786865, "loss_ce": 0.0002477114903740585, "loss_iou": 0.345703125, "loss_num": 0.0164794921875, "loss_xval": 0.7734375, "num_input_tokens_seen": 123286024, "step": 2201 }, { "epoch": 4.904231625835189, "grad_norm": 23.2365665435791, "learning_rate": 1e-06, "loss": 0.6886, "num_input_tokens_seen": 123337296, "step": 2202 }, { "epoch": 4.904231625835189, "loss": 0.6639537811279297, "loss_ce": 0.00037955871084704995, "loss_iou": 0.287109375, "loss_num": 0.017578125, "loss_xval": 0.6640625, "num_input_tokens_seen": 123337296, "step": 2202 }, { "epoch": 4.906458797327394, "grad_norm": 21.47087287902832, "learning_rate": 1e-06, "loss": 0.7853, "num_input_tokens_seen": 123391896, "step": 2203 }, { "epoch": 4.906458797327394, "loss": 0.8630619049072266, "loss_ce": 0.0002689045504666865, "loss_iou": 0.34375, "loss_num": 0.03466796875, "loss_xval": 0.86328125, "num_input_tokens_seen": 123391896, "step": 2203 }, { "epoch": 4.908685968819599, "grad_norm": 15.741186141967773, "learning_rate": 1e-06, "loss": 0.644, "num_input_tokens_seen": 123448656, "step": 2204 }, { "epoch": 4.908685968819599, "loss": 0.6564854383468628, "loss_ce": 0.00023539297399111092, "loss_iou": 0.2734375, "loss_num": 0.021728515625, "loss_xval": 0.65625, "num_input_tokens_seen": 123448656, "step": 2204 }, { "epoch": 4.910913140311804, "grad_norm": 21.5997257232666, "learning_rate": 1e-06, "loss": 0.823, "num_input_tokens_seen": 123503840, "step": 2205 }, { "epoch": 4.910913140311804, "loss": 0.8479044437408447, "loss_ce": 0.00024817389203235507, "loss_iou": 0.35546875, "loss_num": 0.0269775390625, "loss_xval": 0.84765625, "num_input_tokens_seen": 123503840, "step": 2205 }, { "epoch": 4.913140311804009, "grad_norm": 16.790218353271484, "learning_rate": 1e-06, "loss": 0.5964, "num_input_tokens_seen": 123558308, "step": 2206 }, { "epoch": 4.913140311804009, "loss": 0.6172899007797241, "loss_ce": 0.00022445424110628664, "loss_iou": 0.275390625, "loss_num": 0.01287841796875, "loss_xval": 0.6171875, "num_input_tokens_seen": 123558308, "step": 2206 }, { "epoch": 4.9153674832962135, "grad_norm": 66.33501434326172, "learning_rate": 1e-06, "loss": 0.8602, "num_input_tokens_seen": 123615124, "step": 2207 }, { "epoch": 4.9153674832962135, "loss": 0.8113021850585938, "loss_ce": 0.00026704196352511644, "loss_iou": 0.359375, "loss_num": 0.01806640625, "loss_xval": 0.8125, "num_input_tokens_seen": 123615124, "step": 2207 }, { "epoch": 4.917594654788418, "grad_norm": 16.30253028869629, "learning_rate": 1e-06, "loss": 0.4585, "num_input_tokens_seen": 123671784, "step": 2208 }, { "epoch": 4.917594654788418, "loss": 0.4267365336418152, "loss_ce": 0.00022286101011559367, "loss_iou": 0.181640625, "loss_num": 0.01263427734375, "loss_xval": 0.42578125, "num_input_tokens_seen": 123671784, "step": 2208 }, { "epoch": 4.919821826280623, "grad_norm": 16.062482833862305, "learning_rate": 1e-06, "loss": 0.7222, "num_input_tokens_seen": 123729044, "step": 2209 }, { "epoch": 4.919821826280623, "loss": 0.7194496393203735, "loss_ce": 0.0004554773331619799, "loss_iou": 0.298828125, "loss_num": 0.02392578125, "loss_xval": 0.71875, "num_input_tokens_seen": 123729044, "step": 2209 }, { "epoch": 4.922048997772828, "grad_norm": 19.926166534423828, "learning_rate": 1e-06, "loss": 0.9957, "num_input_tokens_seen": 123784456, "step": 2210 }, { "epoch": 4.922048997772828, "loss": 0.9393208026885986, "loss_ce": 0.0003559546312317252, "loss_iou": 0.376953125, "loss_num": 0.037109375, "loss_xval": 0.9375, "num_input_tokens_seen": 123784456, "step": 2210 }, { "epoch": 4.924276169265033, "grad_norm": 26.751087188720703, "learning_rate": 1e-06, "loss": 0.8433, "num_input_tokens_seen": 123839024, "step": 2211 }, { "epoch": 4.924276169265033, "loss": 0.7800534963607788, "loss_ce": 0.0002683330385480076, "loss_iou": 0.330078125, "loss_num": 0.023681640625, "loss_xval": 0.78125, "num_input_tokens_seen": 123839024, "step": 2211 }, { "epoch": 4.926503340757238, "grad_norm": 20.585493087768555, "learning_rate": 1e-06, "loss": 0.6179, "num_input_tokens_seen": 123895700, "step": 2212 }, { "epoch": 4.926503340757238, "loss": 0.6982933282852173, "loss_ce": 0.000295252597425133, "loss_iou": 0.31640625, "loss_num": 0.01312255859375, "loss_xval": 0.69921875, "num_input_tokens_seen": 123895700, "step": 2212 }, { "epoch": 4.928730512249444, "grad_norm": 16.665607452392578, "learning_rate": 1e-06, "loss": 0.7618, "num_input_tokens_seen": 123950376, "step": 2213 }, { "epoch": 4.928730512249444, "loss": 0.5816483497619629, "loss_ce": 0.0002274463913636282, "loss_iou": 0.2490234375, "loss_num": 0.0167236328125, "loss_xval": 0.58203125, "num_input_tokens_seen": 123950376, "step": 2213 }, { "epoch": 4.9309576837416484, "grad_norm": 21.802547454833984, "learning_rate": 1e-06, "loss": 0.6218, "num_input_tokens_seen": 124006408, "step": 2214 }, { "epoch": 4.9309576837416484, "loss": 0.6234120726585388, "loss_ce": 0.00024313261383213103, "loss_iou": 0.259765625, "loss_num": 0.0206298828125, "loss_xval": 0.625, "num_input_tokens_seen": 124006408, "step": 2214 }, { "epoch": 4.933184855233853, "grad_norm": 50.704349517822266, "learning_rate": 1e-06, "loss": 0.7325, "num_input_tokens_seen": 124064376, "step": 2215 }, { "epoch": 4.933184855233853, "loss": 0.8308193683624268, "loss_ce": 0.0004971576854586601, "loss_iou": 0.337890625, "loss_num": 0.0311279296875, "loss_xval": 0.83203125, "num_input_tokens_seen": 124064376, "step": 2215 }, { "epoch": 4.935412026726058, "grad_norm": 18.29207992553711, "learning_rate": 1e-06, "loss": 0.8372, "num_input_tokens_seen": 124120104, "step": 2216 }, { "epoch": 4.935412026726058, "loss": 0.6730678081512451, "loss_ce": 0.0002162476594094187, "loss_iou": 0.283203125, "loss_num": 0.0211181640625, "loss_xval": 0.671875, "num_input_tokens_seen": 124120104, "step": 2216 }, { "epoch": 4.937639198218263, "grad_norm": 18.550386428833008, "learning_rate": 1e-06, "loss": 0.7834, "num_input_tokens_seen": 124175912, "step": 2217 }, { "epoch": 4.937639198218263, "loss": 0.635769784450531, "loss_ce": 0.00027175506693311036, "loss_iou": 0.27734375, "loss_num": 0.0164794921875, "loss_xval": 0.63671875, "num_input_tokens_seen": 124175912, "step": 2217 }, { "epoch": 4.939866369710468, "grad_norm": 20.139272689819336, "learning_rate": 1e-06, "loss": 0.9253, "num_input_tokens_seen": 124231184, "step": 2218 }, { "epoch": 4.939866369710468, "loss": 0.9445996284484863, "loss_ce": 0.00038581539411097765, "loss_iou": 0.365234375, "loss_num": 0.04248046875, "loss_xval": 0.9453125, "num_input_tokens_seen": 124231184, "step": 2218 }, { "epoch": 4.942093541202673, "grad_norm": 23.283292770385742, "learning_rate": 1e-06, "loss": 0.8611, "num_input_tokens_seen": 124288768, "step": 2219 }, { "epoch": 4.942093541202673, "loss": 0.8784899711608887, "loss_ce": 0.0003162057837471366, "loss_iou": 0.36328125, "loss_num": 0.03076171875, "loss_xval": 0.87890625, "num_input_tokens_seen": 124288768, "step": 2219 }, { "epoch": 4.944320712694878, "grad_norm": 15.636147499084473, "learning_rate": 1e-06, "loss": 0.5499, "num_input_tokens_seen": 124344436, "step": 2220 }, { "epoch": 4.944320712694878, "loss": 0.592854380607605, "loss_ce": 0.0003251142334192991, "loss_iou": 0.267578125, "loss_num": 0.01129150390625, "loss_xval": 0.59375, "num_input_tokens_seen": 124344436, "step": 2220 }, { "epoch": 4.9465478841870825, "grad_norm": 14.876840591430664, "learning_rate": 1e-06, "loss": 0.6573, "num_input_tokens_seen": 124400952, "step": 2221 }, { "epoch": 4.9465478841870825, "loss": 0.7517122626304626, "loss_ce": 0.0002474116045050323, "loss_iou": 0.32421875, "loss_num": 0.0205078125, "loss_xval": 0.75, "num_input_tokens_seen": 124400952, "step": 2221 }, { "epoch": 4.948775055679287, "grad_norm": 28.595050811767578, "learning_rate": 1e-06, "loss": 0.901, "num_input_tokens_seen": 124457384, "step": 2222 }, { "epoch": 4.948775055679287, "loss": 0.9143351316452026, "loss_ce": 0.00027260559727437794, "loss_iou": 0.404296875, "loss_num": 0.0213623046875, "loss_xval": 0.9140625, "num_input_tokens_seen": 124457384, "step": 2222 }, { "epoch": 4.951002227171492, "grad_norm": 24.853782653808594, "learning_rate": 1e-06, "loss": 0.7986, "num_input_tokens_seen": 124514252, "step": 2223 }, { "epoch": 4.951002227171492, "loss": 0.6873869895935059, "loss_ce": 0.00043627433478832245, "loss_iou": 0.302734375, "loss_num": 0.0164794921875, "loss_xval": 0.6875, "num_input_tokens_seen": 124514252, "step": 2223 }, { "epoch": 4.953229398663697, "grad_norm": 22.14551544189453, "learning_rate": 1e-06, "loss": 0.691, "num_input_tokens_seen": 124569752, "step": 2224 }, { "epoch": 4.953229398663697, "loss": 0.6962725520133972, "loss_ce": 0.00022760960564482957, "loss_iou": 0.294921875, "loss_num": 0.021240234375, "loss_xval": 0.6953125, "num_input_tokens_seen": 124569752, "step": 2224 }, { "epoch": 4.955456570155902, "grad_norm": 20.33577537536621, "learning_rate": 1e-06, "loss": 0.6513, "num_input_tokens_seen": 124626644, "step": 2225 }, { "epoch": 4.955456570155902, "loss": 0.7576655149459839, "loss_ce": 0.0003413379890844226, "loss_iou": 0.302734375, "loss_num": 0.0306396484375, "loss_xval": 0.7578125, "num_input_tokens_seen": 124626644, "step": 2225 }, { "epoch": 4.957683741648107, "grad_norm": 19.96550178527832, "learning_rate": 1e-06, "loss": 0.4482, "num_input_tokens_seen": 124684520, "step": 2226 }, { "epoch": 4.957683741648107, "loss": 0.46445581316947937, "loss_ce": 0.0002224127674708143, "loss_iou": 0.201171875, "loss_num": 0.01226806640625, "loss_xval": 0.46484375, "num_input_tokens_seen": 124684520, "step": 2226 }, { "epoch": 4.959910913140312, "grad_norm": 16.052396774291992, "learning_rate": 1e-06, "loss": 0.6533, "num_input_tokens_seen": 124739472, "step": 2227 }, { "epoch": 4.959910913140312, "loss": 0.8050886988639832, "loss_ce": 0.0004011690034531057, "loss_iou": 0.33203125, "loss_num": 0.0277099609375, "loss_xval": 0.8046875, "num_input_tokens_seen": 124739472, "step": 2227 }, { "epoch": 4.9621380846325165, "grad_norm": 19.418689727783203, "learning_rate": 1e-06, "loss": 0.7515, "num_input_tokens_seen": 124795168, "step": 2228 }, { "epoch": 4.9621380846325165, "loss": 0.6984738707542419, "loss_ce": 0.00023169181076809764, "loss_iou": 0.287109375, "loss_num": 0.02490234375, "loss_xval": 0.69921875, "num_input_tokens_seen": 124795168, "step": 2228 }, { "epoch": 4.964365256124721, "grad_norm": 19.798582077026367, "learning_rate": 1e-06, "loss": 0.8594, "num_input_tokens_seen": 124850384, "step": 2229 }, { "epoch": 4.964365256124721, "loss": 0.8433953523635864, "loss_ce": 0.0002557524712756276, "loss_iou": 0.349609375, "loss_num": 0.0291748046875, "loss_xval": 0.84375, "num_input_tokens_seen": 124850384, "step": 2229 }, { "epoch": 4.966592427616926, "grad_norm": 28.708772659301758, "learning_rate": 1e-06, "loss": 0.8574, "num_input_tokens_seen": 124905988, "step": 2230 }, { "epoch": 4.966592427616926, "loss": 0.7451651692390442, "loss_ce": 0.00029212021036073565, "loss_iou": 0.306640625, "loss_num": 0.0267333984375, "loss_xval": 0.74609375, "num_input_tokens_seen": 124905988, "step": 2230 }, { "epoch": 4.968819599109131, "grad_norm": 24.822715759277344, "learning_rate": 1e-06, "loss": 0.7711, "num_input_tokens_seen": 124961660, "step": 2231 }, { "epoch": 4.968819599109131, "loss": 0.6456351280212402, "loss_ce": 0.00018830940825864673, "loss_iou": 0.275390625, "loss_num": 0.018798828125, "loss_xval": 0.64453125, "num_input_tokens_seen": 124961660, "step": 2231 }, { "epoch": 4.971046770601336, "grad_norm": 18.94268035888672, "learning_rate": 1e-06, "loss": 0.9299, "num_input_tokens_seen": 125015652, "step": 2232 }, { "epoch": 4.971046770601336, "loss": 0.9597747325897217, "loss_ce": 0.00030205969233065844, "loss_iou": 0.400390625, "loss_num": 0.031494140625, "loss_xval": 0.9609375, "num_input_tokens_seen": 125015652, "step": 2232 }, { "epoch": 4.973273942093542, "grad_norm": 17.5726318359375, "learning_rate": 1e-06, "loss": 0.7039, "num_input_tokens_seen": 125073992, "step": 2233 }, { "epoch": 4.973273942093542, "loss": 0.5961167216300964, "loss_ce": 0.00023052276810631156, "loss_iou": 0.26953125, "loss_num": 0.0115966796875, "loss_xval": 0.59765625, "num_input_tokens_seen": 125073992, "step": 2233 }, { "epoch": 4.9755011135857465, "grad_norm": 33.96272277832031, "learning_rate": 1e-06, "loss": 0.8093, "num_input_tokens_seen": 125130660, "step": 2234 }, { "epoch": 4.9755011135857465, "loss": 0.8671445846557617, "loss_ce": 0.00020118044631090015, "loss_iou": 0.345703125, "loss_num": 0.034423828125, "loss_xval": 0.8671875, "num_input_tokens_seen": 125130660, "step": 2234 }, { "epoch": 4.977728285077951, "grad_norm": 25.479049682617188, "learning_rate": 1e-06, "loss": 0.7355, "num_input_tokens_seen": 125188292, "step": 2235 }, { "epoch": 4.977728285077951, "loss": 0.6173162460327148, "loss_ce": 0.0002507962053641677, "loss_iou": 0.26953125, "loss_num": 0.0157470703125, "loss_xval": 0.6171875, "num_input_tokens_seen": 125188292, "step": 2235 }, { "epoch": 4.979955456570156, "grad_norm": 17.59937286376953, "learning_rate": 1e-06, "loss": 0.5309, "num_input_tokens_seen": 125245472, "step": 2236 }, { "epoch": 4.979955456570156, "loss": 0.4158274531364441, "loss_ce": 0.0002390348818153143, "loss_iou": 0.17578125, "loss_num": 0.012939453125, "loss_xval": 0.416015625, "num_input_tokens_seen": 125245472, "step": 2236 }, { "epoch": 4.982182628062361, "grad_norm": 24.452638626098633, "learning_rate": 1e-06, "loss": 0.7179, "num_input_tokens_seen": 125302944, "step": 2237 }, { "epoch": 4.982182628062361, "loss": 0.5437171459197998, "loss_ce": 0.0002601124288048595, "loss_iou": 0.251953125, "loss_num": 0.007659912109375, "loss_xval": 0.54296875, "num_input_tokens_seen": 125302944, "step": 2237 }, { "epoch": 4.984409799554566, "grad_norm": 15.169961929321289, "learning_rate": 1e-06, "loss": 0.8227, "num_input_tokens_seen": 125362732, "step": 2238 }, { "epoch": 4.984409799554566, "loss": 0.7834569811820984, "loss_ce": 0.00025388289941474795, "loss_iou": 0.34375, "loss_num": 0.01953125, "loss_xval": 0.78125, "num_input_tokens_seen": 125362732, "step": 2238 }, { "epoch": 4.986636971046771, "grad_norm": 15.975359916687012, "learning_rate": 1e-06, "loss": 0.7685, "num_input_tokens_seen": 125419796, "step": 2239 }, { "epoch": 4.986636971046771, "loss": 0.6306621432304382, "loss_ce": 0.0002910442417487502, "loss_iou": 0.265625, "loss_num": 0.019287109375, "loss_xval": 0.62890625, "num_input_tokens_seen": 125419796, "step": 2239 }, { "epoch": 4.988864142538976, "grad_norm": 41.811767578125, "learning_rate": 1e-06, "loss": 0.5101, "num_input_tokens_seen": 125478088, "step": 2240 }, { "epoch": 4.988864142538976, "loss": 0.5742073059082031, "loss_ce": 0.0002326710382476449, "loss_iou": 0.24609375, "loss_num": 0.016357421875, "loss_xval": 0.57421875, "num_input_tokens_seen": 125478088, "step": 2240 }, { "epoch": 4.991091314031181, "grad_norm": 21.81138801574707, "learning_rate": 1e-06, "loss": 0.8395, "num_input_tokens_seen": 125533344, "step": 2241 }, { "epoch": 4.991091314031181, "loss": 0.7543541789054871, "loss_ce": 0.001912805950269103, "loss_iou": 0.287109375, "loss_num": 0.03564453125, "loss_xval": 0.75390625, "num_input_tokens_seen": 125533344, "step": 2241 }, { "epoch": 4.993318485523385, "grad_norm": 30.80375099182129, "learning_rate": 1e-06, "loss": 0.9426, "num_input_tokens_seen": 125588860, "step": 2242 }, { "epoch": 4.993318485523385, "loss": 0.8981271386146545, "loss_ce": 0.0005441233515739441, "loss_iou": 0.384765625, "loss_num": 0.0260009765625, "loss_xval": 0.8984375, "num_input_tokens_seen": 125588860, "step": 2242 }, { "epoch": 4.99554565701559, "grad_norm": 18.436973571777344, "learning_rate": 1e-06, "loss": 0.6176, "num_input_tokens_seen": 125646052, "step": 2243 }, { "epoch": 4.99554565701559, "loss": 0.5616306066513062, "loss_ce": 0.0002292500576004386, "loss_iou": 0.236328125, "loss_num": 0.017822265625, "loss_xval": 0.5625, "num_input_tokens_seen": 125646052, "step": 2243 }, { "epoch": 4.997772828507795, "grad_norm": 20.010271072387695, "learning_rate": 1e-06, "loss": 0.9739, "num_input_tokens_seen": 125701944, "step": 2244 }, { "epoch": 4.997772828507795, "loss": 0.7831923961639404, "loss_ce": 0.0002333829615963623, "loss_iou": 0.34765625, "loss_num": 0.017333984375, "loss_xval": 0.78125, "num_input_tokens_seen": 125701944, "step": 2244 }, { "epoch": 5.0, "grad_norm": 22.875764846801758, "learning_rate": 1e-06, "loss": 0.8608, "num_input_tokens_seen": 125761180, "step": 2245 }, { "epoch": 5.0, "loss": 0.8516440391540527, "loss_ce": 0.0003256534691900015, "loss_iou": 0.349609375, "loss_num": 0.03076171875, "loss_xval": 0.8515625, "num_input_tokens_seen": 125761180, "step": 2245 }, { "epoch": 5.002227171492205, "grad_norm": 32.292686462402344, "learning_rate": 1e-06, "loss": 0.7312, "num_input_tokens_seen": 125816488, "step": 2246 }, { "epoch": 5.002227171492205, "loss": 0.7869899868965149, "loss_ce": 0.00036892099888063967, "loss_iou": 0.341796875, "loss_num": 0.0205078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 125816488, "step": 2246 }, { "epoch": 5.00445434298441, "grad_norm": 22.563552856445312, "learning_rate": 1e-06, "loss": 0.6971, "num_input_tokens_seen": 125871976, "step": 2247 }, { "epoch": 5.00445434298441, "loss": 0.7504916191101074, "loss_ce": 0.0002475018845871091, "loss_iou": 0.333984375, "loss_num": 0.016357421875, "loss_xval": 0.75, "num_input_tokens_seen": 125871976, "step": 2247 }, { "epoch": 5.006681514476615, "grad_norm": 14.274714469909668, "learning_rate": 1e-06, "loss": 0.7045, "num_input_tokens_seen": 125930388, "step": 2248 }, { "epoch": 5.006681514476615, "loss": 0.598191499710083, "loss_ce": 0.000291119038593024, "loss_iou": 0.26953125, "loss_num": 0.01202392578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 125930388, "step": 2248 }, { "epoch": 5.008908685968819, "grad_norm": 16.532169342041016, "learning_rate": 1e-06, "loss": 0.7869, "num_input_tokens_seen": 125986868, "step": 2249 }, { "epoch": 5.008908685968819, "loss": 0.7695136666297913, "loss_ce": 0.0002265319344587624, "loss_iou": 0.337890625, "loss_num": 0.01904296875, "loss_xval": 0.76953125, "num_input_tokens_seen": 125986868, "step": 2249 }, { "epoch": 5.011135857461024, "grad_norm": 14.793770790100098, "learning_rate": 1e-06, "loss": 0.901, "num_input_tokens_seen": 126043356, "step": 2250 }, { "epoch": 5.011135857461024, "eval_seeclick_web_CIoU": 0.5627106428146362, "eval_seeclick_web_GIoU": 0.5593923330307007, "eval_seeclick_web_IoU": 0.5781354308128357, "eval_seeclick_web_MAE_all": 0.01731129875406623, "eval_seeclick_web_MAE_h": 0.009559806901961565, "eval_seeclick_web_MAE_w": 0.01916863350197673, "eval_seeclick_web_MAE_x_boxes": 0.00826259353198111, "eval_seeclick_web_MAE_y_boxes": 0.021791240433230996, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9538028240203857, "eval_seeclick_web_loss_ce": 0.00035019248025491834, "eval_seeclick_web_loss_iou": 0.4365234375, "eval_seeclick_web_loss_num": 0.013622283935546875, "eval_seeclick_web_loss_xval": 0.940673828125, "eval_seeclick_web_runtime": 29.6362, "eval_seeclick_web_samples_per_second": 1.687, "eval_seeclick_web_steps_per_second": 0.067, "num_input_tokens_seen": 126043356, "step": 2250 }, { "epoch": 5.011135857461024, "eval_icons_CIoU": 0.3190227448940277, "eval_icons_GIoU": 0.3375135064125061, "eval_icons_IoU": 0.38641299307346344, "eval_icons_MAE_all": 0.06205383501946926, "eval_icons_MAE_h": 0.03881475329399109, "eval_icons_MAE_w": 0.06065435893833637, "eval_icons_MAE_x_boxes": 0.06400249153375626, "eval_icons_MAE_y_boxes": 0.037100257351994514, "eval_icons_inside_bbox": 0.6493055522441864, "eval_icons_loss": 1.6723029613494873, "eval_icons_loss_ce": 0.0004043810913572088, "eval_icons_loss_iou": 0.663818359375, "eval_icons_loss_num": 0.05780982971191406, "eval_icons_loss_xval": 1.615234375, "eval_icons_runtime": 27.5554, "eval_icons_samples_per_second": 1.815, "eval_icons_steps_per_second": 0.073, "num_input_tokens_seen": 126043356, "step": 2250 }, { "epoch": 5.011135857461024, "eval_screenspot_CIoU": 0.33845322330792743, "eval_screenspot_GIoU": 0.3534944951534271, "eval_screenspot_IoU": 0.41957201560338336, "eval_screenspot_MAE_all": 0.06564747542142868, "eval_screenspot_MAE_h": 0.03897890945275625, "eval_screenspot_MAE_w": 0.07650155077377956, "eval_screenspot_MAE_x_boxes": 0.07791633903980255, "eval_screenspot_MAE_y_boxes": 0.04749378779282173, "eval_screenspot_inside_bbox": 0.6566666762034098, "eval_screenspot_loss": 1.6820822954177856, "eval_screenspot_loss_ce": 0.00038540839644459385, "eval_screenspot_loss_iou": 0.6896158854166666, "eval_screenspot_loss_num": 0.07676951090494792, "eval_screenspot_loss_xval": 1.7643229166666667, "eval_screenspot_runtime": 47.089, "eval_screenspot_samples_per_second": 1.89, "eval_screenspot_steps_per_second": 0.064, "num_input_tokens_seen": 126043356, "step": 2250 }, { "epoch": 5.011135857461024, "eval_compot_CIoU": 0.35792700946331024, "eval_compot_GIoU": 0.37573473155498505, "eval_compot_IoU": 0.4099069982767105, "eval_compot_MAE_all": 0.0185648575425148, "eval_compot_MAE_h": 0.008833811618387699, "eval_compot_MAE_w": 0.02340342104434967, "eval_compot_MAE_x_boxes": 0.02984704216942191, "eval_compot_MAE_y_boxes": 0.006203887518495321, "eval_compot_inside_bbox": 0.6458333432674408, "eval_compot_loss": 1.367824673652649, "eval_compot_loss_ce": 0.00032587358145974576, "eval_compot_loss_iou": 0.623779296875, "eval_compot_loss_num": 0.017635345458984375, "eval_compot_loss_xval": 1.33447265625, "eval_compot_runtime": 24.6368, "eval_compot_samples_per_second": 2.029, "eval_compot_steps_per_second": 0.081, "num_input_tokens_seen": 126043356, "step": 2250 }, { "epoch": 5.011135857461024, "eval_custom_ui_val_CIoU": 0.45421523518032497, "eval_custom_ui_val_GIoU": 0.47301916943656075, "eval_custom_ui_val_IoU": 0.5073628789848752, "eval_custom_ui_val_MAE_all": 0.03390951289070977, "eval_custom_ui_val_MAE_h": 0.020180532294842932, "eval_custom_ui_val_MAE_w": 0.03951867276595698, "eval_custom_ui_val_MAE_x_boxes": 0.039389809800518885, "eval_custom_ui_val_MAE_y_boxes": 0.016320706371011004, "eval_custom_ui_val_inside_bbox": 0.7353395091162788, "eval_custom_ui_val_loss": 1.2387813329696655, "eval_custom_ui_val_loss_ce": 0.0004057693327518387, "eval_custom_ui_val_loss_iou": 0.521484375, "eval_custom_ui_val_loss_num": 0.033151202731662326, "eval_custom_ui_val_loss_xval": 1.2088758680555556, "eval_custom_ui_val_runtime": 73.0296, "eval_custom_ui_val_samples_per_second": 3.629, "eval_custom_ui_val_steps_per_second": 0.123, "num_input_tokens_seen": 126043356, "step": 2250 }, { "epoch": 5.011135857461024, "loss": 0.9642201066017151, "loss_ce": 0.00035298120928928256, "loss_iou": 0.419921875, "loss_num": 0.024169921875, "loss_xval": 0.96484375, "num_input_tokens_seen": 126043356, "step": 2250 }, { "epoch": 5.013363028953229, "grad_norm": 21.67910385131836, "learning_rate": 1e-06, "loss": 0.6784, "num_input_tokens_seen": 126099280, "step": 2251 }, { "epoch": 5.013363028953229, "loss": 0.6512374877929688, "loss_ce": 0.0003585785743780434, "loss_iou": 0.28125, "loss_num": 0.017822265625, "loss_xval": 0.65234375, "num_input_tokens_seen": 126099280, "step": 2251 }, { "epoch": 5.015590200445434, "grad_norm": 29.783891677856445, "learning_rate": 1e-06, "loss": 1.0225, "num_input_tokens_seen": 126154972, "step": 2252 }, { "epoch": 5.015590200445434, "loss": 0.9770213961601257, "loss_ce": 0.0007030742126516998, "loss_iou": 0.380859375, "loss_num": 0.04296875, "loss_xval": 0.9765625, "num_input_tokens_seen": 126154972, "step": 2252 }, { "epoch": 5.017817371937639, "grad_norm": 28.99271011352539, "learning_rate": 1e-06, "loss": 0.713, "num_input_tokens_seen": 126208548, "step": 2253 }, { "epoch": 5.017817371937639, "loss": 0.7910330295562744, "loss_ce": 0.0005056494846940041, "loss_iou": 0.3046875, "loss_num": 0.0361328125, "loss_xval": 0.7890625, "num_input_tokens_seen": 126208548, "step": 2253 }, { "epoch": 5.020044543429844, "grad_norm": 13.615692138671875, "learning_rate": 1e-06, "loss": 0.522, "num_input_tokens_seen": 126262364, "step": 2254 }, { "epoch": 5.020044543429844, "loss": 0.4722355008125305, "loss_ce": 0.0002506262972019613, "loss_iou": 0.1796875, "loss_num": 0.022705078125, "loss_xval": 0.47265625, "num_input_tokens_seen": 126262364, "step": 2254 }, { "epoch": 5.022271714922049, "grad_norm": 13.118451118469238, "learning_rate": 1e-06, "loss": 0.7156, "num_input_tokens_seen": 126319680, "step": 2255 }, { "epoch": 5.022271714922049, "loss": 0.7510231733322144, "loss_ce": 0.00029075576458126307, "loss_iou": 0.31640625, "loss_num": 0.0233154296875, "loss_xval": 0.75, "num_input_tokens_seen": 126319680, "step": 2255 }, { "epoch": 5.0244988864142535, "grad_norm": 51.616050720214844, "learning_rate": 1e-06, "loss": 0.9654, "num_input_tokens_seen": 126375108, "step": 2256 }, { "epoch": 5.0244988864142535, "loss": 0.7489110231399536, "loss_ce": 0.00025383190950378776, "loss_iou": 0.3125, "loss_num": 0.025146484375, "loss_xval": 0.75, "num_input_tokens_seen": 126375108, "step": 2256 }, { "epoch": 5.026726057906459, "grad_norm": 15.720102310180664, "learning_rate": 1e-06, "loss": 0.8735, "num_input_tokens_seen": 126432556, "step": 2257 }, { "epoch": 5.026726057906459, "loss": 0.6794352531433105, "loss_ce": 0.00023599226551596075, "loss_iou": 0.30078125, "loss_num": 0.015380859375, "loss_xval": 0.6796875, "num_input_tokens_seen": 126432556, "step": 2257 }, { "epoch": 5.028953229398664, "grad_norm": 35.91153335571289, "learning_rate": 1e-06, "loss": 0.6807, "num_input_tokens_seen": 126489040, "step": 2258 }, { "epoch": 5.028953229398664, "loss": 0.6047276258468628, "loss_ce": 0.00023541940026916564, "loss_iou": 0.2470703125, "loss_num": 0.0220947265625, "loss_xval": 0.60546875, "num_input_tokens_seen": 126489040, "step": 2258 }, { "epoch": 5.031180400890869, "grad_norm": 19.358016967773438, "learning_rate": 1e-06, "loss": 0.7165, "num_input_tokens_seen": 126546532, "step": 2259 }, { "epoch": 5.031180400890869, "loss": 0.5831573605537415, "loss_ce": 0.0006378005491569638, "loss_iou": 0.25390625, "loss_num": 0.01519775390625, "loss_xval": 0.58203125, "num_input_tokens_seen": 126546532, "step": 2259 }, { "epoch": 5.033407572383074, "grad_norm": 29.59214973449707, "learning_rate": 1e-06, "loss": 0.8328, "num_input_tokens_seen": 126600020, "step": 2260 }, { "epoch": 5.033407572383074, "loss": 0.8578157424926758, "loss_ce": 0.0002718048053793609, "loss_iou": 0.365234375, "loss_num": 0.025634765625, "loss_xval": 0.859375, "num_input_tokens_seen": 126600020, "step": 2260 }, { "epoch": 5.035634743875279, "grad_norm": 24.364622116088867, "learning_rate": 1e-06, "loss": 0.6877, "num_input_tokens_seen": 126658112, "step": 2261 }, { "epoch": 5.035634743875279, "loss": 0.7512416839599609, "loss_ce": 0.00026507957954891026, "loss_iou": 0.333984375, "loss_num": 0.0166015625, "loss_xval": 0.75, "num_input_tokens_seen": 126658112, "step": 2261 }, { "epoch": 5.0378619153674835, "grad_norm": 20.649215698242188, "learning_rate": 1e-06, "loss": 0.8893, "num_input_tokens_seen": 126715528, "step": 2262 }, { "epoch": 5.0378619153674835, "loss": 0.9017385244369507, "loss_ce": 0.0002493016654625535, "loss_iou": 0.3359375, "loss_num": 0.04638671875, "loss_xval": 0.90234375, "num_input_tokens_seen": 126715528, "step": 2262 }, { "epoch": 5.040089086859688, "grad_norm": 27.44195556640625, "learning_rate": 1e-06, "loss": 0.531, "num_input_tokens_seen": 126769300, "step": 2263 }, { "epoch": 5.040089086859688, "loss": 0.6221789121627808, "loss_ce": 0.00023064325796440244, "loss_iou": 0.283203125, "loss_num": 0.01092529296875, "loss_xval": 0.62109375, "num_input_tokens_seen": 126769300, "step": 2263 }, { "epoch": 5.042316258351893, "grad_norm": 16.656320571899414, "learning_rate": 1e-06, "loss": 0.5794, "num_input_tokens_seen": 126825972, "step": 2264 }, { "epoch": 5.042316258351893, "loss": 0.6281655430793762, "loss_ce": 0.0002358591154916212, "loss_iou": 0.271484375, "loss_num": 0.0166015625, "loss_xval": 0.62890625, "num_input_tokens_seen": 126825972, "step": 2264 }, { "epoch": 5.044543429844098, "grad_norm": 29.34036636352539, "learning_rate": 1e-06, "loss": 0.7923, "num_input_tokens_seen": 126883048, "step": 2265 }, { "epoch": 5.044543429844098, "loss": 0.6961706876754761, "loss_ce": 0.000247849035076797, "loss_iou": 0.3046875, "loss_num": 0.0169677734375, "loss_xval": 0.6953125, "num_input_tokens_seen": 126883048, "step": 2265 }, { "epoch": 5.046770601336303, "grad_norm": 28.162872314453125, "learning_rate": 1e-06, "loss": 1.0247, "num_input_tokens_seen": 126937628, "step": 2266 }, { "epoch": 5.046770601336303, "loss": 0.8716781139373779, "loss_ce": 0.0003402004367671907, "loss_iou": 0.392578125, "loss_num": 0.0172119140625, "loss_xval": 0.87109375, "num_input_tokens_seen": 126937628, "step": 2266 }, { "epoch": 5.048997772828508, "grad_norm": 20.128053665161133, "learning_rate": 1e-06, "loss": 1.1153, "num_input_tokens_seen": 126989292, "step": 2267 }, { "epoch": 5.048997772828508, "loss": 1.152024507522583, "loss_ce": 0.0004131349269300699, "loss_iou": 0.51171875, "loss_num": 0.02587890625, "loss_xval": 1.1484375, "num_input_tokens_seen": 126989292, "step": 2267 }, { "epoch": 5.051224944320713, "grad_norm": 17.286222457885742, "learning_rate": 1e-06, "loss": 0.5722, "num_input_tokens_seen": 127046740, "step": 2268 }, { "epoch": 5.051224944320713, "loss": 0.39940914511680603, "loss_ce": 0.00023922644322738051, "loss_iou": 0.16015625, "loss_num": 0.015869140625, "loss_xval": 0.3984375, "num_input_tokens_seen": 127046740, "step": 2268 }, { "epoch": 5.0534521158129175, "grad_norm": 17.838388442993164, "learning_rate": 1e-06, "loss": 0.6537, "num_input_tokens_seen": 127103684, "step": 2269 }, { "epoch": 5.0534521158129175, "loss": 0.643367350101471, "loss_ce": 0.0003009595675393939, "loss_iou": 0.291015625, "loss_num": 0.011962890625, "loss_xval": 0.64453125, "num_input_tokens_seen": 127103684, "step": 2269 }, { "epoch": 5.055679287305122, "grad_norm": 22.380460739135742, "learning_rate": 1e-06, "loss": 0.8209, "num_input_tokens_seen": 127160244, "step": 2270 }, { "epoch": 5.055679287305122, "loss": 0.7935837507247925, "loss_ce": 0.000370887661119923, "loss_iou": 0.32421875, "loss_num": 0.028564453125, "loss_xval": 0.79296875, "num_input_tokens_seen": 127160244, "step": 2270 }, { "epoch": 5.057906458797327, "grad_norm": 19.87263298034668, "learning_rate": 1e-06, "loss": 0.7521, "num_input_tokens_seen": 127215716, "step": 2271 }, { "epoch": 5.057906458797327, "loss": 0.8534216284751892, "loss_ce": 0.0002721815253607929, "loss_iou": 0.357421875, "loss_num": 0.027587890625, "loss_xval": 0.8515625, "num_input_tokens_seen": 127215716, "step": 2271 }, { "epoch": 5.060133630289532, "grad_norm": 24.000696182250977, "learning_rate": 1e-06, "loss": 0.5931, "num_input_tokens_seen": 127274152, "step": 2272 }, { "epoch": 5.060133630289532, "loss": 0.5939878225326538, "loss_ce": 0.00023785245139151812, "loss_iou": 0.2451171875, "loss_num": 0.0208740234375, "loss_xval": 0.59375, "num_input_tokens_seen": 127274152, "step": 2272 }, { "epoch": 5.062360801781737, "grad_norm": 17.99005889892578, "learning_rate": 1e-06, "loss": 0.6256, "num_input_tokens_seen": 127330676, "step": 2273 }, { "epoch": 5.062360801781737, "loss": 0.5732445120811462, "loss_ce": 0.00024645167286507785, "loss_iou": 0.244140625, "loss_num": 0.016845703125, "loss_xval": 0.57421875, "num_input_tokens_seen": 127330676, "step": 2273 }, { "epoch": 5.064587973273942, "grad_norm": 17.27025032043457, "learning_rate": 1e-06, "loss": 0.6703, "num_input_tokens_seen": 127385060, "step": 2274 }, { "epoch": 5.064587973273942, "loss": 0.6232882738113403, "loss_ce": 0.000241378482314758, "loss_iou": 0.287109375, "loss_num": 0.00958251953125, "loss_xval": 0.625, "num_input_tokens_seen": 127385060, "step": 2274 }, { "epoch": 5.066815144766147, "grad_norm": 18.195852279663086, "learning_rate": 1e-06, "loss": 0.7011, "num_input_tokens_seen": 127441056, "step": 2275 }, { "epoch": 5.066815144766147, "loss": 0.6917237043380737, "loss_ce": 0.00031748326728120446, "loss_iou": 0.2734375, "loss_num": 0.0289306640625, "loss_xval": 0.69140625, "num_input_tokens_seen": 127441056, "step": 2275 }, { "epoch": 5.0690423162583516, "grad_norm": 17.40542221069336, "learning_rate": 1e-06, "loss": 0.6412, "num_input_tokens_seen": 127495888, "step": 2276 }, { "epoch": 5.0690423162583516, "loss": 0.7902520895004272, "loss_ce": 0.00021307067072484642, "loss_iou": 0.322265625, "loss_num": 0.029052734375, "loss_xval": 0.7890625, "num_input_tokens_seen": 127495888, "step": 2276 }, { "epoch": 5.071269487750556, "grad_norm": 15.051342010498047, "learning_rate": 1e-06, "loss": 0.9653, "num_input_tokens_seen": 127552560, "step": 2277 }, { "epoch": 5.071269487750556, "loss": 1.024023175239563, "loss_ce": 0.0005856686620973051, "loss_iou": 0.43359375, "loss_num": 0.03173828125, "loss_xval": 1.0234375, "num_input_tokens_seen": 127552560, "step": 2277 }, { "epoch": 5.073496659242761, "grad_norm": 15.868587493896484, "learning_rate": 1e-06, "loss": 0.6495, "num_input_tokens_seen": 127609712, "step": 2278 }, { "epoch": 5.073496659242761, "loss": 0.6852548718452454, "loss_ce": 0.00019630559836514294, "loss_iou": 0.29296875, "loss_num": 0.0198974609375, "loss_xval": 0.68359375, "num_input_tokens_seen": 127609712, "step": 2278 }, { "epoch": 5.075723830734967, "grad_norm": 23.950061798095703, "learning_rate": 1e-06, "loss": 0.6507, "num_input_tokens_seen": 127667116, "step": 2279 }, { "epoch": 5.075723830734967, "loss": 0.6977980136871338, "loss_ce": 0.00028824395849369466, "loss_iou": 0.31640625, "loss_num": 0.01287841796875, "loss_xval": 0.69921875, "num_input_tokens_seen": 127667116, "step": 2279 }, { "epoch": 5.077951002227172, "grad_norm": 83.21546936035156, "learning_rate": 1e-06, "loss": 0.6931, "num_input_tokens_seen": 127724152, "step": 2280 }, { "epoch": 5.077951002227172, "loss": 0.575419545173645, "loss_ce": 0.00022418698063120246, "loss_iou": 0.2578125, "loss_num": 0.01214599609375, "loss_xval": 0.57421875, "num_input_tokens_seen": 127724152, "step": 2280 }, { "epoch": 5.080178173719377, "grad_norm": 16.499300003051758, "learning_rate": 1e-06, "loss": 0.7207, "num_input_tokens_seen": 127779712, "step": 2281 }, { "epoch": 5.080178173719377, "loss": 0.7757115364074707, "loss_ce": 0.00025984214153140783, "loss_iou": 0.30078125, "loss_num": 0.034912109375, "loss_xval": 0.77734375, "num_input_tokens_seen": 127779712, "step": 2281 }, { "epoch": 5.082405345211582, "grad_norm": 21.090255737304688, "learning_rate": 1e-06, "loss": 0.8105, "num_input_tokens_seen": 127835904, "step": 2282 }, { "epoch": 5.082405345211582, "loss": 0.7296441793441772, "loss_ce": 0.00027409923495724797, "loss_iou": 0.328125, "loss_num": 0.0142822265625, "loss_xval": 0.73046875, "num_input_tokens_seen": 127835904, "step": 2282 }, { "epoch": 5.0846325167037865, "grad_norm": 21.14578628540039, "learning_rate": 1e-06, "loss": 0.8618, "num_input_tokens_seen": 127892840, "step": 2283 }, { "epoch": 5.0846325167037865, "loss": 0.9588485360145569, "loss_ce": 0.00023040127416606992, "loss_iou": 0.3984375, "loss_num": 0.0322265625, "loss_xval": 0.95703125, "num_input_tokens_seen": 127892840, "step": 2283 }, { "epoch": 5.086859688195991, "grad_norm": 48.02599334716797, "learning_rate": 1e-06, "loss": 0.5714, "num_input_tokens_seen": 127951528, "step": 2284 }, { "epoch": 5.086859688195991, "loss": 0.5024325847625732, "loss_ce": 0.00023529936152044684, "loss_iou": 0.2265625, "loss_num": 0.00994873046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 127951528, "step": 2284 }, { "epoch": 5.089086859688196, "grad_norm": 15.565524101257324, "learning_rate": 1e-06, "loss": 0.8572, "num_input_tokens_seen": 128008636, "step": 2285 }, { "epoch": 5.089086859688196, "loss": 0.9711748361587524, "loss_ce": 0.0002275363076478243, "loss_iou": 0.40625, "loss_num": 0.03125, "loss_xval": 0.97265625, "num_input_tokens_seen": 128008636, "step": 2285 }, { "epoch": 5.091314031180401, "grad_norm": 23.433475494384766, "learning_rate": 1e-06, "loss": 0.8038, "num_input_tokens_seen": 128066100, "step": 2286 }, { "epoch": 5.091314031180401, "loss": 0.787333607673645, "loss_ce": 0.00022421692847274244, "loss_iou": 0.345703125, "loss_num": 0.0194091796875, "loss_xval": 0.7890625, "num_input_tokens_seen": 128066100, "step": 2286 }, { "epoch": 5.093541202672606, "grad_norm": 24.027969360351562, "learning_rate": 1e-06, "loss": 0.7235, "num_input_tokens_seen": 128121516, "step": 2287 }, { "epoch": 5.093541202672606, "loss": 0.7629618644714355, "loss_ce": 0.0002665651263669133, "loss_iou": 0.349609375, "loss_num": 0.012451171875, "loss_xval": 0.76171875, "num_input_tokens_seen": 128121516, "step": 2287 }, { "epoch": 5.095768374164811, "grad_norm": 19.88593292236328, "learning_rate": 1e-06, "loss": 0.9881, "num_input_tokens_seen": 128174484, "step": 2288 }, { "epoch": 5.095768374164811, "loss": 1.1999242305755615, "loss_ce": 0.00021719752112403512, "loss_iou": 0.4921875, "loss_num": 0.042724609375, "loss_xval": 1.203125, "num_input_tokens_seen": 128174484, "step": 2288 }, { "epoch": 5.097995545657016, "grad_norm": 45.70637130737305, "learning_rate": 1e-06, "loss": 0.5952, "num_input_tokens_seen": 128232784, "step": 2289 }, { "epoch": 5.097995545657016, "loss": 0.6135152578353882, "loss_ce": 0.00023402433726005256, "loss_iou": 0.267578125, "loss_num": 0.015869140625, "loss_xval": 0.61328125, "num_input_tokens_seen": 128232784, "step": 2289 }, { "epoch": 5.1002227171492205, "grad_norm": 16.309289932250977, "learning_rate": 1e-06, "loss": 0.7085, "num_input_tokens_seen": 128287280, "step": 2290 }, { "epoch": 5.1002227171492205, "loss": 0.734100878238678, "loss_ce": 0.0002141737350029871, "loss_iou": 0.30859375, "loss_num": 0.02294921875, "loss_xval": 0.734375, "num_input_tokens_seen": 128287280, "step": 2290 }, { "epoch": 5.102449888641425, "grad_norm": 15.12460994720459, "learning_rate": 1e-06, "loss": 0.5811, "num_input_tokens_seen": 128342260, "step": 2291 }, { "epoch": 5.102449888641425, "loss": 0.5832387208938599, "loss_ce": 0.000230930614634417, "loss_iou": 0.2578125, "loss_num": 0.013916015625, "loss_xval": 0.58203125, "num_input_tokens_seen": 128342260, "step": 2291 }, { "epoch": 5.10467706013363, "grad_norm": 16.05999183654785, "learning_rate": 1e-06, "loss": 0.6387, "num_input_tokens_seen": 128394812, "step": 2292 }, { "epoch": 5.10467706013363, "loss": 0.6987403035163879, "loss_ce": 0.00025396081036888063, "loss_iou": 0.298828125, "loss_num": 0.02001953125, "loss_xval": 0.69921875, "num_input_tokens_seen": 128394812, "step": 2292 }, { "epoch": 5.106904231625835, "grad_norm": 17.23577117919922, "learning_rate": 1e-06, "loss": 0.5956, "num_input_tokens_seen": 128451808, "step": 2293 }, { "epoch": 5.106904231625835, "loss": 0.5036342740058899, "loss_ce": 0.0002162978344131261, "loss_iou": 0.212890625, "loss_num": 0.0157470703125, "loss_xval": 0.50390625, "num_input_tokens_seen": 128451808, "step": 2293 }, { "epoch": 5.10913140311804, "grad_norm": 28.054798126220703, "learning_rate": 1e-06, "loss": 1.03, "num_input_tokens_seen": 128505588, "step": 2294 }, { "epoch": 5.10913140311804, "loss": 0.8542524576187134, "loss_ce": 0.0002485929289832711, "loss_iou": 0.365234375, "loss_num": 0.02490234375, "loss_xval": 0.85546875, "num_input_tokens_seen": 128505588, "step": 2294 }, { "epoch": 5.111358574610245, "grad_norm": 15.79360580444336, "learning_rate": 1e-06, "loss": 0.7536, "num_input_tokens_seen": 128564004, "step": 2295 }, { "epoch": 5.111358574610245, "loss": 0.846659779548645, "loss_ce": 0.00022423264454118907, "loss_iou": 0.349609375, "loss_num": 0.029541015625, "loss_xval": 0.84765625, "num_input_tokens_seen": 128564004, "step": 2295 }, { "epoch": 5.11358574610245, "grad_norm": 35.329349517822266, "learning_rate": 1e-06, "loss": 0.8545, "num_input_tokens_seen": 128622824, "step": 2296 }, { "epoch": 5.11358574610245, "loss": 0.8776304721832275, "loss_ce": 0.00043314468348398805, "loss_iou": 0.369140625, "loss_num": 0.02783203125, "loss_xval": 0.87890625, "num_input_tokens_seen": 128622824, "step": 2296 }, { "epoch": 5.1158129175946545, "grad_norm": 17.822633743286133, "learning_rate": 1e-06, "loss": 0.7707, "num_input_tokens_seen": 128679316, "step": 2297 }, { "epoch": 5.1158129175946545, "loss": 0.49904176592826843, "loss_ce": 0.0002624738262966275, "loss_iou": 0.201171875, "loss_num": 0.0194091796875, "loss_xval": 0.498046875, "num_input_tokens_seen": 128679316, "step": 2297 }, { "epoch": 5.118040089086859, "grad_norm": 24.6488094329834, "learning_rate": 1e-06, "loss": 0.7886, "num_input_tokens_seen": 128734780, "step": 2298 }, { "epoch": 5.118040089086859, "loss": 0.8342887163162231, "loss_ce": 0.00030440062982961535, "loss_iou": 0.384765625, "loss_num": 0.0133056640625, "loss_xval": 0.8359375, "num_input_tokens_seen": 128734780, "step": 2298 }, { "epoch": 5.120267260579064, "grad_norm": 21.603078842163086, "learning_rate": 1e-06, "loss": 0.8968, "num_input_tokens_seen": 128790756, "step": 2299 }, { "epoch": 5.120267260579064, "loss": 1.144487738609314, "loss_ce": 0.0004448448889888823, "loss_iou": 0.451171875, "loss_num": 0.048095703125, "loss_xval": 1.140625, "num_input_tokens_seen": 128790756, "step": 2299 }, { "epoch": 5.122494432071269, "grad_norm": 22.150482177734375, "learning_rate": 1e-06, "loss": 0.8125, "num_input_tokens_seen": 128847224, "step": 2300 }, { "epoch": 5.122494432071269, "loss": 0.8337835669517517, "loss_ce": 0.0002874800411518663, "loss_iou": 0.330078125, "loss_num": 0.034423828125, "loss_xval": 0.83203125, "num_input_tokens_seen": 128847224, "step": 2300 }, { "epoch": 5.124721603563474, "grad_norm": 17.031408309936523, "learning_rate": 1e-06, "loss": 0.5427, "num_input_tokens_seen": 128904848, "step": 2301 }, { "epoch": 5.124721603563474, "loss": 0.5903265476226807, "loss_ce": 0.00023866641276981682, "loss_iou": 0.2333984375, "loss_num": 0.02490234375, "loss_xval": 0.58984375, "num_input_tokens_seen": 128904848, "step": 2301 }, { "epoch": 5.12694877505568, "grad_norm": 17.657089233398438, "learning_rate": 1e-06, "loss": 0.5752, "num_input_tokens_seen": 128960196, "step": 2302 }, { "epoch": 5.12694877505568, "loss": 0.4570813775062561, "loss_ce": 0.00023321554181165993, "loss_iou": 0.193359375, "loss_num": 0.01397705078125, "loss_xval": 0.45703125, "num_input_tokens_seen": 128960196, "step": 2302 }, { "epoch": 5.129175946547885, "grad_norm": 16.3359317779541, "learning_rate": 1e-06, "loss": 0.6331, "num_input_tokens_seen": 129017636, "step": 2303 }, { "epoch": 5.129175946547885, "loss": 0.6099591255187988, "loss_ce": 0.0005841399542987347, "loss_iou": 0.263671875, "loss_num": 0.0167236328125, "loss_xval": 0.609375, "num_input_tokens_seen": 129017636, "step": 2303 }, { "epoch": 5.131403118040089, "grad_norm": 21.530765533447266, "learning_rate": 1e-06, "loss": 0.9466, "num_input_tokens_seen": 129073348, "step": 2304 }, { "epoch": 5.131403118040089, "loss": 1.1084952354431152, "loss_ce": 0.00034102535573765635, "loss_iou": 0.474609375, "loss_num": 0.03173828125, "loss_xval": 1.109375, "num_input_tokens_seen": 129073348, "step": 2304 }, { "epoch": 5.133630289532294, "grad_norm": 37.55251693725586, "learning_rate": 1e-06, "loss": 0.9198, "num_input_tokens_seen": 129130900, "step": 2305 }, { "epoch": 5.133630289532294, "loss": 0.638685941696167, "loss_ce": 0.0002581804874353111, "loss_iou": 0.267578125, "loss_num": 0.020751953125, "loss_xval": 0.63671875, "num_input_tokens_seen": 129130900, "step": 2305 }, { "epoch": 5.135857461024499, "grad_norm": 28.42352867126465, "learning_rate": 1e-06, "loss": 0.737, "num_input_tokens_seen": 129183076, "step": 2306 }, { "epoch": 5.135857461024499, "loss": 0.6569973826408386, "loss_ce": 0.0002591205993667245, "loss_iou": 0.29296875, "loss_num": 0.0145263671875, "loss_xval": 0.65625, "num_input_tokens_seen": 129183076, "step": 2306 }, { "epoch": 5.138084632516704, "grad_norm": 15.96066951751709, "learning_rate": 1e-06, "loss": 0.8596, "num_input_tokens_seen": 129239480, "step": 2307 }, { "epoch": 5.138084632516704, "loss": 0.7568236589431763, "loss_ce": 0.00023182888980954885, "loss_iou": 0.330078125, "loss_num": 0.01953125, "loss_xval": 0.7578125, "num_input_tokens_seen": 129239480, "step": 2307 }, { "epoch": 5.140311804008909, "grad_norm": 21.160701751708984, "learning_rate": 1e-06, "loss": 0.7788, "num_input_tokens_seen": 129295984, "step": 2308 }, { "epoch": 5.140311804008909, "loss": 0.9611057043075562, "loss_ce": 0.00041232837247662246, "loss_iou": 0.36328125, "loss_num": 0.04638671875, "loss_xval": 0.9609375, "num_input_tokens_seen": 129295984, "step": 2308 }, { "epoch": 5.142538975501114, "grad_norm": 23.850879669189453, "learning_rate": 1e-06, "loss": 0.8738, "num_input_tokens_seen": 129350436, "step": 2309 }, { "epoch": 5.142538975501114, "loss": 0.835231602191925, "loss_ce": 0.0002706579980440438, "loss_iou": 0.337890625, "loss_num": 0.031982421875, "loss_xval": 0.8359375, "num_input_tokens_seen": 129350436, "step": 2309 }, { "epoch": 5.144766146993319, "grad_norm": 40.127952575683594, "learning_rate": 1e-06, "loss": 0.7161, "num_input_tokens_seen": 129406620, "step": 2310 }, { "epoch": 5.144766146993319, "loss": 0.691180944442749, "loss_ce": 0.00026295060524716973, "loss_iou": 0.3125, "loss_num": 0.01348876953125, "loss_xval": 0.69140625, "num_input_tokens_seen": 129406620, "step": 2310 }, { "epoch": 5.146993318485523, "grad_norm": 21.454145431518555, "learning_rate": 1e-06, "loss": 0.5811, "num_input_tokens_seen": 129462148, "step": 2311 }, { "epoch": 5.146993318485523, "loss": 0.5917257070541382, "loss_ce": 0.0002950755297206342, "loss_iou": 0.234375, "loss_num": 0.024658203125, "loss_xval": 0.58984375, "num_input_tokens_seen": 129462148, "step": 2311 }, { "epoch": 5.149220489977728, "grad_norm": 16.070457458496094, "learning_rate": 1e-06, "loss": 0.458, "num_input_tokens_seen": 129518980, "step": 2312 }, { "epoch": 5.149220489977728, "loss": 0.5424692630767822, "loss_ce": 0.0002329152193851769, "loss_iou": 0.2314453125, "loss_num": 0.015869140625, "loss_xval": 0.54296875, "num_input_tokens_seen": 129518980, "step": 2312 }, { "epoch": 5.151447661469933, "grad_norm": 27.548030853271484, "learning_rate": 1e-06, "loss": 0.7495, "num_input_tokens_seen": 129574880, "step": 2313 }, { "epoch": 5.151447661469933, "loss": 0.7275496125221252, "loss_ce": 0.00025467833620496094, "loss_iou": 0.3203125, "loss_num": 0.0169677734375, "loss_xval": 0.7265625, "num_input_tokens_seen": 129574880, "step": 2313 }, { "epoch": 5.153674832962138, "grad_norm": 17.572242736816406, "learning_rate": 1e-06, "loss": 0.6046, "num_input_tokens_seen": 129631804, "step": 2314 }, { "epoch": 5.153674832962138, "loss": 0.7023811340332031, "loss_ce": 0.0007209961186163127, "loss_iou": 0.28125, "loss_num": 0.028076171875, "loss_xval": 0.703125, "num_input_tokens_seen": 129631804, "step": 2314 }, { "epoch": 5.155902004454343, "grad_norm": 17.386499404907227, "learning_rate": 1e-06, "loss": 0.8311, "num_input_tokens_seen": 129684280, "step": 2315 }, { "epoch": 5.155902004454343, "loss": 0.8039573431015015, "loss_ce": 0.00024638883769512177, "loss_iou": 0.357421875, "loss_num": 0.017578125, "loss_xval": 0.8046875, "num_input_tokens_seen": 129684280, "step": 2315 }, { "epoch": 5.158129175946548, "grad_norm": 18.181276321411133, "learning_rate": 1e-06, "loss": 0.8983, "num_input_tokens_seen": 129742428, "step": 2316 }, { "epoch": 5.158129175946548, "loss": 0.8774632811546326, "loss_ce": 0.000266014103544876, "loss_iou": 0.34375, "loss_num": 0.037841796875, "loss_xval": 0.87890625, "num_input_tokens_seen": 129742428, "step": 2316 }, { "epoch": 5.160356347438753, "grad_norm": 21.5535888671875, "learning_rate": 1e-06, "loss": 0.8057, "num_input_tokens_seen": 129796972, "step": 2317 }, { "epoch": 5.160356347438753, "loss": 0.9526360630989075, "loss_ce": 0.00024349387967959046, "loss_iou": 0.38671875, "loss_num": 0.0361328125, "loss_xval": 0.953125, "num_input_tokens_seen": 129796972, "step": 2317 }, { "epoch": 5.1625835189309575, "grad_norm": 21.653419494628906, "learning_rate": 1e-06, "loss": 0.7653, "num_input_tokens_seen": 129851752, "step": 2318 }, { "epoch": 5.1625835189309575, "loss": 0.8452595472335815, "loss_ce": 0.0002888377639465034, "loss_iou": 0.345703125, "loss_num": 0.030517578125, "loss_xval": 0.84375, "num_input_tokens_seen": 129851752, "step": 2318 }, { "epoch": 5.164810690423162, "grad_norm": 23.30936622619629, "learning_rate": 1e-06, "loss": 0.7064, "num_input_tokens_seen": 129907124, "step": 2319 }, { "epoch": 5.164810690423162, "loss": 0.541006326675415, "loss_ce": 0.0002348569978494197, "loss_iou": 0.228515625, "loss_num": 0.0164794921875, "loss_xval": 0.5390625, "num_input_tokens_seen": 129907124, "step": 2319 }, { "epoch": 5.167037861915367, "grad_norm": 16.756088256835938, "learning_rate": 1e-06, "loss": 0.5693, "num_input_tokens_seen": 129963388, "step": 2320 }, { "epoch": 5.167037861915367, "loss": 0.481838583946228, "loss_ce": 0.00027121190214529634, "loss_iou": 0.203125, "loss_num": 0.0150146484375, "loss_xval": 0.482421875, "num_input_tokens_seen": 129963388, "step": 2320 }, { "epoch": 5.169265033407572, "grad_norm": 18.584272384643555, "learning_rate": 1e-06, "loss": 0.896, "num_input_tokens_seen": 130019724, "step": 2321 }, { "epoch": 5.169265033407572, "loss": 0.9863563776016235, "loss_ce": 0.00027232500724494457, "loss_iou": 0.41796875, "loss_num": 0.0299072265625, "loss_xval": 0.984375, "num_input_tokens_seen": 130019724, "step": 2321 }, { "epoch": 5.171492204899777, "grad_norm": 23.582571029663086, "learning_rate": 1e-06, "loss": 0.6948, "num_input_tokens_seen": 130077412, "step": 2322 }, { "epoch": 5.171492204899777, "loss": 0.7178100347518921, "loss_ce": 0.0002807402634061873, "loss_iou": 0.3125, "loss_num": 0.018798828125, "loss_xval": 0.71875, "num_input_tokens_seen": 130077412, "step": 2322 }, { "epoch": 5.173719376391982, "grad_norm": 15.536462783813477, "learning_rate": 1e-06, "loss": 0.8434, "num_input_tokens_seen": 130133952, "step": 2323 }, { "epoch": 5.173719376391982, "loss": 0.7571786642074585, "loss_ce": 0.00076994055416435, "loss_iou": 0.32421875, "loss_num": 0.0216064453125, "loss_xval": 0.7578125, "num_input_tokens_seen": 130133952, "step": 2323 }, { "epoch": 5.1759465478841875, "grad_norm": 21.30929946899414, "learning_rate": 1e-06, "loss": 0.832, "num_input_tokens_seen": 130190140, "step": 2324 }, { "epoch": 5.1759465478841875, "loss": 0.9763731956481934, "loss_ce": 0.0002989959320984781, "loss_iou": 0.380859375, "loss_num": 0.042724609375, "loss_xval": 0.9765625, "num_input_tokens_seen": 130190140, "step": 2324 }, { "epoch": 5.178173719376392, "grad_norm": 55.067665100097656, "learning_rate": 1e-06, "loss": 0.8182, "num_input_tokens_seen": 130244552, "step": 2325 }, { "epoch": 5.178173719376392, "loss": 1.0063856840133667, "loss_ce": 0.0002821519155986607, "loss_iou": 0.41796875, "loss_num": 0.033935546875, "loss_xval": 1.0078125, "num_input_tokens_seen": 130244552, "step": 2325 }, { "epoch": 5.180400890868597, "grad_norm": 19.690275192260742, "learning_rate": 1e-06, "loss": 0.9497, "num_input_tokens_seen": 130301252, "step": 2326 }, { "epoch": 5.180400890868597, "loss": 0.765385627746582, "loss_ce": 0.00024891988141462207, "loss_iou": 0.3359375, "loss_num": 0.018310546875, "loss_xval": 0.765625, "num_input_tokens_seen": 130301252, "step": 2326 }, { "epoch": 5.182628062360802, "grad_norm": 14.636940002441406, "learning_rate": 1e-06, "loss": 0.5755, "num_input_tokens_seen": 130357596, "step": 2327 }, { "epoch": 5.182628062360802, "loss": 0.5990309715270996, "loss_ce": 0.00027604633942246437, "loss_iou": 0.26953125, "loss_num": 0.011474609375, "loss_xval": 0.59765625, "num_input_tokens_seen": 130357596, "step": 2327 }, { "epoch": 5.184855233853007, "grad_norm": 19.635517120361328, "learning_rate": 1e-06, "loss": 0.6214, "num_input_tokens_seen": 130413372, "step": 2328 }, { "epoch": 5.184855233853007, "loss": 0.7523684501647949, "loss_ce": 0.00029323421767912805, "loss_iou": 0.31640625, "loss_num": 0.023681640625, "loss_xval": 0.75390625, "num_input_tokens_seen": 130413372, "step": 2328 }, { "epoch": 5.187082405345212, "grad_norm": 15.182498931884766, "learning_rate": 1e-06, "loss": 0.628, "num_input_tokens_seen": 130468056, "step": 2329 }, { "epoch": 5.187082405345212, "loss": 0.513440728187561, "loss_ce": 0.00025709014153108, "loss_iou": 0.220703125, "loss_num": 0.01434326171875, "loss_xval": 0.51171875, "num_input_tokens_seen": 130468056, "step": 2329 }, { "epoch": 5.189309576837417, "grad_norm": 23.259883880615234, "learning_rate": 1e-06, "loss": 0.5599, "num_input_tokens_seen": 130522728, "step": 2330 }, { "epoch": 5.189309576837417, "loss": 0.483401358127594, "loss_ce": 0.00024706803378649056, "loss_iou": 0.1953125, "loss_num": 0.018798828125, "loss_xval": 0.482421875, "num_input_tokens_seen": 130522728, "step": 2330 }, { "epoch": 5.1915367483296215, "grad_norm": 27.320947647094727, "learning_rate": 1e-06, "loss": 0.7893, "num_input_tokens_seen": 130577656, "step": 2331 }, { "epoch": 5.1915367483296215, "loss": 0.6123491525650024, "loss_ce": 0.00028857935103587806, "loss_iou": 0.271484375, "loss_num": 0.0135498046875, "loss_xval": 0.61328125, "num_input_tokens_seen": 130577656, "step": 2331 }, { "epoch": 5.193763919821826, "grad_norm": 74.38851928710938, "learning_rate": 1e-06, "loss": 0.6629, "num_input_tokens_seen": 130635684, "step": 2332 }, { "epoch": 5.193763919821826, "loss": 0.7268493175506592, "loss_ce": 0.0002868149313144386, "loss_iou": 0.326171875, "loss_num": 0.0146484375, "loss_xval": 0.7265625, "num_input_tokens_seen": 130635684, "step": 2332 }, { "epoch": 5.195991091314031, "grad_norm": 21.843446731567383, "learning_rate": 1e-06, "loss": 0.6575, "num_input_tokens_seen": 130693564, "step": 2333 }, { "epoch": 5.195991091314031, "loss": 0.5599232912063599, "loss_ce": 0.00023093904019333422, "loss_iou": 0.251953125, "loss_num": 0.011474609375, "loss_xval": 0.55859375, "num_input_tokens_seen": 130693564, "step": 2333 }, { "epoch": 5.198218262806236, "grad_norm": 27.58201789855957, "learning_rate": 1e-06, "loss": 0.6391, "num_input_tokens_seen": 130751884, "step": 2334 }, { "epoch": 5.198218262806236, "loss": 0.6432796716690063, "loss_ce": 0.00021332071628421545, "loss_iou": 0.28515625, "loss_num": 0.01409912109375, "loss_xval": 0.64453125, "num_input_tokens_seen": 130751884, "step": 2334 }, { "epoch": 5.200445434298441, "grad_norm": 68.57735443115234, "learning_rate": 1e-06, "loss": 0.6312, "num_input_tokens_seen": 130808940, "step": 2335 }, { "epoch": 5.200445434298441, "loss": 0.45080137252807617, "loss_ce": 0.00023984728613868356, "loss_iou": 0.203125, "loss_num": 0.0087890625, "loss_xval": 0.451171875, "num_input_tokens_seen": 130808940, "step": 2335 }, { "epoch": 5.202672605790646, "grad_norm": 23.095355987548828, "learning_rate": 1e-06, "loss": 0.7388, "num_input_tokens_seen": 130864876, "step": 2336 }, { "epoch": 5.202672605790646, "loss": 0.8970399498939514, "loss_ce": 0.0003114146529696882, "loss_iou": 0.380859375, "loss_num": 0.0269775390625, "loss_xval": 0.8984375, "num_input_tokens_seen": 130864876, "step": 2336 }, { "epoch": 5.204899777282851, "grad_norm": 16.609811782836914, "learning_rate": 1e-06, "loss": 0.8964, "num_input_tokens_seen": 130919176, "step": 2337 }, { "epoch": 5.204899777282851, "loss": 1.08896803855896, "loss_ce": 0.0003450649091973901, "loss_iou": 0.484375, "loss_num": 0.024169921875, "loss_xval": 1.0859375, "num_input_tokens_seen": 130919176, "step": 2337 }, { "epoch": 5.2071269487750556, "grad_norm": 17.34803009033203, "learning_rate": 1e-06, "loss": 0.6675, "num_input_tokens_seen": 130977396, "step": 2338 }, { "epoch": 5.2071269487750556, "loss": 0.661956250667572, "loss_ce": 0.00021310552256181836, "loss_iou": 0.259765625, "loss_num": 0.0281982421875, "loss_xval": 0.66015625, "num_input_tokens_seen": 130977396, "step": 2338 }, { "epoch": 5.20935412026726, "grad_norm": 19.648202896118164, "learning_rate": 1e-06, "loss": 0.8492, "num_input_tokens_seen": 131032596, "step": 2339 }, { "epoch": 5.20935412026726, "loss": 0.8113721013069153, "loss_ce": 0.0003369731712155044, "loss_iou": 0.34765625, "loss_num": 0.022705078125, "loss_xval": 0.8125, "num_input_tokens_seen": 131032596, "step": 2339 }, { "epoch": 5.211581291759465, "grad_norm": 28.624279022216797, "learning_rate": 1e-06, "loss": 0.5556, "num_input_tokens_seen": 131089720, "step": 2340 }, { "epoch": 5.211581291759465, "loss": 0.615825891494751, "loss_ce": 0.0003474131808616221, "loss_iou": 0.25390625, "loss_num": 0.021728515625, "loss_xval": 0.6171875, "num_input_tokens_seen": 131089720, "step": 2340 }, { "epoch": 5.21380846325167, "grad_norm": 32.367496490478516, "learning_rate": 1e-06, "loss": 0.8943, "num_input_tokens_seen": 131147500, "step": 2341 }, { "epoch": 5.21380846325167, "loss": 1.0326426029205322, "loss_ce": 0.000355017080437392, "loss_iou": 0.40234375, "loss_num": 0.045166015625, "loss_xval": 1.03125, "num_input_tokens_seen": 131147500, "step": 2341 }, { "epoch": 5.216035634743875, "grad_norm": 32.042381286621094, "learning_rate": 1e-06, "loss": 0.7757, "num_input_tokens_seen": 131202024, "step": 2342 }, { "epoch": 5.216035634743875, "loss": 0.7637057900428772, "loss_ce": 0.0002780459471978247, "loss_iou": 0.31640625, "loss_num": 0.02587890625, "loss_xval": 0.76171875, "num_input_tokens_seen": 131202024, "step": 2342 }, { "epoch": 5.21826280623608, "grad_norm": 24.583831787109375, "learning_rate": 1e-06, "loss": 0.859, "num_input_tokens_seen": 131258048, "step": 2343 }, { "epoch": 5.21826280623608, "loss": 0.815448522567749, "loss_ce": 0.0002629937371239066, "loss_iou": 0.31640625, "loss_num": 0.036865234375, "loss_xval": 0.81640625, "num_input_tokens_seen": 131258048, "step": 2343 }, { "epoch": 5.220489977728285, "grad_norm": 19.044597625732422, "learning_rate": 1e-06, "loss": 0.7225, "num_input_tokens_seen": 131316644, "step": 2344 }, { "epoch": 5.220489977728285, "loss": 0.6471991539001465, "loss_ce": 0.00022645638091489673, "loss_iou": 0.2451171875, "loss_num": 0.03125, "loss_xval": 0.6484375, "num_input_tokens_seen": 131316644, "step": 2344 }, { "epoch": 5.22271714922049, "grad_norm": 16.071741104125977, "learning_rate": 1e-06, "loss": 0.5489, "num_input_tokens_seen": 131372184, "step": 2345 }, { "epoch": 5.22271714922049, "loss": 0.6769649386405945, "loss_ce": 0.00020714016864076257, "loss_iou": 0.296875, "loss_num": 0.0162353515625, "loss_xval": 0.67578125, "num_input_tokens_seen": 131372184, "step": 2345 }, { "epoch": 5.224944320712694, "grad_norm": 18.349136352539062, "learning_rate": 1e-06, "loss": 0.6446, "num_input_tokens_seen": 131430784, "step": 2346 }, { "epoch": 5.224944320712694, "loss": 0.6362409591674805, "loss_ce": 0.00025463022757321596, "loss_iou": 0.28515625, "loss_num": 0.0133056640625, "loss_xval": 0.63671875, "num_input_tokens_seen": 131430784, "step": 2346 }, { "epoch": 5.2271714922049, "grad_norm": 18.53408432006836, "learning_rate": 1e-06, "loss": 0.5966, "num_input_tokens_seen": 131488492, "step": 2347 }, { "epoch": 5.2271714922049, "loss": 0.5330299139022827, "loss_ce": 0.0003150389820802957, "loss_iou": 0.220703125, "loss_num": 0.0184326171875, "loss_xval": 0.53125, "num_input_tokens_seen": 131488492, "step": 2347 }, { "epoch": 5.229398663697105, "grad_norm": 21.853288650512695, "learning_rate": 1e-06, "loss": 0.5483, "num_input_tokens_seen": 131545288, "step": 2348 }, { "epoch": 5.229398663697105, "loss": 0.47664040327072144, "loss_ce": 0.00019999593496322632, "loss_iou": 0.2080078125, "loss_num": 0.01220703125, "loss_xval": 0.4765625, "num_input_tokens_seen": 131545288, "step": 2348 }, { "epoch": 5.23162583518931, "grad_norm": 31.041902542114258, "learning_rate": 1e-06, "loss": 0.8796, "num_input_tokens_seen": 131601784, "step": 2349 }, { "epoch": 5.23162583518931, "loss": 0.9917435646057129, "loss_ce": 0.0002884728601202369, "loss_iou": 0.390625, "loss_num": 0.042236328125, "loss_xval": 0.9921875, "num_input_tokens_seen": 131601784, "step": 2349 }, { "epoch": 5.233853006681515, "grad_norm": 56.980098724365234, "learning_rate": 1e-06, "loss": 0.7844, "num_input_tokens_seen": 131658228, "step": 2350 }, { "epoch": 5.233853006681515, "loss": 0.7722232341766357, "loss_ce": 0.0002506303135305643, "loss_iou": 0.33203125, "loss_num": 0.021728515625, "loss_xval": 0.7734375, "num_input_tokens_seen": 131658228, "step": 2350 }, { "epoch": 5.23608017817372, "grad_norm": 10.815868377685547, "learning_rate": 1e-06, "loss": 0.6814, "num_input_tokens_seen": 131713244, "step": 2351 }, { "epoch": 5.23608017817372, "loss": 0.8227336406707764, "loss_ce": 0.00034591203439049423, "loss_iou": 0.333984375, "loss_num": 0.03125, "loss_xval": 0.82421875, "num_input_tokens_seen": 131713244, "step": 2351 }, { "epoch": 5.2383073496659245, "grad_norm": 34.09395217895508, "learning_rate": 1e-06, "loss": 0.5397, "num_input_tokens_seen": 131771288, "step": 2352 }, { "epoch": 5.2383073496659245, "loss": 0.5100328922271729, "loss_ce": 0.0002672626869753003, "loss_iou": 0.2109375, "loss_num": 0.0177001953125, "loss_xval": 0.5078125, "num_input_tokens_seen": 131771288, "step": 2352 }, { "epoch": 5.240534521158129, "grad_norm": 18.458494186401367, "learning_rate": 1e-06, "loss": 0.7453, "num_input_tokens_seen": 131829120, "step": 2353 }, { "epoch": 5.240534521158129, "loss": 0.4893495738506317, "loss_ce": 0.00021382731210906059, "loss_iou": 0.197265625, "loss_num": 0.0191650390625, "loss_xval": 0.48828125, "num_input_tokens_seen": 131829120, "step": 2353 }, { "epoch": 5.242761692650334, "grad_norm": 21.484622955322266, "learning_rate": 1e-06, "loss": 0.8861, "num_input_tokens_seen": 131884604, "step": 2354 }, { "epoch": 5.242761692650334, "loss": 0.9834548830986023, "loss_ce": 0.0003006171027664095, "loss_iou": 0.416015625, "loss_num": 0.030029296875, "loss_xval": 0.984375, "num_input_tokens_seen": 131884604, "step": 2354 }, { "epoch": 5.244988864142539, "grad_norm": 24.594648361206055, "learning_rate": 1e-06, "loss": 0.7533, "num_input_tokens_seen": 131942424, "step": 2355 }, { "epoch": 5.244988864142539, "loss": 0.7038974761962891, "loss_ce": 0.00028417640714906156, "loss_iou": 0.310546875, "loss_num": 0.0162353515625, "loss_xval": 0.703125, "num_input_tokens_seen": 131942424, "step": 2355 }, { "epoch": 5.247216035634744, "grad_norm": 46.79767990112305, "learning_rate": 1e-06, "loss": 0.6838, "num_input_tokens_seen": 131998944, "step": 2356 }, { "epoch": 5.247216035634744, "loss": 0.5929880738258362, "loss_ce": 0.00021467695478349924, "loss_iou": 0.259765625, "loss_num": 0.01446533203125, "loss_xval": 0.59375, "num_input_tokens_seen": 131998944, "step": 2356 }, { "epoch": 5.249443207126949, "grad_norm": 18.105363845825195, "learning_rate": 1e-06, "loss": 0.6012, "num_input_tokens_seen": 132055476, "step": 2357 }, { "epoch": 5.249443207126949, "loss": 0.45707041025161743, "loss_ce": 0.00028330169152468443, "loss_iou": 0.189453125, "loss_num": 0.015380859375, "loss_xval": 0.45703125, "num_input_tokens_seen": 132055476, "step": 2357 }, { "epoch": 5.251670378619154, "grad_norm": 18.88360595703125, "learning_rate": 1e-06, "loss": 0.8412, "num_input_tokens_seen": 132112904, "step": 2358 }, { "epoch": 5.251670378619154, "loss": 0.8789228200912476, "loss_ce": 0.0002607441274449229, "loss_iou": 0.37109375, "loss_num": 0.02685546875, "loss_xval": 0.87890625, "num_input_tokens_seen": 132112904, "step": 2358 }, { "epoch": 5.2538975501113585, "grad_norm": 26.32551383972168, "learning_rate": 1e-06, "loss": 0.8518, "num_input_tokens_seen": 132168780, "step": 2359 }, { "epoch": 5.2538975501113585, "loss": 0.9214595556259155, "loss_ce": 0.0003170058480463922, "loss_iou": 0.42578125, "loss_num": 0.01373291015625, "loss_xval": 0.921875, "num_input_tokens_seen": 132168780, "step": 2359 }, { "epoch": 5.256124721603563, "grad_norm": 39.1221809387207, "learning_rate": 1e-06, "loss": 0.5728, "num_input_tokens_seen": 132223368, "step": 2360 }, { "epoch": 5.256124721603563, "loss": 0.574551522731781, "loss_ce": 0.00021067832130938768, "loss_iou": 0.2265625, "loss_num": 0.024169921875, "loss_xval": 0.57421875, "num_input_tokens_seen": 132223368, "step": 2360 }, { "epoch": 5.258351893095768, "grad_norm": 24.009830474853516, "learning_rate": 1e-06, "loss": 0.8335, "num_input_tokens_seen": 132279872, "step": 2361 }, { "epoch": 5.258351893095768, "loss": 0.8674226999282837, "loss_ce": 0.00023520024842582643, "loss_iou": 0.388671875, "loss_num": 0.017822265625, "loss_xval": 0.8671875, "num_input_tokens_seen": 132279872, "step": 2361 }, { "epoch": 5.260579064587973, "grad_norm": 16.209796905517578, "learning_rate": 1e-06, "loss": 0.8201, "num_input_tokens_seen": 132337356, "step": 2362 }, { "epoch": 5.260579064587973, "loss": 1.021734356880188, "loss_ce": 0.0004940725630149245, "loss_iou": 0.435546875, "loss_num": 0.0302734375, "loss_xval": 1.0234375, "num_input_tokens_seen": 132337356, "step": 2362 }, { "epoch": 5.262806236080178, "grad_norm": 21.380468368530273, "learning_rate": 1e-06, "loss": 0.6083, "num_input_tokens_seen": 132389212, "step": 2363 }, { "epoch": 5.262806236080178, "loss": 0.5677247643470764, "loss_ce": 0.00034196508931927383, "loss_iou": 0.203125, "loss_num": 0.0322265625, "loss_xval": 0.56640625, "num_input_tokens_seen": 132389212, "step": 2363 }, { "epoch": 5.265033407572383, "grad_norm": 18.931297302246094, "learning_rate": 1e-06, "loss": 1.0995, "num_input_tokens_seen": 132445236, "step": 2364 }, { "epoch": 5.265033407572383, "loss": 0.9282673597335815, "loss_ce": 0.0002889031311497092, "loss_iou": 0.3671875, "loss_num": 0.038818359375, "loss_xval": 0.9296875, "num_input_tokens_seen": 132445236, "step": 2364 }, { "epoch": 5.267260579064588, "grad_norm": 51.42475128173828, "learning_rate": 1e-06, "loss": 0.7182, "num_input_tokens_seen": 132499704, "step": 2365 }, { "epoch": 5.267260579064588, "loss": 0.6144786477088928, "loss_ce": 0.00022083369549363852, "loss_iou": 0.2578125, "loss_num": 0.0196533203125, "loss_xval": 0.61328125, "num_input_tokens_seen": 132499704, "step": 2365 }, { "epoch": 5.2694877505567925, "grad_norm": 15.084627151489258, "learning_rate": 1e-06, "loss": 0.5506, "num_input_tokens_seen": 132557716, "step": 2366 }, { "epoch": 5.2694877505567925, "loss": 0.5173279047012329, "loss_ce": 0.00023811672872398049, "loss_iou": 0.2255859375, "loss_num": 0.01312255859375, "loss_xval": 0.515625, "num_input_tokens_seen": 132557716, "step": 2366 }, { "epoch": 5.271714922048997, "grad_norm": 23.721725463867188, "learning_rate": 1e-06, "loss": 0.8121, "num_input_tokens_seen": 132611180, "step": 2367 }, { "epoch": 5.271714922048997, "loss": 0.6462873220443726, "loss_ce": 0.00029126679874025285, "loss_iou": 0.25390625, "loss_num": 0.02734375, "loss_xval": 0.64453125, "num_input_tokens_seen": 132611180, "step": 2367 }, { "epoch": 5.273942093541203, "grad_norm": 15.91121768951416, "learning_rate": 1e-06, "loss": 0.6395, "num_input_tokens_seen": 132667040, "step": 2368 }, { "epoch": 5.273942093541203, "loss": 0.6589342951774597, "loss_ce": 0.00024289448629133403, "loss_iou": 0.279296875, "loss_num": 0.02001953125, "loss_xval": 0.66015625, "num_input_tokens_seen": 132667040, "step": 2368 }, { "epoch": 5.276169265033408, "grad_norm": 17.777984619140625, "learning_rate": 1e-06, "loss": 0.6871, "num_input_tokens_seen": 132722572, "step": 2369 }, { "epoch": 5.276169265033408, "loss": 0.376442015171051, "loss_ce": 0.00022133463062345982, "loss_iou": 0.16015625, "loss_num": 0.0111083984375, "loss_xval": 0.376953125, "num_input_tokens_seen": 132722572, "step": 2369 }, { "epoch": 5.278396436525613, "grad_norm": 25.825546264648438, "learning_rate": 1e-06, "loss": 0.9053, "num_input_tokens_seen": 132780576, "step": 2370 }, { "epoch": 5.278396436525613, "loss": 0.6252577304840088, "loss_ce": 0.0002577258856035769, "loss_iou": 0.283203125, "loss_num": 0.01141357421875, "loss_xval": 0.625, "num_input_tokens_seen": 132780576, "step": 2370 }, { "epoch": 5.280623608017818, "grad_norm": 20.95437240600586, "learning_rate": 1e-06, "loss": 0.7428, "num_input_tokens_seen": 132836940, "step": 2371 }, { "epoch": 5.280623608017818, "loss": 0.8691422343254089, "loss_ce": 0.00024575780844315886, "loss_iou": 0.36328125, "loss_num": 0.0284423828125, "loss_xval": 0.8671875, "num_input_tokens_seen": 132836940, "step": 2371 }, { "epoch": 5.282850779510023, "grad_norm": 16.127458572387695, "learning_rate": 1e-06, "loss": 0.7024, "num_input_tokens_seen": 132891444, "step": 2372 }, { "epoch": 5.282850779510023, "loss": 0.7618474364280701, "loss_ce": 0.0003727960283868015, "loss_iou": 0.31640625, "loss_num": 0.0255126953125, "loss_xval": 0.76171875, "num_input_tokens_seen": 132891444, "step": 2372 }, { "epoch": 5.285077951002227, "grad_norm": 19.69451141357422, "learning_rate": 1e-06, "loss": 0.9777, "num_input_tokens_seen": 132948012, "step": 2373 }, { "epoch": 5.285077951002227, "loss": 0.8320986032485962, "loss_ce": 0.00031142972875386477, "loss_iou": 0.333984375, "loss_num": 0.032470703125, "loss_xval": 0.83203125, "num_input_tokens_seen": 132948012, "step": 2373 }, { "epoch": 5.287305122494432, "grad_norm": 24.581279754638672, "learning_rate": 1e-06, "loss": 1.0444, "num_input_tokens_seen": 133005612, "step": 2374 }, { "epoch": 5.287305122494432, "loss": 0.9788609743118286, "loss_ce": 0.00034534052247181535, "loss_iou": 0.40625, "loss_num": 0.032958984375, "loss_xval": 0.9765625, "num_input_tokens_seen": 133005612, "step": 2374 }, { "epoch": 5.289532293986637, "grad_norm": 20.55710220336914, "learning_rate": 1e-06, "loss": 0.835, "num_input_tokens_seen": 133062216, "step": 2375 }, { "epoch": 5.289532293986637, "loss": 0.9570013880729675, "loss_ce": 0.00033630896359682083, "loss_iou": 0.390625, "loss_num": 0.03466796875, "loss_xval": 0.95703125, "num_input_tokens_seen": 133062216, "step": 2375 }, { "epoch": 5.291759465478842, "grad_norm": 25.395736694335938, "learning_rate": 1e-06, "loss": 0.7496, "num_input_tokens_seen": 133117644, "step": 2376 }, { "epoch": 5.291759465478842, "loss": 0.5810257196426392, "loss_ce": 0.00021513670799322426, "loss_iou": 0.25, "loss_num": 0.0164794921875, "loss_xval": 0.58203125, "num_input_tokens_seen": 133117644, "step": 2376 }, { "epoch": 5.293986636971047, "grad_norm": 58.38376235961914, "learning_rate": 1e-06, "loss": 0.8065, "num_input_tokens_seen": 133176104, "step": 2377 }, { "epoch": 5.293986636971047, "loss": 0.6448827981948853, "loss_ce": 0.0002294883888680488, "loss_iou": 0.28515625, "loss_num": 0.014404296875, "loss_xval": 0.64453125, "num_input_tokens_seen": 133176104, "step": 2377 }, { "epoch": 5.296213808463252, "grad_norm": 47.06428909301758, "learning_rate": 1e-06, "loss": 0.6075, "num_input_tokens_seen": 133231892, "step": 2378 }, { "epoch": 5.296213808463252, "loss": 0.6965197920799255, "loss_ce": 0.00023071446048561484, "loss_iou": 0.287109375, "loss_num": 0.0245361328125, "loss_xval": 0.6953125, "num_input_tokens_seen": 133231892, "step": 2378 }, { "epoch": 5.298440979955457, "grad_norm": 17.276321411132812, "learning_rate": 1e-06, "loss": 0.7652, "num_input_tokens_seen": 133286408, "step": 2379 }, { "epoch": 5.298440979955457, "loss": 0.8292036652565002, "loss_ce": 0.00034621963277459145, "loss_iou": 0.37109375, "loss_num": 0.01708984375, "loss_xval": 0.828125, "num_input_tokens_seen": 133286408, "step": 2379 }, { "epoch": 5.3006681514476615, "grad_norm": 252.65249633789062, "learning_rate": 1e-06, "loss": 0.7102, "num_input_tokens_seen": 133341916, "step": 2380 }, { "epoch": 5.3006681514476615, "loss": 0.6878687143325806, "loss_ce": 0.00024663194199092686, "loss_iou": 0.30859375, "loss_num": 0.01397705078125, "loss_xval": 0.6875, "num_input_tokens_seen": 133341916, "step": 2380 }, { "epoch": 5.302895322939866, "grad_norm": 22.686996459960938, "learning_rate": 1e-06, "loss": 0.7836, "num_input_tokens_seen": 133397788, "step": 2381 }, { "epoch": 5.302895322939866, "loss": 0.9040675759315491, "loss_ce": 0.0002589549985714257, "loss_iou": 0.41015625, "loss_num": 0.0167236328125, "loss_xval": 0.90234375, "num_input_tokens_seen": 133397788, "step": 2381 }, { "epoch": 5.305122494432071, "grad_norm": 18.216825485229492, "learning_rate": 1e-06, "loss": 0.7332, "num_input_tokens_seen": 133455328, "step": 2382 }, { "epoch": 5.305122494432071, "loss": 0.8532497882843018, "loss_ce": 0.000222432630835101, "loss_iou": 0.36328125, "loss_num": 0.02490234375, "loss_xval": 0.8515625, "num_input_tokens_seen": 133455328, "step": 2382 }, { "epoch": 5.307349665924276, "grad_norm": 66.18022918701172, "learning_rate": 1e-06, "loss": 0.7825, "num_input_tokens_seen": 133509936, "step": 2383 }, { "epoch": 5.307349665924276, "loss": 0.7538729906082153, "loss_ce": 0.0002108832122758031, "loss_iou": 0.31640625, "loss_num": 0.0242919921875, "loss_xval": 0.75390625, "num_input_tokens_seen": 133509936, "step": 2383 }, { "epoch": 5.309576837416481, "grad_norm": 19.18753433227539, "learning_rate": 1e-06, "loss": 0.5276, "num_input_tokens_seen": 133566504, "step": 2384 }, { "epoch": 5.309576837416481, "loss": 0.6280398964881897, "loss_ce": 0.00023228148347698152, "loss_iou": 0.26953125, "loss_num": 0.017822265625, "loss_xval": 0.62890625, "num_input_tokens_seen": 133566504, "step": 2384 }, { "epoch": 5.311804008908686, "grad_norm": 21.568161010742188, "learning_rate": 1e-06, "loss": 0.6863, "num_input_tokens_seen": 133624932, "step": 2385 }, { "epoch": 5.311804008908686, "loss": 0.569631814956665, "loss_ce": 0.00029586919117718935, "loss_iou": 0.251953125, "loss_num": 0.01275634765625, "loss_xval": 0.5703125, "num_input_tokens_seen": 133624932, "step": 2385 }, { "epoch": 5.314031180400891, "grad_norm": 19.216602325439453, "learning_rate": 1e-06, "loss": 0.5267, "num_input_tokens_seen": 133679920, "step": 2386 }, { "epoch": 5.314031180400891, "loss": 0.5746643543243408, "loss_ce": 0.00020149891497567296, "loss_iou": 0.2412109375, "loss_num": 0.0185546875, "loss_xval": 0.57421875, "num_input_tokens_seen": 133679920, "step": 2386 }, { "epoch": 5.3162583518930955, "grad_norm": 25.528783798217773, "learning_rate": 1e-06, "loss": 0.8468, "num_input_tokens_seen": 133735508, "step": 2387 }, { "epoch": 5.3162583518930955, "loss": 0.7472808361053467, "loss_ce": 0.00021054709213785827, "loss_iou": 0.318359375, "loss_num": 0.0216064453125, "loss_xval": 0.74609375, "num_input_tokens_seen": 133735508, "step": 2387 }, { "epoch": 5.3184855233853, "grad_norm": 19.144241333007812, "learning_rate": 1e-06, "loss": 0.7418, "num_input_tokens_seen": 133788248, "step": 2388 }, { "epoch": 5.3184855233853, "loss": 0.7324228286743164, "loss_ce": 0.0002451083273626864, "loss_iou": 0.3203125, "loss_num": 0.0186767578125, "loss_xval": 0.73046875, "num_input_tokens_seen": 133788248, "step": 2388 }, { "epoch": 5.320712694877505, "grad_norm": 26.32198143005371, "learning_rate": 1e-06, "loss": 0.6614, "num_input_tokens_seen": 133843512, "step": 2389 }, { "epoch": 5.320712694877505, "loss": 0.6170359253883362, "loss_ce": 0.00021462509175762534, "loss_iou": 0.26171875, "loss_num": 0.01904296875, "loss_xval": 0.6171875, "num_input_tokens_seen": 133843512, "step": 2389 }, { "epoch": 5.32293986636971, "grad_norm": 17.643627166748047, "learning_rate": 1e-06, "loss": 0.6772, "num_input_tokens_seen": 133898856, "step": 2390 }, { "epoch": 5.32293986636971, "loss": 0.6501243710517883, "loss_ce": 0.00022201667889021337, "loss_iou": 0.27734375, "loss_num": 0.01904296875, "loss_xval": 0.6484375, "num_input_tokens_seen": 133898856, "step": 2390 }, { "epoch": 5.325167037861915, "grad_norm": 31.93773651123047, "learning_rate": 1e-06, "loss": 0.7977, "num_input_tokens_seen": 133956748, "step": 2391 }, { "epoch": 5.325167037861915, "loss": 0.6381719708442688, "loss_ce": 0.00023252921528182924, "loss_iou": 0.28125, "loss_num": 0.01531982421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 133956748, "step": 2391 }, { "epoch": 5.327394209354121, "grad_norm": 19.902048110961914, "learning_rate": 1e-06, "loss": 0.6616, "num_input_tokens_seen": 134011496, "step": 2392 }, { "epoch": 5.327394209354121, "loss": 0.6387029886245728, "loss_ce": 0.0007635352667421103, "loss_iou": 0.2890625, "loss_num": 0.01226806640625, "loss_xval": 0.63671875, "num_input_tokens_seen": 134011496, "step": 2392 }, { "epoch": 5.3296213808463255, "grad_norm": 16.311479568481445, "learning_rate": 1e-06, "loss": 0.4794, "num_input_tokens_seen": 134069204, "step": 2393 }, { "epoch": 5.3296213808463255, "loss": 0.4752798080444336, "loss_ce": 0.00018213533621747047, "loss_iou": 0.19921875, "loss_num": 0.01544189453125, "loss_xval": 0.474609375, "num_input_tokens_seen": 134069204, "step": 2393 }, { "epoch": 5.33184855233853, "grad_norm": 39.13882827758789, "learning_rate": 1e-06, "loss": 0.6946, "num_input_tokens_seen": 134126304, "step": 2394 }, { "epoch": 5.33184855233853, "loss": 0.837151288986206, "loss_ce": 0.00023718301963526756, "loss_iou": 0.34375, "loss_num": 0.02978515625, "loss_xval": 0.8359375, "num_input_tokens_seen": 134126304, "step": 2394 }, { "epoch": 5.334075723830735, "grad_norm": 20.73467445373535, "learning_rate": 1e-06, "loss": 0.7741, "num_input_tokens_seen": 134183476, "step": 2395 }, { "epoch": 5.334075723830735, "loss": 0.7032053470611572, "loss_ce": 0.00032445762190036476, "loss_iou": 0.29296875, "loss_num": 0.023681640625, "loss_xval": 0.703125, "num_input_tokens_seen": 134183476, "step": 2395 }, { "epoch": 5.33630289532294, "grad_norm": 25.634654998779297, "learning_rate": 1e-06, "loss": 0.564, "num_input_tokens_seen": 134240096, "step": 2396 }, { "epoch": 5.33630289532294, "loss": 0.699744701385498, "loss_ce": 0.0002818358479999006, "loss_iou": 0.31640625, "loss_num": 0.01373291015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 134240096, "step": 2396 }, { "epoch": 5.338530066815145, "grad_norm": 19.363285064697266, "learning_rate": 1e-06, "loss": 0.4604, "num_input_tokens_seen": 134296184, "step": 2397 }, { "epoch": 5.338530066815145, "loss": 0.5098617076873779, "loss_ce": 0.00034024479100480676, "loss_iou": 0.224609375, "loss_num": 0.01226806640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 134296184, "step": 2397 }, { "epoch": 5.34075723830735, "grad_norm": 31.034711837768555, "learning_rate": 1e-06, "loss": 0.8775, "num_input_tokens_seen": 134347520, "step": 2398 }, { "epoch": 5.34075723830735, "loss": 0.730765163898468, "loss_ce": 0.00029640039429068565, "loss_iou": 0.322265625, "loss_num": 0.01708984375, "loss_xval": 0.73046875, "num_input_tokens_seen": 134347520, "step": 2398 }, { "epoch": 5.342984409799555, "grad_norm": 21.829448699951172, "learning_rate": 1e-06, "loss": 1.003, "num_input_tokens_seen": 134402984, "step": 2399 }, { "epoch": 5.342984409799555, "loss": 1.1158628463745117, "loss_ce": 0.0006285187555477023, "loss_iou": 0.462890625, "loss_num": 0.0380859375, "loss_xval": 1.1171875, "num_input_tokens_seen": 134402984, "step": 2399 }, { "epoch": 5.3452115812917596, "grad_norm": 21.801376342773438, "learning_rate": 1e-06, "loss": 0.8284, "num_input_tokens_seen": 134457812, "step": 2400 }, { "epoch": 5.3452115812917596, "loss": 0.8780108094215393, "loss_ce": 0.00032530241878703237, "loss_iou": 0.380859375, "loss_num": 0.02294921875, "loss_xval": 0.87890625, "num_input_tokens_seen": 134457812, "step": 2400 }, { "epoch": 5.347438752783964, "grad_norm": 33.11177444458008, "learning_rate": 1e-06, "loss": 0.8474, "num_input_tokens_seen": 134512096, "step": 2401 }, { "epoch": 5.347438752783964, "loss": 0.9323829412460327, "loss_ce": 0.0002540295827202499, "loss_iou": 0.41015625, "loss_num": 0.0223388671875, "loss_xval": 0.93359375, "num_input_tokens_seen": 134512096, "step": 2401 }, { "epoch": 5.349665924276169, "grad_norm": 19.453571319580078, "learning_rate": 1e-06, "loss": 0.7402, "num_input_tokens_seen": 134565996, "step": 2402 }, { "epoch": 5.349665924276169, "loss": 0.7348358631134033, "loss_ce": 0.00021668968838639557, "loss_iou": 0.2890625, "loss_num": 0.031494140625, "loss_xval": 0.734375, "num_input_tokens_seen": 134565996, "step": 2402 }, { "epoch": 5.351893095768374, "grad_norm": 26.344636917114258, "learning_rate": 1e-06, "loss": 1.0379, "num_input_tokens_seen": 134623536, "step": 2403 }, { "epoch": 5.351893095768374, "loss": 0.9678047895431519, "loss_ce": 0.0002755335299298167, "loss_iou": 0.416015625, "loss_num": 0.026611328125, "loss_xval": 0.96875, "num_input_tokens_seen": 134623536, "step": 2403 }, { "epoch": 5.354120267260579, "grad_norm": 24.26504898071289, "learning_rate": 1e-06, "loss": 0.7085, "num_input_tokens_seen": 134676664, "step": 2404 }, { "epoch": 5.354120267260579, "loss": 0.7473124265670776, "loss_ce": 0.00024206144735217094, "loss_iou": 0.283203125, "loss_num": 0.0361328125, "loss_xval": 0.74609375, "num_input_tokens_seen": 134676664, "step": 2404 }, { "epoch": 5.356347438752784, "grad_norm": 37.98905563354492, "learning_rate": 1e-06, "loss": 0.6818, "num_input_tokens_seen": 134734292, "step": 2405 }, { "epoch": 5.356347438752784, "loss": 0.8488801717758179, "loss_ce": 0.00024738311185501516, "loss_iou": 0.34765625, "loss_num": 0.0302734375, "loss_xval": 0.84765625, "num_input_tokens_seen": 134734292, "step": 2405 }, { "epoch": 5.358574610244989, "grad_norm": 110.85530090332031, "learning_rate": 1e-06, "loss": 0.5897, "num_input_tokens_seen": 134791600, "step": 2406 }, { "epoch": 5.358574610244989, "loss": 0.5577570796012878, "loss_ce": 0.0002619822043925524, "loss_iou": 0.25390625, "loss_num": 0.00970458984375, "loss_xval": 0.55859375, "num_input_tokens_seen": 134791600, "step": 2406 }, { "epoch": 5.360801781737194, "grad_norm": 25.807218551635742, "learning_rate": 1e-06, "loss": 0.8237, "num_input_tokens_seen": 134847120, "step": 2407 }, { "epoch": 5.360801781737194, "loss": 0.7333089709281921, "loss_ce": 0.0002767038531601429, "loss_iou": 0.29296875, "loss_num": 0.0294189453125, "loss_xval": 0.734375, "num_input_tokens_seen": 134847120, "step": 2407 }, { "epoch": 5.363028953229398, "grad_norm": 23.73634147644043, "learning_rate": 1e-06, "loss": 0.7855, "num_input_tokens_seen": 134901488, "step": 2408 }, { "epoch": 5.363028953229398, "loss": 0.6962625980377197, "loss_ce": 0.000339773076120764, "loss_iou": 0.2890625, "loss_num": 0.0234375, "loss_xval": 0.6953125, "num_input_tokens_seen": 134901488, "step": 2408 }, { "epoch": 5.365256124721603, "grad_norm": 23.408971786499023, "learning_rate": 1e-06, "loss": 0.7951, "num_input_tokens_seen": 134956808, "step": 2409 }, { "epoch": 5.365256124721603, "loss": 0.8180850744247437, "loss_ce": 0.00021401085541583598, "loss_iou": 0.349609375, "loss_num": 0.0240478515625, "loss_xval": 0.81640625, "num_input_tokens_seen": 134956808, "step": 2409 }, { "epoch": 5.367483296213808, "grad_norm": 24.673824310302734, "learning_rate": 1e-06, "loss": 0.6417, "num_input_tokens_seen": 135013228, "step": 2410 }, { "epoch": 5.367483296213808, "loss": 0.3933447301387787, "loss_ce": 0.0002172911772504449, "loss_iou": 0.162109375, "loss_num": 0.01373291015625, "loss_xval": 0.392578125, "num_input_tokens_seen": 135013228, "step": 2410 }, { "epoch": 5.369710467706013, "grad_norm": 19.17323875427246, "learning_rate": 1e-06, "loss": 0.6747, "num_input_tokens_seen": 135070756, "step": 2411 }, { "epoch": 5.369710467706013, "loss": 0.7497762441635132, "loss_ce": 0.00026456580962985754, "loss_iou": 0.326171875, "loss_num": 0.0189208984375, "loss_xval": 0.75, "num_input_tokens_seen": 135070756, "step": 2411 }, { "epoch": 5.371937639198218, "grad_norm": 21.6716365814209, "learning_rate": 1e-06, "loss": 0.7877, "num_input_tokens_seen": 135123704, "step": 2412 }, { "epoch": 5.371937639198218, "loss": 0.9468029737472534, "loss_ce": 0.0002697420713957399, "loss_iou": 0.40234375, "loss_num": 0.028076171875, "loss_xval": 0.9453125, "num_input_tokens_seen": 135123704, "step": 2412 }, { "epoch": 5.374164810690424, "grad_norm": 28.3837833404541, "learning_rate": 1e-06, "loss": 0.8981, "num_input_tokens_seen": 135177764, "step": 2413 }, { "epoch": 5.374164810690424, "loss": 1.0344269275665283, "loss_ce": 0.00024724419927224517, "loss_iou": 0.421875, "loss_num": 0.0380859375, "loss_xval": 1.03125, "num_input_tokens_seen": 135177764, "step": 2413 }, { "epoch": 5.3763919821826285, "grad_norm": 38.46763229370117, "learning_rate": 1e-06, "loss": 0.7782, "num_input_tokens_seen": 135233172, "step": 2414 }, { "epoch": 5.3763919821826285, "loss": 0.8738818764686584, "loss_ce": 0.00034671538742259145, "loss_iou": 0.37890625, "loss_num": 0.0228271484375, "loss_xval": 0.875, "num_input_tokens_seen": 135233172, "step": 2414 }, { "epoch": 5.378619153674833, "grad_norm": 20.032617568969727, "learning_rate": 1e-06, "loss": 0.661, "num_input_tokens_seen": 135287744, "step": 2415 }, { "epoch": 5.378619153674833, "loss": 0.6660585403442383, "loss_ce": 0.00040915622957982123, "loss_iou": 0.2890625, "loss_num": 0.017822265625, "loss_xval": 0.6640625, "num_input_tokens_seen": 135287744, "step": 2415 }, { "epoch": 5.380846325167038, "grad_norm": 33.80507278442383, "learning_rate": 1e-06, "loss": 0.8263, "num_input_tokens_seen": 135344300, "step": 2416 }, { "epoch": 5.380846325167038, "loss": 0.8908694386482239, "loss_ce": 0.0002443883859086782, "loss_iou": 0.39453125, "loss_num": 0.0206298828125, "loss_xval": 0.890625, "num_input_tokens_seen": 135344300, "step": 2416 }, { "epoch": 5.383073496659243, "grad_norm": 18.646997451782227, "learning_rate": 1e-06, "loss": 0.6062, "num_input_tokens_seen": 135402736, "step": 2417 }, { "epoch": 5.383073496659243, "loss": 0.6154400110244751, "loss_ce": 0.0002056759112747386, "loss_iou": 0.279296875, "loss_num": 0.011474609375, "loss_xval": 0.6171875, "num_input_tokens_seen": 135402736, "step": 2417 }, { "epoch": 5.385300668151448, "grad_norm": 27.358457565307617, "learning_rate": 1e-06, "loss": 0.7446, "num_input_tokens_seen": 135457252, "step": 2418 }, { "epoch": 5.385300668151448, "loss": 0.8736233711242676, "loss_ce": 0.0003323230193927884, "loss_iou": 0.38671875, "loss_num": 0.0201416015625, "loss_xval": 0.875, "num_input_tokens_seen": 135457252, "step": 2418 }, { "epoch": 5.387527839643653, "grad_norm": 37.07065200805664, "learning_rate": 1e-06, "loss": 0.9104, "num_input_tokens_seen": 135514764, "step": 2419 }, { "epoch": 5.387527839643653, "loss": 0.8482790589332581, "loss_ce": 0.000378611555788666, "loss_iou": 0.33984375, "loss_num": 0.033447265625, "loss_xval": 0.84765625, "num_input_tokens_seen": 135514764, "step": 2419 }, { "epoch": 5.389755011135858, "grad_norm": 15.80694580078125, "learning_rate": 1e-06, "loss": 0.7015, "num_input_tokens_seen": 135571288, "step": 2420 }, { "epoch": 5.389755011135858, "loss": 0.6910269856452942, "loss_ce": 0.00023109573521651328, "loss_iou": 0.306640625, "loss_num": 0.015625, "loss_xval": 0.69140625, "num_input_tokens_seen": 135571288, "step": 2420 }, { "epoch": 5.3919821826280625, "grad_norm": 16.644014358520508, "learning_rate": 1e-06, "loss": 0.5917, "num_input_tokens_seen": 135627180, "step": 2421 }, { "epoch": 5.3919821826280625, "loss": 0.6370264291763306, "loss_ce": 0.00030768115539103746, "loss_iou": 0.244140625, "loss_num": 0.0299072265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 135627180, "step": 2421 }, { "epoch": 5.394209354120267, "grad_norm": 23.295394897460938, "learning_rate": 1e-06, "loss": 0.851, "num_input_tokens_seen": 135681732, "step": 2422 }, { "epoch": 5.394209354120267, "loss": 0.890271782875061, "loss_ce": 0.0003792433417402208, "loss_iou": 0.37109375, "loss_num": 0.0296630859375, "loss_xval": 0.890625, "num_input_tokens_seen": 135681732, "step": 2422 }, { "epoch": 5.396436525612472, "grad_norm": 17.34937286376953, "learning_rate": 1e-06, "loss": 0.7305, "num_input_tokens_seen": 135739488, "step": 2423 }, { "epoch": 5.396436525612472, "loss": 0.7664504051208496, "loss_ce": 0.00033708903356455266, "loss_iou": 0.314453125, "loss_num": 0.0272216796875, "loss_xval": 0.765625, "num_input_tokens_seen": 135739488, "step": 2423 }, { "epoch": 5.398663697104677, "grad_norm": 17.406497955322266, "learning_rate": 1e-06, "loss": 0.5762, "num_input_tokens_seen": 135793620, "step": 2424 }, { "epoch": 5.398663697104677, "loss": 0.5314889550209045, "loss_ce": 0.00023894087644293904, "loss_iou": 0.2158203125, "loss_num": 0.019775390625, "loss_xval": 0.53125, "num_input_tokens_seen": 135793620, "step": 2424 }, { "epoch": 5.400890868596882, "grad_norm": 15.964725494384766, "learning_rate": 1e-06, "loss": 0.6212, "num_input_tokens_seen": 135850120, "step": 2425 }, { "epoch": 5.400890868596882, "loss": 0.5310900211334229, "loss_ce": 0.0002062540443148464, "loss_iou": 0.2275390625, "loss_num": 0.01507568359375, "loss_xval": 0.53125, "num_input_tokens_seen": 135850120, "step": 2425 }, { "epoch": 5.403118040089087, "grad_norm": 18.38140296936035, "learning_rate": 1e-06, "loss": 0.5365, "num_input_tokens_seen": 135901984, "step": 2426 }, { "epoch": 5.403118040089087, "loss": 0.48569074273109436, "loss_ce": 0.0002171028172597289, "loss_iou": 0.21484375, "loss_num": 0.01123046875, "loss_xval": 0.486328125, "num_input_tokens_seen": 135901984, "step": 2426 }, { "epoch": 5.405345211581292, "grad_norm": 17.759723663330078, "learning_rate": 1e-06, "loss": 0.6546, "num_input_tokens_seen": 135956660, "step": 2427 }, { "epoch": 5.405345211581292, "loss": 0.6573410034179688, "loss_ce": 0.00035854033194482327, "loss_iou": 0.28125, "loss_num": 0.0186767578125, "loss_xval": 0.65625, "num_input_tokens_seen": 135956660, "step": 2427 }, { "epoch": 5.4075723830734965, "grad_norm": 18.03878402709961, "learning_rate": 1e-06, "loss": 0.5775, "num_input_tokens_seen": 136013048, "step": 2428 }, { "epoch": 5.4075723830734965, "loss": 0.592266857624054, "loss_ce": 0.00022585978149436414, "loss_iou": 0.2451171875, "loss_num": 0.0201416015625, "loss_xval": 0.59375, "num_input_tokens_seen": 136013048, "step": 2428 }, { "epoch": 5.409799554565701, "grad_norm": 30.479446411132812, "learning_rate": 1e-06, "loss": 0.7551, "num_input_tokens_seen": 136068760, "step": 2429 }, { "epoch": 5.409799554565701, "loss": 1.0719916820526123, "loss_ce": 0.00033644368522800505, "loss_iou": 0.46875, "loss_num": 0.0263671875, "loss_xval": 1.0703125, "num_input_tokens_seen": 136068760, "step": 2429 }, { "epoch": 5.412026726057906, "grad_norm": 22.04293441772461, "learning_rate": 1e-06, "loss": 0.7282, "num_input_tokens_seen": 136122268, "step": 2430 }, { "epoch": 5.412026726057906, "loss": 0.7684208750724792, "loss_ce": 0.00023237511049956083, "loss_iou": 0.306640625, "loss_num": 0.0306396484375, "loss_xval": 0.76953125, "num_input_tokens_seen": 136122268, "step": 2430 }, { "epoch": 5.414253897550111, "grad_norm": 49.4771614074707, "learning_rate": 1e-06, "loss": 0.5288, "num_input_tokens_seen": 136178720, "step": 2431 }, { "epoch": 5.414253897550111, "loss": 0.4244188964366913, "loss_ce": 0.00022456918668467551, "loss_iou": 0.1728515625, "loss_num": 0.01556396484375, "loss_xval": 0.423828125, "num_input_tokens_seen": 136178720, "step": 2431 }, { "epoch": 5.416481069042316, "grad_norm": 31.579322814941406, "learning_rate": 1e-06, "loss": 0.5512, "num_input_tokens_seen": 136237672, "step": 2432 }, { "epoch": 5.416481069042316, "loss": 0.5803066492080688, "loss_ce": 0.00022852106485515833, "loss_iou": 0.2431640625, "loss_num": 0.018798828125, "loss_xval": 0.578125, "num_input_tokens_seen": 136237672, "step": 2432 }, { "epoch": 5.418708240534521, "grad_norm": 30.84029197692871, "learning_rate": 1e-06, "loss": 0.574, "num_input_tokens_seen": 136293760, "step": 2433 }, { "epoch": 5.418708240534521, "loss": 0.6513895988464355, "loss_ce": 0.00026654996327124536, "loss_iou": 0.265625, "loss_num": 0.02392578125, "loss_xval": 0.65234375, "num_input_tokens_seen": 136293760, "step": 2433 }, { "epoch": 5.420935412026726, "grad_norm": 16.020021438598633, "learning_rate": 1e-06, "loss": 0.5865, "num_input_tokens_seen": 136350844, "step": 2434 }, { "epoch": 5.420935412026726, "loss": 0.5578700304031372, "loss_ce": 0.0002528943296056241, "loss_iou": 0.2353515625, "loss_num": 0.017333984375, "loss_xval": 0.55859375, "num_input_tokens_seen": 136350844, "step": 2434 }, { "epoch": 5.4231625835189305, "grad_norm": 22.808626174926758, "learning_rate": 1e-06, "loss": 0.9401, "num_input_tokens_seen": 136404744, "step": 2435 }, { "epoch": 5.4231625835189305, "loss": 0.8737362027168274, "loss_ce": 0.00020110802142880857, "loss_iou": 0.359375, "loss_num": 0.03076171875, "loss_xval": 0.875, "num_input_tokens_seen": 136404744, "step": 2435 }, { "epoch": 5.425389755011135, "grad_norm": 25.185213088989258, "learning_rate": 1e-06, "loss": 0.5613, "num_input_tokens_seen": 136462516, "step": 2436 }, { "epoch": 5.425389755011135, "loss": 0.5026748776435852, "loss_ce": 0.00023346173111349344, "loss_iou": 0.22265625, "loss_num": 0.0113525390625, "loss_xval": 0.50390625, "num_input_tokens_seen": 136462516, "step": 2436 }, { "epoch": 5.427616926503341, "grad_norm": 16.586254119873047, "learning_rate": 1e-06, "loss": 0.7677, "num_input_tokens_seen": 136520164, "step": 2437 }, { "epoch": 5.427616926503341, "loss": 0.6672698259353638, "loss_ce": 0.0002775907050818205, "loss_iou": 0.275390625, "loss_num": 0.0230712890625, "loss_xval": 0.66796875, "num_input_tokens_seen": 136520164, "step": 2437 }, { "epoch": 5.429844097995546, "grad_norm": 15.319040298461914, "learning_rate": 1e-06, "loss": 0.5602, "num_input_tokens_seen": 136579040, "step": 2438 }, { "epoch": 5.429844097995546, "loss": 0.39814403653144836, "loss_ce": 0.00019482253992464393, "loss_iou": 0.16015625, "loss_num": 0.0155029296875, "loss_xval": 0.3984375, "num_input_tokens_seen": 136579040, "step": 2438 }, { "epoch": 5.432071269487751, "grad_norm": 21.42112159729004, "learning_rate": 1e-06, "loss": 0.9151, "num_input_tokens_seen": 136635020, "step": 2439 }, { "epoch": 5.432071269487751, "loss": 0.9438084363937378, "loss_ce": 0.0003270409069955349, "loss_iou": 0.392578125, "loss_num": 0.0322265625, "loss_xval": 0.9453125, "num_input_tokens_seen": 136635020, "step": 2439 }, { "epoch": 5.434298440979956, "grad_norm": 19.967939376831055, "learning_rate": 1e-06, "loss": 0.9586, "num_input_tokens_seen": 136693516, "step": 2440 }, { "epoch": 5.434298440979956, "loss": 0.9183446168899536, "loss_ce": 0.0003758435195777565, "loss_iou": 0.390625, "loss_num": 0.027587890625, "loss_xval": 0.91796875, "num_input_tokens_seen": 136693516, "step": 2440 }, { "epoch": 5.436525612472161, "grad_norm": 21.792882919311523, "learning_rate": 1e-06, "loss": 0.7106, "num_input_tokens_seen": 136750260, "step": 2441 }, { "epoch": 5.436525612472161, "loss": 0.6200574636459351, "loss_ce": 0.00018441499560140073, "loss_iou": 0.2578125, "loss_num": 0.0206298828125, "loss_xval": 0.62109375, "num_input_tokens_seen": 136750260, "step": 2441 }, { "epoch": 5.4387527839643655, "grad_norm": 18.052919387817383, "learning_rate": 1e-06, "loss": 0.5861, "num_input_tokens_seen": 136804772, "step": 2442 }, { "epoch": 5.4387527839643655, "loss": 0.48152902722358704, "loss_ce": 0.000449933490017429, "loss_iou": 0.2021484375, "loss_num": 0.015625, "loss_xval": 0.48046875, "num_input_tokens_seen": 136804772, "step": 2442 }, { "epoch": 5.44097995545657, "grad_norm": 20.881345748901367, "learning_rate": 1e-06, "loss": 0.8096, "num_input_tokens_seen": 136861740, "step": 2443 }, { "epoch": 5.44097995545657, "loss": 0.6257365942001343, "loss_ce": 0.0002483331772964448, "loss_iou": 0.248046875, "loss_num": 0.02587890625, "loss_xval": 0.625, "num_input_tokens_seen": 136861740, "step": 2443 }, { "epoch": 5.443207126948775, "grad_norm": 17.746784210205078, "learning_rate": 1e-06, "loss": 0.7268, "num_input_tokens_seen": 136920452, "step": 2444 }, { "epoch": 5.443207126948775, "loss": 0.8379260897636414, "loss_ce": 0.00027960725128650665, "loss_iou": 0.349609375, "loss_num": 0.02783203125, "loss_xval": 0.8359375, "num_input_tokens_seen": 136920452, "step": 2444 }, { "epoch": 5.44543429844098, "grad_norm": 17.058738708496094, "learning_rate": 1e-06, "loss": 0.8293, "num_input_tokens_seen": 136980184, "step": 2445 }, { "epoch": 5.44543429844098, "loss": 0.8342658281326294, "loss_ce": 0.00028141395887359977, "loss_iou": 0.357421875, "loss_num": 0.024169921875, "loss_xval": 0.8359375, "num_input_tokens_seen": 136980184, "step": 2445 }, { "epoch": 5.447661469933185, "grad_norm": 14.320577621459961, "learning_rate": 1e-06, "loss": 0.6234, "num_input_tokens_seen": 137037052, "step": 2446 }, { "epoch": 5.447661469933185, "loss": 0.6837434768676758, "loss_ce": 0.00027177410083822906, "loss_iou": 0.2734375, "loss_num": 0.027587890625, "loss_xval": 0.68359375, "num_input_tokens_seen": 137037052, "step": 2446 }, { "epoch": 5.44988864142539, "grad_norm": 19.701732635498047, "learning_rate": 1e-06, "loss": 0.6273, "num_input_tokens_seen": 137094316, "step": 2447 }, { "epoch": 5.44988864142539, "loss": 0.5671048164367676, "loss_ce": 0.00021026638569310308, "loss_iou": 0.2578125, "loss_num": 0.01031494140625, "loss_xval": 0.56640625, "num_input_tokens_seen": 137094316, "step": 2447 }, { "epoch": 5.452115812917595, "grad_norm": 47.633155822753906, "learning_rate": 1e-06, "loss": 0.7027, "num_input_tokens_seen": 137152472, "step": 2448 }, { "epoch": 5.452115812917595, "loss": 0.7645977735519409, "loss_ce": 0.00019351180526427925, "loss_iou": 0.33984375, "loss_num": 0.0172119140625, "loss_xval": 0.765625, "num_input_tokens_seen": 137152472, "step": 2448 }, { "epoch": 5.4543429844097995, "grad_norm": 18.31052017211914, "learning_rate": 1e-06, "loss": 0.8306, "num_input_tokens_seen": 137208136, "step": 2449 }, { "epoch": 5.4543429844097995, "loss": 0.5993661284446716, "loss_ce": 0.0002450407773721963, "loss_iou": 0.263671875, "loss_num": 0.01422119140625, "loss_xval": 0.59765625, "num_input_tokens_seen": 137208136, "step": 2449 }, { "epoch": 5.456570155902004, "grad_norm": 19.636415481567383, "learning_rate": 1e-06, "loss": 0.9006, "num_input_tokens_seen": 137262276, "step": 2450 }, { "epoch": 5.456570155902004, "loss": 0.8564736843109131, "loss_ce": 0.0002724617370404303, "loss_iou": 0.3671875, "loss_num": 0.0242919921875, "loss_xval": 0.85546875, "num_input_tokens_seen": 137262276, "step": 2450 }, { "epoch": 5.458797327394209, "grad_norm": 21.784557342529297, "learning_rate": 1e-06, "loss": 0.6682, "num_input_tokens_seen": 137320112, "step": 2451 }, { "epoch": 5.458797327394209, "loss": 0.6689052581787109, "loss_ce": 0.0002041200641542673, "loss_iou": 0.287109375, "loss_num": 0.0191650390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 137320112, "step": 2451 }, { "epoch": 5.461024498886414, "grad_norm": 19.112836837768555, "learning_rate": 1e-06, "loss": 0.7381, "num_input_tokens_seen": 137375784, "step": 2452 }, { "epoch": 5.461024498886414, "loss": 0.6511021852493286, "loss_ce": 0.00022325036115944386, "loss_iou": 0.259765625, "loss_num": 0.0267333984375, "loss_xval": 0.65234375, "num_input_tokens_seen": 137375784, "step": 2452 }, { "epoch": 5.463251670378619, "grad_norm": 25.689016342163086, "learning_rate": 1e-06, "loss": 0.7655, "num_input_tokens_seen": 137431796, "step": 2453 }, { "epoch": 5.463251670378619, "loss": 0.9367631673812866, "loss_ce": 0.0006059646257199347, "loss_iou": 0.376953125, "loss_num": 0.03662109375, "loss_xval": 0.9375, "num_input_tokens_seen": 137431796, "step": 2453 }, { "epoch": 5.465478841870824, "grad_norm": 24.619190216064453, "learning_rate": 1e-06, "loss": 0.8185, "num_input_tokens_seen": 137487700, "step": 2454 }, { "epoch": 5.465478841870824, "loss": 0.683560311794281, "loss_ce": 0.00021070845832582563, "loss_iou": 0.27734375, "loss_num": 0.02587890625, "loss_xval": 0.68359375, "num_input_tokens_seen": 137487700, "step": 2454 }, { "epoch": 5.467706013363029, "grad_norm": 19.307693481445312, "learning_rate": 1e-06, "loss": 0.6936, "num_input_tokens_seen": 137544564, "step": 2455 }, { "epoch": 5.467706013363029, "loss": 0.6617897748947144, "loss_ce": 0.0002907552698161453, "loss_iou": 0.27734375, "loss_num": 0.021484375, "loss_xval": 0.66015625, "num_input_tokens_seen": 137544564, "step": 2455 }, { "epoch": 5.4699331848552335, "grad_norm": 20.65030860900879, "learning_rate": 1e-06, "loss": 0.8517, "num_input_tokens_seen": 137602024, "step": 2456 }, { "epoch": 5.4699331848552335, "loss": 0.7545545101165771, "loss_ce": 0.0004040825879201293, "loss_iou": 0.29296875, "loss_num": 0.033447265625, "loss_xval": 0.75390625, "num_input_tokens_seen": 137602024, "step": 2456 }, { "epoch": 5.472160356347438, "grad_norm": 19.01409339904785, "learning_rate": 1e-06, "loss": 0.593, "num_input_tokens_seen": 137657656, "step": 2457 }, { "epoch": 5.472160356347438, "loss": 0.577867329120636, "loss_ce": 0.00023062352556735277, "loss_iou": 0.2431640625, "loss_num": 0.018310546875, "loss_xval": 0.578125, "num_input_tokens_seen": 137657656, "step": 2457 }, { "epoch": 5.474387527839644, "grad_norm": 25.600095748901367, "learning_rate": 1e-06, "loss": 0.9298, "num_input_tokens_seen": 137713816, "step": 2458 }, { "epoch": 5.474387527839644, "loss": 0.7569461464881897, "loss_ce": 0.00023227237397804856, "loss_iou": 0.34375, "loss_num": 0.01361083984375, "loss_xval": 0.7578125, "num_input_tokens_seen": 137713816, "step": 2458 }, { "epoch": 5.476614699331849, "grad_norm": 16.993986129760742, "learning_rate": 1e-06, "loss": 0.695, "num_input_tokens_seen": 137771028, "step": 2459 }, { "epoch": 5.476614699331849, "loss": 0.7477880120277405, "loss_ce": 0.00022941670613363385, "loss_iou": 0.302734375, "loss_num": 0.0279541015625, "loss_xval": 0.74609375, "num_input_tokens_seen": 137771028, "step": 2459 }, { "epoch": 5.478841870824054, "grad_norm": 25.62329864501953, "learning_rate": 1e-06, "loss": 0.755, "num_input_tokens_seen": 137826816, "step": 2460 }, { "epoch": 5.478841870824054, "loss": 0.8492061495780945, "loss_ce": 0.0003292199980933219, "loss_iou": 0.35546875, "loss_num": 0.02734375, "loss_xval": 0.84765625, "num_input_tokens_seen": 137826816, "step": 2460 }, { "epoch": 5.481069042316259, "grad_norm": 18.305076599121094, "learning_rate": 1e-06, "loss": 0.6416, "num_input_tokens_seen": 137879896, "step": 2461 }, { "epoch": 5.481069042316259, "loss": 0.6674618721008301, "loss_ce": 0.00022550678113475442, "loss_iou": 0.30078125, "loss_num": 0.01275634765625, "loss_xval": 0.66796875, "num_input_tokens_seen": 137879896, "step": 2461 }, { "epoch": 5.4832962138084635, "grad_norm": 14.69758129119873, "learning_rate": 1e-06, "loss": 0.5432, "num_input_tokens_seen": 137937088, "step": 2462 }, { "epoch": 5.4832962138084635, "loss": 0.4212685227394104, "loss_ce": 0.0002480166731402278, "loss_iou": 0.1875, "loss_num": 0.009033203125, "loss_xval": 0.421875, "num_input_tokens_seen": 137937088, "step": 2462 }, { "epoch": 5.485523385300668, "grad_norm": 15.696910858154297, "learning_rate": 1e-06, "loss": 0.8152, "num_input_tokens_seen": 137994708, "step": 2463 }, { "epoch": 5.485523385300668, "loss": 0.7479099035263062, "loss_ce": 0.00022926190285943449, "loss_iou": 0.328125, "loss_num": 0.0185546875, "loss_xval": 0.74609375, "num_input_tokens_seen": 137994708, "step": 2463 }, { "epoch": 5.487750556792873, "grad_norm": 14.623006820678711, "learning_rate": 1e-06, "loss": 0.5012, "num_input_tokens_seen": 138052660, "step": 2464 }, { "epoch": 5.487750556792873, "loss": 0.48292332887649536, "loss_ce": 0.00025733254733495414, "loss_iou": 0.193359375, "loss_num": 0.019287109375, "loss_xval": 0.482421875, "num_input_tokens_seen": 138052660, "step": 2464 }, { "epoch": 5.489977728285078, "grad_norm": 60.842769622802734, "learning_rate": 1e-06, "loss": 0.7927, "num_input_tokens_seen": 138108180, "step": 2465 }, { "epoch": 5.489977728285078, "loss": 0.8183541297912598, "loss_ce": 0.00023886302369646728, "loss_iou": 0.3203125, "loss_num": 0.035888671875, "loss_xval": 0.81640625, "num_input_tokens_seen": 138108180, "step": 2465 }, { "epoch": 5.492204899777283, "grad_norm": 13.957056045532227, "learning_rate": 1e-06, "loss": 0.5878, "num_input_tokens_seen": 138166092, "step": 2466 }, { "epoch": 5.492204899777283, "loss": 0.6444277763366699, "loss_ce": 0.0003848428023047745, "loss_iou": 0.2734375, "loss_num": 0.01953125, "loss_xval": 0.64453125, "num_input_tokens_seen": 138166092, "step": 2466 }, { "epoch": 5.494432071269488, "grad_norm": 21.401012420654297, "learning_rate": 1e-06, "loss": 0.7396, "num_input_tokens_seen": 138223400, "step": 2467 }, { "epoch": 5.494432071269488, "loss": 0.7348067760467529, "loss_ce": 0.0003097508451901376, "loss_iou": 0.3046875, "loss_num": 0.0247802734375, "loss_xval": 0.734375, "num_input_tokens_seen": 138223400, "step": 2467 }, { "epoch": 5.496659242761693, "grad_norm": 31.345884323120117, "learning_rate": 1e-06, "loss": 0.6474, "num_input_tokens_seen": 138281468, "step": 2468 }, { "epoch": 5.496659242761693, "loss": 0.5778509378433228, "loss_ce": 0.00021416423260234296, "loss_iou": 0.255859375, "loss_num": 0.0133056640625, "loss_xval": 0.578125, "num_input_tokens_seen": 138281468, "step": 2468 }, { "epoch": 5.498886414253898, "grad_norm": 14.698542594909668, "learning_rate": 1e-06, "loss": 0.5857, "num_input_tokens_seen": 138337216, "step": 2469 }, { "epoch": 5.498886414253898, "loss": 0.5800629258155823, "loss_ce": 0.00022892668494023383, "loss_iou": 0.2412109375, "loss_num": 0.01953125, "loss_xval": 0.578125, "num_input_tokens_seen": 138337216, "step": 2469 }, { "epoch": 5.501113585746102, "grad_norm": 18.81879425048828, "learning_rate": 1e-06, "loss": 0.5896, "num_input_tokens_seen": 138395452, "step": 2470 }, { "epoch": 5.501113585746102, "loss": 0.6011573672294617, "loss_ce": 0.00020525051513686776, "loss_iou": 0.26171875, "loss_num": 0.015625, "loss_xval": 0.6015625, "num_input_tokens_seen": 138395452, "step": 2470 }, { "epoch": 5.503340757238307, "grad_norm": 14.421637535095215, "learning_rate": 1e-06, "loss": 0.5715, "num_input_tokens_seen": 138453528, "step": 2471 }, { "epoch": 5.503340757238307, "loss": 0.5524652600288391, "loss_ce": 0.00021915776596870273, "loss_iou": 0.2216796875, "loss_num": 0.02197265625, "loss_xval": 0.55078125, "num_input_tokens_seen": 138453528, "step": 2471 }, { "epoch": 5.505567928730512, "grad_norm": 32.5775146484375, "learning_rate": 1e-06, "loss": 0.5319, "num_input_tokens_seen": 138508352, "step": 2472 }, { "epoch": 5.505567928730512, "loss": 0.5072603225708008, "loss_ce": 0.00018024971359409392, "loss_iou": 0.224609375, "loss_num": 0.01141357421875, "loss_xval": 0.5078125, "num_input_tokens_seen": 138508352, "step": 2472 }, { "epoch": 5.507795100222717, "grad_norm": 14.467394828796387, "learning_rate": 1e-06, "loss": 0.9051, "num_input_tokens_seen": 138562664, "step": 2473 }, { "epoch": 5.507795100222717, "loss": 1.1971768140792847, "loss_ce": 0.0003995007718913257, "loss_iou": 0.474609375, "loss_num": 0.04931640625, "loss_xval": 1.1953125, "num_input_tokens_seen": 138562664, "step": 2473 }, { "epoch": 5.510022271714922, "grad_norm": 15.590438842773438, "learning_rate": 1e-06, "loss": 0.7975, "num_input_tokens_seen": 138614632, "step": 2474 }, { "epoch": 5.510022271714922, "loss": 0.7288182973861694, "loss_ce": 0.00030269764829427004, "loss_iou": 0.32421875, "loss_num": 0.0162353515625, "loss_xval": 0.7265625, "num_input_tokens_seen": 138614632, "step": 2474 }, { "epoch": 5.512249443207127, "grad_norm": 16.527565002441406, "learning_rate": 1e-06, "loss": 0.6321, "num_input_tokens_seen": 138672144, "step": 2475 }, { "epoch": 5.512249443207127, "loss": 0.5714784860610962, "loss_ce": 0.00018944795010611415, "loss_iou": 0.2255859375, "loss_num": 0.02392578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 138672144, "step": 2475 }, { "epoch": 5.514476614699332, "grad_norm": 16.16336441040039, "learning_rate": 1e-06, "loss": 0.6376, "num_input_tokens_seen": 138727908, "step": 2476 }, { "epoch": 5.514476614699332, "loss": 0.5442032814025879, "loss_ce": 0.0002579318534117192, "loss_iou": 0.2421875, "loss_num": 0.01177978515625, "loss_xval": 0.54296875, "num_input_tokens_seen": 138727908, "step": 2476 }, { "epoch": 5.5167037861915365, "grad_norm": 19.183643341064453, "learning_rate": 1e-06, "loss": 0.6079, "num_input_tokens_seen": 138783588, "step": 2477 }, { "epoch": 5.5167037861915365, "loss": 0.5135550498962402, "loss_ce": 0.00037142602377571166, "loss_iou": 0.1826171875, "loss_num": 0.029541015625, "loss_xval": 0.51171875, "num_input_tokens_seen": 138783588, "step": 2477 }, { "epoch": 5.518930957683741, "grad_norm": 34.50285339355469, "learning_rate": 1e-06, "loss": 0.588, "num_input_tokens_seen": 138836984, "step": 2478 }, { "epoch": 5.518930957683741, "loss": 0.5181179046630859, "loss_ce": 0.00029565999284386635, "loss_iou": 0.2001953125, "loss_num": 0.0233154296875, "loss_xval": 0.51953125, "num_input_tokens_seen": 138836984, "step": 2478 }, { "epoch": 5.521158129175946, "grad_norm": 16.072481155395508, "learning_rate": 1e-06, "loss": 0.5809, "num_input_tokens_seen": 138895444, "step": 2479 }, { "epoch": 5.521158129175946, "loss": 0.4480636417865753, "loss_ce": 0.00018766321591101587, "loss_iou": 0.1806640625, "loss_num": 0.0174560546875, "loss_xval": 0.447265625, "num_input_tokens_seen": 138895444, "step": 2479 }, { "epoch": 5.523385300668151, "grad_norm": 27.131000518798828, "learning_rate": 1e-06, "loss": 0.8815, "num_input_tokens_seen": 138948860, "step": 2480 }, { "epoch": 5.523385300668151, "loss": 0.9381879568099976, "loss_ce": 0.00019970518769696355, "loss_iou": 0.390625, "loss_num": 0.03173828125, "loss_xval": 0.9375, "num_input_tokens_seen": 138948860, "step": 2480 }, { "epoch": 5.525612472160356, "grad_norm": 21.947586059570312, "learning_rate": 1e-06, "loss": 0.6556, "num_input_tokens_seen": 139003808, "step": 2481 }, { "epoch": 5.525612472160356, "loss": 0.6769753694534302, "loss_ce": 0.0004616759833879769, "loss_iou": 0.287109375, "loss_num": 0.020263671875, "loss_xval": 0.67578125, "num_input_tokens_seen": 139003808, "step": 2481 }, { "epoch": 5.527839643652561, "grad_norm": 18.34388542175293, "learning_rate": 1e-06, "loss": 1.0344, "num_input_tokens_seen": 139060604, "step": 2482 }, { "epoch": 5.527839643652561, "loss": 1.1095937490463257, "loss_ce": 0.00021878261759411544, "loss_iou": 0.4296875, "loss_num": 0.0498046875, "loss_xval": 1.109375, "num_input_tokens_seen": 139060604, "step": 2482 }, { "epoch": 5.5300668151447665, "grad_norm": 28.046768188476562, "learning_rate": 1e-06, "loss": 0.9061, "num_input_tokens_seen": 139115064, "step": 2483 }, { "epoch": 5.5300668151447665, "loss": 0.8196831941604614, "loss_ce": 0.00022515948512591422, "loss_iou": 0.353515625, "loss_num": 0.0225830078125, "loss_xval": 0.8203125, "num_input_tokens_seen": 139115064, "step": 2483 }, { "epoch": 5.532293986636971, "grad_norm": 38.94261169433594, "learning_rate": 1e-06, "loss": 0.5469, "num_input_tokens_seen": 139171664, "step": 2484 }, { "epoch": 5.532293986636971, "loss": 0.418997585773468, "loss_ce": 0.0002963862498290837, "loss_iou": 0.1787109375, "loss_num": 0.01220703125, "loss_xval": 0.41796875, "num_input_tokens_seen": 139171664, "step": 2484 }, { "epoch": 5.534521158129176, "grad_norm": 21.334020614624023, "learning_rate": 1e-06, "loss": 0.7203, "num_input_tokens_seen": 139228464, "step": 2485 }, { "epoch": 5.534521158129176, "loss": 0.8227799534797668, "loss_ce": 0.0002701581106521189, "loss_iou": 0.349609375, "loss_num": 0.0244140625, "loss_xval": 0.82421875, "num_input_tokens_seen": 139228464, "step": 2485 }, { "epoch": 5.536748329621381, "grad_norm": 30.529455184936523, "learning_rate": 1e-06, "loss": 0.6742, "num_input_tokens_seen": 139281716, "step": 2486 }, { "epoch": 5.536748329621381, "loss": 0.769308865070343, "loss_ce": 0.00026593299116939306, "loss_iou": 0.353515625, "loss_num": 0.01190185546875, "loss_xval": 0.76953125, "num_input_tokens_seen": 139281716, "step": 2486 }, { "epoch": 5.538975501113586, "grad_norm": 15.466273307800293, "learning_rate": 1e-06, "loss": 0.5857, "num_input_tokens_seen": 139339380, "step": 2487 }, { "epoch": 5.538975501113586, "loss": 0.6729258298873901, "loss_ce": 0.00031836878042668104, "loss_iou": 0.28515625, "loss_num": 0.0206298828125, "loss_xval": 0.671875, "num_input_tokens_seen": 139339380, "step": 2487 }, { "epoch": 5.541202672605791, "grad_norm": 21.989582061767578, "learning_rate": 1e-06, "loss": 0.6376, "num_input_tokens_seen": 139398944, "step": 2488 }, { "epoch": 5.541202672605791, "loss": 0.7792110443115234, "loss_ce": 0.0004024332156404853, "loss_iou": 0.31640625, "loss_num": 0.029541015625, "loss_xval": 0.77734375, "num_input_tokens_seen": 139398944, "step": 2488 }, { "epoch": 5.543429844097996, "grad_norm": 35.42259979248047, "learning_rate": 1e-06, "loss": 0.8917, "num_input_tokens_seen": 139454540, "step": 2489 }, { "epoch": 5.543429844097996, "loss": 1.0833284854888916, "loss_ce": 0.00032068698783405125, "loss_iou": 0.44140625, "loss_num": 0.0400390625, "loss_xval": 1.0859375, "num_input_tokens_seen": 139454540, "step": 2489 }, { "epoch": 5.5456570155902005, "grad_norm": 20.069564819335938, "learning_rate": 1e-06, "loss": 0.7142, "num_input_tokens_seen": 139510416, "step": 2490 }, { "epoch": 5.5456570155902005, "loss": 0.6588754653930664, "loss_ce": 0.0005502753192558885, "loss_iou": 0.28515625, "loss_num": 0.017822265625, "loss_xval": 0.66015625, "num_input_tokens_seen": 139510416, "step": 2490 }, { "epoch": 5.547884187082405, "grad_norm": 28.32633399963379, "learning_rate": 1e-06, "loss": 0.5564, "num_input_tokens_seen": 139567320, "step": 2491 }, { "epoch": 5.547884187082405, "loss": 0.4439275860786438, "loss_ce": 0.00026305546634830534, "loss_iou": 0.2021484375, "loss_num": 0.00799560546875, "loss_xval": 0.443359375, "num_input_tokens_seen": 139567320, "step": 2491 }, { "epoch": 5.55011135857461, "grad_norm": 29.56279182434082, "learning_rate": 1e-06, "loss": 0.6047, "num_input_tokens_seen": 139625440, "step": 2492 }, { "epoch": 5.55011135857461, "loss": 0.5217064619064331, "loss_ce": 0.00022206196445040405, "loss_iou": 0.2431640625, "loss_num": 0.006988525390625, "loss_xval": 0.5234375, "num_input_tokens_seen": 139625440, "step": 2492 }, { "epoch": 5.552338530066815, "grad_norm": 46.10884475708008, "learning_rate": 1e-06, "loss": 0.5618, "num_input_tokens_seen": 139682200, "step": 2493 }, { "epoch": 5.552338530066815, "loss": 0.41375732421875, "loss_ce": 0.00018312688916921616, "loss_iou": 0.1875, "loss_num": 0.00775146484375, "loss_xval": 0.4140625, "num_input_tokens_seen": 139682200, "step": 2493 }, { "epoch": 5.55456570155902, "grad_norm": 18.429887771606445, "learning_rate": 1e-06, "loss": 0.5575, "num_input_tokens_seen": 139738800, "step": 2494 }, { "epoch": 5.55456570155902, "loss": 0.6128218770027161, "loss_ce": 0.00027302149101160467, "loss_iou": 0.28515625, "loss_num": 0.008544921875, "loss_xval": 0.61328125, "num_input_tokens_seen": 139738800, "step": 2494 }, { "epoch": 5.556792873051225, "grad_norm": 24.69832420349121, "learning_rate": 1e-06, "loss": 0.7079, "num_input_tokens_seen": 139796508, "step": 2495 }, { "epoch": 5.556792873051225, "loss": 0.8797301054000854, "loss_ce": 0.00027458026306703687, "loss_iou": 0.3515625, "loss_num": 0.03466796875, "loss_xval": 0.87890625, "num_input_tokens_seen": 139796508, "step": 2495 }, { "epoch": 5.55902004454343, "grad_norm": 20.031938552856445, "learning_rate": 1e-06, "loss": 0.8567, "num_input_tokens_seen": 139852360, "step": 2496 }, { "epoch": 5.55902004454343, "loss": 0.9052836894989014, "loss_ce": 0.000254325830610469, "loss_iou": 0.412109375, "loss_num": 0.0166015625, "loss_xval": 0.90625, "num_input_tokens_seen": 139852360, "step": 2496 }, { "epoch": 5.5612472160356345, "grad_norm": 24.965023040771484, "learning_rate": 1e-06, "loss": 0.7616, "num_input_tokens_seen": 139907408, "step": 2497 }, { "epoch": 5.5612472160356345, "loss": 0.6985030770301819, "loss_ce": 0.0002608746290206909, "loss_iou": 0.287109375, "loss_num": 0.0250244140625, "loss_xval": 0.69921875, "num_input_tokens_seen": 139907408, "step": 2497 }, { "epoch": 5.563474387527839, "grad_norm": 22.246639251708984, "learning_rate": 1e-06, "loss": 0.9545, "num_input_tokens_seen": 139963936, "step": 2498 }, { "epoch": 5.563474387527839, "loss": 0.8662214875221252, "loss_ce": 0.0002546662581153214, "loss_iou": 0.35546875, "loss_num": 0.031005859375, "loss_xval": 0.8671875, "num_input_tokens_seen": 139963936, "step": 2498 }, { "epoch": 5.565701559020044, "grad_norm": 16.71168327331543, "learning_rate": 1e-06, "loss": 0.5694, "num_input_tokens_seen": 140018732, "step": 2499 }, { "epoch": 5.565701559020044, "loss": 0.6181437373161316, "loss_ce": 0.00022381696908269078, "loss_iou": 0.263671875, "loss_num": 0.0177001953125, "loss_xval": 0.6171875, "num_input_tokens_seen": 140018732, "step": 2499 }, { "epoch": 5.567928730512249, "grad_norm": 17.26616096496582, "learning_rate": 1e-06, "loss": 0.6123, "num_input_tokens_seen": 140075980, "step": 2500 }, { "epoch": 5.567928730512249, "eval_seeclick_web_CIoU": 0.5732596218585968, "eval_seeclick_web_GIoU": 0.5691904425621033, "eval_seeclick_web_IoU": 0.5895318686962128, "eval_seeclick_web_MAE_all": 0.016760945785790682, "eval_seeclick_web_MAE_h": 0.009123492753133178, "eval_seeclick_web_MAE_w": 0.01760054472833872, "eval_seeclick_web_MAE_x_boxes": 0.008502837270498276, "eval_seeclick_web_MAE_y_boxes": 0.02224468719214201, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9365757703781128, "eval_seeclick_web_loss_ce": 0.00031520209449809045, "eval_seeclick_web_loss_iou": 0.4267578125, "eval_seeclick_web_loss_num": 0.013166427612304688, "eval_seeclick_web_loss_xval": 0.918701171875, "eval_seeclick_web_runtime": 28.1194, "eval_seeclick_web_samples_per_second": 1.778, "eval_seeclick_web_steps_per_second": 0.071, "num_input_tokens_seen": 140075980, "step": 2500 }, { "epoch": 5.567928730512249, "eval_icons_CIoU": 0.32149502635002136, "eval_icons_GIoU": 0.33890318870544434, "eval_icons_IoU": 0.3914993554353714, "eval_icons_MAE_all": 0.05996252968907356, "eval_icons_MAE_h": 0.03914587013423443, "eval_icons_MAE_w": 0.05691290646791458, "eval_icons_MAE_x_boxes": 0.05956794694066048, "eval_icons_MAE_y_boxes": 0.037835730239748955, "eval_icons_inside_bbox": 0.6493055522441864, "eval_icons_loss": 1.6447449922561646, "eval_icons_loss_ce": 0.0003619373310357332, "eval_icons_loss_iou": 0.6527099609375, "eval_icons_loss_num": 0.05636787414550781, "eval_icons_loss_xval": 1.58837890625, "eval_icons_runtime": 25.1446, "eval_icons_samples_per_second": 1.988, "eval_icons_steps_per_second": 0.08, "num_input_tokens_seen": 140075980, "step": 2500 }, { "epoch": 5.567928730512249, "eval_screenspot_CIoU": 0.33252301812171936, "eval_screenspot_GIoU": 0.3458707630634308, "eval_screenspot_IoU": 0.4156401753425598, "eval_screenspot_MAE_all": 0.066184946646293, "eval_screenspot_MAE_h": 0.03784913197159767, "eval_screenspot_MAE_w": 0.08022025724252065, "eval_screenspot_MAE_x_boxes": 0.08052412172158559, "eval_screenspot_MAE_y_boxes": 0.0472174392392238, "eval_screenspot_inside_bbox": 0.6462500095367432, "eval_screenspot_loss": 1.7031261920928955, "eval_screenspot_loss_ce": 0.0003634931442017357, "eval_screenspot_loss_iou": 0.6932779947916666, "eval_screenspot_loss_num": 0.07747904459635417, "eval_screenspot_loss_xval": 1.7745768229166667, "eval_screenspot_runtime": 39.8835, "eval_screenspot_samples_per_second": 2.231, "eval_screenspot_steps_per_second": 0.075, "num_input_tokens_seen": 140075980, "step": 2500 }, { "epoch": 5.567928730512249, "eval_compot_CIoU": 0.36012406647205353, "eval_compot_GIoU": 0.3780812919139862, "eval_compot_IoU": 0.41496771574020386, "eval_compot_MAE_all": 0.017852995079010725, "eval_compot_MAE_h": 0.007951512467116117, "eval_compot_MAE_w": 0.02203182876110077, "eval_compot_MAE_x_boxes": 0.02926653064787388, "eval_compot_MAE_y_boxes": 0.006283238530158997, "eval_compot_inside_bbox": 0.6458333432674408, "eval_compot_loss": 1.3696753978729248, "eval_compot_loss_ce": 0.00030196372244972736, "eval_compot_loss_iou": 0.6297607421875, "eval_compot_loss_num": 0.016778945922851562, "eval_compot_loss_xval": 1.343505859375, "eval_compot_runtime": 24.2244, "eval_compot_samples_per_second": 2.064, "eval_compot_steps_per_second": 0.083, "num_input_tokens_seen": 140075980, "step": 2500 }, { "epoch": 5.567928730512249, "eval_custom_ui_val_CIoU": 0.4640722307893965, "eval_custom_ui_val_GIoU": 0.48160041703118217, "eval_custom_ui_val_IoU": 0.518558962477578, "eval_custom_ui_val_MAE_all": 0.03118372191157606, "eval_custom_ui_val_MAE_h": 0.018310750601813197, "eval_custom_ui_val_MAE_w": 0.036142679759197764, "eval_custom_ui_val_MAE_x_boxes": 0.03573724896543556, "eval_custom_ui_val_MAE_y_boxes": 0.01598744459139804, "eval_custom_ui_val_inside_bbox": 0.738811731338501, "eval_custom_ui_val_loss": 1.2015693187713623, "eval_custom_ui_val_loss_ce": 0.0003550093032471422, "eval_custom_ui_val_loss_iou": 0.5086941189236112, "eval_custom_ui_val_loss_num": 0.029562632242838543, "eval_custom_ui_val_loss_xval": 1.1652560763888888, "eval_custom_ui_val_runtime": 74.2518, "eval_custom_ui_val_samples_per_second": 3.569, "eval_custom_ui_val_steps_per_second": 0.121, "num_input_tokens_seen": 140075980, "step": 2500 }, { "epoch": 5.567928730512249, "loss": 0.907296895980835, "loss_ce": 0.0003145075461361557, "loss_iou": 0.3984375, "loss_num": 0.0216064453125, "loss_xval": 0.90625, "num_input_tokens_seen": 140075980, "step": 2500 }, { "epoch": 5.570155902004454, "grad_norm": 96.08206939697266, "learning_rate": 1e-06, "loss": 0.6085, "num_input_tokens_seen": 140130244, "step": 2501 }, { "epoch": 5.570155902004454, "loss": 0.6808421611785889, "loss_ce": 0.00023916579084470868, "loss_iou": 0.28125, "loss_num": 0.0234375, "loss_xval": 0.6796875, "num_input_tokens_seen": 140130244, "step": 2501 }, { "epoch": 5.57238307349666, "grad_norm": 23.38459587097168, "learning_rate": 1e-06, "loss": 0.7066, "num_input_tokens_seen": 140187016, "step": 2502 }, { "epoch": 5.57238307349666, "loss": 0.6098702549934387, "loss_ce": 0.0002510999620426446, "loss_iou": 0.267578125, "loss_num": 0.0147705078125, "loss_xval": 0.609375, "num_input_tokens_seen": 140187016, "step": 2502 }, { "epoch": 5.574610244988865, "grad_norm": 18.638948440551758, "learning_rate": 1e-06, "loss": 0.6391, "num_input_tokens_seen": 140245660, "step": 2503 }, { "epoch": 5.574610244988865, "loss": 0.7389647364616394, "loss_ce": 0.0001952238380908966, "loss_iou": 0.287109375, "loss_num": 0.03271484375, "loss_xval": 0.73828125, "num_input_tokens_seen": 140245660, "step": 2503 }, { "epoch": 5.5768374164810695, "grad_norm": 15.6986722946167, "learning_rate": 1e-06, "loss": 0.6478, "num_input_tokens_seen": 140300240, "step": 2504 }, { "epoch": 5.5768374164810695, "loss": 0.4837605357170105, "loss_ce": 0.00024005114391911775, "loss_iou": 0.2177734375, "loss_num": 0.00958251953125, "loss_xval": 0.484375, "num_input_tokens_seen": 140300240, "step": 2504 }, { "epoch": 5.579064587973274, "grad_norm": 15.869756698608398, "learning_rate": 1e-06, "loss": 0.6703, "num_input_tokens_seen": 140357396, "step": 2505 }, { "epoch": 5.579064587973274, "loss": 0.8984197378158569, "loss_ce": 0.00022636953508481383, "loss_iou": 0.365234375, "loss_num": 0.03369140625, "loss_xval": 0.8984375, "num_input_tokens_seen": 140357396, "step": 2505 }, { "epoch": 5.581291759465479, "grad_norm": 15.25345516204834, "learning_rate": 1e-06, "loss": 0.8536, "num_input_tokens_seen": 140413868, "step": 2506 }, { "epoch": 5.581291759465479, "loss": 0.951309084892273, "loss_ce": 0.0006254723994061351, "loss_iou": 0.41015625, "loss_num": 0.0257568359375, "loss_xval": 0.94921875, "num_input_tokens_seen": 140413868, "step": 2506 }, { "epoch": 5.583518930957684, "grad_norm": 35.47723388671875, "learning_rate": 1e-06, "loss": 0.6912, "num_input_tokens_seen": 140471300, "step": 2507 }, { "epoch": 5.583518930957684, "loss": 0.6750940084457397, "loss_ce": 0.0002893685596063733, "loss_iou": 0.306640625, "loss_num": 0.011962890625, "loss_xval": 0.67578125, "num_input_tokens_seen": 140471300, "step": 2507 }, { "epoch": 5.585746102449889, "grad_norm": 45.554256439208984, "learning_rate": 1e-06, "loss": 0.6581, "num_input_tokens_seen": 140528484, "step": 2508 }, { "epoch": 5.585746102449889, "loss": 0.7441803216934204, "loss_ce": 0.00028382547316141427, "loss_iou": 0.326171875, "loss_num": 0.0181884765625, "loss_xval": 0.7421875, "num_input_tokens_seen": 140528484, "step": 2508 }, { "epoch": 5.587973273942094, "grad_norm": 13.767509460449219, "learning_rate": 1e-06, "loss": 0.7014, "num_input_tokens_seen": 140585164, "step": 2509 }, { "epoch": 5.587973273942094, "loss": 0.6096572875976562, "loss_ce": 0.00028234010096639395, "loss_iou": 0.25390625, "loss_num": 0.0201416015625, "loss_xval": 0.609375, "num_input_tokens_seen": 140585164, "step": 2509 }, { "epoch": 5.590200445434299, "grad_norm": 42.34712219238281, "learning_rate": 1e-06, "loss": 0.7912, "num_input_tokens_seen": 140640568, "step": 2510 }, { "epoch": 5.590200445434299, "loss": 0.9159772396087646, "loss_ce": 0.00020570913329720497, "loss_iou": 0.3984375, "loss_num": 0.0234375, "loss_xval": 0.9140625, "num_input_tokens_seen": 140640568, "step": 2510 }, { "epoch": 5.5924276169265035, "grad_norm": 12.886595726013184, "learning_rate": 1e-06, "loss": 0.8791, "num_input_tokens_seen": 140697100, "step": 2511 }, { "epoch": 5.5924276169265035, "loss": 0.7073045372962952, "loss_ce": 0.0002733045257627964, "loss_iou": 0.306640625, "loss_num": 0.0189208984375, "loss_xval": 0.70703125, "num_input_tokens_seen": 140697100, "step": 2511 }, { "epoch": 5.594654788418708, "grad_norm": 20.680938720703125, "learning_rate": 1e-06, "loss": 0.8405, "num_input_tokens_seen": 140754100, "step": 2512 }, { "epoch": 5.594654788418708, "loss": 0.854705274105072, "loss_ce": 0.00021311425371095538, "loss_iou": 0.341796875, "loss_num": 0.034423828125, "loss_xval": 0.85546875, "num_input_tokens_seen": 140754100, "step": 2512 }, { "epoch": 5.596881959910913, "grad_norm": 23.940448760986328, "learning_rate": 1e-06, "loss": 0.8587, "num_input_tokens_seen": 140809880, "step": 2513 }, { "epoch": 5.596881959910913, "loss": 0.8762653470039368, "loss_ce": 0.0002887820010073483, "loss_iou": 0.40625, "loss_num": 0.012451171875, "loss_xval": 0.875, "num_input_tokens_seen": 140809880, "step": 2513 }, { "epoch": 5.599109131403118, "grad_norm": 22.817508697509766, "learning_rate": 1e-06, "loss": 0.6931, "num_input_tokens_seen": 140865016, "step": 2514 }, { "epoch": 5.599109131403118, "loss": 0.8307744860649109, "loss_ce": 0.00020807303371839225, "loss_iou": 0.353515625, "loss_num": 0.0250244140625, "loss_xval": 0.83203125, "num_input_tokens_seen": 140865016, "step": 2514 }, { "epoch": 5.601336302895323, "grad_norm": 27.795007705688477, "learning_rate": 1e-06, "loss": 0.6248, "num_input_tokens_seen": 140921912, "step": 2515 }, { "epoch": 5.601336302895323, "loss": 0.6642851829528809, "loss_ce": 0.00022267791791819036, "loss_iou": 0.294921875, "loss_num": 0.014892578125, "loss_xval": 0.6640625, "num_input_tokens_seen": 140921912, "step": 2515 }, { "epoch": 5.603563474387528, "grad_norm": 17.664352416992188, "learning_rate": 1e-06, "loss": 0.5839, "num_input_tokens_seen": 140976712, "step": 2516 }, { "epoch": 5.603563474387528, "loss": 0.4930035173892975, "loss_ce": 0.00020567109459079802, "loss_iou": 0.2109375, "loss_num": 0.013916015625, "loss_xval": 0.4921875, "num_input_tokens_seen": 140976712, "step": 2516 }, { "epoch": 5.605790645879733, "grad_norm": 16.923248291015625, "learning_rate": 1e-06, "loss": 0.6882, "num_input_tokens_seen": 141030776, "step": 2517 }, { "epoch": 5.605790645879733, "loss": 0.6687592267990112, "loss_ce": 0.0001800994505174458, "loss_iou": 0.275390625, "loss_num": 0.0233154296875, "loss_xval": 0.66796875, "num_input_tokens_seen": 141030776, "step": 2517 }, { "epoch": 5.6080178173719375, "grad_norm": 21.347900390625, "learning_rate": 1e-06, "loss": 0.6674, "num_input_tokens_seen": 141087176, "step": 2518 }, { "epoch": 5.6080178173719375, "loss": 0.7416476011276245, "loss_ce": 0.00019255219376645982, "loss_iou": 0.333984375, "loss_num": 0.01458740234375, "loss_xval": 0.7421875, "num_input_tokens_seen": 141087176, "step": 2518 }, { "epoch": 5.610244988864142, "grad_norm": 29.194229125976562, "learning_rate": 1e-06, "loss": 0.7402, "num_input_tokens_seen": 141143912, "step": 2519 }, { "epoch": 5.610244988864142, "loss": 0.937484860420227, "loss_ce": 0.00022902997443452477, "loss_iou": 0.421875, "loss_num": 0.0186767578125, "loss_xval": 0.9375, "num_input_tokens_seen": 141143912, "step": 2519 }, { "epoch": 5.612472160356347, "grad_norm": 25.27153778076172, "learning_rate": 1e-06, "loss": 0.6045, "num_input_tokens_seen": 141199920, "step": 2520 }, { "epoch": 5.612472160356347, "loss": 0.7792827486991882, "loss_ce": 0.00023000439978204668, "loss_iou": 0.318359375, "loss_num": 0.028564453125, "loss_xval": 0.77734375, "num_input_tokens_seen": 141199920, "step": 2520 }, { "epoch": 5.614699331848552, "grad_norm": 20.523181915283203, "learning_rate": 1e-06, "loss": 0.5798, "num_input_tokens_seen": 141257784, "step": 2521 }, { "epoch": 5.614699331848552, "loss": 0.6049889326095581, "loss_ce": 0.0002525739837437868, "loss_iou": 0.271484375, "loss_num": 0.01226806640625, "loss_xval": 0.60546875, "num_input_tokens_seen": 141257784, "step": 2521 }, { "epoch": 5.616926503340757, "grad_norm": 26.476478576660156, "learning_rate": 1e-06, "loss": 0.7458, "num_input_tokens_seen": 141311380, "step": 2522 }, { "epoch": 5.616926503340757, "loss": 0.8652767539024353, "loss_ce": 0.000286536494968459, "loss_iou": 0.3984375, "loss_num": 0.01385498046875, "loss_xval": 0.86328125, "num_input_tokens_seen": 141311380, "step": 2522 }, { "epoch": 5.619153674832962, "grad_norm": 25.442533493041992, "learning_rate": 1e-06, "loss": 0.8765, "num_input_tokens_seen": 141367500, "step": 2523 }, { "epoch": 5.619153674832962, "loss": 1.0669339895248413, "loss_ce": 0.00028354296227917075, "loss_iou": 0.44140625, "loss_num": 0.037109375, "loss_xval": 1.0703125, "num_input_tokens_seen": 141367500, "step": 2523 }, { "epoch": 5.621380846325167, "grad_norm": 24.357839584350586, "learning_rate": 1e-06, "loss": 0.6908, "num_input_tokens_seen": 141425116, "step": 2524 }, { "epoch": 5.621380846325167, "loss": 0.751206636428833, "loss_ce": 0.0002300894120708108, "loss_iou": 0.3359375, "loss_num": 0.0162353515625, "loss_xval": 0.75, "num_input_tokens_seen": 141425116, "step": 2524 }, { "epoch": 5.6236080178173715, "grad_norm": 17.895536422729492, "learning_rate": 1e-06, "loss": 0.7793, "num_input_tokens_seen": 141481940, "step": 2525 }, { "epoch": 5.6236080178173715, "loss": 0.9199173450469971, "loss_ce": 0.0002396509371465072, "loss_iou": 0.390625, "loss_num": 0.027587890625, "loss_xval": 0.91796875, "num_input_tokens_seen": 141481940, "step": 2525 }, { "epoch": 5.625835189309576, "grad_norm": 13.240893363952637, "learning_rate": 1e-06, "loss": 0.686, "num_input_tokens_seen": 141538040, "step": 2526 }, { "epoch": 5.625835189309576, "loss": 0.52378910779953, "loss_ce": 0.00022953613370191306, "loss_iou": 0.234375, "loss_num": 0.0107421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 141538040, "step": 2526 }, { "epoch": 5.628062360801781, "grad_norm": 16.763574600219727, "learning_rate": 1e-06, "loss": 0.6977, "num_input_tokens_seen": 141595016, "step": 2527 }, { "epoch": 5.628062360801781, "loss": 0.8276025056838989, "loss_ce": 0.00020992739882785827, "loss_iou": 0.32421875, "loss_num": 0.03515625, "loss_xval": 0.828125, "num_input_tokens_seen": 141595016, "step": 2527 }, { "epoch": 5.630289532293987, "grad_norm": 23.006132125854492, "learning_rate": 1e-06, "loss": 0.788, "num_input_tokens_seen": 141653304, "step": 2528 }, { "epoch": 5.630289532293987, "loss": 0.8520069122314453, "loss_ce": 0.0002002965338760987, "loss_iou": 0.35546875, "loss_num": 0.0281982421875, "loss_xval": 0.8515625, "num_input_tokens_seen": 141653304, "step": 2528 }, { "epoch": 5.632516703786192, "grad_norm": 15.624357223510742, "learning_rate": 1e-06, "loss": 0.651, "num_input_tokens_seen": 141710920, "step": 2529 }, { "epoch": 5.632516703786192, "loss": 0.6601657867431641, "loss_ce": 0.0002536678221076727, "loss_iou": 0.296875, "loss_num": 0.01312255859375, "loss_xval": 0.66015625, "num_input_tokens_seen": 141710920, "step": 2529 }, { "epoch": 5.634743875278397, "grad_norm": 18.147720336914062, "learning_rate": 1e-06, "loss": 0.7309, "num_input_tokens_seen": 141765836, "step": 2530 }, { "epoch": 5.634743875278397, "loss": 0.8750065565109253, "loss_ce": 0.000250728742685169, "loss_iou": 0.36328125, "loss_num": 0.029541015625, "loss_xval": 0.875, "num_input_tokens_seen": 141765836, "step": 2530 }, { "epoch": 5.636971046770602, "grad_norm": 18.959402084350586, "learning_rate": 1e-06, "loss": 0.6962, "num_input_tokens_seen": 141821112, "step": 2531 }, { "epoch": 5.636971046770602, "loss": 0.8564548492431641, "loss_ce": 0.00025371278752572834, "loss_iou": 0.357421875, "loss_num": 0.0281982421875, "loss_xval": 0.85546875, "num_input_tokens_seen": 141821112, "step": 2531 }, { "epoch": 5.639198218262806, "grad_norm": 28.660118103027344, "learning_rate": 1e-06, "loss": 0.6739, "num_input_tokens_seen": 141877664, "step": 2532 }, { "epoch": 5.639198218262806, "loss": 0.8321478366851807, "loss_ce": 0.0004828467790503055, "loss_iou": 0.30078125, "loss_num": 0.04541015625, "loss_xval": 0.83203125, "num_input_tokens_seen": 141877664, "step": 2532 }, { "epoch": 5.641425389755011, "grad_norm": 33.3441162109375, "learning_rate": 1e-06, "loss": 0.6838, "num_input_tokens_seen": 141934636, "step": 2533 }, { "epoch": 5.641425389755011, "loss": 0.7307108640670776, "loss_ce": 0.0002421102544758469, "loss_iou": 0.318359375, "loss_num": 0.0185546875, "loss_xval": 0.73046875, "num_input_tokens_seen": 141934636, "step": 2533 }, { "epoch": 5.643652561247216, "grad_norm": 22.631315231323242, "learning_rate": 1e-06, "loss": 0.8627, "num_input_tokens_seen": 141991128, "step": 2534 }, { "epoch": 5.643652561247216, "loss": 0.9368560910224915, "loss_ce": 0.0003326181322336197, "loss_iou": 0.3984375, "loss_num": 0.0283203125, "loss_xval": 0.9375, "num_input_tokens_seen": 141991128, "step": 2534 }, { "epoch": 5.645879732739421, "grad_norm": 13.450477600097656, "learning_rate": 1e-06, "loss": 0.61, "num_input_tokens_seen": 142045436, "step": 2535 }, { "epoch": 5.645879732739421, "loss": 0.5746713280677795, "loss_ce": 0.00020842923549935222, "loss_iou": 0.2470703125, "loss_num": 0.015869140625, "loss_xval": 0.57421875, "num_input_tokens_seen": 142045436, "step": 2535 }, { "epoch": 5.648106904231626, "grad_norm": 14.741578102111816, "learning_rate": 1e-06, "loss": 0.5904, "num_input_tokens_seen": 142102496, "step": 2536 }, { "epoch": 5.648106904231626, "loss": 0.4626786708831787, "loss_ce": 0.0005204671761021018, "loss_iou": 0.197265625, "loss_num": 0.01361083984375, "loss_xval": 0.462890625, "num_input_tokens_seen": 142102496, "step": 2536 }, { "epoch": 5.650334075723831, "grad_norm": 17.967416763305664, "learning_rate": 1e-06, "loss": 0.6889, "num_input_tokens_seen": 142160328, "step": 2537 }, { "epoch": 5.650334075723831, "loss": 0.5751903057098389, "loss_ce": 0.00017807658878155053, "loss_iou": 0.2265625, "loss_num": 0.0242919921875, "loss_xval": 0.57421875, "num_input_tokens_seen": 142160328, "step": 2537 }, { "epoch": 5.652561247216036, "grad_norm": 19.637807846069336, "learning_rate": 1e-06, "loss": 0.5937, "num_input_tokens_seen": 142215832, "step": 2538 }, { "epoch": 5.652561247216036, "loss": 0.5415201187133789, "loss_ce": 0.0002603091998025775, "loss_iou": 0.25, "loss_num": 0.0081787109375, "loss_xval": 0.54296875, "num_input_tokens_seen": 142215832, "step": 2538 }, { "epoch": 5.6547884187082404, "grad_norm": 27.54079246520996, "learning_rate": 1e-06, "loss": 0.8718, "num_input_tokens_seen": 142272748, "step": 2539 }, { "epoch": 5.6547884187082404, "loss": 0.9330868721008301, "loss_ce": 0.00022556885960511863, "loss_iou": 0.380859375, "loss_num": 0.0341796875, "loss_xval": 0.93359375, "num_input_tokens_seen": 142272748, "step": 2539 }, { "epoch": 5.657015590200445, "grad_norm": 23.226085662841797, "learning_rate": 1e-06, "loss": 0.6973, "num_input_tokens_seen": 142329648, "step": 2540 }, { "epoch": 5.657015590200445, "loss": 0.5698352456092834, "loss_ce": 0.00025519938208162785, "loss_iou": 0.232421875, "loss_num": 0.0208740234375, "loss_xval": 0.5703125, "num_input_tokens_seen": 142329648, "step": 2540 }, { "epoch": 5.65924276169265, "grad_norm": 28.16909408569336, "learning_rate": 1e-06, "loss": 0.6034, "num_input_tokens_seen": 142385652, "step": 2541 }, { "epoch": 5.65924276169265, "loss": 0.7484890222549438, "loss_ce": 0.00019804939802270383, "loss_iou": 0.3125, "loss_num": 0.0247802734375, "loss_xval": 0.75, "num_input_tokens_seen": 142385652, "step": 2541 }, { "epoch": 5.661469933184855, "grad_norm": 20.69179916381836, "learning_rate": 1e-06, "loss": 0.6642, "num_input_tokens_seen": 142439416, "step": 2542 }, { "epoch": 5.661469933184855, "loss": 0.7014844417572021, "loss_ce": 0.0001904680102597922, "loss_iou": 0.3046875, "loss_num": 0.018798828125, "loss_xval": 0.703125, "num_input_tokens_seen": 142439416, "step": 2542 }, { "epoch": 5.66369710467706, "grad_norm": 22.986743927001953, "learning_rate": 1e-06, "loss": 0.6988, "num_input_tokens_seen": 142494088, "step": 2543 }, { "epoch": 5.66369710467706, "loss": 0.6153843402862549, "loss_ce": 0.0003941247705370188, "loss_iou": 0.27734375, "loss_num": 0.01214599609375, "loss_xval": 0.61328125, "num_input_tokens_seen": 142494088, "step": 2543 }, { "epoch": 5.665924276169265, "grad_norm": 15.41435432434082, "learning_rate": 1e-06, "loss": 0.584, "num_input_tokens_seen": 142548856, "step": 2544 }, { "epoch": 5.665924276169265, "loss": 0.4866850674152374, "loss_ce": 0.0002348774141864851, "loss_iou": 0.21875, "loss_num": 0.00994873046875, "loss_xval": 0.486328125, "num_input_tokens_seen": 142548856, "step": 2544 }, { "epoch": 5.66815144766147, "grad_norm": 19.401214599609375, "learning_rate": 1e-06, "loss": 0.803, "num_input_tokens_seen": 142605360, "step": 2545 }, { "epoch": 5.66815144766147, "loss": 0.8005963563919067, "loss_ce": 0.0003033963148482144, "loss_iou": 0.345703125, "loss_num": 0.021728515625, "loss_xval": 0.80078125, "num_input_tokens_seen": 142605360, "step": 2545 }, { "epoch": 5.6703786191536745, "grad_norm": 15.095193862915039, "learning_rate": 1e-06, "loss": 0.7953, "num_input_tokens_seen": 142660940, "step": 2546 }, { "epoch": 5.6703786191536745, "loss": 0.6447733640670776, "loss_ce": 0.00024208366812672466, "loss_iou": 0.265625, "loss_num": 0.022705078125, "loss_xval": 0.64453125, "num_input_tokens_seen": 142660940, "step": 2546 }, { "epoch": 5.67260579064588, "grad_norm": 17.20547103881836, "learning_rate": 1e-06, "loss": 0.6511, "num_input_tokens_seen": 142717892, "step": 2547 }, { "epoch": 5.67260579064588, "loss": 0.6638010740280151, "loss_ce": 0.0002268501848448068, "loss_iou": 0.267578125, "loss_num": 0.0257568359375, "loss_xval": 0.6640625, "num_input_tokens_seen": 142717892, "step": 2547 }, { "epoch": 5.674832962138085, "grad_norm": 26.371519088745117, "learning_rate": 1e-06, "loss": 0.7449, "num_input_tokens_seen": 142769660, "step": 2548 }, { "epoch": 5.674832962138085, "loss": 0.6620455980300903, "loss_ce": 0.0003024160396307707, "loss_iou": 0.267578125, "loss_num": 0.025390625, "loss_xval": 0.66015625, "num_input_tokens_seen": 142769660, "step": 2548 }, { "epoch": 5.67706013363029, "grad_norm": 27.11166763305664, "learning_rate": 1e-06, "loss": 0.9627, "num_input_tokens_seen": 142825764, "step": 2549 }, { "epoch": 5.67706013363029, "loss": 1.015166997909546, "loss_ce": 0.00027431544731371105, "loss_iou": 0.3984375, "loss_num": 0.043701171875, "loss_xval": 1.015625, "num_input_tokens_seen": 142825764, "step": 2549 }, { "epoch": 5.679287305122495, "grad_norm": 14.629166603088379, "learning_rate": 1e-06, "loss": 0.6238, "num_input_tokens_seen": 142880500, "step": 2550 }, { "epoch": 5.679287305122495, "loss": 0.4876967668533325, "loss_ce": 0.00039207623922266066, "loss_iou": 0.2109375, "loss_num": 0.01318359375, "loss_xval": 0.48828125, "num_input_tokens_seen": 142880500, "step": 2550 }, { "epoch": 5.6815144766147, "grad_norm": 41.370121002197266, "learning_rate": 1e-06, "loss": 0.7918, "num_input_tokens_seen": 142932608, "step": 2551 }, { "epoch": 5.6815144766147, "loss": 0.9631957411766052, "loss_ce": 0.0004271506331861019, "loss_iou": 0.3984375, "loss_num": 0.033203125, "loss_xval": 0.9609375, "num_input_tokens_seen": 142932608, "step": 2551 }, { "epoch": 5.6837416481069045, "grad_norm": 17.59296417236328, "learning_rate": 1e-06, "loss": 0.6625, "num_input_tokens_seen": 142988896, "step": 2552 }, { "epoch": 5.6837416481069045, "loss": 0.51399827003479, "loss_ce": 0.0003264106926508248, "loss_iou": 0.2333984375, "loss_num": 0.00933837890625, "loss_xval": 0.515625, "num_input_tokens_seen": 142988896, "step": 2552 }, { "epoch": 5.685968819599109, "grad_norm": 28.058643341064453, "learning_rate": 1e-06, "loss": 0.7143, "num_input_tokens_seen": 143043040, "step": 2553 }, { "epoch": 5.685968819599109, "loss": 0.6291226744651794, "loss_ce": 0.00021646139794029295, "loss_iou": 0.2890625, "loss_num": 0.0098876953125, "loss_xval": 0.62890625, "num_input_tokens_seen": 143043040, "step": 2553 }, { "epoch": 5.688195991091314, "grad_norm": 25.84493064880371, "learning_rate": 1e-06, "loss": 0.6757, "num_input_tokens_seen": 143096936, "step": 2554 }, { "epoch": 5.688195991091314, "loss": 0.8105074167251587, "loss_ce": 0.0002046898298431188, "loss_iou": 0.33203125, "loss_num": 0.0291748046875, "loss_xval": 0.80859375, "num_input_tokens_seen": 143096936, "step": 2554 }, { "epoch": 5.690423162583519, "grad_norm": 21.50450897216797, "learning_rate": 1e-06, "loss": 0.5824, "num_input_tokens_seen": 143155644, "step": 2555 }, { "epoch": 5.690423162583519, "loss": 0.5691958665847778, "loss_ce": 0.00022610818268731236, "loss_iou": 0.2578125, "loss_num": 0.01043701171875, "loss_xval": 0.5703125, "num_input_tokens_seen": 143155644, "step": 2555 }, { "epoch": 5.692650334075724, "grad_norm": 18.279918670654297, "learning_rate": 1e-06, "loss": 0.6493, "num_input_tokens_seen": 143213008, "step": 2556 }, { "epoch": 5.692650334075724, "loss": 0.828661322593689, "loss_ce": 0.0002921561535913497, "loss_iou": 0.373046875, "loss_num": 0.0162353515625, "loss_xval": 0.828125, "num_input_tokens_seen": 143213008, "step": 2556 }, { "epoch": 5.694877505567929, "grad_norm": 16.526641845703125, "learning_rate": 1e-06, "loss": 0.6586, "num_input_tokens_seen": 143269428, "step": 2557 }, { "epoch": 5.694877505567929, "loss": 0.6889902353286743, "loss_ce": 0.00026959230308420956, "loss_iou": 0.2890625, "loss_num": 0.02197265625, "loss_xval": 0.6875, "num_input_tokens_seen": 143269428, "step": 2557 }, { "epoch": 5.697104677060134, "grad_norm": 16.533336639404297, "learning_rate": 1e-06, "loss": 0.5711, "num_input_tokens_seen": 143326208, "step": 2558 }, { "epoch": 5.697104677060134, "loss": 0.5453148484230042, "loss_ce": 0.0002708985994104296, "loss_iou": 0.23828125, "loss_num": 0.013671875, "loss_xval": 0.546875, "num_input_tokens_seen": 143326208, "step": 2558 }, { "epoch": 5.6993318485523385, "grad_norm": 85.07674407958984, "learning_rate": 1e-06, "loss": 0.7295, "num_input_tokens_seen": 143383116, "step": 2559 }, { "epoch": 5.6993318485523385, "loss": 0.5694315433502197, "loss_ce": 0.00021770322928205132, "loss_iou": 0.2236328125, "loss_num": 0.024169921875, "loss_xval": 0.5703125, "num_input_tokens_seen": 143383116, "step": 2559 }, { "epoch": 5.701559020044543, "grad_norm": 23.484725952148438, "learning_rate": 1e-06, "loss": 0.6961, "num_input_tokens_seen": 143437420, "step": 2560 }, { "epoch": 5.701559020044543, "loss": 0.736332893371582, "loss_ce": 0.0002489334437996149, "loss_iou": 0.28515625, "loss_num": 0.033203125, "loss_xval": 0.734375, "num_input_tokens_seen": 143437420, "step": 2560 }, { "epoch": 5.703786191536748, "grad_norm": 18.436580657958984, "learning_rate": 1e-06, "loss": 0.7144, "num_input_tokens_seen": 143493204, "step": 2561 }, { "epoch": 5.703786191536748, "loss": 0.8088289499282837, "loss_ce": 0.0002351756556890905, "loss_iou": 0.3671875, "loss_num": 0.01513671875, "loss_xval": 0.80859375, "num_input_tokens_seen": 143493204, "step": 2561 }, { "epoch": 5.706013363028953, "grad_norm": 406.9714660644531, "learning_rate": 1e-06, "loss": 0.8129, "num_input_tokens_seen": 143548960, "step": 2562 }, { "epoch": 5.706013363028953, "loss": 0.7016435861587524, "loss_ce": 0.00022760604042559862, "loss_iou": 0.318359375, "loss_num": 0.01287841796875, "loss_xval": 0.703125, "num_input_tokens_seen": 143548960, "step": 2562 }, { "epoch": 5.708240534521158, "grad_norm": 20.201797485351562, "learning_rate": 1e-06, "loss": 0.7235, "num_input_tokens_seen": 143606860, "step": 2563 }, { "epoch": 5.708240534521158, "loss": 0.6854332685470581, "loss_ce": 0.0003746763104572892, "loss_iou": 0.3125, "loss_num": 0.01190185546875, "loss_xval": 0.68359375, "num_input_tokens_seen": 143606860, "step": 2563 }, { "epoch": 5.710467706013363, "grad_norm": 17.661972045898438, "learning_rate": 1e-06, "loss": 0.6369, "num_input_tokens_seen": 143661512, "step": 2564 }, { "epoch": 5.710467706013363, "loss": 0.589299201965332, "loss_ce": 0.00018784166604746133, "loss_iou": 0.2578125, "loss_num": 0.01434326171875, "loss_xval": 0.58984375, "num_input_tokens_seen": 143661512, "step": 2564 }, { "epoch": 5.712694877505568, "grad_norm": 30.762678146362305, "learning_rate": 1e-06, "loss": 0.8426, "num_input_tokens_seen": 143719200, "step": 2565 }, { "epoch": 5.712694877505568, "loss": 0.8480957746505737, "loss_ce": 0.00019540796347428113, "loss_iou": 0.345703125, "loss_num": 0.03125, "loss_xval": 0.84765625, "num_input_tokens_seen": 143719200, "step": 2565 }, { "epoch": 5.714922048997773, "grad_norm": 18.01441764831543, "learning_rate": 1e-06, "loss": 0.7997, "num_input_tokens_seen": 143772696, "step": 2566 }, { "epoch": 5.714922048997773, "loss": 0.9778006076812744, "loss_ce": 0.00020047195721417665, "loss_iou": 0.400390625, "loss_num": 0.03515625, "loss_xval": 0.9765625, "num_input_tokens_seen": 143772696, "step": 2566 }, { "epoch": 5.717149220489977, "grad_norm": 19.236597061157227, "learning_rate": 1e-06, "loss": 0.6094, "num_input_tokens_seen": 143828632, "step": 2567 }, { "epoch": 5.717149220489977, "loss": 0.7114354372024536, "loss_ce": 0.0002537810942158103, "loss_iou": 0.30859375, "loss_num": 0.018798828125, "loss_xval": 0.7109375, "num_input_tokens_seen": 143828632, "step": 2567 }, { "epoch": 5.719376391982182, "grad_norm": 16.775983810424805, "learning_rate": 1e-06, "loss": 0.5711, "num_input_tokens_seen": 143882672, "step": 2568 }, { "epoch": 5.719376391982182, "loss": 0.6285956501960754, "loss_ce": 0.00017768185352906585, "loss_iou": 0.2734375, "loss_num": 0.0159912109375, "loss_xval": 0.62890625, "num_input_tokens_seen": 143882672, "step": 2568 }, { "epoch": 5.721603563474387, "grad_norm": 15.795881271362305, "learning_rate": 1e-06, "loss": 0.7436, "num_input_tokens_seen": 143938316, "step": 2569 }, { "epoch": 5.721603563474387, "loss": 0.8578826189041138, "loss_ce": 0.0002165931509807706, "loss_iou": 0.392578125, "loss_num": 0.0147705078125, "loss_xval": 0.859375, "num_input_tokens_seen": 143938316, "step": 2569 }, { "epoch": 5.723830734966592, "grad_norm": 21.437118530273438, "learning_rate": 1e-06, "loss": 0.7572, "num_input_tokens_seen": 143995348, "step": 2570 }, { "epoch": 5.723830734966592, "loss": 0.7881063222885132, "loss_ce": 0.00026450445875525475, "loss_iou": 0.345703125, "loss_num": 0.019287109375, "loss_xval": 0.7890625, "num_input_tokens_seen": 143995348, "step": 2570 }, { "epoch": 5.726057906458797, "grad_norm": 16.0108585357666, "learning_rate": 1e-06, "loss": 0.7053, "num_input_tokens_seen": 144052660, "step": 2571 }, { "epoch": 5.726057906458797, "loss": 0.58664470911026, "loss_ce": 0.00021891661162953824, "loss_iou": 0.251953125, "loss_num": 0.0164794921875, "loss_xval": 0.5859375, "num_input_tokens_seen": 144052660, "step": 2571 }, { "epoch": 5.728285077951003, "grad_norm": 26.33615493774414, "learning_rate": 1e-06, "loss": 0.6633, "num_input_tokens_seen": 144105148, "step": 2572 }, { "epoch": 5.728285077951003, "loss": 0.4833102822303772, "loss_ce": 0.00021702511003240943, "loss_iou": 0.205078125, "loss_num": 0.0146484375, "loss_xval": 0.482421875, "num_input_tokens_seen": 144105148, "step": 2572 }, { "epoch": 5.7305122494432075, "grad_norm": 21.177268981933594, "learning_rate": 1e-06, "loss": 0.9853, "num_input_tokens_seen": 144161172, "step": 2573 }, { "epoch": 5.7305122494432075, "loss": 1.158959150314331, "loss_ce": 0.0007559259538538754, "loss_iou": 0.48046875, "loss_num": 0.039306640625, "loss_xval": 1.15625, "num_input_tokens_seen": 144161172, "step": 2573 }, { "epoch": 5.732739420935412, "grad_norm": 25.209182739257812, "learning_rate": 1e-06, "loss": 0.6585, "num_input_tokens_seen": 144215804, "step": 2574 }, { "epoch": 5.732739420935412, "loss": 0.6748742461204529, "loss_ce": 0.00031366333132609725, "loss_iou": 0.291015625, "loss_num": 0.0185546875, "loss_xval": 0.67578125, "num_input_tokens_seen": 144215804, "step": 2574 }, { "epoch": 5.734966592427617, "grad_norm": 16.51605796813965, "learning_rate": 1e-06, "loss": 0.6987, "num_input_tokens_seen": 144273436, "step": 2575 }, { "epoch": 5.734966592427617, "loss": 0.564425528049469, "loss_ce": 0.00021653338626492769, "loss_iou": 0.248046875, "loss_num": 0.0135498046875, "loss_xval": 0.5625, "num_input_tokens_seen": 144273436, "step": 2575 }, { "epoch": 5.737193763919822, "grad_norm": 21.38966941833496, "learning_rate": 1e-06, "loss": 0.7136, "num_input_tokens_seen": 144330508, "step": 2576 }, { "epoch": 5.737193763919822, "loss": 0.7851696610450745, "loss_ce": 0.00025755877140909433, "loss_iou": 0.3125, "loss_num": 0.0322265625, "loss_xval": 0.78515625, "num_input_tokens_seen": 144330508, "step": 2576 }, { "epoch": 5.739420935412027, "grad_norm": 21.6599178314209, "learning_rate": 1e-06, "loss": 0.7422, "num_input_tokens_seen": 144387136, "step": 2577 }, { "epoch": 5.739420935412027, "loss": 0.8027091026306152, "loss_ce": 0.00021889799972996116, "loss_iou": 0.326171875, "loss_num": 0.0303955078125, "loss_xval": 0.80078125, "num_input_tokens_seen": 144387136, "step": 2577 }, { "epoch": 5.741648106904232, "grad_norm": 21.33940887451172, "learning_rate": 1e-06, "loss": 0.7533, "num_input_tokens_seen": 144441500, "step": 2578 }, { "epoch": 5.741648106904232, "loss": 0.6317015290260315, "loss_ce": 0.0002318147598998621, "loss_iou": 0.28515625, "loss_num": 0.012451171875, "loss_xval": 0.6328125, "num_input_tokens_seen": 144441500, "step": 2578 }, { "epoch": 5.743875278396437, "grad_norm": 18.354366302490234, "learning_rate": 1e-06, "loss": 0.6682, "num_input_tokens_seen": 144499348, "step": 2579 }, { "epoch": 5.743875278396437, "loss": 0.7804338335990906, "loss_ce": 0.0002824625698849559, "loss_iou": 0.28125, "loss_num": 0.043701171875, "loss_xval": 0.78125, "num_input_tokens_seen": 144499348, "step": 2579 }, { "epoch": 5.7461024498886415, "grad_norm": 20.35249900817871, "learning_rate": 1e-06, "loss": 0.8315, "num_input_tokens_seen": 144553584, "step": 2580 }, { "epoch": 5.7461024498886415, "loss": 0.8381065130233765, "loss_ce": 0.00021590120741166174, "loss_iou": 0.365234375, "loss_num": 0.0211181640625, "loss_xval": 0.8359375, "num_input_tokens_seen": 144553584, "step": 2580 }, { "epoch": 5.748329621380846, "grad_norm": 16.173368453979492, "learning_rate": 1e-06, "loss": 0.4855, "num_input_tokens_seen": 144610324, "step": 2581 }, { "epoch": 5.748329621380846, "loss": 0.5163002610206604, "loss_ce": 0.00018697154882829636, "loss_iou": 0.2353515625, "loss_num": 0.0091552734375, "loss_xval": 0.515625, "num_input_tokens_seen": 144610324, "step": 2581 }, { "epoch": 5.750556792873051, "grad_norm": 31.1335506439209, "learning_rate": 1e-06, "loss": 0.7519, "num_input_tokens_seen": 144665984, "step": 2582 }, { "epoch": 5.750556792873051, "loss": 0.5313022136688232, "loss_ce": 0.0002963669830933213, "loss_iou": 0.2353515625, "loss_num": 0.0118408203125, "loss_xval": 0.53125, "num_input_tokens_seen": 144665984, "step": 2582 }, { "epoch": 5.752783964365256, "grad_norm": 24.663740158081055, "learning_rate": 1e-06, "loss": 0.5619, "num_input_tokens_seen": 144722060, "step": 2583 }, { "epoch": 5.752783964365256, "loss": 0.6767467260360718, "loss_ce": 0.00023307063383981586, "loss_iou": 0.30078125, "loss_num": 0.015380859375, "loss_xval": 0.67578125, "num_input_tokens_seen": 144722060, "step": 2583 }, { "epoch": 5.755011135857461, "grad_norm": 13.527139663696289, "learning_rate": 1e-06, "loss": 0.5107, "num_input_tokens_seen": 144777528, "step": 2584 }, { "epoch": 5.755011135857461, "loss": 0.5506859421730042, "loss_ce": 0.0002709058462642133, "loss_iou": 0.2373046875, "loss_num": 0.01519775390625, "loss_xval": 0.55078125, "num_input_tokens_seen": 144777528, "step": 2584 }, { "epoch": 5.757238307349666, "grad_norm": 19.172088623046875, "learning_rate": 1e-06, "loss": 0.7847, "num_input_tokens_seen": 144833756, "step": 2585 }, { "epoch": 5.757238307349666, "loss": 0.7893670797348022, "loss_ce": 0.0003045589546673, "loss_iou": 0.330078125, "loss_num": 0.025634765625, "loss_xval": 0.7890625, "num_input_tokens_seen": 144833756, "step": 2585 }, { "epoch": 5.759465478841871, "grad_norm": 17.034088134765625, "learning_rate": 1e-06, "loss": 0.6934, "num_input_tokens_seen": 144892408, "step": 2586 }, { "epoch": 5.759465478841871, "loss": 0.6261861324310303, "loss_ce": 0.00020951575424987823, "loss_iou": 0.26953125, "loss_num": 0.0174560546875, "loss_xval": 0.625, "num_input_tokens_seen": 144892408, "step": 2586 }, { "epoch": 5.7616926503340755, "grad_norm": 22.121641159057617, "learning_rate": 1e-06, "loss": 0.7483, "num_input_tokens_seen": 144946344, "step": 2587 }, { "epoch": 5.7616926503340755, "loss": 0.8284502029418945, "loss_ce": 0.00032524295966140926, "loss_iou": 0.34765625, "loss_num": 0.026611328125, "loss_xval": 0.828125, "num_input_tokens_seen": 144946344, "step": 2587 }, { "epoch": 5.76391982182628, "grad_norm": 18.808324813842773, "learning_rate": 1e-06, "loss": 0.706, "num_input_tokens_seen": 145002944, "step": 2588 }, { "epoch": 5.76391982182628, "loss": 0.5951967239379883, "loss_ce": 0.00022597931092604995, "loss_iou": 0.2578125, "loss_num": 0.0157470703125, "loss_xval": 0.59375, "num_input_tokens_seen": 145002944, "step": 2588 }, { "epoch": 5.766146993318485, "grad_norm": 17.422155380249023, "learning_rate": 1e-06, "loss": 0.689, "num_input_tokens_seen": 145056836, "step": 2589 }, { "epoch": 5.766146993318485, "loss": 0.7689008712768555, "loss_ce": 0.00022416308638639748, "loss_iou": 0.310546875, "loss_num": 0.029541015625, "loss_xval": 0.76953125, "num_input_tokens_seen": 145056836, "step": 2589 }, { "epoch": 5.76837416481069, "grad_norm": 19.195316314697266, "learning_rate": 1e-06, "loss": 0.7372, "num_input_tokens_seen": 145113532, "step": 2590 }, { "epoch": 5.76837416481069, "loss": 0.8400857448577881, "loss_ce": 0.0002420624950900674, "loss_iou": 0.34375, "loss_num": 0.0303955078125, "loss_xval": 0.83984375, "num_input_tokens_seen": 145113532, "step": 2590 }, { "epoch": 5.770601336302895, "grad_norm": 18.72977638244629, "learning_rate": 1e-06, "loss": 0.7385, "num_input_tokens_seen": 145169580, "step": 2591 }, { "epoch": 5.770601336302895, "loss": 0.9092113375663757, "loss_ce": 0.0002757691836450249, "loss_iou": 0.361328125, "loss_num": 0.037841796875, "loss_xval": 0.91015625, "num_input_tokens_seen": 145169580, "step": 2591 }, { "epoch": 5.772828507795101, "grad_norm": 26.984228134155273, "learning_rate": 1e-06, "loss": 0.7739, "num_input_tokens_seen": 145225448, "step": 2592 }, { "epoch": 5.772828507795101, "loss": 0.6938083171844482, "loss_ce": 0.0004489576967898756, "loss_iou": 0.2890625, "loss_num": 0.023193359375, "loss_xval": 0.6953125, "num_input_tokens_seen": 145225448, "step": 2592 }, { "epoch": 5.775055679287306, "grad_norm": 26.926746368408203, "learning_rate": 1e-06, "loss": 0.8225, "num_input_tokens_seen": 145282560, "step": 2593 }, { "epoch": 5.775055679287306, "loss": 0.8718444108963013, "loss_ce": 0.00026242341846227646, "loss_iou": 0.37109375, "loss_num": 0.0262451171875, "loss_xval": 0.87109375, "num_input_tokens_seen": 145282560, "step": 2593 }, { "epoch": 5.77728285077951, "grad_norm": 17.646190643310547, "learning_rate": 1e-06, "loss": 0.7529, "num_input_tokens_seen": 145339244, "step": 2594 }, { "epoch": 5.77728285077951, "loss": 0.878664493560791, "loss_ce": 0.00024651282001286745, "loss_iou": 0.34375, "loss_num": 0.037841796875, "loss_xval": 0.87890625, "num_input_tokens_seen": 145339244, "step": 2594 }, { "epoch": 5.779510022271715, "grad_norm": 18.98468780517578, "learning_rate": 1e-06, "loss": 0.6249, "num_input_tokens_seen": 145394856, "step": 2595 }, { "epoch": 5.779510022271715, "loss": 0.6960242390632629, "loss_ce": 0.00022345452453009784, "loss_iou": 0.318359375, "loss_num": 0.01214599609375, "loss_xval": 0.6953125, "num_input_tokens_seen": 145394856, "step": 2595 }, { "epoch": 5.78173719376392, "grad_norm": 31.605703353881836, "learning_rate": 1e-06, "loss": 0.6959, "num_input_tokens_seen": 145450920, "step": 2596 }, { "epoch": 5.78173719376392, "loss": 0.6217830181121826, "loss_ce": 0.00020095528452657163, "loss_iou": 0.263671875, "loss_num": 0.0191650390625, "loss_xval": 0.62109375, "num_input_tokens_seen": 145450920, "step": 2596 }, { "epoch": 5.783964365256125, "grad_norm": 17.69574546813965, "learning_rate": 1e-06, "loss": 0.6234, "num_input_tokens_seen": 145504552, "step": 2597 }, { "epoch": 5.783964365256125, "loss": 0.8066386580467224, "loss_ce": 0.0002421344688627869, "loss_iou": 0.328125, "loss_num": 0.02978515625, "loss_xval": 0.8046875, "num_input_tokens_seen": 145504552, "step": 2597 }, { "epoch": 5.78619153674833, "grad_norm": 31.138301849365234, "learning_rate": 1e-06, "loss": 0.7631, "num_input_tokens_seen": 145559244, "step": 2598 }, { "epoch": 5.78619153674833, "loss": 0.7720120549201965, "loss_ce": 0.00028353132074698806, "loss_iou": 0.3515625, "loss_num": 0.01336669921875, "loss_xval": 0.7734375, "num_input_tokens_seen": 145559244, "step": 2598 }, { "epoch": 5.788418708240535, "grad_norm": 22.075687408447266, "learning_rate": 1e-06, "loss": 0.7088, "num_input_tokens_seen": 145617544, "step": 2599 }, { "epoch": 5.788418708240535, "loss": 0.8822649717330933, "loss_ce": 0.00042899814434349537, "loss_iou": 0.392578125, "loss_num": 0.0196533203125, "loss_xval": 0.8828125, "num_input_tokens_seen": 145617544, "step": 2599 }, { "epoch": 5.79064587973274, "grad_norm": 22.63301658630371, "learning_rate": 1e-06, "loss": 0.7705, "num_input_tokens_seen": 145675480, "step": 2600 }, { "epoch": 5.79064587973274, "loss": 0.7863603234291077, "loss_ce": 0.0002275097358506173, "loss_iou": 0.33203125, "loss_num": 0.024169921875, "loss_xval": 0.78515625, "num_input_tokens_seen": 145675480, "step": 2600 }, { "epoch": 5.7928730512249444, "grad_norm": 23.069360733032227, "learning_rate": 1e-06, "loss": 0.7148, "num_input_tokens_seen": 145726312, "step": 2601 }, { "epoch": 5.7928730512249444, "loss": 0.8112291097640991, "loss_ce": 0.00019390250963624567, "loss_iou": 0.349609375, "loss_num": 0.0224609375, "loss_xval": 0.8125, "num_input_tokens_seen": 145726312, "step": 2601 }, { "epoch": 5.795100222717149, "grad_norm": 27.435791015625, "learning_rate": 1e-06, "loss": 0.7378, "num_input_tokens_seen": 145783560, "step": 2602 }, { "epoch": 5.795100222717149, "loss": 0.7958614826202393, "loss_ce": 0.00020716458675451577, "loss_iou": 0.35546875, "loss_num": 0.01708984375, "loss_xval": 0.796875, "num_input_tokens_seen": 145783560, "step": 2602 }, { "epoch": 5.797327394209354, "grad_norm": 24.120868682861328, "learning_rate": 1e-06, "loss": 0.7342, "num_input_tokens_seen": 145836256, "step": 2603 }, { "epoch": 5.797327394209354, "loss": 0.8412899971008301, "loss_ce": 0.00028652820037677884, "loss_iou": 0.33203125, "loss_num": 0.035400390625, "loss_xval": 0.83984375, "num_input_tokens_seen": 145836256, "step": 2603 }, { "epoch": 5.799554565701559, "grad_norm": 28.57222557067871, "learning_rate": 1e-06, "loss": 0.7984, "num_input_tokens_seen": 145892380, "step": 2604 }, { "epoch": 5.799554565701559, "loss": 0.7485337853431702, "loss_ce": 0.00024279108038172126, "loss_iou": 0.318359375, "loss_num": 0.0228271484375, "loss_xval": 0.75, "num_input_tokens_seen": 145892380, "step": 2604 }, { "epoch": 5.801781737193764, "grad_norm": 30.485336303710938, "learning_rate": 1e-06, "loss": 0.7688, "num_input_tokens_seen": 145948016, "step": 2605 }, { "epoch": 5.801781737193764, "loss": 0.7458430528640747, "loss_ce": 0.0002376111369812861, "loss_iou": 0.3125, "loss_num": 0.02392578125, "loss_xval": 0.74609375, "num_input_tokens_seen": 145948016, "step": 2605 }, { "epoch": 5.804008908685969, "grad_norm": 18.45177459716797, "learning_rate": 1e-06, "loss": 0.8566, "num_input_tokens_seen": 146005492, "step": 2606 }, { "epoch": 5.804008908685969, "loss": 0.9877591133117676, "loss_ce": 0.00021025189198553562, "loss_iou": 0.40625, "loss_num": 0.034912109375, "loss_xval": 0.98828125, "num_input_tokens_seen": 146005492, "step": 2606 }, { "epoch": 5.806236080178174, "grad_norm": 23.423572540283203, "learning_rate": 1e-06, "loss": 0.5956, "num_input_tokens_seen": 146062328, "step": 2607 }, { "epoch": 5.806236080178174, "loss": 0.6337399482727051, "loss_ce": 0.00019499435438774526, "loss_iou": 0.28125, "loss_num": 0.013671875, "loss_xval": 0.6328125, "num_input_tokens_seen": 146062328, "step": 2607 }, { "epoch": 5.8084632516703785, "grad_norm": 23.9626407623291, "learning_rate": 1e-06, "loss": 0.8475, "num_input_tokens_seen": 146116388, "step": 2608 }, { "epoch": 5.8084632516703785, "loss": 0.6824861764907837, "loss_ce": 0.00023519776004832238, "loss_iou": 0.2470703125, "loss_num": 0.037841796875, "loss_xval": 0.68359375, "num_input_tokens_seen": 146116388, "step": 2608 }, { "epoch": 5.810690423162583, "grad_norm": 19.422828674316406, "learning_rate": 1e-06, "loss": 0.7589, "num_input_tokens_seen": 146172864, "step": 2609 }, { "epoch": 5.810690423162583, "loss": 0.7359454035758972, "loss_ce": 0.0002276489103678614, "loss_iou": 0.330078125, "loss_num": 0.0152587890625, "loss_xval": 0.734375, "num_input_tokens_seen": 146172864, "step": 2609 }, { "epoch": 5.812917594654788, "grad_norm": 26.152000427246094, "learning_rate": 1e-06, "loss": 0.6478, "num_input_tokens_seen": 146230640, "step": 2610 }, { "epoch": 5.812917594654788, "loss": 0.7595158815383911, "loss_ce": 0.00023854889150243253, "loss_iou": 0.310546875, "loss_num": 0.0279541015625, "loss_xval": 0.7578125, "num_input_tokens_seen": 146230640, "step": 2610 }, { "epoch": 5.815144766146993, "grad_norm": 40.59634780883789, "learning_rate": 1e-06, "loss": 0.5484, "num_input_tokens_seen": 146287508, "step": 2611 }, { "epoch": 5.815144766146993, "loss": 0.6367042660713196, "loss_ce": 0.00022962281946092844, "loss_iou": 0.259765625, "loss_num": 0.023193359375, "loss_xval": 0.63671875, "num_input_tokens_seen": 146287508, "step": 2611 }, { "epoch": 5.817371937639198, "grad_norm": 16.996164321899414, "learning_rate": 1e-06, "loss": 0.8022, "num_input_tokens_seen": 146343108, "step": 2612 }, { "epoch": 5.817371937639198, "loss": 0.7761521339416504, "loss_ce": 0.00027319122455082834, "loss_iou": 0.310546875, "loss_num": 0.03125, "loss_xval": 0.77734375, "num_input_tokens_seen": 146343108, "step": 2612 }, { "epoch": 5.819599109131403, "grad_norm": 14.195480346679688, "learning_rate": 1e-06, "loss": 0.5662, "num_input_tokens_seen": 146401944, "step": 2613 }, { "epoch": 5.819599109131403, "loss": 0.6162877082824707, "loss_ce": 0.00032092921901494265, "loss_iou": 0.2421875, "loss_num": 0.0262451171875, "loss_xval": 0.6171875, "num_input_tokens_seen": 146401944, "step": 2613 }, { "epoch": 5.821826280623608, "grad_norm": 13.408287048339844, "learning_rate": 1e-06, "loss": 0.676, "num_input_tokens_seen": 146459796, "step": 2614 }, { "epoch": 5.821826280623608, "loss": 0.6257205009460449, "loss_ce": 0.00023219409922603518, "loss_iou": 0.26953125, "loss_num": 0.0174560546875, "loss_xval": 0.625, "num_input_tokens_seen": 146459796, "step": 2614 }, { "epoch": 5.8240534521158125, "grad_norm": 37.40284729003906, "learning_rate": 1e-06, "loss": 0.6556, "num_input_tokens_seen": 146515484, "step": 2615 }, { "epoch": 5.8240534521158125, "loss": 0.6754716634750366, "loss_ce": 0.00017868283612187952, "loss_iou": 0.27734375, "loss_num": 0.023681640625, "loss_xval": 0.67578125, "num_input_tokens_seen": 146515484, "step": 2615 }, { "epoch": 5.826280623608017, "grad_norm": 22.057910919189453, "learning_rate": 1e-06, "loss": 0.7252, "num_input_tokens_seen": 146570828, "step": 2616 }, { "epoch": 5.826280623608017, "loss": 0.7309498190879822, "loss_ce": 0.00023692671675235033, "loss_iou": 0.296875, "loss_num": 0.02734375, "loss_xval": 0.73046875, "num_input_tokens_seen": 146570828, "step": 2616 }, { "epoch": 5.828507795100223, "grad_norm": 63.05039596557617, "learning_rate": 1e-06, "loss": 0.577, "num_input_tokens_seen": 146627412, "step": 2617 }, { "epoch": 5.828507795100223, "loss": 0.5437142848968506, "loss_ce": 0.0002571970981080085, "loss_iou": 0.23828125, "loss_num": 0.01336669921875, "loss_xval": 0.54296875, "num_input_tokens_seen": 146627412, "step": 2617 }, { "epoch": 5.830734966592428, "grad_norm": 29.350282669067383, "learning_rate": 1e-06, "loss": 0.6785, "num_input_tokens_seen": 146684332, "step": 2618 }, { "epoch": 5.830734966592428, "loss": 0.5683233141899109, "loss_ce": 0.00020807163673453033, "loss_iou": 0.236328125, "loss_num": 0.01904296875, "loss_xval": 0.56640625, "num_input_tokens_seen": 146684332, "step": 2618 }, { "epoch": 5.832962138084633, "grad_norm": 23.34769058227539, "learning_rate": 1e-06, "loss": 0.6697, "num_input_tokens_seen": 146737280, "step": 2619 }, { "epoch": 5.832962138084633, "loss": 0.6157218217849731, "loss_ce": 0.0004874598525930196, "loss_iou": 0.271484375, "loss_num": 0.01446533203125, "loss_xval": 0.6171875, "num_input_tokens_seen": 146737280, "step": 2619 }, { "epoch": 5.835189309576838, "grad_norm": 22.184803009033203, "learning_rate": 1e-06, "loss": 0.6025, "num_input_tokens_seen": 146794400, "step": 2620 }, { "epoch": 5.835189309576838, "loss": 0.6838958263397217, "loss_ce": 0.0003020889707840979, "loss_iou": 0.3125, "loss_num": 0.0115966796875, "loss_xval": 0.68359375, "num_input_tokens_seen": 146794400, "step": 2620 }, { "epoch": 5.8374164810690425, "grad_norm": 16.25792694091797, "learning_rate": 1e-06, "loss": 0.71, "num_input_tokens_seen": 146849192, "step": 2621 }, { "epoch": 5.8374164810690425, "loss": 0.6701442003250122, "loss_ce": 0.00022233014169614762, "loss_iou": 0.25390625, "loss_num": 0.032470703125, "loss_xval": 0.671875, "num_input_tokens_seen": 146849192, "step": 2621 }, { "epoch": 5.839643652561247, "grad_norm": 17.911888122558594, "learning_rate": 1e-06, "loss": 0.6373, "num_input_tokens_seen": 146906416, "step": 2622 }, { "epoch": 5.839643652561247, "loss": 0.5712316632270813, "loss_ce": 0.00018675101455301046, "loss_iou": 0.2421875, "loss_num": 0.0172119140625, "loss_xval": 0.5703125, "num_input_tokens_seen": 146906416, "step": 2622 }, { "epoch": 5.841870824053452, "grad_norm": 15.35204029083252, "learning_rate": 1e-06, "loss": 0.6543, "num_input_tokens_seen": 146963888, "step": 2623 }, { "epoch": 5.841870824053452, "loss": 0.6473047733306885, "loss_ce": 0.00021000676497351378, "loss_iou": 0.265625, "loss_num": 0.0233154296875, "loss_xval": 0.6484375, "num_input_tokens_seen": 146963888, "step": 2623 }, { "epoch": 5.844097995545657, "grad_norm": 19.907550811767578, "learning_rate": 1e-06, "loss": 0.8551, "num_input_tokens_seen": 147019984, "step": 2624 }, { "epoch": 5.844097995545657, "loss": 0.6151929497718811, "loss_ce": 0.0002027209848165512, "loss_iou": 0.26953125, "loss_num": 0.01483154296875, "loss_xval": 0.61328125, "num_input_tokens_seen": 147019984, "step": 2624 }, { "epoch": 5.846325167037862, "grad_norm": 35.5843391418457, "learning_rate": 1e-06, "loss": 0.8085, "num_input_tokens_seen": 147074124, "step": 2625 }, { "epoch": 5.846325167037862, "loss": 0.6340126991271973, "loss_ce": 0.00022365737822838128, "loss_iou": 0.283203125, "loss_num": 0.0135498046875, "loss_xval": 0.6328125, "num_input_tokens_seen": 147074124, "step": 2625 }, { "epoch": 5.848552338530067, "grad_norm": 22.104284286499023, "learning_rate": 1e-06, "loss": 0.7518, "num_input_tokens_seen": 147129964, "step": 2626 }, { "epoch": 5.848552338530067, "loss": 0.7092376947402954, "loss_ce": 0.0002533411025069654, "loss_iou": 0.310546875, "loss_num": 0.017578125, "loss_xval": 0.7109375, "num_input_tokens_seen": 147129964, "step": 2626 }, { "epoch": 5.850779510022272, "grad_norm": 18.39105987548828, "learning_rate": 1e-06, "loss": 0.6966, "num_input_tokens_seen": 147184592, "step": 2627 }, { "epoch": 5.850779510022272, "loss": 0.661689043045044, "loss_ce": 0.0003120756009593606, "loss_iou": 0.27734375, "loss_num": 0.021728515625, "loss_xval": 0.66015625, "num_input_tokens_seen": 147184592, "step": 2627 }, { "epoch": 5.853006681514477, "grad_norm": 22.63771629333496, "learning_rate": 1e-06, "loss": 0.9064, "num_input_tokens_seen": 147241272, "step": 2628 }, { "epoch": 5.853006681514477, "loss": 0.5712363719940186, "loss_ce": 0.00019146442355122417, "loss_iou": 0.251953125, "loss_num": 0.0135498046875, "loss_xval": 0.5703125, "num_input_tokens_seen": 147241272, "step": 2628 }, { "epoch": 5.855233853006681, "grad_norm": 21.009822845458984, "learning_rate": 1e-06, "loss": 0.8111, "num_input_tokens_seen": 147294076, "step": 2629 }, { "epoch": 5.855233853006681, "loss": 0.9777696132659912, "loss_ce": 0.00023052690085023642, "loss_iou": 0.443359375, "loss_num": 0.0179443359375, "loss_xval": 0.9765625, "num_input_tokens_seen": 147294076, "step": 2629 }, { "epoch": 5.857461024498886, "grad_norm": 19.79799461364746, "learning_rate": 1e-06, "loss": 0.7446, "num_input_tokens_seen": 147349408, "step": 2630 }, { "epoch": 5.857461024498886, "loss": 0.6237329244613647, "loss_ce": 0.00019774649990722537, "loss_iou": 0.263671875, "loss_num": 0.01904296875, "loss_xval": 0.625, "num_input_tokens_seen": 147349408, "step": 2630 }, { "epoch": 5.859688195991091, "grad_norm": 22.230148315429688, "learning_rate": 1e-06, "loss": 0.7763, "num_input_tokens_seen": 147405948, "step": 2631 }, { "epoch": 5.859688195991091, "loss": 0.8681101202964783, "loss_ce": 0.0003122506313957274, "loss_iou": 0.37890625, "loss_num": 0.021728515625, "loss_xval": 0.8671875, "num_input_tokens_seen": 147405948, "step": 2631 }, { "epoch": 5.861915367483296, "grad_norm": 16.93956756591797, "learning_rate": 1e-06, "loss": 0.7856, "num_input_tokens_seen": 147460328, "step": 2632 }, { "epoch": 5.861915367483296, "loss": 0.8456612825393677, "loss_ce": 0.00020235308329574764, "loss_iou": 0.34375, "loss_num": 0.031494140625, "loss_xval": 0.84375, "num_input_tokens_seen": 147460328, "step": 2632 }, { "epoch": 5.864142538975501, "grad_norm": 28.430946350097656, "learning_rate": 1e-06, "loss": 0.7788, "num_input_tokens_seen": 147515700, "step": 2633 }, { "epoch": 5.864142538975501, "loss": 0.8373937606811523, "loss_ce": 0.00023549118486698717, "loss_iou": 0.369140625, "loss_num": 0.0198974609375, "loss_xval": 0.8359375, "num_input_tokens_seen": 147515700, "step": 2633 }, { "epoch": 5.866369710467706, "grad_norm": 111.71736145019531, "learning_rate": 1e-06, "loss": 0.9358, "num_input_tokens_seen": 147570320, "step": 2634 }, { "epoch": 5.866369710467706, "loss": 0.9798734188079834, "loss_ce": 0.0003812336944974959, "loss_iou": 0.400390625, "loss_num": 0.035888671875, "loss_xval": 0.98046875, "num_input_tokens_seen": 147570320, "step": 2634 }, { "epoch": 5.868596881959911, "grad_norm": 20.809370040893555, "learning_rate": 1e-06, "loss": 0.7524, "num_input_tokens_seen": 147625424, "step": 2635 }, { "epoch": 5.868596881959911, "loss": 0.6215413808822632, "loss_ce": 0.00020347216923255473, "loss_iou": 0.255859375, "loss_num": 0.021728515625, "loss_xval": 0.62109375, "num_input_tokens_seen": 147625424, "step": 2635 }, { "epoch": 5.870824053452115, "grad_norm": 23.472732543945312, "learning_rate": 1e-06, "loss": 0.896, "num_input_tokens_seen": 147682576, "step": 2636 }, { "epoch": 5.870824053452115, "loss": 0.7243614196777344, "loss_ce": 0.00024032902729231864, "loss_iou": 0.322265625, "loss_num": 0.0159912109375, "loss_xval": 0.72265625, "num_input_tokens_seen": 147682576, "step": 2636 }, { "epoch": 5.873051224944321, "grad_norm": 19.540515899658203, "learning_rate": 1e-06, "loss": 0.8665, "num_input_tokens_seen": 147738480, "step": 2637 }, { "epoch": 5.873051224944321, "loss": 0.94027179479599, "loss_ce": 0.0003304004785604775, "loss_iou": 0.33984375, "loss_num": 0.0517578125, "loss_xval": 0.94140625, "num_input_tokens_seen": 147738480, "step": 2637 }, { "epoch": 5.875278396436526, "grad_norm": 18.206157684326172, "learning_rate": 1e-06, "loss": 0.9815, "num_input_tokens_seen": 147795268, "step": 2638 }, { "epoch": 5.875278396436526, "loss": 0.8137099146842957, "loss_ce": 0.00023330387193709612, "loss_iou": 0.328125, "loss_num": 0.03125, "loss_xval": 0.8125, "num_input_tokens_seen": 147795268, "step": 2638 }, { "epoch": 5.877505567928731, "grad_norm": 36.962310791015625, "learning_rate": 1e-06, "loss": 0.8831, "num_input_tokens_seen": 147851048, "step": 2639 }, { "epoch": 5.877505567928731, "loss": 1.2098288536071777, "loss_ce": 0.0003562095225788653, "loss_iou": 0.5234375, "loss_num": 0.03271484375, "loss_xval": 1.2109375, "num_input_tokens_seen": 147851048, "step": 2639 }, { "epoch": 5.879732739420936, "grad_norm": 20.242557525634766, "learning_rate": 1e-06, "loss": 0.6815, "num_input_tokens_seen": 147908012, "step": 2640 }, { "epoch": 5.879732739420936, "loss": 0.6303223371505737, "loss_ce": 0.0001953527971636504, "loss_iou": 0.27734375, "loss_num": 0.01495361328125, "loss_xval": 0.62890625, "num_input_tokens_seen": 147908012, "step": 2640 }, { "epoch": 5.881959910913141, "grad_norm": 34.177433013916016, "learning_rate": 1e-06, "loss": 0.7303, "num_input_tokens_seen": 147960696, "step": 2641 }, { "epoch": 5.881959910913141, "loss": 0.9157909154891968, "loss_ce": 0.000263590132817626, "loss_iou": 0.39453125, "loss_num": 0.0255126953125, "loss_xval": 0.9140625, "num_input_tokens_seen": 147960696, "step": 2641 }, { "epoch": 5.8841870824053455, "grad_norm": 19.002622604370117, "learning_rate": 1e-06, "loss": 0.6177, "num_input_tokens_seen": 148016204, "step": 2642 }, { "epoch": 5.8841870824053455, "loss": 0.528780460357666, "loss_ce": 0.0002160129661206156, "loss_iou": 0.2265625, "loss_num": 0.0150146484375, "loss_xval": 0.52734375, "num_input_tokens_seen": 148016204, "step": 2642 }, { "epoch": 5.88641425389755, "grad_norm": 20.485605239868164, "learning_rate": 1e-06, "loss": 0.7832, "num_input_tokens_seen": 148072492, "step": 2643 }, { "epoch": 5.88641425389755, "loss": 1.044980764389038, "loss_ce": 0.0015236493200063705, "loss_iou": 0.455078125, "loss_num": 0.0269775390625, "loss_xval": 1.046875, "num_input_tokens_seen": 148072492, "step": 2643 }, { "epoch": 5.888641425389755, "grad_norm": 25.896760940551758, "learning_rate": 1e-06, "loss": 0.6733, "num_input_tokens_seen": 148124560, "step": 2644 }, { "epoch": 5.888641425389755, "loss": 0.5408051013946533, "loss_ce": 0.00021672301227226853, "loss_iou": 0.248046875, "loss_num": 0.00909423828125, "loss_xval": 0.5390625, "num_input_tokens_seen": 148124560, "step": 2644 }, { "epoch": 5.89086859688196, "grad_norm": 15.1829252243042, "learning_rate": 1e-06, "loss": 0.7851, "num_input_tokens_seen": 148182164, "step": 2645 }, { "epoch": 5.89086859688196, "loss": 0.6976854801177979, "loss_ce": 0.000297736085485667, "loss_iou": 0.287109375, "loss_num": 0.0242919921875, "loss_xval": 0.69921875, "num_input_tokens_seen": 148182164, "step": 2645 }, { "epoch": 5.893095768374165, "grad_norm": 20.109758377075195, "learning_rate": 1e-06, "loss": 0.4663, "num_input_tokens_seen": 148237212, "step": 2646 }, { "epoch": 5.893095768374165, "loss": 0.403061181306839, "loss_ce": 0.00016811591922305524, "loss_iou": 0.1708984375, "loss_num": 0.0120849609375, "loss_xval": 0.40234375, "num_input_tokens_seen": 148237212, "step": 2646 }, { "epoch": 5.89532293986637, "grad_norm": 22.928607940673828, "learning_rate": 1e-06, "loss": 0.7238, "num_input_tokens_seen": 148294320, "step": 2647 }, { "epoch": 5.89532293986637, "loss": 0.7370385527610779, "loss_ce": 0.00022214508499018848, "loss_iou": 0.3203125, "loss_num": 0.01953125, "loss_xval": 0.73828125, "num_input_tokens_seen": 148294320, "step": 2647 }, { "epoch": 5.897550111358575, "grad_norm": 18.266637802124023, "learning_rate": 1e-06, "loss": 0.7101, "num_input_tokens_seen": 148350012, "step": 2648 }, { "epoch": 5.897550111358575, "loss": 0.841547429561615, "loss_ce": 0.00023882737150415778, "loss_iou": 0.357421875, "loss_num": 0.0250244140625, "loss_xval": 0.83984375, "num_input_tokens_seen": 148350012, "step": 2648 }, { "epoch": 5.8997772828507795, "grad_norm": 16.207386016845703, "learning_rate": 1e-06, "loss": 0.7109, "num_input_tokens_seen": 148405388, "step": 2649 }, { "epoch": 5.8997772828507795, "loss": 0.6293390989303589, "loss_ce": 0.00018869154155254364, "loss_iou": 0.275390625, "loss_num": 0.0159912109375, "loss_xval": 0.62890625, "num_input_tokens_seen": 148405388, "step": 2649 }, { "epoch": 5.902004454342984, "grad_norm": 16.40260124206543, "learning_rate": 1e-06, "loss": 0.4936, "num_input_tokens_seen": 148462940, "step": 2650 }, { "epoch": 5.902004454342984, "loss": 0.45450687408447266, "loss_ce": 0.00016120026702992618, "loss_iou": 0.1953125, "loss_num": 0.01287841796875, "loss_xval": 0.455078125, "num_input_tokens_seen": 148462940, "step": 2650 }, { "epoch": 5.904231625835189, "grad_norm": 22.90458869934082, "learning_rate": 1e-06, "loss": 0.7208, "num_input_tokens_seen": 148518860, "step": 2651 }, { "epoch": 5.904231625835189, "loss": 0.7937043309211731, "loss_ce": 0.00024726998526602983, "loss_iou": 0.34375, "loss_num": 0.020751953125, "loss_xval": 0.79296875, "num_input_tokens_seen": 148518860, "step": 2651 }, { "epoch": 5.906458797327394, "grad_norm": 15.67951488494873, "learning_rate": 1e-06, "loss": 0.478, "num_input_tokens_seen": 148577372, "step": 2652 }, { "epoch": 5.906458797327394, "loss": 0.41467005014419556, "loss_ce": 0.0002413251786492765, "loss_iou": 0.1962890625, "loss_num": 0.00433349609375, "loss_xval": 0.4140625, "num_input_tokens_seen": 148577372, "step": 2652 }, { "epoch": 5.908685968819599, "grad_norm": 23.35599708557129, "learning_rate": 1e-06, "loss": 0.698, "num_input_tokens_seen": 148633988, "step": 2653 }, { "epoch": 5.908685968819599, "loss": 0.7189466953277588, "loss_ce": 0.0001966924173757434, "loss_iou": 0.306640625, "loss_num": 0.0211181640625, "loss_xval": 0.71875, "num_input_tokens_seen": 148633988, "step": 2653 }, { "epoch": 5.910913140311804, "grad_norm": 22.419492721557617, "learning_rate": 1e-06, "loss": 0.8223, "num_input_tokens_seen": 148686904, "step": 2654 }, { "epoch": 5.910913140311804, "loss": 0.5142409205436707, "loss_ce": 0.0003248959837947041, "loss_iou": 0.2138671875, "loss_num": 0.01708984375, "loss_xval": 0.515625, "num_input_tokens_seen": 148686904, "step": 2654 }, { "epoch": 5.913140311804009, "grad_norm": 22.227079391479492, "learning_rate": 1e-06, "loss": 0.5734, "num_input_tokens_seen": 148741860, "step": 2655 }, { "epoch": 5.913140311804009, "loss": 0.6701195240020752, "loss_ce": 0.00019767896446865052, "loss_iou": 0.2578125, "loss_num": 0.0308837890625, "loss_xval": 0.671875, "num_input_tokens_seen": 148741860, "step": 2655 }, { "epoch": 5.9153674832962135, "grad_norm": 24.240558624267578, "learning_rate": 1e-06, "loss": 0.7875, "num_input_tokens_seen": 148798228, "step": 2656 }, { "epoch": 5.9153674832962135, "loss": 0.7565721869468689, "loss_ce": 0.0002245493233203888, "loss_iou": 0.3203125, "loss_num": 0.0228271484375, "loss_xval": 0.7578125, "num_input_tokens_seen": 148798228, "step": 2656 }, { "epoch": 5.917594654788418, "grad_norm": 21.239978790283203, "learning_rate": 1e-06, "loss": 0.8062, "num_input_tokens_seen": 148852728, "step": 2657 }, { "epoch": 5.917594654788418, "loss": 0.9770278930664062, "loss_ce": 0.00022124522365629673, "loss_iou": 0.40234375, "loss_num": 0.034423828125, "loss_xval": 0.9765625, "num_input_tokens_seen": 148852728, "step": 2657 }, { "epoch": 5.919821826280623, "grad_norm": 21.198122024536133, "learning_rate": 1e-06, "loss": 0.5775, "num_input_tokens_seen": 148910144, "step": 2658 }, { "epoch": 5.919821826280623, "loss": 0.5937119126319885, "loss_ce": 0.00020606812904588878, "loss_iou": 0.25, "loss_num": 0.0186767578125, "loss_xval": 0.59375, "num_input_tokens_seen": 148910144, "step": 2658 }, { "epoch": 5.922048997772828, "grad_norm": 21.191734313964844, "learning_rate": 1e-06, "loss": 0.6275, "num_input_tokens_seen": 148965064, "step": 2659 }, { "epoch": 5.922048997772828, "loss": 0.5399864912033081, "loss_ce": 0.0001916133624035865, "loss_iou": 0.244140625, "loss_num": 0.01031494140625, "loss_xval": 0.5390625, "num_input_tokens_seen": 148965064, "step": 2659 }, { "epoch": 5.924276169265033, "grad_norm": 22.334716796875, "learning_rate": 1e-06, "loss": 0.881, "num_input_tokens_seen": 149019216, "step": 2660 }, { "epoch": 5.924276169265033, "loss": 0.7107348442077637, "loss_ce": 0.00022451579570770264, "loss_iou": 0.298828125, "loss_num": 0.0228271484375, "loss_xval": 0.7109375, "num_input_tokens_seen": 149019216, "step": 2660 }, { "epoch": 5.926503340757238, "grad_norm": 26.160076141357422, "learning_rate": 1e-06, "loss": 0.6856, "num_input_tokens_seen": 149076832, "step": 2661 }, { "epoch": 5.926503340757238, "loss": 0.7112481594085693, "loss_ce": 0.0001885854871943593, "loss_iou": 0.294921875, "loss_num": 0.0245361328125, "loss_xval": 0.7109375, "num_input_tokens_seen": 149076832, "step": 2661 }, { "epoch": 5.928730512249444, "grad_norm": 35.00899887084961, "learning_rate": 1e-06, "loss": 0.7615, "num_input_tokens_seen": 149133992, "step": 2662 }, { "epoch": 5.928730512249444, "loss": 0.8671593070030212, "loss_ce": 0.00021591814584098756, "loss_iou": 0.396484375, "loss_num": 0.01483154296875, "loss_xval": 0.8671875, "num_input_tokens_seen": 149133992, "step": 2662 }, { "epoch": 5.9309576837416484, "grad_norm": 23.221233367919922, "learning_rate": 1e-06, "loss": 0.6566, "num_input_tokens_seen": 149190436, "step": 2663 }, { "epoch": 5.9309576837416484, "loss": 0.43981656432151794, "loss_ce": 0.0002413657057331875, "loss_iou": 0.1943359375, "loss_num": 0.010009765625, "loss_xval": 0.439453125, "num_input_tokens_seen": 149190436, "step": 2663 }, { "epoch": 5.933184855233853, "grad_norm": 19.708267211914062, "learning_rate": 1e-06, "loss": 0.7044, "num_input_tokens_seen": 149247672, "step": 2664 }, { "epoch": 5.933184855233853, "loss": 0.8448212742805481, "loss_ce": 0.00021680007921531796, "loss_iou": 0.375, "loss_num": 0.018798828125, "loss_xval": 0.84375, "num_input_tokens_seen": 149247672, "step": 2664 }, { "epoch": 5.935412026726058, "grad_norm": 15.226134300231934, "learning_rate": 1e-06, "loss": 0.5869, "num_input_tokens_seen": 149305572, "step": 2665 }, { "epoch": 5.935412026726058, "loss": 0.5407141447067261, "loss_ce": 0.00018680887296795845, "loss_iou": 0.2333984375, "loss_num": 0.0146484375, "loss_xval": 0.5390625, "num_input_tokens_seen": 149305572, "step": 2665 }, { "epoch": 5.937639198218263, "grad_norm": 18.039369583129883, "learning_rate": 1e-06, "loss": 0.6593, "num_input_tokens_seen": 149363100, "step": 2666 }, { "epoch": 5.937639198218263, "loss": 0.4833727180957794, "loss_ce": 0.00021840460249222815, "loss_iou": 0.21875, "loss_num": 0.0091552734375, "loss_xval": 0.482421875, "num_input_tokens_seen": 149363100, "step": 2666 }, { "epoch": 5.939866369710468, "grad_norm": 14.021678924560547, "learning_rate": 1e-06, "loss": 0.9587, "num_input_tokens_seen": 149418140, "step": 2667 }, { "epoch": 5.939866369710468, "loss": 0.6920410990715027, "loss_ce": 0.0002686180523596704, "loss_iou": 0.306640625, "loss_num": 0.0159912109375, "loss_xval": 0.69140625, "num_input_tokens_seen": 149418140, "step": 2667 }, { "epoch": 5.942093541202673, "grad_norm": 17.604162216186523, "learning_rate": 1e-06, "loss": 0.5922, "num_input_tokens_seen": 149474312, "step": 2668 }, { "epoch": 5.942093541202673, "loss": 0.5115793347358704, "loss_ce": 0.0002268127864226699, "loss_iou": 0.2353515625, "loss_num": 0.00811767578125, "loss_xval": 0.51171875, "num_input_tokens_seen": 149474312, "step": 2668 }, { "epoch": 5.944320712694878, "grad_norm": 23.896390914916992, "learning_rate": 1e-06, "loss": 0.7063, "num_input_tokens_seen": 149530356, "step": 2669 }, { "epoch": 5.944320712694878, "loss": 0.5715218186378479, "loss_ce": 0.00023274502018466592, "loss_iou": 0.2490234375, "loss_num": 0.0145263671875, "loss_xval": 0.5703125, "num_input_tokens_seen": 149530356, "step": 2669 }, { "epoch": 5.9465478841870825, "grad_norm": 20.610628128051758, "learning_rate": 1e-06, "loss": 0.721, "num_input_tokens_seen": 149587276, "step": 2670 }, { "epoch": 5.9465478841870825, "loss": 0.7370386123657227, "loss_ce": 0.00022224214626476169, "loss_iou": 0.328125, "loss_num": 0.0162353515625, "loss_xval": 0.73828125, "num_input_tokens_seen": 149587276, "step": 2670 }, { "epoch": 5.948775055679287, "grad_norm": 18.218326568603516, "learning_rate": 1e-06, "loss": 0.8538, "num_input_tokens_seen": 149643160, "step": 2671 }, { "epoch": 5.948775055679287, "loss": 1.0529261827468872, "loss_ce": 0.00043590826680883765, "loss_iou": 0.40625, "loss_num": 0.0478515625, "loss_xval": 1.0546875, "num_input_tokens_seen": 149643160, "step": 2671 }, { "epoch": 5.951002227171492, "grad_norm": 20.644004821777344, "learning_rate": 1e-06, "loss": 0.7755, "num_input_tokens_seen": 149697316, "step": 2672 }, { "epoch": 5.951002227171492, "loss": 0.9467422962188721, "loss_ce": 0.0003311632899567485, "loss_iou": 0.357421875, "loss_num": 0.04638671875, "loss_xval": 0.9453125, "num_input_tokens_seen": 149697316, "step": 2672 }, { "epoch": 5.953229398663697, "grad_norm": 15.27214527130127, "learning_rate": 1e-06, "loss": 0.5603, "num_input_tokens_seen": 149754696, "step": 2673 }, { "epoch": 5.953229398663697, "loss": 0.5694996118545532, "loss_ce": 0.00040784955490380526, "loss_iou": 0.23828125, "loss_num": 0.0186767578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 149754696, "step": 2673 }, { "epoch": 5.955456570155902, "grad_norm": 18.75067138671875, "learning_rate": 1e-06, "loss": 0.7863, "num_input_tokens_seen": 149809432, "step": 2674 }, { "epoch": 5.955456570155902, "loss": 0.8103808760643005, "loss_ce": 0.00032227032352238894, "loss_iou": 0.365234375, "loss_num": 0.01556396484375, "loss_xval": 0.80859375, "num_input_tokens_seen": 149809432, "step": 2674 }, { "epoch": 5.957683741648107, "grad_norm": 25.840362548828125, "learning_rate": 1e-06, "loss": 0.8198, "num_input_tokens_seen": 149865132, "step": 2675 }, { "epoch": 5.957683741648107, "loss": 0.85858553647995, "loss_ce": 0.00018709682626649737, "loss_iou": 0.384765625, "loss_num": 0.0177001953125, "loss_xval": 0.859375, "num_input_tokens_seen": 149865132, "step": 2675 }, { "epoch": 5.959910913140312, "grad_norm": 15.168061256408691, "learning_rate": 1e-06, "loss": 0.5927, "num_input_tokens_seen": 149921252, "step": 2676 }, { "epoch": 5.959910913140312, "loss": 0.5525012016296387, "loss_ce": 0.00025514926528558135, "loss_iou": 0.2412109375, "loss_num": 0.01385498046875, "loss_xval": 0.55078125, "num_input_tokens_seen": 149921252, "step": 2676 }, { "epoch": 5.9621380846325165, "grad_norm": 359.7155456542969, "learning_rate": 1e-06, "loss": 0.8264, "num_input_tokens_seen": 149975912, "step": 2677 }, { "epoch": 5.9621380846325165, "loss": 0.7250722646713257, "loss_ce": 0.00021878087136428803, "loss_iou": 0.30859375, "loss_num": 0.0218505859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 149975912, "step": 2677 }, { "epoch": 5.964365256124721, "grad_norm": 20.11498260498047, "learning_rate": 1e-06, "loss": 0.6048, "num_input_tokens_seen": 150032272, "step": 2678 }, { "epoch": 5.964365256124721, "loss": 0.6044542789459229, "loss_ce": 0.00020618733833543956, "loss_iou": 0.255859375, "loss_num": 0.018310546875, "loss_xval": 0.60546875, "num_input_tokens_seen": 150032272, "step": 2678 }, { "epoch": 5.966592427616926, "grad_norm": 40.01982116699219, "learning_rate": 1e-06, "loss": 0.6705, "num_input_tokens_seen": 150089664, "step": 2679 }, { "epoch": 5.966592427616926, "loss": 0.48896247148513794, "loss_ce": 0.00019291002536192536, "loss_iou": 0.1884765625, "loss_num": 0.0223388671875, "loss_xval": 0.48828125, "num_input_tokens_seen": 150089664, "step": 2679 }, { "epoch": 5.968819599109131, "grad_norm": 13.833683013916016, "learning_rate": 1e-06, "loss": 0.5939, "num_input_tokens_seen": 150145520, "step": 2680 }, { "epoch": 5.968819599109131, "loss": 0.5350244641304016, "loss_ce": 0.000356461969204247, "loss_iou": 0.236328125, "loss_num": 0.0125732421875, "loss_xval": 0.53515625, "num_input_tokens_seen": 150145520, "step": 2680 }, { "epoch": 5.971046770601336, "grad_norm": 28.182466506958008, "learning_rate": 1e-06, "loss": 0.9304, "num_input_tokens_seen": 150198876, "step": 2681 }, { "epoch": 5.971046770601336, "loss": 0.5284985899925232, "loss_ce": 0.00017829591524787247, "loss_iou": 0.220703125, "loss_num": 0.017333984375, "loss_xval": 0.52734375, "num_input_tokens_seen": 150198876, "step": 2681 }, { "epoch": 5.973273942093542, "grad_norm": 15.853994369506836, "learning_rate": 1e-06, "loss": 0.6288, "num_input_tokens_seen": 150255024, "step": 2682 }, { "epoch": 5.973273942093542, "loss": 0.5141396522521973, "loss_ce": 0.0004677815013565123, "loss_iou": 0.1982421875, "loss_num": 0.023681640625, "loss_xval": 0.515625, "num_input_tokens_seen": 150255024, "step": 2682 }, { "epoch": 5.9755011135857465, "grad_norm": 19.383615493774414, "learning_rate": 1e-06, "loss": 0.9422, "num_input_tokens_seen": 150309836, "step": 2683 }, { "epoch": 5.9755011135857465, "loss": 0.8364056944847107, "loss_ce": 0.00022408382210414857, "loss_iou": 0.33203125, "loss_num": 0.03466796875, "loss_xval": 0.8359375, "num_input_tokens_seen": 150309836, "step": 2683 }, { "epoch": 5.977728285077951, "grad_norm": 23.231369018554688, "learning_rate": 1e-06, "loss": 0.7372, "num_input_tokens_seen": 150365848, "step": 2684 }, { "epoch": 5.977728285077951, "loss": 0.8447408676147461, "loss_ce": 0.0002584208268672228, "loss_iou": 0.34765625, "loss_num": 0.02978515625, "loss_xval": 0.84375, "num_input_tokens_seen": 150365848, "step": 2684 }, { "epoch": 5.979955456570156, "grad_norm": 22.7137393951416, "learning_rate": 1e-06, "loss": 0.6089, "num_input_tokens_seen": 150423628, "step": 2685 }, { "epoch": 5.979955456570156, "loss": 0.5462290048599243, "loss_ce": 0.00020853537716902792, "loss_iou": 0.23828125, "loss_num": 0.0137939453125, "loss_xval": 0.546875, "num_input_tokens_seen": 150423628, "step": 2685 }, { "epoch": 5.982182628062361, "grad_norm": 24.549579620361328, "learning_rate": 1e-06, "loss": 0.7922, "num_input_tokens_seen": 150478840, "step": 2686 }, { "epoch": 5.982182628062361, "loss": 0.8291885256767273, "loss_ce": 0.00020902017422486097, "loss_iou": 0.373046875, "loss_num": 0.0166015625, "loss_xval": 0.828125, "num_input_tokens_seen": 150478840, "step": 2686 }, { "epoch": 5.984409799554566, "grad_norm": 26.245058059692383, "learning_rate": 1e-06, "loss": 0.6099, "num_input_tokens_seen": 150534692, "step": 2687 }, { "epoch": 5.984409799554566, "loss": 0.49533137679100037, "loss_ce": 0.00021419850236270577, "loss_iou": 0.224609375, "loss_num": 0.00909423828125, "loss_xval": 0.49609375, "num_input_tokens_seen": 150534692, "step": 2687 }, { "epoch": 5.986636971046771, "grad_norm": 24.997560501098633, "learning_rate": 1e-06, "loss": 0.5429, "num_input_tokens_seen": 150591508, "step": 2688 }, { "epoch": 5.986636971046771, "loss": 0.5927173495292664, "loss_ce": 0.00031011985265649855, "loss_iou": 0.267578125, "loss_num": 0.01177978515625, "loss_xval": 0.59375, "num_input_tokens_seen": 150591508, "step": 2688 }, { "epoch": 5.988864142538976, "grad_norm": 16.52888298034668, "learning_rate": 1e-06, "loss": 0.7054, "num_input_tokens_seen": 150648216, "step": 2689 }, { "epoch": 5.988864142538976, "loss": 0.44299080967903137, "loss_ce": 0.00022651677136309445, "loss_iou": 0.171875, "loss_num": 0.0198974609375, "loss_xval": 0.443359375, "num_input_tokens_seen": 150648216, "step": 2689 }, { "epoch": 5.991091314031181, "grad_norm": 19.582372665405273, "learning_rate": 1e-06, "loss": 0.901, "num_input_tokens_seen": 150705220, "step": 2690 }, { "epoch": 5.991091314031181, "loss": 0.9914488196372986, "loss_ce": 0.00023790652630850673, "loss_iou": 0.43359375, "loss_num": 0.0244140625, "loss_xval": 0.9921875, "num_input_tokens_seen": 150705220, "step": 2690 }, { "epoch": 5.993318485523385, "grad_norm": 19.67958641052246, "learning_rate": 1e-06, "loss": 0.7656, "num_input_tokens_seen": 150759608, "step": 2691 }, { "epoch": 5.993318485523385, "loss": 0.586168110370636, "loss_ce": 0.00023060785315465182, "loss_iou": 0.2431640625, "loss_num": 0.0198974609375, "loss_xval": 0.5859375, "num_input_tokens_seen": 150759608, "step": 2691 }, { "epoch": 5.99554565701559, "grad_norm": 15.20195198059082, "learning_rate": 1e-06, "loss": 0.6715, "num_input_tokens_seen": 150816908, "step": 2692 }, { "epoch": 5.99554565701559, "loss": 0.6716079115867615, "loss_ce": 0.00022117490880191326, "loss_iou": 0.28515625, "loss_num": 0.020263671875, "loss_xval": 0.671875, "num_input_tokens_seen": 150816908, "step": 2692 }, { "epoch": 5.997772828507795, "grad_norm": 25.80984878540039, "learning_rate": 1e-06, "loss": 0.8853, "num_input_tokens_seen": 150872492, "step": 2693 }, { "epoch": 5.997772828507795, "loss": 1.047957181930542, "loss_ce": 0.00034974192385561764, "loss_iou": 0.43359375, "loss_num": 0.0361328125, "loss_xval": 1.046875, "num_input_tokens_seen": 150872492, "step": 2693 }, { "epoch": 6.0, "grad_norm": 18.474491119384766, "learning_rate": 1e-06, "loss": 0.8475, "num_input_tokens_seen": 150929208, "step": 2694 }, { "epoch": 6.0, "loss": 1.0649967193603516, "loss_ce": 0.0002994451788254082, "loss_iou": 0.4453125, "loss_num": 0.034912109375, "loss_xval": 1.0625, "num_input_tokens_seen": 150929208, "step": 2694 }, { "epoch": 6.002227171492205, "grad_norm": 25.486400604248047, "learning_rate": 1e-06, "loss": 0.7665, "num_input_tokens_seen": 150983692, "step": 2695 }, { "epoch": 6.002227171492205, "loss": 0.7017991542816162, "loss_ce": 0.0002610695082694292, "loss_iou": 0.283203125, "loss_num": 0.0269775390625, "loss_xval": 0.703125, "num_input_tokens_seen": 150983692, "step": 2695 }, { "epoch": 6.00445434298441, "grad_norm": 15.804261207580566, "learning_rate": 1e-06, "loss": 0.621, "num_input_tokens_seen": 151040928, "step": 2696 }, { "epoch": 6.00445434298441, "loss": 0.6565641164779663, "loss_ce": 0.000192045554285869, "loss_iou": 0.28125, "loss_num": 0.0185546875, "loss_xval": 0.65625, "num_input_tokens_seen": 151040928, "step": 2696 }, { "epoch": 6.006681514476615, "grad_norm": 18.53908348083496, "learning_rate": 1e-06, "loss": 0.7812, "num_input_tokens_seen": 151095388, "step": 2697 }, { "epoch": 6.006681514476615, "loss": 0.8815810084342957, "loss_ce": 0.00023334722209256142, "loss_iou": 0.369140625, "loss_num": 0.02880859375, "loss_xval": 0.8828125, "num_input_tokens_seen": 151095388, "step": 2697 }, { "epoch": 6.008908685968819, "grad_norm": 23.245758056640625, "learning_rate": 1e-06, "loss": 0.7773, "num_input_tokens_seen": 151148968, "step": 2698 }, { "epoch": 6.008908685968819, "loss": 0.8959696292877197, "loss_ce": 0.00021769374143332243, "loss_iou": 0.35546875, "loss_num": 0.03759765625, "loss_xval": 0.89453125, "num_input_tokens_seen": 151148968, "step": 2698 }, { "epoch": 6.011135857461024, "grad_norm": 13.859575271606445, "learning_rate": 1e-06, "loss": 0.54, "num_input_tokens_seen": 151205420, "step": 2699 }, { "epoch": 6.011135857461024, "loss": 0.5023019313812256, "loss_ce": 0.00022672602790407836, "loss_iou": 0.21484375, "loss_num": 0.01434326171875, "loss_xval": 0.50390625, "num_input_tokens_seen": 151205420, "step": 2699 }, { "epoch": 6.013363028953229, "grad_norm": 17.814104080200195, "learning_rate": 1e-06, "loss": 0.5899, "num_input_tokens_seen": 151262892, "step": 2700 }, { "epoch": 6.013363028953229, "loss": 0.6286102533340454, "loss_ce": 0.0001923007657751441, "loss_iou": 0.255859375, "loss_num": 0.02294921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 151262892, "step": 2700 }, { "epoch": 6.015590200445434, "grad_norm": 20.40790557861328, "learning_rate": 1e-06, "loss": 0.7099, "num_input_tokens_seen": 151316164, "step": 2701 }, { "epoch": 6.015590200445434, "loss": 0.8200287818908691, "loss_ce": 0.00020452812896110117, "loss_iou": 0.34765625, "loss_num": 0.025146484375, "loss_xval": 0.8203125, "num_input_tokens_seen": 151316164, "step": 2701 }, { "epoch": 6.017817371937639, "grad_norm": 19.481016159057617, "learning_rate": 1e-06, "loss": 0.6958, "num_input_tokens_seen": 151371160, "step": 2702 }, { "epoch": 6.017817371937639, "loss": 0.6471884250640869, "loss_ce": 0.00021577253937721252, "loss_iou": 0.25, "loss_num": 0.0296630859375, "loss_xval": 0.6484375, "num_input_tokens_seen": 151371160, "step": 2702 }, { "epoch": 6.020044543429844, "grad_norm": 32.87615203857422, "learning_rate": 1e-06, "loss": 0.7168, "num_input_tokens_seen": 151428440, "step": 2703 }, { "epoch": 6.020044543429844, "loss": 0.737989604473114, "loss_ce": 0.00019663787679746747, "loss_iou": 0.330078125, "loss_num": 0.015625, "loss_xval": 0.73828125, "num_input_tokens_seen": 151428440, "step": 2703 }, { "epoch": 6.022271714922049, "grad_norm": 11.81971263885498, "learning_rate": 1e-06, "loss": 0.6217, "num_input_tokens_seen": 151483012, "step": 2704 }, { "epoch": 6.022271714922049, "loss": 0.6774269342422485, "loss_ce": 0.00018085945339407772, "loss_iou": 0.291015625, "loss_num": 0.0189208984375, "loss_xval": 0.67578125, "num_input_tokens_seen": 151483012, "step": 2704 }, { "epoch": 6.0244988864142535, "grad_norm": 30.31116485595703, "learning_rate": 1e-06, "loss": 0.6544, "num_input_tokens_seen": 151538432, "step": 2705 }, { "epoch": 6.0244988864142535, "loss": 0.8250452280044556, "loss_ce": 0.00033821084070950747, "loss_iou": 0.341796875, "loss_num": 0.0281982421875, "loss_xval": 0.82421875, "num_input_tokens_seen": 151538432, "step": 2705 }, { "epoch": 6.026726057906459, "grad_norm": 18.400726318359375, "learning_rate": 1e-06, "loss": 0.5954, "num_input_tokens_seen": 151594204, "step": 2706 }, { "epoch": 6.026726057906459, "loss": 0.5231574177742004, "loss_ce": 0.00020822283113375306, "loss_iou": 0.21875, "loss_num": 0.016845703125, "loss_xval": 0.5234375, "num_input_tokens_seen": 151594204, "step": 2706 }, { "epoch": 6.028953229398664, "grad_norm": 15.375958442687988, "learning_rate": 1e-06, "loss": 0.7347, "num_input_tokens_seen": 151651444, "step": 2707 }, { "epoch": 6.028953229398664, "loss": 0.5390880107879639, "loss_ce": 0.00026969151804223657, "loss_iou": 0.2333984375, "loss_num": 0.0146484375, "loss_xval": 0.5390625, "num_input_tokens_seen": 151651444, "step": 2707 }, { "epoch": 6.031180400890869, "grad_norm": 20.725507736206055, "learning_rate": 1e-06, "loss": 0.546, "num_input_tokens_seen": 151708908, "step": 2708 }, { "epoch": 6.031180400890869, "loss": 0.49738240242004395, "loss_ce": 0.0009224280365742743, "loss_iou": 0.203125, "loss_num": 0.01806640625, "loss_xval": 0.49609375, "num_input_tokens_seen": 151708908, "step": 2708 }, { "epoch": 6.033407572383074, "grad_norm": 21.394176483154297, "learning_rate": 1e-06, "loss": 0.4111, "num_input_tokens_seen": 151765488, "step": 2709 }, { "epoch": 6.033407572383074, "loss": 0.4496401846408844, "loss_ce": 0.0001773049880284816, "loss_iou": 0.1826171875, "loss_num": 0.0167236328125, "loss_xval": 0.44921875, "num_input_tokens_seen": 151765488, "step": 2709 }, { "epoch": 6.035634743875279, "grad_norm": 26.451292037963867, "learning_rate": 1e-06, "loss": 0.5001, "num_input_tokens_seen": 151820864, "step": 2710 }, { "epoch": 6.035634743875279, "loss": 0.41302353143692017, "loss_ce": 0.0001817169541027397, "loss_iou": 0.177734375, "loss_num": 0.0115966796875, "loss_xval": 0.412109375, "num_input_tokens_seen": 151820864, "step": 2710 }, { "epoch": 6.0378619153674835, "grad_norm": 18.717567443847656, "learning_rate": 1e-06, "loss": 0.6072, "num_input_tokens_seen": 151879976, "step": 2711 }, { "epoch": 6.0378619153674835, "loss": 0.7275397181510925, "loss_ce": 0.00024480142747052014, "loss_iou": 0.31640625, "loss_num": 0.0191650390625, "loss_xval": 0.7265625, "num_input_tokens_seen": 151879976, "step": 2711 }, { "epoch": 6.040089086859688, "grad_norm": 15.812241554260254, "learning_rate": 1e-06, "loss": 0.6669, "num_input_tokens_seen": 151938396, "step": 2712 }, { "epoch": 6.040089086859688, "loss": 0.5586850047111511, "loss_ce": 0.00021330438903532922, "loss_iou": 0.2275390625, "loss_num": 0.020751953125, "loss_xval": 0.55859375, "num_input_tokens_seen": 151938396, "step": 2712 }, { "epoch": 6.042316258351893, "grad_norm": 28.703325271606445, "learning_rate": 1e-06, "loss": 1.0115, "num_input_tokens_seen": 151992472, "step": 2713 }, { "epoch": 6.042316258351893, "loss": 0.8724164366722107, "loss_ce": 0.0003461412852630019, "loss_iou": 0.384765625, "loss_num": 0.020263671875, "loss_xval": 0.87109375, "num_input_tokens_seen": 151992472, "step": 2713 }, { "epoch": 6.044543429844098, "grad_norm": 21.311323165893555, "learning_rate": 1e-06, "loss": 0.9992, "num_input_tokens_seen": 152046528, "step": 2714 }, { "epoch": 6.044543429844098, "loss": 0.9513915777206421, "loss_ce": 0.00021972534887026995, "loss_iou": 0.408203125, "loss_num": 0.0269775390625, "loss_xval": 0.953125, "num_input_tokens_seen": 152046528, "step": 2714 }, { "epoch": 6.046770601336303, "grad_norm": 16.139442443847656, "learning_rate": 1e-06, "loss": 0.5864, "num_input_tokens_seen": 152103856, "step": 2715 }, { "epoch": 6.046770601336303, "loss": 0.7112746238708496, "loss_ce": 0.00021506489429157227, "loss_iou": 0.306640625, "loss_num": 0.01953125, "loss_xval": 0.7109375, "num_input_tokens_seen": 152103856, "step": 2715 }, { "epoch": 6.048997772828508, "grad_norm": 18.766185760498047, "learning_rate": 1e-06, "loss": 0.6731, "num_input_tokens_seen": 152161252, "step": 2716 }, { "epoch": 6.048997772828508, "loss": 0.6426092386245728, "loss_ce": 0.0002752981963567436, "loss_iou": 0.283203125, "loss_num": 0.01483154296875, "loss_xval": 0.640625, "num_input_tokens_seen": 152161252, "step": 2716 }, { "epoch": 6.051224944320713, "grad_norm": 20.689594268798828, "learning_rate": 1e-06, "loss": 0.6403, "num_input_tokens_seen": 152219268, "step": 2717 }, { "epoch": 6.051224944320713, "loss": 0.6734442114830017, "loss_ce": 0.00022642976546194404, "loss_iou": 0.291015625, "loss_num": 0.0181884765625, "loss_xval": 0.671875, "num_input_tokens_seen": 152219268, "step": 2717 }, { "epoch": 6.0534521158129175, "grad_norm": 20.202369689941406, "learning_rate": 1e-06, "loss": 0.9098, "num_input_tokens_seen": 152275040, "step": 2718 }, { "epoch": 6.0534521158129175, "loss": 1.088090181350708, "loss_ce": 0.00019952133879996836, "loss_iou": 0.462890625, "loss_num": 0.032470703125, "loss_xval": 1.0859375, "num_input_tokens_seen": 152275040, "step": 2718 }, { "epoch": 6.055679287305122, "grad_norm": 24.603801727294922, "learning_rate": 1e-06, "loss": 0.5698, "num_input_tokens_seen": 152329728, "step": 2719 }, { "epoch": 6.055679287305122, "loss": 0.7060960531234741, "loss_ce": 0.0002855417551472783, "loss_iou": 0.3125, "loss_num": 0.0164794921875, "loss_xval": 0.70703125, "num_input_tokens_seen": 152329728, "step": 2719 }, { "epoch": 6.057906458797327, "grad_norm": 17.9193172454834, "learning_rate": 1e-06, "loss": 0.528, "num_input_tokens_seen": 152388868, "step": 2720 }, { "epoch": 6.057906458797327, "loss": 0.4527170658111572, "loss_ce": 0.00020240643061697483, "loss_iou": 0.17578125, "loss_num": 0.02001953125, "loss_xval": 0.453125, "num_input_tokens_seen": 152388868, "step": 2720 }, { "epoch": 6.060133630289532, "grad_norm": 16.661762237548828, "learning_rate": 1e-06, "loss": 0.6847, "num_input_tokens_seen": 152442380, "step": 2721 }, { "epoch": 6.060133630289532, "loss": 0.6767435073852539, "loss_ce": 0.0007180861430242658, "loss_iou": 0.263671875, "loss_num": 0.030029296875, "loss_xval": 0.67578125, "num_input_tokens_seen": 152442380, "step": 2721 }, { "epoch": 6.062360801781737, "grad_norm": 16.014999389648438, "learning_rate": 1e-06, "loss": 0.5119, "num_input_tokens_seen": 152499432, "step": 2722 }, { "epoch": 6.062360801781737, "loss": 0.574461817741394, "loss_ce": 0.000243054106249474, "loss_iou": 0.2431640625, "loss_num": 0.017822265625, "loss_xval": 0.57421875, "num_input_tokens_seen": 152499432, "step": 2722 }, { "epoch": 6.064587973273942, "grad_norm": 39.888118743896484, "learning_rate": 1e-06, "loss": 0.8712, "num_input_tokens_seen": 152551644, "step": 2723 }, { "epoch": 6.064587973273942, "loss": 0.9099514484405518, "loss_ce": 0.0002834274200722575, "loss_iou": 0.38671875, "loss_num": 0.0277099609375, "loss_xval": 0.91015625, "num_input_tokens_seen": 152551644, "step": 2723 }, { "epoch": 6.066815144766147, "grad_norm": 16.595260620117188, "learning_rate": 1e-06, "loss": 0.6684, "num_input_tokens_seen": 152608960, "step": 2724 }, { "epoch": 6.066815144766147, "loss": 0.7639758586883545, "loss_ce": 0.0001819009194150567, "loss_iou": 0.283203125, "loss_num": 0.03955078125, "loss_xval": 0.765625, "num_input_tokens_seen": 152608960, "step": 2724 }, { "epoch": 6.0690423162583516, "grad_norm": 15.64448070526123, "learning_rate": 1e-06, "loss": 0.6383, "num_input_tokens_seen": 152661612, "step": 2725 }, { "epoch": 6.0690423162583516, "loss": 0.5945534706115723, "loss_ce": 0.0003151525743305683, "loss_iou": 0.234375, "loss_num": 0.02490234375, "loss_xval": 0.59375, "num_input_tokens_seen": 152661612, "step": 2725 }, { "epoch": 6.071269487750556, "grad_norm": 14.511089324951172, "learning_rate": 1e-06, "loss": 0.7527, "num_input_tokens_seen": 152717140, "step": 2726 }, { "epoch": 6.071269487750556, "loss": 0.7612234354019165, "loss_ce": 0.00023707791115157306, "loss_iou": 0.337890625, "loss_num": 0.0166015625, "loss_xval": 0.76171875, "num_input_tokens_seen": 152717140, "step": 2726 }, { "epoch": 6.073496659242761, "grad_norm": 23.56868553161621, "learning_rate": 1e-06, "loss": 0.69, "num_input_tokens_seen": 152772512, "step": 2727 }, { "epoch": 6.073496659242761, "loss": 0.8910683393478394, "loss_ce": 0.00019922200590372086, "loss_iou": 0.373046875, "loss_num": 0.029296875, "loss_xval": 0.890625, "num_input_tokens_seen": 152772512, "step": 2727 }, { "epoch": 6.075723830734967, "grad_norm": 15.447635650634766, "learning_rate": 1e-06, "loss": 0.5218, "num_input_tokens_seen": 152828880, "step": 2728 }, { "epoch": 6.075723830734967, "loss": 0.5673261284828186, "loss_ce": 0.00018746175919659436, "loss_iou": 0.21875, "loss_num": 0.0257568359375, "loss_xval": 0.56640625, "num_input_tokens_seen": 152828880, "step": 2728 }, { "epoch": 6.077951002227172, "grad_norm": 24.025293350219727, "learning_rate": 1e-06, "loss": 0.862, "num_input_tokens_seen": 152884388, "step": 2729 }, { "epoch": 6.077951002227172, "loss": 0.7360300421714783, "loss_ce": 0.00019018063903786242, "loss_iou": 0.32421875, "loss_num": 0.01708984375, "loss_xval": 0.734375, "num_input_tokens_seen": 152884388, "step": 2729 }, { "epoch": 6.080178173719377, "grad_norm": 16.803165435791016, "learning_rate": 1e-06, "loss": 0.8949, "num_input_tokens_seen": 152942756, "step": 2730 }, { "epoch": 6.080178173719377, "loss": 0.9147862195968628, "loss_ce": 0.00023543770657852292, "loss_iou": 0.349609375, "loss_num": 0.042724609375, "loss_xval": 0.9140625, "num_input_tokens_seen": 152942756, "step": 2730 }, { "epoch": 6.082405345211582, "grad_norm": 18.61233139038086, "learning_rate": 1e-06, "loss": 0.6134, "num_input_tokens_seen": 152998352, "step": 2731 }, { "epoch": 6.082405345211582, "loss": 0.46365272998809814, "loss_ce": 0.0004569324664771557, "loss_iou": 0.2041015625, "loss_num": 0.0108642578125, "loss_xval": 0.462890625, "num_input_tokens_seen": 152998352, "step": 2731 }, { "epoch": 6.0846325167037865, "grad_norm": 22.512348175048828, "learning_rate": 1e-06, "loss": 0.8523, "num_input_tokens_seen": 153052908, "step": 2732 }, { "epoch": 6.0846325167037865, "loss": 0.7741538882255554, "loss_ce": 0.00022813121904619038, "loss_iou": 0.310546875, "loss_num": 0.03076171875, "loss_xval": 0.7734375, "num_input_tokens_seen": 153052908, "step": 2732 }, { "epoch": 6.086859688195991, "grad_norm": 36.26032257080078, "learning_rate": 1e-06, "loss": 0.8099, "num_input_tokens_seen": 153107940, "step": 2733 }, { "epoch": 6.086859688195991, "loss": 0.8505562543869019, "loss_ce": 0.00021446403115987778, "loss_iou": 0.39453125, "loss_num": 0.0123291015625, "loss_xval": 0.8515625, "num_input_tokens_seen": 153107940, "step": 2733 }, { "epoch": 6.089086859688196, "grad_norm": 23.493366241455078, "learning_rate": 1e-06, "loss": 0.7301, "num_input_tokens_seen": 153166016, "step": 2734 }, { "epoch": 6.089086859688196, "loss": 0.6998932361602783, "loss_ce": 0.00018625493976287544, "loss_iou": 0.298828125, "loss_num": 0.020263671875, "loss_xval": 0.69921875, "num_input_tokens_seen": 153166016, "step": 2734 }, { "epoch": 6.091314031180401, "grad_norm": 14.597694396972656, "learning_rate": 1e-06, "loss": 0.6806, "num_input_tokens_seen": 153222732, "step": 2735 }, { "epoch": 6.091314031180401, "loss": 0.44916197657585144, "loss_ce": 0.00018737564096227288, "loss_iou": 0.1962890625, "loss_num": 0.01123046875, "loss_xval": 0.44921875, "num_input_tokens_seen": 153222732, "step": 2735 }, { "epoch": 6.093541202672606, "grad_norm": 22.396705627441406, "learning_rate": 1e-06, "loss": 0.7905, "num_input_tokens_seen": 153279676, "step": 2736 }, { "epoch": 6.093541202672606, "loss": 0.8422399163246155, "loss_ce": 0.00019887213420588523, "loss_iou": 0.361328125, "loss_num": 0.02392578125, "loss_xval": 0.84375, "num_input_tokens_seen": 153279676, "step": 2736 }, { "epoch": 6.095768374164811, "grad_norm": 125.70928192138672, "learning_rate": 1e-06, "loss": 0.6704, "num_input_tokens_seen": 153335272, "step": 2737 }, { "epoch": 6.095768374164811, "loss": 0.6769283413887024, "loss_ce": 0.0001705446484265849, "loss_iou": 0.298828125, "loss_num": 0.01611328125, "loss_xval": 0.67578125, "num_input_tokens_seen": 153335272, "step": 2737 }, { "epoch": 6.097995545657016, "grad_norm": 16.32554817199707, "learning_rate": 1e-06, "loss": 0.6954, "num_input_tokens_seen": 153390464, "step": 2738 }, { "epoch": 6.097995545657016, "loss": 0.9421560764312744, "loss_ce": 0.0002615359262563288, "loss_iou": 0.419921875, "loss_num": 0.0206298828125, "loss_xval": 0.94140625, "num_input_tokens_seen": 153390464, "step": 2738 }, { "epoch": 6.1002227171492205, "grad_norm": 28.368751525878906, "learning_rate": 1e-06, "loss": 0.806, "num_input_tokens_seen": 153448492, "step": 2739 }, { "epoch": 6.1002227171492205, "loss": 0.846657395362854, "loss_ce": 0.00022182743123266846, "loss_iou": 0.357421875, "loss_num": 0.0264892578125, "loss_xval": 0.84765625, "num_input_tokens_seen": 153448492, "step": 2739 }, { "epoch": 6.102449888641425, "grad_norm": 16.504480361938477, "learning_rate": 1e-06, "loss": 0.651, "num_input_tokens_seen": 153502140, "step": 2740 }, { "epoch": 6.102449888641425, "loss": 0.6750026345252991, "loss_ce": 0.0001979665830731392, "loss_iou": 0.298828125, "loss_num": 0.01531982421875, "loss_xval": 0.67578125, "num_input_tokens_seen": 153502140, "step": 2740 }, { "epoch": 6.10467706013363, "grad_norm": 17.08942413330078, "learning_rate": 1e-06, "loss": 0.5972, "num_input_tokens_seen": 153558592, "step": 2741 }, { "epoch": 6.10467706013363, "loss": 0.7375233769416809, "loss_ce": 0.00021869146439712495, "loss_iou": 0.29296875, "loss_num": 0.030029296875, "loss_xval": 0.73828125, "num_input_tokens_seen": 153558592, "step": 2741 }, { "epoch": 6.106904231625835, "grad_norm": 39.28758239746094, "learning_rate": 1e-06, "loss": 0.6328, "num_input_tokens_seen": 153615344, "step": 2742 }, { "epoch": 6.106904231625835, "loss": 0.6711956858634949, "loss_ce": 0.00029724877094849944, "loss_iou": 0.287109375, "loss_num": 0.0194091796875, "loss_xval": 0.671875, "num_input_tokens_seen": 153615344, "step": 2742 }, { "epoch": 6.10913140311804, "grad_norm": 25.463825225830078, "learning_rate": 1e-06, "loss": 0.6359, "num_input_tokens_seen": 153673836, "step": 2743 }, { "epoch": 6.10913140311804, "loss": 0.6788397431373596, "loss_ce": 0.0002203606127295643, "loss_iou": 0.3046875, "loss_num": 0.01385498046875, "loss_xval": 0.6796875, "num_input_tokens_seen": 153673836, "step": 2743 }, { "epoch": 6.111358574610245, "grad_norm": 20.883134841918945, "learning_rate": 1e-06, "loss": 0.7075, "num_input_tokens_seen": 153728812, "step": 2744 }, { "epoch": 6.111358574610245, "loss": 0.9753950834274292, "loss_ce": 0.00029741463367827237, "loss_iou": 0.43359375, "loss_num": 0.021728515625, "loss_xval": 0.9765625, "num_input_tokens_seen": 153728812, "step": 2744 }, { "epoch": 6.11358574610245, "grad_norm": 24.50876808166504, "learning_rate": 1e-06, "loss": 0.8218, "num_input_tokens_seen": 153784252, "step": 2745 }, { "epoch": 6.11358574610245, "loss": 0.9287269115447998, "loss_ce": 0.0009925166377797723, "loss_iou": 0.3671875, "loss_num": 0.038818359375, "loss_xval": 0.9296875, "num_input_tokens_seen": 153784252, "step": 2745 }, { "epoch": 6.1158129175946545, "grad_norm": 22.58624839782715, "learning_rate": 1e-06, "loss": 0.6866, "num_input_tokens_seen": 153839176, "step": 2746 }, { "epoch": 6.1158129175946545, "loss": 0.7910230755805969, "loss_ce": 0.0002516076201573014, "loss_iou": 0.33203125, "loss_num": 0.0247802734375, "loss_xval": 0.7890625, "num_input_tokens_seen": 153839176, "step": 2746 }, { "epoch": 6.118040089086859, "grad_norm": 12.76305103302002, "learning_rate": 1e-06, "loss": 0.6154, "num_input_tokens_seen": 153897360, "step": 2747 }, { "epoch": 6.118040089086859, "loss": 0.6281359791755676, "loss_ce": 0.0002062909334199503, "loss_iou": 0.291015625, "loss_num": 0.0089111328125, "loss_xval": 0.62890625, "num_input_tokens_seen": 153897360, "step": 2747 }, { "epoch": 6.120267260579064, "grad_norm": 17.98402214050293, "learning_rate": 1e-06, "loss": 0.6908, "num_input_tokens_seen": 153951844, "step": 2748 }, { "epoch": 6.120267260579064, "loss": 0.8605327606201172, "loss_ce": 0.0001811749825719744, "loss_iou": 0.34765625, "loss_num": 0.033203125, "loss_xval": 0.859375, "num_input_tokens_seen": 153951844, "step": 2748 }, { "epoch": 6.122494432071269, "grad_norm": 19.222530364990234, "learning_rate": 1e-06, "loss": 0.7594, "num_input_tokens_seen": 154006004, "step": 2749 }, { "epoch": 6.122494432071269, "loss": 0.7193889617919922, "loss_ce": 0.0003948350786231458, "loss_iou": 0.3046875, "loss_num": 0.02197265625, "loss_xval": 0.71875, "num_input_tokens_seen": 154006004, "step": 2749 }, { "epoch": 6.124721603563474, "grad_norm": 16.589632034301758, "learning_rate": 1e-06, "loss": 0.7099, "num_input_tokens_seen": 154061564, "step": 2750 }, { "epoch": 6.124721603563474, "eval_seeclick_web_CIoU": 0.5717557966709137, "eval_seeclick_web_GIoU": 0.5666035413742065, "eval_seeclick_web_IoU": 0.5889425873756409, "eval_seeclick_web_MAE_all": 0.016641407273709774, "eval_seeclick_web_MAE_h": 0.009463720256462693, "eval_seeclick_web_MAE_w": 0.0170047702267766, "eval_seeclick_web_MAE_x_boxes": 0.009009606204926968, "eval_seeclick_web_MAE_y_boxes": 0.022332632914185524, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9315496683120728, "eval_seeclick_web_loss_ce": 0.0002911543124355376, "eval_seeclick_web_loss_iou": 0.423828125, "eval_seeclick_web_loss_num": 0.0130767822265625, "eval_seeclick_web_loss_xval": 0.912841796875, "eval_seeclick_web_runtime": 26.4518, "eval_seeclick_web_samples_per_second": 1.89, "eval_seeclick_web_steps_per_second": 0.076, "num_input_tokens_seen": 154061564, "step": 2750 }, { "epoch": 6.124721603563474, "eval_icons_CIoU": 0.2957966476678848, "eval_icons_GIoU": 0.3207407593727112, "eval_icons_IoU": 0.3739467114210129, "eval_icons_MAE_all": 0.06642757169902325, "eval_icons_MAE_h": 0.03895352780818939, "eval_icons_MAE_w": 0.07084468938410282, "eval_icons_MAE_x_boxes": 0.06239369884133339, "eval_icons_MAE_y_boxes": 0.03859401401132345, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.7069156169891357, "eval_icons_loss_ce": 0.00033613041159696877, "eval_icons_loss_iou": 0.6629638671875, "eval_icons_loss_num": 0.06306648254394531, "eval_icons_loss_xval": 1.64111328125, "eval_icons_runtime": 25.4698, "eval_icons_samples_per_second": 1.963, "eval_icons_steps_per_second": 0.079, "num_input_tokens_seen": 154061564, "step": 2750 }, { "epoch": 6.124721603563474, "eval_screenspot_CIoU": 0.3402452568213145, "eval_screenspot_GIoU": 0.3564106822013855, "eval_screenspot_IoU": 0.4236031075318654, "eval_screenspot_MAE_all": 0.06354892750581105, "eval_screenspot_MAE_h": 0.03823430463671684, "eval_screenspot_MAE_w": 0.07551725829641025, "eval_screenspot_MAE_x_boxes": 0.07483576859037082, "eval_screenspot_MAE_y_boxes": 0.046273874429365, "eval_screenspot_inside_bbox": 0.659583330154419, "eval_screenspot_loss": 1.6718511581420898, "eval_screenspot_loss_ce": 0.00034005365644892055, "eval_screenspot_loss_iou": 0.6853841145833334, "eval_screenspot_loss_num": 0.07482655843098958, "eval_screenspot_loss_xval": 1.7449544270833333, "eval_screenspot_runtime": 44.3938, "eval_screenspot_samples_per_second": 2.005, "eval_screenspot_steps_per_second": 0.068, "num_input_tokens_seen": 154061564, "step": 2750 }, { "epoch": 6.124721603563474, "eval_compot_CIoU": 0.3488970696926117, "eval_compot_GIoU": 0.363851934671402, "eval_compot_IoU": 0.40621405839920044, "eval_compot_MAE_all": 0.017889784649014473, "eval_compot_MAE_h": 0.008526601362973452, "eval_compot_MAE_w": 0.021996816620230675, "eval_compot_MAE_x_boxes": 0.02964367438107729, "eval_compot_MAE_y_boxes": 0.006707766558974981, "eval_compot_inside_bbox": 0.6458333432674408, "eval_compot_loss": 1.3785548210144043, "eval_compot_loss_ce": 0.0002755048044491559, "eval_compot_loss_iou": 0.6339111328125, "eval_compot_loss_num": 0.016744613647460938, "eval_compot_loss_xval": 1.35205078125, "eval_compot_runtime": 24.1215, "eval_compot_samples_per_second": 2.073, "eval_compot_steps_per_second": 0.083, "num_input_tokens_seen": 154061564, "step": 2750 }, { "epoch": 6.124721603563474, "eval_custom_ui_val_CIoU": 0.4712728477186627, "eval_custom_ui_val_GIoU": 0.48705925544102985, "eval_custom_ui_val_IoU": 0.5247346328364478, "eval_custom_ui_val_MAE_all": 0.03072897769096825, "eval_custom_ui_val_MAE_h": 0.017728664524232347, "eval_custom_ui_val_MAE_w": 0.03841559050811662, "eval_custom_ui_val_MAE_x_boxes": 0.03411081122855345, "eval_custom_ui_val_MAE_y_boxes": 0.015722092292788956, "eval_custom_ui_val_inside_bbox": 0.7353395091162788, "eval_custom_ui_val_loss": 1.1931304931640625, "eval_custom_ui_val_loss_ce": 0.0003192785694005175, "eval_custom_ui_val_loss_iou": 0.5072292751736112, "eval_custom_ui_val_loss_num": 0.02840868631998698, "eval_custom_ui_val_loss_xval": 1.1561957465277777, "eval_custom_ui_val_runtime": 72.27, "eval_custom_ui_val_samples_per_second": 3.667, "eval_custom_ui_val_steps_per_second": 0.125, "num_input_tokens_seen": 154061564, "step": 2750 }, { "epoch": 6.124721603563474, "loss": 0.908734917640686, "loss_ce": 0.0002876613289117813, "loss_iou": 0.39453125, "loss_num": 0.024169921875, "loss_xval": 0.91015625, "num_input_tokens_seen": 154061564, "step": 2750 }, { "epoch": 6.12694877505568, "grad_norm": 21.767990112304688, "learning_rate": 1e-06, "loss": 0.7365, "num_input_tokens_seen": 154117696, "step": 2751 }, { "epoch": 6.12694877505568, "loss": 0.7305013537406921, "loss_ce": 0.0002767470432445407, "loss_iou": 0.283203125, "loss_num": 0.032958984375, "loss_xval": 0.73046875, "num_input_tokens_seen": 154117696, "step": 2751 }, { "epoch": 6.129175946547885, "grad_norm": 26.993640899658203, "learning_rate": 1e-06, "loss": 0.8128, "num_input_tokens_seen": 154173524, "step": 2752 }, { "epoch": 6.129175946547885, "loss": 0.9162663221359253, "loss_ce": 0.0002507681492716074, "loss_iou": 0.3828125, "loss_num": 0.0296630859375, "loss_xval": 0.9140625, "num_input_tokens_seen": 154173524, "step": 2752 }, { "epoch": 6.131403118040089, "grad_norm": 18.245227813720703, "learning_rate": 1e-06, "loss": 0.5699, "num_input_tokens_seen": 154231616, "step": 2753 }, { "epoch": 6.131403118040089, "loss": 0.5690479278564453, "loss_ce": 0.00020031025633215904, "loss_iou": 0.251953125, "loss_num": 0.0126953125, "loss_xval": 0.5703125, "num_input_tokens_seen": 154231616, "step": 2753 }, { "epoch": 6.133630289532294, "grad_norm": 15.791337013244629, "learning_rate": 1e-06, "loss": 0.7701, "num_input_tokens_seen": 154288064, "step": 2754 }, { "epoch": 6.133630289532294, "loss": 0.6845183968544006, "loss_ce": 0.00019223052368033677, "loss_iou": 0.291015625, "loss_num": 0.0206298828125, "loss_xval": 0.68359375, "num_input_tokens_seen": 154288064, "step": 2754 }, { "epoch": 6.135857461024499, "grad_norm": 18.88831901550293, "learning_rate": 1e-06, "loss": 0.5883, "num_input_tokens_seen": 154340732, "step": 2755 }, { "epoch": 6.135857461024499, "loss": 0.5457676649093628, "loss_ce": 0.00023538943787571043, "loss_iou": 0.25390625, "loss_num": 0.00787353515625, "loss_xval": 0.546875, "num_input_tokens_seen": 154340732, "step": 2755 }, { "epoch": 6.138084632516704, "grad_norm": 15.97834300994873, "learning_rate": 1e-06, "loss": 0.5491, "num_input_tokens_seen": 154392456, "step": 2756 }, { "epoch": 6.138084632516704, "loss": 0.5473060011863708, "loss_ce": 0.00018683119560591877, "loss_iou": 0.24609375, "loss_num": 0.0107421875, "loss_xval": 0.546875, "num_input_tokens_seen": 154392456, "step": 2756 }, { "epoch": 6.140311804008909, "grad_norm": 13.842836380004883, "learning_rate": 1e-06, "loss": 0.6017, "num_input_tokens_seen": 154448276, "step": 2757 }, { "epoch": 6.140311804008909, "loss": 0.6372648477554321, "loss_ce": 0.00017988680338021368, "loss_iou": 0.271484375, "loss_num": 0.018798828125, "loss_xval": 0.63671875, "num_input_tokens_seen": 154448276, "step": 2757 }, { "epoch": 6.142538975501114, "grad_norm": 20.629444122314453, "learning_rate": 1e-06, "loss": 0.6205, "num_input_tokens_seen": 154507936, "step": 2758 }, { "epoch": 6.142538975501114, "loss": 0.643352210521698, "loss_ce": 0.00028582755476236343, "loss_iou": 0.25390625, "loss_num": 0.0274658203125, "loss_xval": 0.64453125, "num_input_tokens_seen": 154507936, "step": 2758 }, { "epoch": 6.144766146993319, "grad_norm": 26.82574462890625, "learning_rate": 1e-06, "loss": 0.7818, "num_input_tokens_seen": 154564448, "step": 2759 }, { "epoch": 6.144766146993319, "loss": 0.9775708913803101, "loss_ce": 0.0002760253846645355, "loss_iou": 0.427734375, "loss_num": 0.025146484375, "loss_xval": 0.9765625, "num_input_tokens_seen": 154564448, "step": 2759 }, { "epoch": 6.146993318485523, "grad_norm": 21.474916458129883, "learning_rate": 1e-06, "loss": 0.8228, "num_input_tokens_seen": 154621152, "step": 2760 }, { "epoch": 6.146993318485523, "loss": 0.8825445175170898, "loss_ce": 0.00022031497792340815, "loss_iou": 0.33203125, "loss_num": 0.044189453125, "loss_xval": 0.8828125, "num_input_tokens_seen": 154621152, "step": 2760 }, { "epoch": 6.149220489977728, "grad_norm": 21.773693084716797, "learning_rate": 1e-06, "loss": 0.9334, "num_input_tokens_seen": 154676312, "step": 2761 }, { "epoch": 6.149220489977728, "loss": 0.9210202693939209, "loss_ce": 0.00024393736384809017, "loss_iou": 0.384765625, "loss_num": 0.0299072265625, "loss_xval": 0.921875, "num_input_tokens_seen": 154676312, "step": 2761 }, { "epoch": 6.151447661469933, "grad_norm": 18.154090881347656, "learning_rate": 1e-06, "loss": 0.563, "num_input_tokens_seen": 154732704, "step": 2762 }, { "epoch": 6.151447661469933, "loss": 0.3975493311882019, "loss_ce": 0.00033253489527851343, "loss_iou": 0.158203125, "loss_num": 0.0159912109375, "loss_xval": 0.396484375, "num_input_tokens_seen": 154732704, "step": 2762 }, { "epoch": 6.153674832962138, "grad_norm": 20.072853088378906, "learning_rate": 1e-06, "loss": 0.5174, "num_input_tokens_seen": 154790408, "step": 2763 }, { "epoch": 6.153674832962138, "loss": 0.5634435415267944, "loss_ce": 0.00021113727416377515, "loss_iou": 0.2470703125, "loss_num": 0.01397705078125, "loss_xval": 0.5625, "num_input_tokens_seen": 154790408, "step": 2763 }, { "epoch": 6.155902004454343, "grad_norm": 16.74744987487793, "learning_rate": 1e-06, "loss": 0.729, "num_input_tokens_seen": 154847920, "step": 2764 }, { "epoch": 6.155902004454343, "loss": 0.8036819696426392, "loss_ce": 0.00021515009575523436, "loss_iou": 0.34375, "loss_num": 0.023193359375, "loss_xval": 0.8046875, "num_input_tokens_seen": 154847920, "step": 2764 }, { "epoch": 6.158129175946548, "grad_norm": 31.824892044067383, "learning_rate": 1e-06, "loss": 0.8517, "num_input_tokens_seen": 154904104, "step": 2765 }, { "epoch": 6.158129175946548, "loss": 0.874335527420044, "loss_ce": 0.00018998724408447742, "loss_iou": 0.38671875, "loss_num": 0.02001953125, "loss_xval": 0.875, "num_input_tokens_seen": 154904104, "step": 2765 }, { "epoch": 6.160356347438753, "grad_norm": 18.871536254882812, "learning_rate": 1e-06, "loss": 0.6084, "num_input_tokens_seen": 154960796, "step": 2766 }, { "epoch": 6.160356347438753, "loss": 0.6629579067230225, "loss_ce": 0.000360234291292727, "loss_iou": 0.265625, "loss_num": 0.0264892578125, "loss_xval": 0.6640625, "num_input_tokens_seen": 154960796, "step": 2766 }, { "epoch": 6.1625835189309575, "grad_norm": 18.299470901489258, "learning_rate": 1e-06, "loss": 0.5538, "num_input_tokens_seen": 155013768, "step": 2767 }, { "epoch": 6.1625835189309575, "loss": 0.6405810117721558, "loss_ce": 0.00020016740018036216, "loss_iou": 0.28515625, "loss_num": 0.01409912109375, "loss_xval": 0.640625, "num_input_tokens_seen": 155013768, "step": 2767 }, { "epoch": 6.164810690423162, "grad_norm": 13.490962982177734, "learning_rate": 1e-06, "loss": 0.8451, "num_input_tokens_seen": 155069988, "step": 2768 }, { "epoch": 6.164810690423162, "loss": 0.865939199924469, "loss_ce": 0.00021657557226717472, "loss_iou": 0.36328125, "loss_num": 0.027587890625, "loss_xval": 0.8671875, "num_input_tokens_seen": 155069988, "step": 2768 }, { "epoch": 6.167037861915367, "grad_norm": 15.8577241897583, "learning_rate": 1e-06, "loss": 0.6934, "num_input_tokens_seen": 155125976, "step": 2769 }, { "epoch": 6.167037861915367, "loss": 0.7875275611877441, "loss_ce": 0.00017407389532309026, "loss_iou": 0.3359375, "loss_num": 0.023193359375, "loss_xval": 0.7890625, "num_input_tokens_seen": 155125976, "step": 2769 }, { "epoch": 6.169265033407572, "grad_norm": 15.64920711517334, "learning_rate": 1e-06, "loss": 0.584, "num_input_tokens_seen": 155181288, "step": 2770 }, { "epoch": 6.169265033407572, "loss": 0.6932950615882874, "loss_ce": 0.0004239326517563313, "loss_iou": 0.291015625, "loss_num": 0.0224609375, "loss_xval": 0.69140625, "num_input_tokens_seen": 155181288, "step": 2770 }, { "epoch": 6.171492204899777, "grad_norm": 25.973970413208008, "learning_rate": 1e-06, "loss": 0.5113, "num_input_tokens_seen": 155238888, "step": 2771 }, { "epoch": 6.171492204899777, "loss": 0.5262500047683716, "loss_ce": 0.0002490488113835454, "loss_iou": 0.228515625, "loss_num": 0.01409912109375, "loss_xval": 0.52734375, "num_input_tokens_seen": 155238888, "step": 2771 }, { "epoch": 6.173719376391982, "grad_norm": 23.871335983276367, "learning_rate": 1e-06, "loss": 0.8139, "num_input_tokens_seen": 155294792, "step": 2772 }, { "epoch": 6.173719376391982, "loss": 0.7459384202957153, "loss_ce": 0.0004549958393909037, "loss_iou": 0.3125, "loss_num": 0.0238037109375, "loss_xval": 0.74609375, "num_input_tokens_seen": 155294792, "step": 2772 }, { "epoch": 6.1759465478841875, "grad_norm": 24.223966598510742, "learning_rate": 1e-06, "loss": 0.8061, "num_input_tokens_seen": 155350512, "step": 2773 }, { "epoch": 6.1759465478841875, "loss": 1.045411229133606, "loss_ce": 0.00024516499252058566, "loss_iou": 0.462890625, "loss_num": 0.0235595703125, "loss_xval": 1.046875, "num_input_tokens_seen": 155350512, "step": 2773 }, { "epoch": 6.178173719376392, "grad_norm": 15.486029624938965, "learning_rate": 1e-06, "loss": 0.6705, "num_input_tokens_seen": 155404252, "step": 2774 }, { "epoch": 6.178173719376392, "loss": 0.8749101758003235, "loss_ce": 0.0001542975369375199, "loss_iou": 0.333984375, "loss_num": 0.041748046875, "loss_xval": 0.875, "num_input_tokens_seen": 155404252, "step": 2774 }, { "epoch": 6.180400890868597, "grad_norm": 20.75381851196289, "learning_rate": 1e-06, "loss": 0.6957, "num_input_tokens_seen": 155459700, "step": 2775 }, { "epoch": 6.180400890868597, "loss": 0.5211961269378662, "loss_ce": 0.00020005203259643167, "loss_iou": 0.2373046875, "loss_num": 0.00921630859375, "loss_xval": 0.51953125, "num_input_tokens_seen": 155459700, "step": 2775 }, { "epoch": 6.182628062360802, "grad_norm": 19.334497451782227, "learning_rate": 1e-06, "loss": 0.6821, "num_input_tokens_seen": 155517152, "step": 2776 }, { "epoch": 6.182628062360802, "loss": 0.7436808347702026, "loss_ce": 0.0002726712264120579, "loss_iou": 0.314453125, "loss_num": 0.02294921875, "loss_xval": 0.7421875, "num_input_tokens_seen": 155517152, "step": 2776 }, { "epoch": 6.184855233853007, "grad_norm": 13.525506019592285, "learning_rate": 1e-06, "loss": 0.5351, "num_input_tokens_seen": 155574720, "step": 2777 }, { "epoch": 6.184855233853007, "loss": 0.42132747173309326, "loss_ce": 0.0001848653773777187, "loss_iou": 0.1845703125, "loss_num": 0.01055908203125, "loss_xval": 0.421875, "num_input_tokens_seen": 155574720, "step": 2777 }, { "epoch": 6.187082405345212, "grad_norm": 13.232222557067871, "learning_rate": 1e-06, "loss": 0.5728, "num_input_tokens_seen": 155631268, "step": 2778 }, { "epoch": 6.187082405345212, "loss": 0.5074321627616882, "loss_ce": 0.00023002157104201615, "loss_iou": 0.2021484375, "loss_num": 0.020751953125, "loss_xval": 0.5078125, "num_input_tokens_seen": 155631268, "step": 2778 }, { "epoch": 6.189309576837417, "grad_norm": 31.53081703186035, "learning_rate": 1e-06, "loss": 0.6913, "num_input_tokens_seen": 155688784, "step": 2779 }, { "epoch": 6.189309576837417, "loss": 0.9023219347000122, "loss_ce": 0.0002223325427621603, "loss_iou": 0.345703125, "loss_num": 0.0419921875, "loss_xval": 0.90234375, "num_input_tokens_seen": 155688784, "step": 2779 }, { "epoch": 6.1915367483296215, "grad_norm": 20.483375549316406, "learning_rate": 1e-06, "loss": 0.6798, "num_input_tokens_seen": 155744636, "step": 2780 }, { "epoch": 6.1915367483296215, "loss": 0.7736812233924866, "loss_ce": 0.00024372681218665093, "loss_iou": 0.34765625, "loss_num": 0.0159912109375, "loss_xval": 0.7734375, "num_input_tokens_seen": 155744636, "step": 2780 }, { "epoch": 6.193763919821826, "grad_norm": 13.428009986877441, "learning_rate": 1e-06, "loss": 0.5512, "num_input_tokens_seen": 155803476, "step": 2781 }, { "epoch": 6.193763919821826, "loss": 0.5040408372879028, "loss_ce": 0.0002566420880611986, "loss_iou": 0.2216796875, "loss_num": 0.01202392578125, "loss_xval": 0.50390625, "num_input_tokens_seen": 155803476, "step": 2781 }, { "epoch": 6.195991091314031, "grad_norm": 23.016326904296875, "learning_rate": 1e-06, "loss": 0.6668, "num_input_tokens_seen": 155858004, "step": 2782 }, { "epoch": 6.195991091314031, "loss": 0.6006724834442139, "loss_ce": 0.0002086429885821417, "loss_iou": 0.26953125, "loss_num": 0.01202392578125, "loss_xval": 0.6015625, "num_input_tokens_seen": 155858004, "step": 2782 }, { "epoch": 6.198218262806236, "grad_norm": 18.91625213623047, "learning_rate": 1e-06, "loss": 0.5554, "num_input_tokens_seen": 155908832, "step": 2783 }, { "epoch": 6.198218262806236, "loss": 0.4532051086425781, "loss_ce": 0.00020219493308104575, "loss_iou": 0.1884765625, "loss_num": 0.0152587890625, "loss_xval": 0.453125, "num_input_tokens_seen": 155908832, "step": 2783 }, { "epoch": 6.200445434298441, "grad_norm": 21.05170440673828, "learning_rate": 1e-06, "loss": 0.5745, "num_input_tokens_seen": 155965312, "step": 2784 }, { "epoch": 6.200445434298441, "loss": 0.6314213275909424, "loss_ce": 0.00019577116472646594, "loss_iou": 0.279296875, "loss_num": 0.01416015625, "loss_xval": 0.6328125, "num_input_tokens_seen": 155965312, "step": 2784 }, { "epoch": 6.202672605790646, "grad_norm": 20.375423431396484, "learning_rate": 1e-06, "loss": 0.6475, "num_input_tokens_seen": 156018284, "step": 2785 }, { "epoch": 6.202672605790646, "loss": 0.7216565608978271, "loss_ce": 0.0002210296515841037, "loss_iou": 0.2890625, "loss_num": 0.0284423828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 156018284, "step": 2785 }, { "epoch": 6.204899777282851, "grad_norm": 16.35026741027832, "learning_rate": 1e-06, "loss": 0.7937, "num_input_tokens_seen": 156075156, "step": 2786 }, { "epoch": 6.204899777282851, "loss": 0.8708457946777344, "loss_ce": 0.00024033612862695009, "loss_iou": 0.37890625, "loss_num": 0.0228271484375, "loss_xval": 0.87109375, "num_input_tokens_seen": 156075156, "step": 2786 }, { "epoch": 6.2071269487750556, "grad_norm": 23.919958114624023, "learning_rate": 1e-06, "loss": 0.5475, "num_input_tokens_seen": 156130308, "step": 2787 }, { "epoch": 6.2071269487750556, "loss": 0.5468391180038452, "loss_ce": 0.00020827000844292343, "loss_iou": 0.2021484375, "loss_num": 0.0284423828125, "loss_xval": 0.546875, "num_input_tokens_seen": 156130308, "step": 2787 }, { "epoch": 6.20935412026726, "grad_norm": 14.023747444152832, "learning_rate": 1e-06, "loss": 0.5289, "num_input_tokens_seen": 156185484, "step": 2788 }, { "epoch": 6.20935412026726, "loss": 0.6045721769332886, "loss_ce": 0.00020206648332532495, "loss_iou": 0.25390625, "loss_num": 0.0191650390625, "loss_xval": 0.60546875, "num_input_tokens_seen": 156185484, "step": 2788 }, { "epoch": 6.211581291759465, "grad_norm": 33.71453094482422, "learning_rate": 1e-06, "loss": 0.7773, "num_input_tokens_seen": 156240220, "step": 2789 }, { "epoch": 6.211581291759465, "loss": 0.7553761601448059, "loss_ce": 0.0002492480562068522, "loss_iou": 0.318359375, "loss_num": 0.0240478515625, "loss_xval": 0.75390625, "num_input_tokens_seen": 156240220, "step": 2789 }, { "epoch": 6.21380846325167, "grad_norm": 15.885749816894531, "learning_rate": 1e-06, "loss": 0.7896, "num_input_tokens_seen": 156294752, "step": 2790 }, { "epoch": 6.21380846325167, "loss": 0.7173956632614136, "loss_ce": 0.00035462420783005655, "loss_iou": 0.326171875, "loss_num": 0.01300048828125, "loss_xval": 0.71875, "num_input_tokens_seen": 156294752, "step": 2790 }, { "epoch": 6.216035634743875, "grad_norm": 23.221410751342773, "learning_rate": 1e-06, "loss": 0.5949, "num_input_tokens_seen": 156350172, "step": 2791 }, { "epoch": 6.216035634743875, "loss": 0.4679461717605591, "loss_ce": 0.00017272785771638155, "loss_iou": 0.1923828125, "loss_num": 0.0166015625, "loss_xval": 0.46875, "num_input_tokens_seen": 156350172, "step": 2791 }, { "epoch": 6.21826280623608, "grad_norm": 18.390562057495117, "learning_rate": 1e-06, "loss": 0.7924, "num_input_tokens_seen": 156405748, "step": 2792 }, { "epoch": 6.21826280623608, "loss": 0.5683201551437378, "loss_ce": 0.0002049248432740569, "loss_iou": 0.251953125, "loss_num": 0.012451171875, "loss_xval": 0.56640625, "num_input_tokens_seen": 156405748, "step": 2792 }, { "epoch": 6.220489977728285, "grad_norm": 21.410785675048828, "learning_rate": 1e-06, "loss": 0.6411, "num_input_tokens_seen": 156458592, "step": 2793 }, { "epoch": 6.220489977728285, "loss": 0.49654620885849, "loss_ce": 0.0002083319704979658, "loss_iou": 0.21484375, "loss_num": 0.01348876953125, "loss_xval": 0.49609375, "num_input_tokens_seen": 156458592, "step": 2793 }, { "epoch": 6.22271714922049, "grad_norm": 16.62869644165039, "learning_rate": 1e-06, "loss": 0.6773, "num_input_tokens_seen": 156515956, "step": 2794 }, { "epoch": 6.22271714922049, "loss": 0.6638964414596558, "loss_ce": 0.00020011054584756494, "loss_iou": 0.2734375, "loss_num": 0.0233154296875, "loss_xval": 0.6640625, "num_input_tokens_seen": 156515956, "step": 2794 }, { "epoch": 6.224944320712694, "grad_norm": 17.955623626708984, "learning_rate": 1e-06, "loss": 0.5419, "num_input_tokens_seen": 156571336, "step": 2795 }, { "epoch": 6.224944320712694, "loss": 0.5947801470756531, "loss_ce": 0.00017566801398061216, "loss_iou": 0.25, "loss_num": 0.01904296875, "loss_xval": 0.59375, "num_input_tokens_seen": 156571336, "step": 2795 }, { "epoch": 6.2271714922049, "grad_norm": 16.663782119750977, "learning_rate": 1e-06, "loss": 0.5893, "num_input_tokens_seen": 156627776, "step": 2796 }, { "epoch": 6.2271714922049, "loss": 0.7995222806930542, "loss_ce": 0.00020588196639437228, "loss_iou": 0.30859375, "loss_num": 0.03662109375, "loss_xval": 0.80078125, "num_input_tokens_seen": 156627776, "step": 2796 }, { "epoch": 6.229398663697105, "grad_norm": 16.558795928955078, "learning_rate": 1e-06, "loss": 0.7245, "num_input_tokens_seen": 156682772, "step": 2797 }, { "epoch": 6.229398663697105, "loss": 0.7288368344306946, "loss_ce": 0.0003212342271581292, "loss_iou": 0.306640625, "loss_num": 0.022705078125, "loss_xval": 0.7265625, "num_input_tokens_seen": 156682772, "step": 2797 }, { "epoch": 6.23162583518931, "grad_norm": 56.9061164855957, "learning_rate": 1e-06, "loss": 0.6461, "num_input_tokens_seen": 156739268, "step": 2798 }, { "epoch": 6.23162583518931, "loss": 0.5878466367721558, "loss_ce": 0.00020016153575852513, "loss_iou": 0.265625, "loss_num": 0.0108642578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 156739268, "step": 2798 }, { "epoch": 6.233853006681515, "grad_norm": 17.540464401245117, "learning_rate": 1e-06, "loss": 0.8046, "num_input_tokens_seen": 156797304, "step": 2799 }, { "epoch": 6.233853006681515, "loss": 0.8160011768341064, "loss_ce": 0.0002052470954367891, "loss_iou": 0.357421875, "loss_num": 0.0201416015625, "loss_xval": 0.81640625, "num_input_tokens_seen": 156797304, "step": 2799 }, { "epoch": 6.23608017817372, "grad_norm": 23.366252899169922, "learning_rate": 1e-06, "loss": 0.7489, "num_input_tokens_seen": 156852524, "step": 2800 }, { "epoch": 6.23608017817372, "loss": 0.7832313776016235, "loss_ce": 0.0002724075166042894, "loss_iou": 0.298828125, "loss_num": 0.037109375, "loss_xval": 0.78125, "num_input_tokens_seen": 156852524, "step": 2800 }, { "epoch": 6.2383073496659245, "grad_norm": 18.745590209960938, "learning_rate": 1e-06, "loss": 0.602, "num_input_tokens_seen": 156909388, "step": 2801 }, { "epoch": 6.2383073496659245, "loss": 0.377541720867157, "loss_ce": 0.0002223837363999337, "loss_iou": 0.16015625, "loss_num": 0.01141357421875, "loss_xval": 0.376953125, "num_input_tokens_seen": 156909388, "step": 2801 }, { "epoch": 6.240534521158129, "grad_norm": 16.821367263793945, "learning_rate": 1e-06, "loss": 0.6813, "num_input_tokens_seen": 156965248, "step": 2802 }, { "epoch": 6.240534521158129, "loss": 0.6255514621734619, "loss_ce": 0.00018525280756875873, "loss_iou": 0.2490234375, "loss_num": 0.025634765625, "loss_xval": 0.625, "num_input_tokens_seen": 156965248, "step": 2802 }, { "epoch": 6.242761692650334, "grad_norm": 17.60284996032715, "learning_rate": 1e-06, "loss": 0.6629, "num_input_tokens_seen": 157021544, "step": 2803 }, { "epoch": 6.242761692650334, "loss": 0.7033724784851074, "loss_ce": 0.00024746524286456406, "loss_iou": 0.310546875, "loss_num": 0.0166015625, "loss_xval": 0.703125, "num_input_tokens_seen": 157021544, "step": 2803 }, { "epoch": 6.244988864142539, "grad_norm": 18.265342712402344, "learning_rate": 1e-06, "loss": 0.5529, "num_input_tokens_seen": 157077008, "step": 2804 }, { "epoch": 6.244988864142539, "loss": 0.495932936668396, "loss_ce": 0.0002053846837952733, "loss_iou": 0.1982421875, "loss_num": 0.019775390625, "loss_xval": 0.49609375, "num_input_tokens_seen": 157077008, "step": 2804 }, { "epoch": 6.247216035634744, "grad_norm": 36.90097427368164, "learning_rate": 1e-06, "loss": 0.6764, "num_input_tokens_seen": 157131556, "step": 2805 }, { "epoch": 6.247216035634744, "loss": 0.6967709064483643, "loss_ce": 0.00023769214749336243, "loss_iou": 0.306640625, "loss_num": 0.0164794921875, "loss_xval": 0.6953125, "num_input_tokens_seen": 157131556, "step": 2805 }, { "epoch": 6.249443207126949, "grad_norm": 20.952041625976562, "learning_rate": 1e-06, "loss": 0.6065, "num_input_tokens_seen": 157187128, "step": 2806 }, { "epoch": 6.249443207126949, "loss": 0.496565043926239, "loss_ce": 0.00022713570797350258, "loss_iou": 0.208984375, "loss_num": 0.0157470703125, "loss_xval": 0.49609375, "num_input_tokens_seen": 157187128, "step": 2806 }, { "epoch": 6.251670378619154, "grad_norm": 27.254335403442383, "learning_rate": 1e-06, "loss": 0.7668, "num_input_tokens_seen": 157242080, "step": 2807 }, { "epoch": 6.251670378619154, "loss": 0.721343457698822, "loss_ce": 0.0002741218195296824, "loss_iou": 0.28125, "loss_num": 0.03173828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 157242080, "step": 2807 }, { "epoch": 6.2538975501113585, "grad_norm": 19.683401107788086, "learning_rate": 1e-06, "loss": 0.4727, "num_input_tokens_seen": 157299408, "step": 2808 }, { "epoch": 6.2538975501113585, "loss": 0.5055980682373047, "loss_ce": 0.00022694582003168762, "loss_iou": 0.22265625, "loss_num": 0.01202392578125, "loss_xval": 0.50390625, "num_input_tokens_seen": 157299408, "step": 2808 }, { "epoch": 6.256124721603563, "grad_norm": 16.38343620300293, "learning_rate": 1e-06, "loss": 0.6176, "num_input_tokens_seen": 157354524, "step": 2809 }, { "epoch": 6.256124721603563, "loss": 0.5820066332817078, "loss_ce": 0.0002195206907344982, "loss_iou": 0.232421875, "loss_num": 0.0234375, "loss_xval": 0.58203125, "num_input_tokens_seen": 157354524, "step": 2809 }, { "epoch": 6.258351893095768, "grad_norm": 23.633302688598633, "learning_rate": 1e-06, "loss": 0.7708, "num_input_tokens_seen": 157409136, "step": 2810 }, { "epoch": 6.258351893095768, "loss": 1.0537309646606445, "loss_ce": 0.00026410428108647466, "loss_iou": 0.46875, "loss_num": 0.0225830078125, "loss_xval": 1.0546875, "num_input_tokens_seen": 157409136, "step": 2810 }, { "epoch": 6.260579064587973, "grad_norm": 17.914230346679688, "learning_rate": 1e-06, "loss": 0.8414, "num_input_tokens_seen": 157463620, "step": 2811 }, { "epoch": 6.260579064587973, "loss": 1.0271397829055786, "loss_ce": 0.00028427952202036977, "loss_iou": 0.45703125, "loss_num": 0.022216796875, "loss_xval": 1.0234375, "num_input_tokens_seen": 157463620, "step": 2811 }, { "epoch": 6.262806236080178, "grad_norm": 20.162099838256836, "learning_rate": 1e-06, "loss": 0.6692, "num_input_tokens_seen": 157518172, "step": 2812 }, { "epoch": 6.262806236080178, "loss": 0.709923267364502, "loss_ce": 0.0004505990073084831, "loss_iou": 0.30078125, "loss_num": 0.0213623046875, "loss_xval": 0.7109375, "num_input_tokens_seen": 157518172, "step": 2812 }, { "epoch": 6.265033407572383, "grad_norm": 20.278295516967773, "learning_rate": 1e-06, "loss": 0.6405, "num_input_tokens_seen": 157573484, "step": 2813 }, { "epoch": 6.265033407572383, "loss": 0.7189103364944458, "loss_ce": 0.0001603504060767591, "loss_iou": 0.2890625, "loss_num": 0.028076171875, "loss_xval": 0.71875, "num_input_tokens_seen": 157573484, "step": 2813 }, { "epoch": 6.267260579064588, "grad_norm": 19.324562072753906, "learning_rate": 1e-06, "loss": 0.6074, "num_input_tokens_seen": 157627908, "step": 2814 }, { "epoch": 6.267260579064588, "loss": 0.7765587568283081, "loss_ce": 0.0002525679301470518, "loss_iou": 0.330078125, "loss_num": 0.0234375, "loss_xval": 0.77734375, "num_input_tokens_seen": 157627908, "step": 2814 }, { "epoch": 6.2694877505567925, "grad_norm": 15.711827278137207, "learning_rate": 1e-06, "loss": 0.676, "num_input_tokens_seen": 157685372, "step": 2815 }, { "epoch": 6.2694877505567925, "loss": 0.6160228252410889, "loss_ce": 0.0003001945442520082, "loss_iou": 0.259765625, "loss_num": 0.0189208984375, "loss_xval": 0.6171875, "num_input_tokens_seen": 157685372, "step": 2815 }, { "epoch": 6.271714922048997, "grad_norm": 24.523929595947266, "learning_rate": 1e-06, "loss": 0.7507, "num_input_tokens_seen": 157740784, "step": 2816 }, { "epoch": 6.271714922048997, "loss": 0.6751787662506104, "loss_ce": 0.0003741044201888144, "loss_iou": 0.28125, "loss_num": 0.0225830078125, "loss_xval": 0.67578125, "num_input_tokens_seen": 157740784, "step": 2816 }, { "epoch": 6.273942093541203, "grad_norm": 17.52358627319336, "learning_rate": 1e-06, "loss": 0.627, "num_input_tokens_seen": 157797720, "step": 2817 }, { "epoch": 6.273942093541203, "loss": 0.6270310878753662, "loss_ce": 0.00032214989187195897, "loss_iou": 0.263671875, "loss_num": 0.02001953125, "loss_xval": 0.625, "num_input_tokens_seen": 157797720, "step": 2817 }, { "epoch": 6.276169265033408, "grad_norm": 15.597443580627441, "learning_rate": 1e-06, "loss": 0.511, "num_input_tokens_seen": 157855748, "step": 2818 }, { "epoch": 6.276169265033408, "loss": 0.45937642455101013, "loss_ce": 0.0005141303990967572, "loss_iou": 0.1962890625, "loss_num": 0.01336669921875, "loss_xval": 0.458984375, "num_input_tokens_seen": 157855748, "step": 2818 }, { "epoch": 6.278396436525613, "grad_norm": 20.574974060058594, "learning_rate": 1e-06, "loss": 0.8684, "num_input_tokens_seen": 157906192, "step": 2819 }, { "epoch": 6.278396436525613, "loss": 0.9832384586334229, "loss_ce": 0.0003282871039118618, "loss_iou": 0.4140625, "loss_num": 0.0311279296875, "loss_xval": 0.984375, "num_input_tokens_seen": 157906192, "step": 2819 }, { "epoch": 6.280623608017818, "grad_norm": 16.417438507080078, "learning_rate": 1e-06, "loss": 0.878, "num_input_tokens_seen": 157960736, "step": 2820 }, { "epoch": 6.280623608017818, "loss": 1.0158679485321045, "loss_ce": 0.0002430274907965213, "loss_iou": 0.416015625, "loss_num": 0.036865234375, "loss_xval": 1.015625, "num_input_tokens_seen": 157960736, "step": 2820 }, { "epoch": 6.282850779510023, "grad_norm": 21.210569381713867, "learning_rate": 1e-06, "loss": 0.724, "num_input_tokens_seen": 158016856, "step": 2821 }, { "epoch": 6.282850779510023, "loss": 0.6245869398117065, "loss_ce": 0.0001973453618120402, "loss_iou": 0.267578125, "loss_num": 0.018310546875, "loss_xval": 0.625, "num_input_tokens_seen": 158016856, "step": 2821 }, { "epoch": 6.285077951002227, "grad_norm": 18.306804656982422, "learning_rate": 1e-06, "loss": 0.7915, "num_input_tokens_seen": 158071804, "step": 2822 }, { "epoch": 6.285077951002227, "loss": 0.865458071231842, "loss_ce": 0.0002236901200376451, "loss_iou": 0.361328125, "loss_num": 0.0284423828125, "loss_xval": 0.8671875, "num_input_tokens_seen": 158071804, "step": 2822 }, { "epoch": 6.287305122494432, "grad_norm": 18.746957778930664, "learning_rate": 1e-06, "loss": 0.8188, "num_input_tokens_seen": 158126148, "step": 2823 }, { "epoch": 6.287305122494432, "loss": 0.8055477142333984, "loss_ce": 0.0003718675870914012, "loss_iou": 0.359375, "loss_num": 0.016845703125, "loss_xval": 0.8046875, "num_input_tokens_seen": 158126148, "step": 2823 }, { "epoch": 6.289532293986637, "grad_norm": 20.189741134643555, "learning_rate": 1e-06, "loss": 0.7432, "num_input_tokens_seen": 158181076, "step": 2824 }, { "epoch": 6.289532293986637, "loss": 0.8805779218673706, "loss_ce": 0.00020679559384007007, "loss_iou": 0.375, "loss_num": 0.026123046875, "loss_xval": 0.87890625, "num_input_tokens_seen": 158181076, "step": 2824 }, { "epoch": 6.291759465478842, "grad_norm": 29.9582576751709, "learning_rate": 1e-06, "loss": 0.8211, "num_input_tokens_seen": 158236796, "step": 2825 }, { "epoch": 6.291759465478842, "loss": 0.7246193289756775, "loss_ce": 0.0002540839195717126, "loss_iou": 0.310546875, "loss_num": 0.0206298828125, "loss_xval": 0.72265625, "num_input_tokens_seen": 158236796, "step": 2825 }, { "epoch": 6.293986636971047, "grad_norm": 24.43955421447754, "learning_rate": 1e-06, "loss": 0.6298, "num_input_tokens_seen": 158292708, "step": 2826 }, { "epoch": 6.293986636971047, "loss": 0.560718297958374, "loss_ce": 0.00017140517593361437, "loss_iou": 0.251953125, "loss_num": 0.01123046875, "loss_xval": 0.5625, "num_input_tokens_seen": 158292708, "step": 2826 }, { "epoch": 6.296213808463252, "grad_norm": 15.927586555480957, "learning_rate": 1e-06, "loss": 0.7068, "num_input_tokens_seen": 158347268, "step": 2827 }, { "epoch": 6.296213808463252, "loss": 0.6577511429786682, "loss_ce": 0.0002804349351208657, "loss_iou": 0.291015625, "loss_num": 0.01519775390625, "loss_xval": 0.65625, "num_input_tokens_seen": 158347268, "step": 2827 }, { "epoch": 6.298440979955457, "grad_norm": 17.409822463989258, "learning_rate": 1e-06, "loss": 0.6776, "num_input_tokens_seen": 158402892, "step": 2828 }, { "epoch": 6.298440979955457, "loss": 0.6074726581573486, "loss_ce": 0.00029491446912288666, "loss_iou": 0.26171875, "loss_num": 0.0167236328125, "loss_xval": 0.60546875, "num_input_tokens_seen": 158402892, "step": 2828 }, { "epoch": 6.3006681514476615, "grad_norm": 27.617082595825195, "learning_rate": 1e-06, "loss": 0.604, "num_input_tokens_seen": 158457584, "step": 2829 }, { "epoch": 6.3006681514476615, "loss": 0.6342703104019165, "loss_ce": 0.00023712051915936172, "loss_iou": 0.267578125, "loss_num": 0.0194091796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 158457584, "step": 2829 }, { "epoch": 6.302895322939866, "grad_norm": 15.794069290161133, "learning_rate": 1e-06, "loss": 0.5568, "num_input_tokens_seen": 158514100, "step": 2830 }, { "epoch": 6.302895322939866, "loss": 0.591174840927124, "loss_ce": 0.0003545153886079788, "loss_iou": 0.255859375, "loss_num": 0.015625, "loss_xval": 0.58984375, "num_input_tokens_seen": 158514100, "step": 2830 }, { "epoch": 6.305122494432071, "grad_norm": 19.552928924560547, "learning_rate": 1e-06, "loss": 0.7125, "num_input_tokens_seen": 158571860, "step": 2831 }, { "epoch": 6.305122494432071, "loss": 0.5463184118270874, "loss_ce": 0.00017578111146576703, "loss_iou": 0.2392578125, "loss_num": 0.013671875, "loss_xval": 0.546875, "num_input_tokens_seen": 158571860, "step": 2831 }, { "epoch": 6.307349665924276, "grad_norm": 31.347753524780273, "learning_rate": 1e-06, "loss": 0.5719, "num_input_tokens_seen": 158627292, "step": 2832 }, { "epoch": 6.307349665924276, "loss": 0.46991389989852905, "loss_ce": 0.00018733731121756136, "loss_iou": 0.189453125, "loss_num": 0.0181884765625, "loss_xval": 0.46875, "num_input_tokens_seen": 158627292, "step": 2832 }, { "epoch": 6.309576837416481, "grad_norm": 19.185462951660156, "learning_rate": 1e-06, "loss": 0.6727, "num_input_tokens_seen": 158682192, "step": 2833 }, { "epoch": 6.309576837416481, "loss": 0.6861748695373535, "loss_ce": 0.0002617766731418669, "loss_iou": 0.275390625, "loss_num": 0.02685546875, "loss_xval": 0.6875, "num_input_tokens_seen": 158682192, "step": 2833 }, { "epoch": 6.311804008908686, "grad_norm": 23.759424209594727, "learning_rate": 1e-06, "loss": 0.5722, "num_input_tokens_seen": 158739748, "step": 2834 }, { "epoch": 6.311804008908686, "loss": 0.6177768707275391, "loss_ce": 0.0002231545513495803, "loss_iou": 0.263671875, "loss_num": 0.01806640625, "loss_xval": 0.6171875, "num_input_tokens_seen": 158739748, "step": 2834 }, { "epoch": 6.314031180400891, "grad_norm": 26.93903350830078, "learning_rate": 1e-06, "loss": 0.7831, "num_input_tokens_seen": 158792348, "step": 2835 }, { "epoch": 6.314031180400891, "loss": 1.0019464492797852, "loss_ce": 0.0004816856817342341, "loss_iou": 0.42578125, "loss_num": 0.02978515625, "loss_xval": 1.0, "num_input_tokens_seen": 158792348, "step": 2835 }, { "epoch": 6.3162583518930955, "grad_norm": 55.32765197753906, "learning_rate": 1e-06, "loss": 0.7881, "num_input_tokens_seen": 158850492, "step": 2836 }, { "epoch": 6.3162583518930955, "loss": 0.7746649384498596, "loss_ce": 0.00025085339439101517, "loss_iou": 0.302734375, "loss_num": 0.033935546875, "loss_xval": 0.7734375, "num_input_tokens_seen": 158850492, "step": 2836 }, { "epoch": 6.3184855233853, "grad_norm": 18.002099990844727, "learning_rate": 1e-06, "loss": 0.5655, "num_input_tokens_seen": 158907516, "step": 2837 }, { "epoch": 6.3184855233853, "loss": 0.5236201882362366, "loss_ce": 0.00018271096632815897, "loss_iou": 0.19140625, "loss_num": 0.0283203125, "loss_xval": 0.5234375, "num_input_tokens_seen": 158907516, "step": 2837 }, { "epoch": 6.320712694877505, "grad_norm": 20.92586326599121, "learning_rate": 1e-06, "loss": 0.6947, "num_input_tokens_seen": 158962112, "step": 2838 }, { "epoch": 6.320712694877505, "loss": 0.7641561031341553, "loss_ce": 0.0002400341909378767, "loss_iou": 0.328125, "loss_num": 0.0213623046875, "loss_xval": 0.765625, "num_input_tokens_seen": 158962112, "step": 2838 }, { "epoch": 6.32293986636971, "grad_norm": 24.1262264251709, "learning_rate": 1e-06, "loss": 0.8267, "num_input_tokens_seen": 159017948, "step": 2839 }, { "epoch": 6.32293986636971, "loss": 0.6984615325927734, "loss_ce": 0.0002193648397224024, "loss_iou": 0.283203125, "loss_num": 0.026611328125, "loss_xval": 0.69921875, "num_input_tokens_seen": 159017948, "step": 2839 }, { "epoch": 6.325167037861915, "grad_norm": 14.128987312316895, "learning_rate": 1e-06, "loss": 0.7434, "num_input_tokens_seen": 159072920, "step": 2840 }, { "epoch": 6.325167037861915, "loss": 0.782150387763977, "loss_ce": 0.00029003899544477463, "loss_iou": 0.326171875, "loss_num": 0.0257568359375, "loss_xval": 0.78125, "num_input_tokens_seen": 159072920, "step": 2840 }, { "epoch": 6.327394209354121, "grad_norm": 15.3905029296875, "learning_rate": 1e-06, "loss": 0.5419, "num_input_tokens_seen": 159130120, "step": 2841 }, { "epoch": 6.327394209354121, "loss": 0.3986847996711731, "loss_ce": 0.00024727691197767854, "loss_iou": 0.169921875, "loss_num": 0.01165771484375, "loss_xval": 0.3984375, "num_input_tokens_seen": 159130120, "step": 2841 }, { "epoch": 6.3296213808463255, "grad_norm": 22.618736267089844, "learning_rate": 1e-06, "loss": 0.7229, "num_input_tokens_seen": 159185880, "step": 2842 }, { "epoch": 6.3296213808463255, "loss": 0.5595074892044067, "loss_ce": 0.00018136684957426041, "loss_iou": 0.240234375, "loss_num": 0.015869140625, "loss_xval": 0.55859375, "num_input_tokens_seen": 159185880, "step": 2842 }, { "epoch": 6.33184855233853, "grad_norm": 26.56134605407715, "learning_rate": 1e-06, "loss": 0.5805, "num_input_tokens_seen": 159243236, "step": 2843 }, { "epoch": 6.33184855233853, "loss": 0.5183169841766357, "loss_ce": 0.0002505767624825239, "loss_iou": 0.2255859375, "loss_num": 0.01348876953125, "loss_xval": 0.51953125, "num_input_tokens_seen": 159243236, "step": 2843 }, { "epoch": 6.334075723830735, "grad_norm": 18.414081573486328, "learning_rate": 1e-06, "loss": 0.833, "num_input_tokens_seen": 159300220, "step": 2844 }, { "epoch": 6.334075723830735, "loss": 0.8649712800979614, "loss_ce": 0.00022521875507663935, "loss_iou": 0.37109375, "loss_num": 0.024658203125, "loss_xval": 0.86328125, "num_input_tokens_seen": 159300220, "step": 2844 }, { "epoch": 6.33630289532294, "grad_norm": 55.4049072265625, "learning_rate": 1e-06, "loss": 0.5908, "num_input_tokens_seen": 159358836, "step": 2845 }, { "epoch": 6.33630289532294, "loss": 0.5839510560035706, "loss_ce": 0.0002108067856170237, "loss_iou": 0.24609375, "loss_num": 0.0181884765625, "loss_xval": 0.58203125, "num_input_tokens_seen": 159358836, "step": 2845 }, { "epoch": 6.338530066815145, "grad_norm": 15.77271556854248, "learning_rate": 1e-06, "loss": 0.6155, "num_input_tokens_seen": 159416428, "step": 2846 }, { "epoch": 6.338530066815145, "loss": 0.5473378896713257, "loss_ce": 0.00021875005040783435, "loss_iou": 0.2451171875, "loss_num": 0.0115966796875, "loss_xval": 0.546875, "num_input_tokens_seen": 159416428, "step": 2846 }, { "epoch": 6.34075723830735, "grad_norm": 17.767343521118164, "learning_rate": 1e-06, "loss": 0.4326, "num_input_tokens_seen": 159474868, "step": 2847 }, { "epoch": 6.34075723830735, "loss": 0.420124888420105, "loss_ce": 0.00020299111201893538, "loss_iou": 0.1865234375, "loss_num": 0.00921630859375, "loss_xval": 0.419921875, "num_input_tokens_seen": 159474868, "step": 2847 }, { "epoch": 6.342984409799555, "grad_norm": 17.833757400512695, "learning_rate": 1e-06, "loss": 0.6052, "num_input_tokens_seen": 159530900, "step": 2848 }, { "epoch": 6.342984409799555, "loss": 0.6891921758651733, "loss_ce": 0.0002273364079883322, "loss_iou": 0.306640625, "loss_num": 0.01519775390625, "loss_xval": 0.6875, "num_input_tokens_seen": 159530900, "step": 2848 }, { "epoch": 6.3452115812917596, "grad_norm": 36.907676696777344, "learning_rate": 1e-06, "loss": 0.9243, "num_input_tokens_seen": 159587904, "step": 2849 }, { "epoch": 6.3452115812917596, "loss": 0.7456858158111572, "loss_ce": 0.00020244505139999092, "loss_iou": 0.31640625, "loss_num": 0.022705078125, "loss_xval": 0.74609375, "num_input_tokens_seen": 159587904, "step": 2849 }, { "epoch": 6.347438752783964, "grad_norm": 25.969263076782227, "learning_rate": 1e-06, "loss": 0.671, "num_input_tokens_seen": 159643948, "step": 2850 }, { "epoch": 6.347438752783964, "loss": 0.7656386494636536, "loss_ce": 0.000257776933722198, "loss_iou": 0.330078125, "loss_num": 0.021240234375, "loss_xval": 0.765625, "num_input_tokens_seen": 159643948, "step": 2850 }, { "epoch": 6.349665924276169, "grad_norm": 16.245384216308594, "learning_rate": 1e-06, "loss": 0.6789, "num_input_tokens_seen": 159701440, "step": 2851 }, { "epoch": 6.349665924276169, "loss": 0.6860156655311584, "loss_ce": 0.00034670589957386255, "loss_iou": 0.27734375, "loss_num": 0.026611328125, "loss_xval": 0.6875, "num_input_tokens_seen": 159701440, "step": 2851 }, { "epoch": 6.351893095768374, "grad_norm": 23.11394691467285, "learning_rate": 1e-06, "loss": 0.6914, "num_input_tokens_seen": 159759676, "step": 2852 }, { "epoch": 6.351893095768374, "loss": 0.4833386540412903, "loss_ce": 0.00018433517834637314, "loss_iou": 0.2197265625, "loss_num": 0.0089111328125, "loss_xval": 0.482421875, "num_input_tokens_seen": 159759676, "step": 2852 }, { "epoch": 6.354120267260579, "grad_norm": 46.09978103637695, "learning_rate": 1e-06, "loss": 0.8392, "num_input_tokens_seen": 159817828, "step": 2853 }, { "epoch": 6.354120267260579, "loss": 0.5963437557220459, "loss_ce": 0.00039652473060414195, "loss_iou": 0.2333984375, "loss_num": 0.02587890625, "loss_xval": 0.59765625, "num_input_tokens_seen": 159817828, "step": 2853 }, { "epoch": 6.356347438752784, "grad_norm": 25.764183044433594, "learning_rate": 1e-06, "loss": 0.8404, "num_input_tokens_seen": 159871560, "step": 2854 }, { "epoch": 6.356347438752784, "loss": 0.6220239400863647, "loss_ce": 0.00019775178225245327, "loss_iou": 0.25, "loss_num": 0.02392578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 159871560, "step": 2854 }, { "epoch": 6.358574610244989, "grad_norm": 19.76352882385254, "learning_rate": 1e-06, "loss": 0.575, "num_input_tokens_seen": 159923780, "step": 2855 }, { "epoch": 6.358574610244989, "loss": 0.5392453074455261, "loss_ce": 0.00018279827781952918, "loss_iou": 0.2265625, "loss_num": 0.01708984375, "loss_xval": 0.5390625, "num_input_tokens_seen": 159923780, "step": 2855 }, { "epoch": 6.360801781737194, "grad_norm": 19.6981201171875, "learning_rate": 1e-06, "loss": 0.7724, "num_input_tokens_seen": 159977408, "step": 2856 }, { "epoch": 6.360801781737194, "loss": 0.7918369770050049, "loss_ce": 0.00021093878604006022, "loss_iou": 0.33203125, "loss_num": 0.025390625, "loss_xval": 0.79296875, "num_input_tokens_seen": 159977408, "step": 2856 }, { "epoch": 6.363028953229398, "grad_norm": 19.517127990722656, "learning_rate": 1e-06, "loss": 0.4262, "num_input_tokens_seen": 160035044, "step": 2857 }, { "epoch": 6.363028953229398, "loss": 0.45344364643096924, "loss_ce": 0.00019658930250443518, "loss_iou": 0.2001953125, "loss_num": 0.0107421875, "loss_xval": 0.453125, "num_input_tokens_seen": 160035044, "step": 2857 }, { "epoch": 6.365256124721603, "grad_norm": 27.655757904052734, "learning_rate": 1e-06, "loss": 0.6266, "num_input_tokens_seen": 160091256, "step": 2858 }, { "epoch": 6.365256124721603, "loss": 0.45751261711120605, "loss_ce": 0.00023723256890662014, "loss_iou": 0.1787109375, "loss_num": 0.02001953125, "loss_xval": 0.45703125, "num_input_tokens_seen": 160091256, "step": 2858 }, { "epoch": 6.367483296213808, "grad_norm": 13.697670936584473, "learning_rate": 1e-06, "loss": 0.6436, "num_input_tokens_seen": 160146776, "step": 2859 }, { "epoch": 6.367483296213808, "loss": 0.7326347827911377, "loss_ce": 0.00021289548021741211, "loss_iou": 0.306640625, "loss_num": 0.02392578125, "loss_xval": 0.734375, "num_input_tokens_seen": 160146776, "step": 2859 }, { "epoch": 6.369710467706013, "grad_norm": 20.36834144592285, "learning_rate": 1e-06, "loss": 0.7276, "num_input_tokens_seen": 160202160, "step": 2860 }, { "epoch": 6.369710467706013, "loss": 0.8042212128639221, "loss_ce": 0.0002660886093508452, "loss_iou": 0.357421875, "loss_num": 0.0179443359375, "loss_xval": 0.8046875, "num_input_tokens_seen": 160202160, "step": 2860 }, { "epoch": 6.371937639198218, "grad_norm": 28.106962203979492, "learning_rate": 1e-06, "loss": 0.6299, "num_input_tokens_seen": 160255840, "step": 2861 }, { "epoch": 6.371937639198218, "loss": 0.5891226530075073, "loss_ce": 0.00025547394761815667, "loss_iou": 0.2734375, "loss_num": 0.00823974609375, "loss_xval": 0.58984375, "num_input_tokens_seen": 160255840, "step": 2861 }, { "epoch": 6.374164810690424, "grad_norm": 17.660959243774414, "learning_rate": 1e-06, "loss": 0.6536, "num_input_tokens_seen": 160312148, "step": 2862 }, { "epoch": 6.374164810690424, "loss": 0.6094207763671875, "loss_ce": 0.000167851394508034, "loss_iou": 0.263671875, "loss_num": 0.0166015625, "loss_xval": 0.609375, "num_input_tokens_seen": 160312148, "step": 2862 }, { "epoch": 6.3763919821826285, "grad_norm": 17.073429107666016, "learning_rate": 1e-06, "loss": 0.7522, "num_input_tokens_seen": 160368156, "step": 2863 }, { "epoch": 6.3763919821826285, "loss": 0.7373796105384827, "loss_ce": 0.00019699129916261882, "loss_iou": 0.33203125, "loss_num": 0.01458740234375, "loss_xval": 0.73828125, "num_input_tokens_seen": 160368156, "step": 2863 }, { "epoch": 6.378619153674833, "grad_norm": 92.50753021240234, "learning_rate": 1e-06, "loss": 0.7973, "num_input_tokens_seen": 160426052, "step": 2864 }, { "epoch": 6.378619153674833, "loss": 0.7172311544418335, "loss_ce": 0.00019013139535672963, "loss_iou": 0.318359375, "loss_num": 0.01611328125, "loss_xval": 0.71875, "num_input_tokens_seen": 160426052, "step": 2864 }, { "epoch": 6.380846325167038, "grad_norm": 18.557222366333008, "learning_rate": 1e-06, "loss": 0.6129, "num_input_tokens_seen": 160482740, "step": 2865 }, { "epoch": 6.380846325167038, "loss": 0.49754250049591064, "loss_ce": 0.00022808092762716115, "loss_iou": 0.2373046875, "loss_num": 0.004638671875, "loss_xval": 0.498046875, "num_input_tokens_seen": 160482740, "step": 2865 }, { "epoch": 6.383073496659243, "grad_norm": 19.247840881347656, "learning_rate": 1e-06, "loss": 0.8082, "num_input_tokens_seen": 160538876, "step": 2866 }, { "epoch": 6.383073496659243, "loss": 0.753126859664917, "loss_ce": 0.00019718983094207942, "loss_iou": 0.33984375, "loss_num": 0.01483154296875, "loss_xval": 0.75390625, "num_input_tokens_seen": 160538876, "step": 2866 }, { "epoch": 6.385300668151448, "grad_norm": 19.878429412841797, "learning_rate": 1e-06, "loss": 0.5645, "num_input_tokens_seen": 160592400, "step": 2867 }, { "epoch": 6.385300668151448, "loss": 0.44196611642837524, "loss_ce": 0.0001936689077410847, "loss_iou": 0.189453125, "loss_num": 0.0128173828125, "loss_xval": 0.44140625, "num_input_tokens_seen": 160592400, "step": 2867 }, { "epoch": 6.387527839643653, "grad_norm": 23.526199340820312, "learning_rate": 1e-06, "loss": 0.7953, "num_input_tokens_seen": 160648336, "step": 2868 }, { "epoch": 6.387527839643653, "loss": 0.9042606353759766, "loss_ce": 0.00032996918889693916, "loss_iou": 0.39453125, "loss_num": 0.02294921875, "loss_xval": 0.90234375, "num_input_tokens_seen": 160648336, "step": 2868 }, { "epoch": 6.389755011135858, "grad_norm": 24.269256591796875, "learning_rate": 1e-06, "loss": 0.6684, "num_input_tokens_seen": 160706796, "step": 2869 }, { "epoch": 6.389755011135858, "loss": 0.7116117477416992, "loss_ce": 0.00030804408015683293, "loss_iou": 0.283203125, "loss_num": 0.0286865234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 160706796, "step": 2869 }, { "epoch": 6.3919821826280625, "grad_norm": 31.342178344726562, "learning_rate": 1e-06, "loss": 0.647, "num_input_tokens_seen": 160762660, "step": 2870 }, { "epoch": 6.3919821826280625, "loss": 0.4261830449104309, "loss_ce": 0.00021868752082809806, "loss_iou": 0.189453125, "loss_num": 0.00946044921875, "loss_xval": 0.42578125, "num_input_tokens_seen": 160762660, "step": 2870 }, { "epoch": 6.394209354120267, "grad_norm": 16.151552200317383, "learning_rate": 1e-06, "loss": 0.5474, "num_input_tokens_seen": 160818852, "step": 2871 }, { "epoch": 6.394209354120267, "loss": 0.5044623017311096, "loss_ce": 0.0001898359478218481, "loss_iou": 0.2265625, "loss_num": 0.01007080078125, "loss_xval": 0.50390625, "num_input_tokens_seen": 160818852, "step": 2871 }, { "epoch": 6.396436525612472, "grad_norm": 15.111863136291504, "learning_rate": 1e-06, "loss": 0.6786, "num_input_tokens_seen": 160875212, "step": 2872 }, { "epoch": 6.396436525612472, "loss": 0.6591340899467468, "loss_ce": 0.00019854362471960485, "loss_iou": 0.28125, "loss_num": 0.0189208984375, "loss_xval": 0.66015625, "num_input_tokens_seen": 160875212, "step": 2872 }, { "epoch": 6.398663697104677, "grad_norm": 20.276626586914062, "learning_rate": 1e-06, "loss": 0.5893, "num_input_tokens_seen": 160933880, "step": 2873 }, { "epoch": 6.398663697104677, "loss": 0.4171835482120514, "loss_ce": 0.00019134554895572364, "loss_iou": 0.1865234375, "loss_num": 0.00872802734375, "loss_xval": 0.41796875, "num_input_tokens_seen": 160933880, "step": 2873 }, { "epoch": 6.400890868596882, "grad_norm": 33.857666015625, "learning_rate": 1e-06, "loss": 0.6731, "num_input_tokens_seen": 160988996, "step": 2874 }, { "epoch": 6.400890868596882, "loss": 0.8410516977310181, "loss_ce": 0.00023136789968702942, "loss_iou": 0.357421875, "loss_num": 0.0252685546875, "loss_xval": 0.83984375, "num_input_tokens_seen": 160988996, "step": 2874 }, { "epoch": 6.403118040089087, "grad_norm": 12.741671562194824, "learning_rate": 1e-06, "loss": 0.7232, "num_input_tokens_seen": 161043496, "step": 2875 }, { "epoch": 6.403118040089087, "loss": 0.6495035886764526, "loss_ce": 0.00021157064475119114, "loss_iou": 0.28125, "loss_num": 0.0172119140625, "loss_xval": 0.6484375, "num_input_tokens_seen": 161043496, "step": 2875 }, { "epoch": 6.405345211581292, "grad_norm": 18.550369262695312, "learning_rate": 1e-06, "loss": 0.6694, "num_input_tokens_seen": 161099512, "step": 2876 }, { "epoch": 6.405345211581292, "loss": 0.7444691061973572, "loss_ce": 0.00032850331626832485, "loss_iou": 0.296875, "loss_num": 0.030029296875, "loss_xval": 0.7421875, "num_input_tokens_seen": 161099512, "step": 2876 }, { "epoch": 6.4075723830734965, "grad_norm": 23.05417823791504, "learning_rate": 1e-06, "loss": 0.4981, "num_input_tokens_seen": 161155008, "step": 2877 }, { "epoch": 6.4075723830734965, "loss": 0.6640802621841431, "loss_ce": 0.00020084393327124417, "loss_iou": 0.3046875, "loss_num": 0.0107421875, "loss_xval": 0.6640625, "num_input_tokens_seen": 161155008, "step": 2877 }, { "epoch": 6.409799554565701, "grad_norm": 16.260465621948242, "learning_rate": 1e-06, "loss": 0.8791, "num_input_tokens_seen": 161211428, "step": 2878 }, { "epoch": 6.409799554565701, "loss": 0.9614474773406982, "loss_ce": 0.0005100243142805994, "loss_iou": 0.40234375, "loss_num": 0.031005859375, "loss_xval": 0.9609375, "num_input_tokens_seen": 161211428, "step": 2878 }, { "epoch": 6.412026726057906, "grad_norm": 25.339269638061523, "learning_rate": 1e-06, "loss": 0.6356, "num_input_tokens_seen": 161268840, "step": 2879 }, { "epoch": 6.412026726057906, "loss": 0.5295264720916748, "loss_ce": 0.00022961897775530815, "loss_iou": 0.2353515625, "loss_num": 0.01165771484375, "loss_xval": 0.53125, "num_input_tokens_seen": 161268840, "step": 2879 }, { "epoch": 6.414253897550111, "grad_norm": 17.69098663330078, "learning_rate": 1e-06, "loss": 0.7186, "num_input_tokens_seen": 161326592, "step": 2880 }, { "epoch": 6.414253897550111, "loss": 0.6872239708900452, "loss_ce": 0.00021221938368398696, "loss_iou": 0.3046875, "loss_num": 0.015380859375, "loss_xval": 0.6875, "num_input_tokens_seen": 161326592, "step": 2880 }, { "epoch": 6.416481069042316, "grad_norm": 20.64066505432129, "learning_rate": 1e-06, "loss": 0.6329, "num_input_tokens_seen": 161379560, "step": 2881 }, { "epoch": 6.416481069042316, "loss": 0.8198229074478149, "loss_ce": 0.00024278149066958576, "loss_iou": 0.36328125, "loss_num": 0.0184326171875, "loss_xval": 0.8203125, "num_input_tokens_seen": 161379560, "step": 2881 }, { "epoch": 6.418708240534521, "grad_norm": 16.438508987426758, "learning_rate": 1e-06, "loss": 0.6852, "num_input_tokens_seen": 161436220, "step": 2882 }, { "epoch": 6.418708240534521, "loss": 0.7768357992172241, "loss_ce": 0.0002244812058052048, "loss_iou": 0.298828125, "loss_num": 0.0361328125, "loss_xval": 0.77734375, "num_input_tokens_seen": 161436220, "step": 2882 }, { "epoch": 6.420935412026726, "grad_norm": 20.693113327026367, "learning_rate": 1e-06, "loss": 0.6301, "num_input_tokens_seen": 161489760, "step": 2883 }, { "epoch": 6.420935412026726, "loss": 0.6213924884796143, "loss_ce": 0.00029870617436245084, "loss_iou": 0.271484375, "loss_num": 0.01531982421875, "loss_xval": 0.62109375, "num_input_tokens_seen": 161489760, "step": 2883 }, { "epoch": 6.4231625835189305, "grad_norm": 89.39657592773438, "learning_rate": 1e-06, "loss": 0.8208, "num_input_tokens_seen": 161546596, "step": 2884 }, { "epoch": 6.4231625835189305, "loss": 0.8657987117767334, "loss_ce": 0.00032019149512052536, "loss_iou": 0.361328125, "loss_num": 0.028564453125, "loss_xval": 0.8671875, "num_input_tokens_seen": 161546596, "step": 2884 }, { "epoch": 6.425389755011135, "grad_norm": 19.063907623291016, "learning_rate": 1e-06, "loss": 0.5801, "num_input_tokens_seen": 161600504, "step": 2885 }, { "epoch": 6.425389755011135, "loss": 0.5580852627754211, "loss_ce": 0.00019339279970154166, "loss_iou": 0.2578125, "loss_num": 0.00860595703125, "loss_xval": 0.55859375, "num_input_tokens_seen": 161600504, "step": 2885 }, { "epoch": 6.427616926503341, "grad_norm": 21.565977096557617, "learning_rate": 1e-06, "loss": 0.6976, "num_input_tokens_seen": 161659560, "step": 2886 }, { "epoch": 6.427616926503341, "loss": 0.8639631271362305, "loss_ce": 0.0004377923032734543, "loss_iou": 0.400390625, "loss_num": 0.01287841796875, "loss_xval": 0.86328125, "num_input_tokens_seen": 161659560, "step": 2886 }, { "epoch": 6.429844097995546, "grad_norm": 17.81540870666504, "learning_rate": 1e-06, "loss": 0.4801, "num_input_tokens_seen": 161717364, "step": 2887 }, { "epoch": 6.429844097995546, "loss": 0.49559223651885986, "loss_ce": 0.00023093473282642663, "loss_iou": 0.2216796875, "loss_num": 0.01043701171875, "loss_xval": 0.49609375, "num_input_tokens_seen": 161717364, "step": 2887 }, { "epoch": 6.432071269487751, "grad_norm": 46.8631591796875, "learning_rate": 1e-06, "loss": 0.7548, "num_input_tokens_seen": 161772008, "step": 2888 }, { "epoch": 6.432071269487751, "loss": 0.8915398716926575, "loss_ce": 0.0004266298783477396, "loss_iou": 0.353515625, "loss_num": 0.03662109375, "loss_xval": 0.890625, "num_input_tokens_seen": 161772008, "step": 2888 }, { "epoch": 6.434298440979956, "grad_norm": 17.266054153442383, "learning_rate": 1e-06, "loss": 0.5134, "num_input_tokens_seen": 161831844, "step": 2889 }, { "epoch": 6.434298440979956, "loss": 0.5959360003471375, "loss_ce": 0.00023290744866244495, "loss_iou": 0.25, "loss_num": 0.0189208984375, "loss_xval": 0.59375, "num_input_tokens_seen": 161831844, "step": 2889 }, { "epoch": 6.436525612472161, "grad_norm": 14.699010848999023, "learning_rate": 1e-06, "loss": 0.7692, "num_input_tokens_seen": 161889280, "step": 2890 }, { "epoch": 6.436525612472161, "loss": 0.817835807800293, "loss_ce": 0.0004529977450147271, "loss_iou": 0.330078125, "loss_num": 0.031494140625, "loss_xval": 0.81640625, "num_input_tokens_seen": 161889280, "step": 2890 }, { "epoch": 6.4387527839643655, "grad_norm": 15.680800437927246, "learning_rate": 1e-06, "loss": 0.6009, "num_input_tokens_seen": 161944376, "step": 2891 }, { "epoch": 6.4387527839643655, "loss": 0.6331249475479126, "loss_ce": 0.0003124309587292373, "loss_iou": 0.27734375, "loss_num": 0.0159912109375, "loss_xval": 0.6328125, "num_input_tokens_seen": 161944376, "step": 2891 }, { "epoch": 6.44097995545657, "grad_norm": 15.222108840942383, "learning_rate": 1e-06, "loss": 0.7805, "num_input_tokens_seen": 162001860, "step": 2892 }, { "epoch": 6.44097995545657, "loss": 0.8359357714653015, "loss_ce": 0.00024238668265752494, "loss_iou": 0.345703125, "loss_num": 0.028564453125, "loss_xval": 0.8359375, "num_input_tokens_seen": 162001860, "step": 2892 }, { "epoch": 6.443207126948775, "grad_norm": 44.775211334228516, "learning_rate": 1e-06, "loss": 0.7518, "num_input_tokens_seen": 162057500, "step": 2893 }, { "epoch": 6.443207126948775, "loss": 0.6410810351371765, "loss_ce": 0.0007001558551564813, "loss_iou": 0.287109375, "loss_num": 0.01312255859375, "loss_xval": 0.640625, "num_input_tokens_seen": 162057500, "step": 2893 }, { "epoch": 6.44543429844098, "grad_norm": 16.15131378173828, "learning_rate": 1e-06, "loss": 0.7321, "num_input_tokens_seen": 162112232, "step": 2894 }, { "epoch": 6.44543429844098, "loss": 0.7305101156234741, "loss_ce": 0.0002855030761566013, "loss_iou": 0.32421875, "loss_num": 0.0164794921875, "loss_xval": 0.73046875, "num_input_tokens_seen": 162112232, "step": 2894 }, { "epoch": 6.447661469933185, "grad_norm": 62.584171295166016, "learning_rate": 1e-06, "loss": 0.6187, "num_input_tokens_seen": 162166996, "step": 2895 }, { "epoch": 6.447661469933185, "loss": 0.5436509847640991, "loss_ce": 0.00019392551621422172, "loss_iou": 0.2431640625, "loss_num": 0.011474609375, "loss_xval": 0.54296875, "num_input_tokens_seen": 162166996, "step": 2895 }, { "epoch": 6.44988864142539, "grad_norm": 25.94312858581543, "learning_rate": 1e-06, "loss": 0.8496, "num_input_tokens_seen": 162221552, "step": 2896 }, { "epoch": 6.44988864142539, "loss": 0.8812910914421082, "loss_ce": 0.00018758632359094918, "loss_iou": 0.37890625, "loss_num": 0.0247802734375, "loss_xval": 0.8828125, "num_input_tokens_seen": 162221552, "step": 2896 }, { "epoch": 6.452115812917595, "grad_norm": 17.239500045776367, "learning_rate": 1e-06, "loss": 0.6339, "num_input_tokens_seen": 162278460, "step": 2897 }, { "epoch": 6.452115812917595, "loss": 0.5580621957778931, "loss_ce": 0.00020086884615011513, "loss_iou": 0.220703125, "loss_num": 0.0233154296875, "loss_xval": 0.55859375, "num_input_tokens_seen": 162278460, "step": 2897 }, { "epoch": 6.4543429844097995, "grad_norm": 30.1453800201416, "learning_rate": 1e-06, "loss": 0.6447, "num_input_tokens_seen": 162335104, "step": 2898 }, { "epoch": 6.4543429844097995, "loss": 0.529179036617279, "loss_ce": 0.00037046882789582014, "loss_iou": 0.244140625, "loss_num": 0.00799560546875, "loss_xval": 0.52734375, "num_input_tokens_seen": 162335104, "step": 2898 }, { "epoch": 6.456570155902004, "grad_norm": 14.278246879577637, "learning_rate": 1e-06, "loss": 0.8011, "num_input_tokens_seen": 162390036, "step": 2899 }, { "epoch": 6.456570155902004, "loss": 0.6543991565704346, "loss_ce": 0.00022433955746237189, "loss_iou": 0.255859375, "loss_num": 0.0283203125, "loss_xval": 0.65234375, "num_input_tokens_seen": 162390036, "step": 2899 }, { "epoch": 6.458797327394209, "grad_norm": 27.82345962524414, "learning_rate": 1e-06, "loss": 0.5877, "num_input_tokens_seen": 162446176, "step": 2900 }, { "epoch": 6.458797327394209, "loss": 0.5277654528617859, "loss_ce": 0.0001775633281795308, "loss_iou": 0.2353515625, "loss_num": 0.01129150390625, "loss_xval": 0.52734375, "num_input_tokens_seen": 162446176, "step": 2900 }, { "epoch": 6.461024498886414, "grad_norm": 16.007659912109375, "learning_rate": 1e-06, "loss": 0.7475, "num_input_tokens_seen": 162502660, "step": 2901 }, { "epoch": 6.461024498886414, "loss": 0.9330655932426453, "loss_ce": 0.00020428383140824735, "loss_iou": 0.349609375, "loss_num": 0.046630859375, "loss_xval": 0.93359375, "num_input_tokens_seen": 162502660, "step": 2901 }, { "epoch": 6.463251670378619, "grad_norm": 21.829668045043945, "learning_rate": 1e-06, "loss": 0.6332, "num_input_tokens_seen": 162559356, "step": 2902 }, { "epoch": 6.463251670378619, "loss": 0.623560905456543, "loss_ce": 0.0002699050819501281, "loss_iou": 0.23828125, "loss_num": 0.0294189453125, "loss_xval": 0.625, "num_input_tokens_seen": 162559356, "step": 2902 }, { "epoch": 6.465478841870824, "grad_norm": 16.68356704711914, "learning_rate": 1e-06, "loss": 0.6161, "num_input_tokens_seen": 162617696, "step": 2903 }, { "epoch": 6.465478841870824, "loss": 0.5705079436302185, "loss_ce": 0.00019540925859473646, "loss_iou": 0.240234375, "loss_num": 0.01806640625, "loss_xval": 0.5703125, "num_input_tokens_seen": 162617696, "step": 2903 }, { "epoch": 6.467706013363029, "grad_norm": 19.659059524536133, "learning_rate": 1e-06, "loss": 0.6401, "num_input_tokens_seen": 162674516, "step": 2904 }, { "epoch": 6.467706013363029, "loss": 0.7844167351722717, "loss_ce": 0.00023700644669588655, "loss_iou": 0.33203125, "loss_num": 0.0238037109375, "loss_xval": 0.78515625, "num_input_tokens_seen": 162674516, "step": 2904 }, { "epoch": 6.4699331848552335, "grad_norm": 21.870929718017578, "learning_rate": 1e-06, "loss": 0.6373, "num_input_tokens_seen": 162731232, "step": 2905 }, { "epoch": 6.4699331848552335, "loss": 0.6285732984542847, "loss_ce": 0.00039952859515324235, "loss_iou": 0.28515625, "loss_num": 0.01171875, "loss_xval": 0.62890625, "num_input_tokens_seen": 162731232, "step": 2905 }, { "epoch": 6.472160356347438, "grad_norm": 22.746726989746094, "learning_rate": 1e-06, "loss": 0.5828, "num_input_tokens_seen": 162786624, "step": 2906 }, { "epoch": 6.472160356347438, "loss": 0.6897099018096924, "loss_ce": 0.0005009524757042527, "loss_iou": 0.30078125, "loss_num": 0.017822265625, "loss_xval": 0.6875, "num_input_tokens_seen": 162786624, "step": 2906 }, { "epoch": 6.474387527839644, "grad_norm": 22.810585021972656, "learning_rate": 1e-06, "loss": 0.858, "num_input_tokens_seen": 162844052, "step": 2907 }, { "epoch": 6.474387527839644, "loss": 0.7649703025817871, "loss_ce": 0.00032186286989599466, "loss_iou": 0.34375, "loss_num": 0.01513671875, "loss_xval": 0.765625, "num_input_tokens_seen": 162844052, "step": 2907 }, { "epoch": 6.476614699331849, "grad_norm": 14.657524108886719, "learning_rate": 1e-06, "loss": 0.7491, "num_input_tokens_seen": 162903604, "step": 2908 }, { "epoch": 6.476614699331849, "loss": 0.6694676280021667, "loss_ce": 0.00027815584326162934, "loss_iou": 0.255859375, "loss_num": 0.03173828125, "loss_xval": 0.66796875, "num_input_tokens_seen": 162903604, "step": 2908 }, { "epoch": 6.478841870824054, "grad_norm": 24.23907470703125, "learning_rate": 1e-06, "loss": 0.6508, "num_input_tokens_seen": 162959780, "step": 2909 }, { "epoch": 6.478841870824054, "loss": 0.7415847778320312, "loss_ce": 0.0001907538971863687, "loss_iou": 0.326171875, "loss_num": 0.01806640625, "loss_xval": 0.7421875, "num_input_tokens_seen": 162959780, "step": 2909 }, { "epoch": 6.481069042316259, "grad_norm": 20.807043075561523, "learning_rate": 1e-06, "loss": 0.5773, "num_input_tokens_seen": 163016212, "step": 2910 }, { "epoch": 6.481069042316259, "loss": 0.7409608364105225, "loss_ce": 0.00048227497609332204, "loss_iou": 0.3203125, "loss_num": 0.0196533203125, "loss_xval": 0.7421875, "num_input_tokens_seen": 163016212, "step": 2910 }, { "epoch": 6.4832962138084635, "grad_norm": 22.38340187072754, "learning_rate": 1e-06, "loss": 0.69, "num_input_tokens_seen": 163071812, "step": 2911 }, { "epoch": 6.4832962138084635, "loss": 0.6240810751914978, "loss_ce": 0.00030178556335158646, "loss_iou": 0.26953125, "loss_num": 0.017333984375, "loss_xval": 0.625, "num_input_tokens_seen": 163071812, "step": 2911 }, { "epoch": 6.485523385300668, "grad_norm": 17.558237075805664, "learning_rate": 1e-06, "loss": 0.942, "num_input_tokens_seen": 163127468, "step": 2912 }, { "epoch": 6.485523385300668, "loss": 1.0024060010910034, "loss_ce": 0.00020868651336058974, "loss_iou": 0.443359375, "loss_num": 0.02294921875, "loss_xval": 1.0, "num_input_tokens_seen": 163127468, "step": 2912 }, { "epoch": 6.487750556792873, "grad_norm": 20.10137367248535, "learning_rate": 1e-06, "loss": 0.6397, "num_input_tokens_seen": 163182160, "step": 2913 }, { "epoch": 6.487750556792873, "loss": 0.37796449661254883, "loss_ce": 0.0002789545978885144, "loss_iou": 0.173828125, "loss_num": 0.00579833984375, "loss_xval": 0.376953125, "num_input_tokens_seen": 163182160, "step": 2913 }, { "epoch": 6.489977728285078, "grad_norm": 59.727386474609375, "learning_rate": 1e-06, "loss": 0.7155, "num_input_tokens_seen": 163238980, "step": 2914 }, { "epoch": 6.489977728285078, "loss": 0.6282615661621094, "loss_ce": 0.00020979228429496288, "loss_iou": 0.271484375, "loss_num": 0.01708984375, "loss_xval": 0.62890625, "num_input_tokens_seen": 163238980, "step": 2914 }, { "epoch": 6.492204899777283, "grad_norm": 25.564571380615234, "learning_rate": 1e-06, "loss": 0.737, "num_input_tokens_seen": 163293456, "step": 2915 }, { "epoch": 6.492204899777283, "loss": 0.5602612495422363, "loss_ce": 0.00020264273916836828, "loss_iou": 0.263671875, "loss_num": 0.006744384765625, "loss_xval": 0.55859375, "num_input_tokens_seen": 163293456, "step": 2915 }, { "epoch": 6.494432071269488, "grad_norm": 19.56485939025879, "learning_rate": 1e-06, "loss": 0.6112, "num_input_tokens_seen": 163347456, "step": 2916 }, { "epoch": 6.494432071269488, "loss": 0.6698689460754395, "loss_ce": 0.00028277470846660435, "loss_iou": 0.251953125, "loss_num": 0.032958984375, "loss_xval": 0.66796875, "num_input_tokens_seen": 163347456, "step": 2916 }, { "epoch": 6.496659242761693, "grad_norm": 16.429033279418945, "learning_rate": 1e-06, "loss": 0.6937, "num_input_tokens_seen": 163405948, "step": 2917 }, { "epoch": 6.496659242761693, "loss": 0.42375442385673523, "loss_ce": 0.00017044274136424065, "loss_iou": 0.17578125, "loss_num": 0.014404296875, "loss_xval": 0.423828125, "num_input_tokens_seen": 163405948, "step": 2917 }, { "epoch": 6.498886414253898, "grad_norm": 18.85441780090332, "learning_rate": 1e-06, "loss": 0.7792, "num_input_tokens_seen": 163461656, "step": 2918 }, { "epoch": 6.498886414253898, "loss": 0.5861859321594238, "loss_ce": 0.00024842872517183423, "loss_iou": 0.26171875, "loss_num": 0.01263427734375, "loss_xval": 0.5859375, "num_input_tokens_seen": 163461656, "step": 2918 }, { "epoch": 6.501113585746102, "grad_norm": 23.43665885925293, "learning_rate": 1e-06, "loss": 0.5503, "num_input_tokens_seen": 163519632, "step": 2919 }, { "epoch": 6.501113585746102, "loss": 0.4104507565498352, "loss_ce": 0.00017243438924197108, "loss_iou": 0.1845703125, "loss_num": 0.0081787109375, "loss_xval": 0.41015625, "num_input_tokens_seen": 163519632, "step": 2919 }, { "epoch": 6.503340757238307, "grad_norm": 28.950546264648438, "learning_rate": 1e-06, "loss": 0.741, "num_input_tokens_seen": 163577312, "step": 2920 }, { "epoch": 6.503340757238307, "loss": 0.7677791714668274, "loss_ce": 0.0002010307798627764, "loss_iou": 0.330078125, "loss_num": 0.0216064453125, "loss_xval": 0.765625, "num_input_tokens_seen": 163577312, "step": 2920 }, { "epoch": 6.505567928730512, "grad_norm": 24.427560806274414, "learning_rate": 1e-06, "loss": 0.5989, "num_input_tokens_seen": 163632820, "step": 2921 }, { "epoch": 6.505567928730512, "loss": 0.7374780178070068, "loss_ce": 0.00017326742818113416, "loss_iou": 0.296875, "loss_num": 0.0281982421875, "loss_xval": 0.73828125, "num_input_tokens_seen": 163632820, "step": 2921 }, { "epoch": 6.507795100222717, "grad_norm": 18.299259185791016, "learning_rate": 1e-06, "loss": 0.676, "num_input_tokens_seen": 163689372, "step": 2922 }, { "epoch": 6.507795100222717, "loss": 0.6376951932907104, "loss_ce": 0.00024403775751125067, "loss_iou": 0.298828125, "loss_num": 0.00830078125, "loss_xval": 0.63671875, "num_input_tokens_seen": 163689372, "step": 2922 }, { "epoch": 6.510022271714922, "grad_norm": 31.762420654296875, "learning_rate": 1e-06, "loss": 0.8069, "num_input_tokens_seen": 163745852, "step": 2923 }, { "epoch": 6.510022271714922, "loss": 1.0176315307617188, "loss_ce": 0.00029755313880741596, "loss_iou": 0.4140625, "loss_num": 0.037353515625, "loss_xval": 1.015625, "num_input_tokens_seen": 163745852, "step": 2923 }, { "epoch": 6.512249443207127, "grad_norm": 25.43143653869629, "learning_rate": 1e-06, "loss": 1.0766, "num_input_tokens_seen": 163801732, "step": 2924 }, { "epoch": 6.512249443207127, "loss": 1.3304567337036133, "loss_ce": 0.00025651464238762856, "loss_iou": 0.53125, "loss_num": 0.0537109375, "loss_xval": 1.328125, "num_input_tokens_seen": 163801732, "step": 2924 }, { "epoch": 6.514476614699332, "grad_norm": 15.741415977478027, "learning_rate": 1e-06, "loss": 0.6474, "num_input_tokens_seen": 163859180, "step": 2925 }, { "epoch": 6.514476614699332, "loss": 0.6122488975524902, "loss_ce": 0.00018836073286365718, "loss_iou": 0.279296875, "loss_num": 0.0106201171875, "loss_xval": 0.61328125, "num_input_tokens_seen": 163859180, "step": 2925 }, { "epoch": 6.5167037861915365, "grad_norm": 20.761812210083008, "learning_rate": 1e-06, "loss": 0.8235, "num_input_tokens_seen": 163916244, "step": 2926 }, { "epoch": 6.5167037861915365, "loss": 0.5054827332496643, "loss_ce": 0.00023371227143798023, "loss_iou": 0.2216796875, "loss_num": 0.01226806640625, "loss_xval": 0.50390625, "num_input_tokens_seen": 163916244, "step": 2926 }, { "epoch": 6.518930957683741, "grad_norm": 61.213077545166016, "learning_rate": 1e-06, "loss": 0.9006, "num_input_tokens_seen": 163970816, "step": 2927 }, { "epoch": 6.518930957683741, "loss": 0.9136002659797668, "loss_ce": 0.0003922681207768619, "loss_iou": 0.3984375, "loss_num": 0.023681640625, "loss_xval": 0.9140625, "num_input_tokens_seen": 163970816, "step": 2927 }, { "epoch": 6.521158129175946, "grad_norm": 21.650136947631836, "learning_rate": 1e-06, "loss": 0.7797, "num_input_tokens_seen": 164028208, "step": 2928 }, { "epoch": 6.521158129175946, "loss": 0.8101509213447571, "loss_ce": 0.00021437015675473958, "loss_iou": 0.337890625, "loss_num": 0.0269775390625, "loss_xval": 0.80859375, "num_input_tokens_seen": 164028208, "step": 2928 }, { "epoch": 6.523385300668151, "grad_norm": 20.427627563476562, "learning_rate": 1e-06, "loss": 0.8737, "num_input_tokens_seen": 164083608, "step": 2929 }, { "epoch": 6.523385300668151, "loss": 0.9638548493385315, "loss_ce": 0.00023182647419162095, "loss_iou": 0.43359375, "loss_num": 0.01953125, "loss_xval": 0.96484375, "num_input_tokens_seen": 164083608, "step": 2929 }, { "epoch": 6.525612472160356, "grad_norm": 17.497631072998047, "learning_rate": 1e-06, "loss": 0.7885, "num_input_tokens_seen": 164138104, "step": 2930 }, { "epoch": 6.525612472160356, "loss": 0.9440581798553467, "loss_ce": 0.00021053646923974156, "loss_iou": 0.3515625, "loss_num": 0.04833984375, "loss_xval": 0.9453125, "num_input_tokens_seen": 164138104, "step": 2930 }, { "epoch": 6.527839643652561, "grad_norm": 26.011911392211914, "learning_rate": 1e-06, "loss": 0.7072, "num_input_tokens_seen": 164194992, "step": 2931 }, { "epoch": 6.527839643652561, "loss": 0.5633350610733032, "loss_ce": 0.00022474557044915855, "loss_iou": 0.251953125, "loss_num": 0.0118408203125, "loss_xval": 0.5625, "num_input_tokens_seen": 164194992, "step": 2931 }, { "epoch": 6.5300668151447665, "grad_norm": 14.881753921508789, "learning_rate": 1e-06, "loss": 0.6276, "num_input_tokens_seen": 164252436, "step": 2932 }, { "epoch": 6.5300668151447665, "loss": 0.7083165645599365, "loss_ce": 0.0005529068876057863, "loss_iou": 0.2890625, "loss_num": 0.02587890625, "loss_xval": 0.70703125, "num_input_tokens_seen": 164252436, "step": 2932 }, { "epoch": 6.532293986636971, "grad_norm": 22.067697525024414, "learning_rate": 1e-06, "loss": 0.6472, "num_input_tokens_seen": 164309220, "step": 2933 }, { "epoch": 6.532293986636971, "loss": 0.6984990835189819, "loss_ce": 0.0002568979107309133, "loss_iou": 0.3125, "loss_num": 0.01416015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 164309220, "step": 2933 }, { "epoch": 6.534521158129176, "grad_norm": 18.131603240966797, "learning_rate": 1e-06, "loss": 0.6463, "num_input_tokens_seen": 164366052, "step": 2934 }, { "epoch": 6.534521158129176, "loss": 0.6132932901382446, "loss_ce": 0.00025620122323744, "loss_iou": 0.228515625, "loss_num": 0.031005859375, "loss_xval": 0.61328125, "num_input_tokens_seen": 164366052, "step": 2934 }, { "epoch": 6.536748329621381, "grad_norm": 28.302825927734375, "learning_rate": 1e-06, "loss": 0.5853, "num_input_tokens_seen": 164417760, "step": 2935 }, { "epoch": 6.536748329621381, "loss": 0.7578645944595337, "loss_ce": 0.0002961784484796226, "loss_iou": 0.318359375, "loss_num": 0.024169921875, "loss_xval": 0.7578125, "num_input_tokens_seen": 164417760, "step": 2935 }, { "epoch": 6.538975501113586, "grad_norm": 18.224735260009766, "learning_rate": 1e-06, "loss": 0.6522, "num_input_tokens_seen": 164472700, "step": 2936 }, { "epoch": 6.538975501113586, "loss": 0.6816818118095398, "loss_ce": 0.000285336805973202, "loss_iou": 0.29296875, "loss_num": 0.018798828125, "loss_xval": 0.6796875, "num_input_tokens_seen": 164472700, "step": 2936 }, { "epoch": 6.541202672605791, "grad_norm": 21.562236785888672, "learning_rate": 1e-06, "loss": 0.6994, "num_input_tokens_seen": 164528444, "step": 2937 }, { "epoch": 6.541202672605791, "loss": 0.641094446182251, "loss_ce": 0.00022529340640176088, "loss_iou": 0.296875, "loss_num": 0.00982666015625, "loss_xval": 0.640625, "num_input_tokens_seen": 164528444, "step": 2937 }, { "epoch": 6.543429844097996, "grad_norm": 44.151615142822266, "learning_rate": 1e-06, "loss": 0.7169, "num_input_tokens_seen": 164583964, "step": 2938 }, { "epoch": 6.543429844097996, "loss": 0.6977666616439819, "loss_ce": 0.00025689357426017523, "loss_iou": 0.31640625, "loss_num": 0.01287841796875, "loss_xval": 0.69921875, "num_input_tokens_seen": 164583964, "step": 2938 }, { "epoch": 6.5456570155902005, "grad_norm": 27.166345596313477, "learning_rate": 1e-06, "loss": 0.6231, "num_input_tokens_seen": 164638820, "step": 2939 }, { "epoch": 6.5456570155902005, "loss": 0.6275501251220703, "loss_ce": 0.0002307354734512046, "loss_iou": 0.251953125, "loss_num": 0.0242919921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 164638820, "step": 2939 }, { "epoch": 6.547884187082405, "grad_norm": 15.208316802978516, "learning_rate": 1e-06, "loss": 0.4751, "num_input_tokens_seen": 164693132, "step": 2940 }, { "epoch": 6.547884187082405, "loss": 0.3183910846710205, "loss_ce": 0.00018428399926051497, "loss_iou": 0.1455078125, "loss_num": 0.005584716796875, "loss_xval": 0.318359375, "num_input_tokens_seen": 164693132, "step": 2940 }, { "epoch": 6.55011135857461, "grad_norm": 32.17951965332031, "learning_rate": 1e-06, "loss": 0.7, "num_input_tokens_seen": 164749436, "step": 2941 }, { "epoch": 6.55011135857461, "loss": 0.7634174823760986, "loss_ce": 0.0004780220042448491, "loss_iou": 0.33203125, "loss_num": 0.02001953125, "loss_xval": 0.76171875, "num_input_tokens_seen": 164749436, "step": 2941 }, { "epoch": 6.552338530066815, "grad_norm": 21.82436752319336, "learning_rate": 1e-06, "loss": 0.6542, "num_input_tokens_seen": 164805000, "step": 2942 }, { "epoch": 6.552338530066815, "loss": 0.6028134822845459, "loss_ce": 0.0002744359662756324, "loss_iou": 0.25390625, "loss_num": 0.018798828125, "loss_xval": 0.6015625, "num_input_tokens_seen": 164805000, "step": 2942 }, { "epoch": 6.55456570155902, "grad_norm": 20.991344451904297, "learning_rate": 1e-06, "loss": 0.7364, "num_input_tokens_seen": 164861344, "step": 2943 }, { "epoch": 6.55456570155902, "loss": 0.7187168002128601, "loss_ce": 0.00021093602117616683, "loss_iou": 0.26953125, "loss_num": 0.03564453125, "loss_xval": 0.71875, "num_input_tokens_seen": 164861344, "step": 2943 }, { "epoch": 6.556792873051225, "grad_norm": 21.933055877685547, "learning_rate": 1e-06, "loss": 0.4837, "num_input_tokens_seen": 164919076, "step": 2944 }, { "epoch": 6.556792873051225, "loss": 0.5052962899208069, "loss_ce": 0.00016931179561652243, "loss_iou": 0.2099609375, "loss_num": 0.0167236328125, "loss_xval": 0.50390625, "num_input_tokens_seen": 164919076, "step": 2944 }, { "epoch": 6.55902004454343, "grad_norm": 19.509490966796875, "learning_rate": 1e-06, "loss": 0.9163, "num_input_tokens_seen": 164972212, "step": 2945 }, { "epoch": 6.55902004454343, "loss": 0.810613751411438, "loss_ce": 0.00025000711320899427, "loss_iou": 0.33984375, "loss_num": 0.026123046875, "loss_xval": 0.80859375, "num_input_tokens_seen": 164972212, "step": 2945 }, { "epoch": 6.5612472160356345, "grad_norm": 33.0357780456543, "learning_rate": 1e-06, "loss": 0.6847, "num_input_tokens_seen": 165028004, "step": 2946 }, { "epoch": 6.5612472160356345, "loss": 0.7473153471946716, "loss_ce": 0.0002450351894367486, "loss_iou": 0.31640625, "loss_num": 0.0223388671875, "loss_xval": 0.74609375, "num_input_tokens_seen": 165028004, "step": 2946 }, { "epoch": 6.563474387527839, "grad_norm": 19.9017391204834, "learning_rate": 1e-06, "loss": 0.6142, "num_input_tokens_seen": 165083812, "step": 2947 }, { "epoch": 6.563474387527839, "loss": 0.6369014382362366, "loss_ce": 0.0001826911757234484, "loss_iou": 0.28515625, "loss_num": 0.0130615234375, "loss_xval": 0.63671875, "num_input_tokens_seen": 165083812, "step": 2947 }, { "epoch": 6.565701559020044, "grad_norm": 30.455806732177734, "learning_rate": 1e-06, "loss": 0.601, "num_input_tokens_seen": 165139816, "step": 2948 }, { "epoch": 6.565701559020044, "loss": 0.6039369106292725, "loss_ce": 0.00023821514332666993, "loss_iou": 0.2333984375, "loss_num": 0.027587890625, "loss_xval": 0.60546875, "num_input_tokens_seen": 165139816, "step": 2948 }, { "epoch": 6.567928730512249, "grad_norm": 20.041481018066406, "learning_rate": 1e-06, "loss": 0.62, "num_input_tokens_seen": 165195512, "step": 2949 }, { "epoch": 6.567928730512249, "loss": 0.6124828457832336, "loss_ce": 0.00017815582395996898, "loss_iou": 0.2490234375, "loss_num": 0.0228271484375, "loss_xval": 0.61328125, "num_input_tokens_seen": 165195512, "step": 2949 }, { "epoch": 6.570155902004454, "grad_norm": 17.582809448242188, "learning_rate": 1e-06, "loss": 0.5694, "num_input_tokens_seen": 165251664, "step": 2950 }, { "epoch": 6.570155902004454, "loss": 0.6081294417381287, "loss_ce": 0.00021927471971139312, "loss_iou": 0.263671875, "loss_num": 0.015869140625, "loss_xval": 0.609375, "num_input_tokens_seen": 165251664, "step": 2950 }, { "epoch": 6.57238307349666, "grad_norm": 18.174318313598633, "learning_rate": 1e-06, "loss": 0.4721, "num_input_tokens_seen": 165307188, "step": 2951 }, { "epoch": 6.57238307349666, "loss": 0.49299192428588867, "loss_ce": 0.00016358881839551032, "loss_iou": 0.2099609375, "loss_num": 0.01434326171875, "loss_xval": 0.4921875, "num_input_tokens_seen": 165307188, "step": 2951 }, { "epoch": 6.574610244988865, "grad_norm": 21.09537696838379, "learning_rate": 1e-06, "loss": 0.6628, "num_input_tokens_seen": 165362820, "step": 2952 }, { "epoch": 6.574610244988865, "loss": 0.7518377304077148, "loss_ce": 0.0002508063626009971, "loss_iou": 0.326171875, "loss_num": 0.02001953125, "loss_xval": 0.75, "num_input_tokens_seen": 165362820, "step": 2952 }, { "epoch": 6.5768374164810695, "grad_norm": 30.387908935546875, "learning_rate": 1e-06, "loss": 0.5611, "num_input_tokens_seen": 165419408, "step": 2953 }, { "epoch": 6.5768374164810695, "loss": 0.5768758058547974, "loss_ce": 0.00021561238099820912, "loss_iou": 0.2578125, "loss_num": 0.01251220703125, "loss_xval": 0.578125, "num_input_tokens_seen": 165419408, "step": 2953 }, { "epoch": 6.579064587973274, "grad_norm": 20.413816452026367, "learning_rate": 1e-06, "loss": 0.7898, "num_input_tokens_seen": 165472128, "step": 2954 }, { "epoch": 6.579064587973274, "loss": 0.7371137142181396, "loss_ce": 0.0001751929521560669, "loss_iou": 0.287109375, "loss_num": 0.03271484375, "loss_xval": 0.73828125, "num_input_tokens_seen": 165472128, "step": 2954 }, { "epoch": 6.581291759465479, "grad_norm": 20.204029083251953, "learning_rate": 1e-06, "loss": 0.5606, "num_input_tokens_seen": 165529144, "step": 2955 }, { "epoch": 6.581291759465479, "loss": 0.6010243892669678, "loss_ce": 0.00019429507665336132, "loss_iou": 0.28125, "loss_num": 0.00787353515625, "loss_xval": 0.6015625, "num_input_tokens_seen": 165529144, "step": 2955 }, { "epoch": 6.583518930957684, "grad_norm": 17.47038459777832, "learning_rate": 1e-06, "loss": 0.6566, "num_input_tokens_seen": 165588264, "step": 2956 }, { "epoch": 6.583518930957684, "loss": 0.7859352827072144, "loss_ce": 0.0002907089365180582, "loss_iou": 0.333984375, "loss_num": 0.023681640625, "loss_xval": 0.78515625, "num_input_tokens_seen": 165588264, "step": 2956 }, { "epoch": 6.585746102449889, "grad_norm": 16.517253875732422, "learning_rate": 1e-06, "loss": 0.547, "num_input_tokens_seen": 165644784, "step": 2957 }, { "epoch": 6.585746102449889, "loss": 0.6574386358261108, "loss_ce": 0.00021205886150710285, "loss_iou": 0.287109375, "loss_num": 0.016357421875, "loss_xval": 0.65625, "num_input_tokens_seen": 165644784, "step": 2957 }, { "epoch": 6.587973273942094, "grad_norm": 18.46898078918457, "learning_rate": 1e-06, "loss": 0.6594, "num_input_tokens_seen": 165701372, "step": 2958 }, { "epoch": 6.587973273942094, "loss": 0.38792580366134644, "loss_ce": 0.0003525797219481319, "loss_iou": 0.162109375, "loss_num": 0.01251220703125, "loss_xval": 0.38671875, "num_input_tokens_seen": 165701372, "step": 2958 }, { "epoch": 6.590200445434299, "grad_norm": 28.243539810180664, "learning_rate": 1e-06, "loss": 0.7292, "num_input_tokens_seen": 165756960, "step": 2959 }, { "epoch": 6.590200445434299, "loss": 0.9738781452178955, "loss_ce": 0.0002453392662573606, "loss_iou": 0.42578125, "loss_num": 0.0244140625, "loss_xval": 0.97265625, "num_input_tokens_seen": 165756960, "step": 2959 }, { "epoch": 6.5924276169265035, "grad_norm": 15.872271537780762, "learning_rate": 1e-06, "loss": 0.5439, "num_input_tokens_seen": 165813860, "step": 2960 }, { "epoch": 6.5924276169265035, "loss": 0.624319314956665, "loss_ce": 0.00029592958162538707, "loss_iou": 0.26171875, "loss_num": 0.0201416015625, "loss_xval": 0.625, "num_input_tokens_seen": 165813860, "step": 2960 }, { "epoch": 6.594654788418708, "grad_norm": 109.15684509277344, "learning_rate": 1e-06, "loss": 0.7413, "num_input_tokens_seen": 165871416, "step": 2961 }, { "epoch": 6.594654788418708, "loss": 0.9980896711349487, "loss_ce": 0.00016483690706081688, "loss_iou": 0.44140625, "loss_num": 0.023193359375, "loss_xval": 0.99609375, "num_input_tokens_seen": 165871416, "step": 2961 }, { "epoch": 6.596881959910913, "grad_norm": 16.21949577331543, "learning_rate": 1e-06, "loss": 0.9368, "num_input_tokens_seen": 165925904, "step": 2962 }, { "epoch": 6.596881959910913, "loss": 0.8550323247909546, "loss_ce": 0.00017394759925082326, "loss_iou": 0.33984375, "loss_num": 0.034912109375, "loss_xval": 0.85546875, "num_input_tokens_seen": 165925904, "step": 2962 }, { "epoch": 6.599109131403118, "grad_norm": 31.012943267822266, "learning_rate": 1e-06, "loss": 0.6417, "num_input_tokens_seen": 165983760, "step": 2963 }, { "epoch": 6.599109131403118, "loss": 0.6024689674377441, "loss_ce": 0.000662269361782819, "loss_iou": 0.25390625, "loss_num": 0.018798828125, "loss_xval": 0.6015625, "num_input_tokens_seen": 165983760, "step": 2963 }, { "epoch": 6.601336302895323, "grad_norm": 20.181808471679688, "learning_rate": 1e-06, "loss": 0.6337, "num_input_tokens_seen": 166040296, "step": 2964 }, { "epoch": 6.601336302895323, "loss": 0.724341094493866, "loss_ce": 0.0003420562425162643, "loss_iou": 0.298828125, "loss_num": 0.0252685546875, "loss_xval": 0.72265625, "num_input_tokens_seen": 166040296, "step": 2964 }, { "epoch": 6.603563474387528, "grad_norm": 28.568126678466797, "learning_rate": 1e-06, "loss": 0.7788, "num_input_tokens_seen": 166092104, "step": 2965 }, { "epoch": 6.603563474387528, "loss": 0.7236001491546631, "loss_ce": 0.00021149440726730973, "loss_iou": 0.275390625, "loss_num": 0.034912109375, "loss_xval": 0.72265625, "num_input_tokens_seen": 166092104, "step": 2965 }, { "epoch": 6.605790645879733, "grad_norm": 14.282336235046387, "learning_rate": 1e-06, "loss": 0.7572, "num_input_tokens_seen": 166147648, "step": 2966 }, { "epoch": 6.605790645879733, "loss": 0.9260650873184204, "loss_ce": 0.0002839115622919053, "loss_iou": 0.365234375, "loss_num": 0.0390625, "loss_xval": 0.92578125, "num_input_tokens_seen": 166147648, "step": 2966 }, { "epoch": 6.6080178173719375, "grad_norm": 18.43797492980957, "learning_rate": 1e-06, "loss": 0.8469, "num_input_tokens_seen": 166205440, "step": 2967 }, { "epoch": 6.6080178173719375, "loss": 1.090362548828125, "loss_ce": 0.0002746962709352374, "loss_iou": 0.4453125, "loss_num": 0.03955078125, "loss_xval": 1.09375, "num_input_tokens_seen": 166205440, "step": 2967 }, { "epoch": 6.610244988864142, "grad_norm": 20.853721618652344, "learning_rate": 1e-06, "loss": 0.7179, "num_input_tokens_seen": 166262912, "step": 2968 }, { "epoch": 6.610244988864142, "loss": 0.7870877981185913, "loss_ce": 0.00022257049567997456, "loss_iou": 0.34375, "loss_num": 0.020263671875, "loss_xval": 0.78515625, "num_input_tokens_seen": 166262912, "step": 2968 }, { "epoch": 6.612472160356347, "grad_norm": 119.40055847167969, "learning_rate": 1e-06, "loss": 0.6213, "num_input_tokens_seen": 166320972, "step": 2969 }, { "epoch": 6.612472160356347, "loss": 0.6227630376815796, "loss_ce": 0.00020442584354896098, "loss_iou": 0.275390625, "loss_num": 0.01434326171875, "loss_xval": 0.62109375, "num_input_tokens_seen": 166320972, "step": 2969 }, { "epoch": 6.614699331848552, "grad_norm": 21.16520881652832, "learning_rate": 1e-06, "loss": 0.7879, "num_input_tokens_seen": 166377852, "step": 2970 }, { "epoch": 6.614699331848552, "loss": 0.6298392415046692, "loss_ce": 0.0003226206754334271, "loss_iou": 0.27734375, "loss_num": 0.01470947265625, "loss_xval": 0.62890625, "num_input_tokens_seen": 166377852, "step": 2970 }, { "epoch": 6.616926503340757, "grad_norm": 24.545520782470703, "learning_rate": 1e-06, "loss": 0.761, "num_input_tokens_seen": 166434064, "step": 2971 }, { "epoch": 6.616926503340757, "loss": 0.798297643661499, "loss_ce": 0.00020199231221340597, "loss_iou": 0.345703125, "loss_num": 0.021240234375, "loss_xval": 0.796875, "num_input_tokens_seen": 166434064, "step": 2971 }, { "epoch": 6.619153674832962, "grad_norm": 14.242857933044434, "learning_rate": 1e-06, "loss": 0.7568, "num_input_tokens_seen": 166486632, "step": 2972 }, { "epoch": 6.619153674832962, "loss": 0.8938003778457642, "loss_ce": 0.00024563493207097054, "loss_iou": 0.35546875, "loss_num": 0.036376953125, "loss_xval": 0.89453125, "num_input_tokens_seen": 166486632, "step": 2972 }, { "epoch": 6.621380846325167, "grad_norm": 13.78774642944336, "learning_rate": 1e-06, "loss": 0.5507, "num_input_tokens_seen": 166543024, "step": 2973 }, { "epoch": 6.621380846325167, "loss": 0.5280297994613647, "loss_ce": 0.00019778512069024146, "loss_iou": 0.2314453125, "loss_num": 0.01312255859375, "loss_xval": 0.52734375, "num_input_tokens_seen": 166543024, "step": 2973 }, { "epoch": 6.6236080178173715, "grad_norm": 16.257577896118164, "learning_rate": 1e-06, "loss": 0.5946, "num_input_tokens_seen": 166601376, "step": 2974 }, { "epoch": 6.6236080178173715, "loss": 0.705248236656189, "loss_ce": 0.00017008130089379847, "loss_iou": 0.30859375, "loss_num": 0.0179443359375, "loss_xval": 0.703125, "num_input_tokens_seen": 166601376, "step": 2974 }, { "epoch": 6.625835189309576, "grad_norm": 19.095983505249023, "learning_rate": 1e-06, "loss": 0.7031, "num_input_tokens_seen": 166654948, "step": 2975 }, { "epoch": 6.625835189309576, "loss": 0.48734956979751587, "loss_ce": 0.00016694515943527222, "loss_iou": 0.201171875, "loss_num": 0.0169677734375, "loss_xval": 0.486328125, "num_input_tokens_seen": 166654948, "step": 2975 }, { "epoch": 6.628062360801781, "grad_norm": 21.924911499023438, "learning_rate": 1e-06, "loss": 0.6498, "num_input_tokens_seen": 166711656, "step": 2976 }, { "epoch": 6.628062360801781, "loss": 0.5872310996055603, "loss_ce": 0.0001949925208464265, "loss_iou": 0.25, "loss_num": 0.0174560546875, "loss_xval": 0.5859375, "num_input_tokens_seen": 166711656, "step": 2976 }, { "epoch": 6.630289532293987, "grad_norm": 20.422719955444336, "learning_rate": 1e-06, "loss": 0.736, "num_input_tokens_seen": 166770256, "step": 2977 }, { "epoch": 6.630289532293987, "loss": 0.9298211932182312, "loss_ce": 0.0002557823609095067, "loss_iou": 0.3515625, "loss_num": 0.045654296875, "loss_xval": 0.9296875, "num_input_tokens_seen": 166770256, "step": 2977 }, { "epoch": 6.632516703786192, "grad_norm": 18.542539596557617, "learning_rate": 1e-06, "loss": 0.6461, "num_input_tokens_seen": 166826808, "step": 2978 }, { "epoch": 6.632516703786192, "loss": 0.5421923398971558, "loss_ce": 0.00020013400353491306, "loss_iou": 0.22265625, "loss_num": 0.0191650390625, "loss_xval": 0.54296875, "num_input_tokens_seen": 166826808, "step": 2978 }, { "epoch": 6.634743875278397, "grad_norm": 21.711483001708984, "learning_rate": 1e-06, "loss": 0.833, "num_input_tokens_seen": 166881468, "step": 2979 }, { "epoch": 6.634743875278397, "loss": 0.8562052249908447, "loss_ce": 0.000248211930738762, "loss_iou": 0.33984375, "loss_num": 0.034912109375, "loss_xval": 0.85546875, "num_input_tokens_seen": 166881468, "step": 2979 }, { "epoch": 6.636971046770602, "grad_norm": 19.191953659057617, "learning_rate": 1e-06, "loss": 0.5379, "num_input_tokens_seen": 166938348, "step": 2980 }, { "epoch": 6.636971046770602, "loss": 0.47580990195274353, "loss_ce": 0.0002239710884168744, "loss_iou": 0.216796875, "loss_num": 0.00836181640625, "loss_xval": 0.4765625, "num_input_tokens_seen": 166938348, "step": 2980 }, { "epoch": 6.639198218262806, "grad_norm": 19.030431747436523, "learning_rate": 1e-06, "loss": 0.5368, "num_input_tokens_seen": 166994888, "step": 2981 }, { "epoch": 6.639198218262806, "loss": 0.5439237952232361, "loss_ce": 0.00022264779545366764, "loss_iou": 0.220703125, "loss_num": 0.0205078125, "loss_xval": 0.54296875, "num_input_tokens_seen": 166994888, "step": 2981 }, { "epoch": 6.641425389755011, "grad_norm": 17.75498390197754, "learning_rate": 1e-06, "loss": 0.5462, "num_input_tokens_seen": 167050384, "step": 2982 }, { "epoch": 6.641425389755011, "loss": 0.4909123182296753, "loss_ce": 0.00018965097842738032, "loss_iou": 0.2158203125, "loss_num": 0.0118408203125, "loss_xval": 0.490234375, "num_input_tokens_seen": 167050384, "step": 2982 }, { "epoch": 6.643652561247216, "grad_norm": 15.332694053649902, "learning_rate": 1e-06, "loss": 0.4491, "num_input_tokens_seen": 167108932, "step": 2983 }, { "epoch": 6.643652561247216, "loss": 0.5531620979309082, "loss_ce": 0.00018356410146225244, "loss_iou": 0.2490234375, "loss_num": 0.01116943359375, "loss_xval": 0.5546875, "num_input_tokens_seen": 167108932, "step": 2983 }, { "epoch": 6.645879732739421, "grad_norm": 26.91922378540039, "learning_rate": 1e-06, "loss": 0.6703, "num_input_tokens_seen": 167167308, "step": 2984 }, { "epoch": 6.645879732739421, "loss": 0.5900508165359497, "loss_ce": 0.0002071046328637749, "loss_iou": 0.2490234375, "loss_num": 0.0181884765625, "loss_xval": 0.58984375, "num_input_tokens_seen": 167167308, "step": 2984 }, { "epoch": 6.648106904231626, "grad_norm": 15.314505577087402, "learning_rate": 1e-06, "loss": 0.6925, "num_input_tokens_seen": 167223976, "step": 2985 }, { "epoch": 6.648106904231626, "loss": 0.6426599621772766, "loss_ce": 0.00032597355311736465, "loss_iou": 0.28515625, "loss_num": 0.0147705078125, "loss_xval": 0.640625, "num_input_tokens_seen": 167223976, "step": 2985 }, { "epoch": 6.650334075723831, "grad_norm": 20.561725616455078, "learning_rate": 1e-06, "loss": 0.7403, "num_input_tokens_seen": 167279468, "step": 2986 }, { "epoch": 6.650334075723831, "loss": 0.6292366981506348, "loss_ce": 0.00020838108321186155, "loss_iou": 0.26953125, "loss_num": 0.0177001953125, "loss_xval": 0.62890625, "num_input_tokens_seen": 167279468, "step": 2986 }, { "epoch": 6.652561247216036, "grad_norm": 22.052919387817383, "learning_rate": 1e-06, "loss": 0.6358, "num_input_tokens_seen": 167338024, "step": 2987 }, { "epoch": 6.652561247216036, "loss": 0.6393663287162781, "loss_ce": 0.00032825471134856343, "loss_iou": 0.27734375, "loss_num": 0.0172119140625, "loss_xval": 0.640625, "num_input_tokens_seen": 167338024, "step": 2987 }, { "epoch": 6.6547884187082404, "grad_norm": 25.92134666442871, "learning_rate": 1e-06, "loss": 0.9487, "num_input_tokens_seen": 167394980, "step": 2988 }, { "epoch": 6.6547884187082404, "loss": 0.8820292949676514, "loss_ce": 0.00019336529658176005, "loss_iou": 0.376953125, "loss_num": 0.0255126953125, "loss_xval": 0.8828125, "num_input_tokens_seen": 167394980, "step": 2988 }, { "epoch": 6.657015590200445, "grad_norm": 18.905071258544922, "learning_rate": 1e-06, "loss": 0.5956, "num_input_tokens_seen": 167451864, "step": 2989 }, { "epoch": 6.657015590200445, "loss": 0.5608692169189453, "loss_ce": 0.00032231814111582935, "loss_iou": 0.251953125, "loss_num": 0.01153564453125, "loss_xval": 0.5625, "num_input_tokens_seen": 167451864, "step": 2989 }, { "epoch": 6.65924276169265, "grad_norm": 18.516260147094727, "learning_rate": 1e-06, "loss": 0.6818, "num_input_tokens_seen": 167507760, "step": 2990 }, { "epoch": 6.65924276169265, "loss": 0.5940439701080322, "loss_ce": 0.0002939357655122876, "loss_iou": 0.259765625, "loss_num": 0.01519775390625, "loss_xval": 0.59375, "num_input_tokens_seen": 167507760, "step": 2990 }, { "epoch": 6.661469933184855, "grad_norm": 16.960506439208984, "learning_rate": 1e-06, "loss": 0.6908, "num_input_tokens_seen": 167560344, "step": 2991 }, { "epoch": 6.661469933184855, "loss": 0.620684802532196, "loss_ce": 0.00020142148423474282, "loss_iou": 0.263671875, "loss_num": 0.018798828125, "loss_xval": 0.62109375, "num_input_tokens_seen": 167560344, "step": 2991 }, { "epoch": 6.66369710467706, "grad_norm": 24.14487648010254, "learning_rate": 1e-06, "loss": 0.5732, "num_input_tokens_seen": 167618800, "step": 2992 }, { "epoch": 6.66369710467706, "loss": 0.6208098530769348, "loss_ce": 0.0003264534752815962, "loss_iou": 0.279296875, "loss_num": 0.01226806640625, "loss_xval": 0.62109375, "num_input_tokens_seen": 167618800, "step": 2992 }, { "epoch": 6.665924276169265, "grad_norm": 16.674419403076172, "learning_rate": 1e-06, "loss": 0.4944, "num_input_tokens_seen": 167674316, "step": 2993 }, { "epoch": 6.665924276169265, "loss": 0.5599268674850464, "loss_ce": 0.0002344850217923522, "loss_iou": 0.2255859375, "loss_num": 0.0216064453125, "loss_xval": 0.55859375, "num_input_tokens_seen": 167674316, "step": 2993 }, { "epoch": 6.66815144766147, "grad_norm": 14.498659133911133, "learning_rate": 1e-06, "loss": 0.8444, "num_input_tokens_seen": 167730912, "step": 2994 }, { "epoch": 6.66815144766147, "loss": 0.7760642766952515, "loss_ce": 0.0009178139735013247, "loss_iou": 0.31640625, "loss_num": 0.0283203125, "loss_xval": 0.7734375, "num_input_tokens_seen": 167730912, "step": 2994 }, { "epoch": 6.6703786191536745, "grad_norm": 25.62771987915039, "learning_rate": 1e-06, "loss": 0.7389, "num_input_tokens_seen": 167786696, "step": 2995 }, { "epoch": 6.6703786191536745, "loss": 0.6130115985870361, "loss_ce": 0.00021857366664335132, "loss_iou": 0.27734375, "loss_num": 0.01190185546875, "loss_xval": 0.61328125, "num_input_tokens_seen": 167786696, "step": 2995 }, { "epoch": 6.67260579064588, "grad_norm": 84.09690856933594, "learning_rate": 1e-06, "loss": 0.7173, "num_input_tokens_seen": 167843364, "step": 2996 }, { "epoch": 6.67260579064588, "loss": 0.5746430158615112, "loss_ce": 0.00018016056856140494, "loss_iou": 0.2578125, "loss_num": 0.011962890625, "loss_xval": 0.57421875, "num_input_tokens_seen": 167843364, "step": 2996 }, { "epoch": 6.674832962138085, "grad_norm": 25.761995315551758, "learning_rate": 1e-06, "loss": 0.7212, "num_input_tokens_seen": 167900084, "step": 2997 }, { "epoch": 6.674832962138085, "loss": 0.848706066608429, "loss_ce": 0.0003174202283844352, "loss_iou": 0.3671875, "loss_num": 0.0230712890625, "loss_xval": 0.84765625, "num_input_tokens_seen": 167900084, "step": 2997 }, { "epoch": 6.67706013363029, "grad_norm": 20.495059967041016, "learning_rate": 1e-06, "loss": 0.6009, "num_input_tokens_seen": 167955952, "step": 2998 }, { "epoch": 6.67706013363029, "loss": 0.6451590061187744, "loss_ce": 0.0002615168341435492, "loss_iou": 0.2890625, "loss_num": 0.0135498046875, "loss_xval": 0.64453125, "num_input_tokens_seen": 167955952, "step": 2998 }, { "epoch": 6.679287305122495, "grad_norm": 18.190731048583984, "learning_rate": 1e-06, "loss": 0.5682, "num_input_tokens_seen": 168011540, "step": 2999 }, { "epoch": 6.679287305122495, "loss": 0.5880811810493469, "loss_ce": 0.0001905930694192648, "loss_iou": 0.25390625, "loss_num": 0.0157470703125, "loss_xval": 0.5859375, "num_input_tokens_seen": 168011540, "step": 2999 }, { "epoch": 6.6815144766147, "grad_norm": 15.931896209716797, "learning_rate": 1e-06, "loss": 0.556, "num_input_tokens_seen": 168068960, "step": 3000 }, { "epoch": 6.6815144766147, "eval_seeclick_web_CIoU": 0.5745645761489868, "eval_seeclick_web_GIoU": 0.5703821778297424, "eval_seeclick_web_IoU": 0.5911202728748322, "eval_seeclick_web_MAE_all": 0.016741125378757715, "eval_seeclick_web_MAE_h": 0.009390837512910366, "eval_seeclick_web_MAE_w": 0.01750754565000534, "eval_seeclick_web_MAE_x_boxes": 0.009469148702919483, "eval_seeclick_web_MAE_y_boxes": 0.022113264771178365, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9277820587158203, "eval_seeclick_web_loss_ce": 0.00027503210003487766, "eval_seeclick_web_loss_iou": 0.421875, "eval_seeclick_web_loss_num": 0.013193130493164062, "eval_seeclick_web_loss_xval": 0.909423828125, "eval_seeclick_web_runtime": 22.6295, "eval_seeclick_web_samples_per_second": 2.21, "eval_seeclick_web_steps_per_second": 0.088, "num_input_tokens_seen": 168068960, "step": 3000 }, { "epoch": 6.6815144766147, "eval_icons_CIoU": 0.28626811504364014, "eval_icons_GIoU": 0.31125396490097046, "eval_icons_IoU": 0.36675940454006195, "eval_icons_MAE_all": 0.06613602861762047, "eval_icons_MAE_h": 0.04001020174473524, "eval_icons_MAE_w": 0.07005784474313259, "eval_icons_MAE_x_boxes": 0.06103345938026905, "eval_icons_MAE_y_boxes": 0.039336864836513996, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.7287678718566895, "eval_icons_loss_ce": 0.00033969079959206283, "eval_icons_loss_iou": 0.669921875, "eval_icons_loss_num": 0.06263542175292969, "eval_icons_loss_xval": 1.65380859375, "eval_icons_runtime": 20.8368, "eval_icons_samples_per_second": 2.4, "eval_icons_steps_per_second": 0.096, "num_input_tokens_seen": 168068960, "step": 3000 }, { "epoch": 6.6815144766147, "eval_screenspot_CIoU": 0.34714128573735553, "eval_screenspot_GIoU": 0.36292509237925213, "eval_screenspot_IoU": 0.42989224195480347, "eval_screenspot_MAE_all": 0.06254567454258601, "eval_screenspot_MAE_h": 0.03752323302129904, "eval_screenspot_MAE_w": 0.07452885061502457, "eval_screenspot_MAE_x_boxes": 0.07285770525534947, "eval_screenspot_MAE_y_boxes": 0.04644376132637262, "eval_screenspot_inside_bbox": 0.659583330154419, "eval_screenspot_loss": 1.6588377952575684, "eval_screenspot_loss_ce": 0.00031722194398753345, "eval_screenspot_loss_iou": 0.6805826822916666, "eval_screenspot_loss_num": 0.07418060302734375, "eval_screenspot_loss_xval": 1.7322591145833333, "eval_screenspot_runtime": 32.6151, "eval_screenspot_samples_per_second": 2.729, "eval_screenspot_steps_per_second": 0.092, "num_input_tokens_seen": 168068960, "step": 3000 }, { "epoch": 6.6815144766147, "eval_compot_CIoU": 0.3531677573919296, "eval_compot_GIoU": 0.3627708703279495, "eval_compot_IoU": 0.4095195233821869, "eval_compot_MAE_all": 0.01802563015371561, "eval_compot_MAE_h": 0.008967408444732428, "eval_compot_MAE_w": 0.022056237794458866, "eval_compot_MAE_x_boxes": 0.02982906997203827, "eval_compot_MAE_y_boxes": 0.006724046776071191, "eval_compot_inside_bbox": 0.6458333432674408, "eval_compot_loss": 1.3883819580078125, "eval_compot_loss_ce": 0.0002577164559625089, "eval_compot_loss_iou": 0.633544921875, "eval_compot_loss_num": 0.016773223876953125, "eval_compot_loss_xval": 1.350830078125, "eval_compot_runtime": 20.0639, "eval_compot_samples_per_second": 2.492, "eval_compot_steps_per_second": 0.1, "num_input_tokens_seen": 168068960, "step": 3000 }, { "epoch": 6.6815144766147, "eval_custom_ui_val_CIoU": 0.46863051421112484, "eval_custom_ui_val_GIoU": 0.4837728473875258, "eval_custom_ui_val_IoU": 0.5239554312494066, "eval_custom_ui_val_MAE_all": 0.030405950939489737, "eval_custom_ui_val_MAE_h": 0.01713582917323543, "eval_custom_ui_val_MAE_w": 0.038816668921046786, "eval_custom_ui_val_MAE_x_boxes": 0.034813721767730184, "eval_custom_ui_val_MAE_y_boxes": 0.015315383543363877, "eval_custom_ui_val_inside_bbox": 0.7422839535607232, "eval_custom_ui_val_loss": 1.1846333742141724, "eval_custom_ui_val_loss_ce": 0.00029205658291983936, "eval_custom_ui_val_loss_iou": 0.5049913194444444, "eval_custom_ui_val_loss_num": 0.02808909946017795, "eval_custom_ui_val_loss_xval": 1.1504720052083333, "eval_custom_ui_val_runtime": 56.2868, "eval_custom_ui_val_samples_per_second": 4.708, "eval_custom_ui_val_steps_per_second": 0.16, "num_input_tokens_seen": 168068960, "step": 3000 }, { "epoch": 6.6815144766147, "loss": 0.9355728030204773, "loss_ce": 0.00027009687619283795, "loss_iou": 0.40625, "loss_num": 0.0244140625, "loss_xval": 0.93359375, "num_input_tokens_seen": 168068960, "step": 3000 }, { "epoch": 6.6837416481069045, "grad_norm": 13.867417335510254, "learning_rate": 1e-06, "loss": 0.778, "num_input_tokens_seen": 168123220, "step": 3001 }, { "epoch": 6.6837416481069045, "loss": 0.8318126201629639, "loss_ce": 0.00026967888697981834, "loss_iou": 0.345703125, "loss_num": 0.0283203125, "loss_xval": 0.83203125, "num_input_tokens_seen": 168123220, "step": 3001 }, { "epoch": 6.685968819599109, "grad_norm": 23.010339736938477, "learning_rate": 1e-06, "loss": 0.8531, "num_input_tokens_seen": 168177788, "step": 3002 }, { "epoch": 6.685968819599109, "loss": 0.9224424958229065, "loss_ce": 0.00032336413278244436, "loss_iou": 0.390625, "loss_num": 0.028076171875, "loss_xval": 0.921875, "num_input_tokens_seen": 168177788, "step": 3002 }, { "epoch": 6.688195991091314, "grad_norm": 17.75686264038086, "learning_rate": 1e-06, "loss": 0.6219, "num_input_tokens_seen": 168232444, "step": 3003 }, { "epoch": 6.688195991091314, "loss": 0.38138991594314575, "loss_ce": 0.00016430205141659826, "loss_iou": 0.166015625, "loss_num": 0.00994873046875, "loss_xval": 0.380859375, "num_input_tokens_seen": 168232444, "step": 3003 }, { "epoch": 6.690423162583519, "grad_norm": 25.14574432373047, "learning_rate": 1e-06, "loss": 0.6382, "num_input_tokens_seen": 168285212, "step": 3004 }, { "epoch": 6.690423162583519, "loss": 0.7175732851028442, "loss_ce": 0.000288100796751678, "loss_iou": 0.2890625, "loss_num": 0.028076171875, "loss_xval": 0.71875, "num_input_tokens_seen": 168285212, "step": 3004 }, { "epoch": 6.692650334075724, "grad_norm": 15.96047592163086, "learning_rate": 1e-06, "loss": 0.8216, "num_input_tokens_seen": 168341780, "step": 3005 }, { "epoch": 6.692650334075724, "loss": 1.042626142501831, "loss_ce": 0.0002676715375855565, "loss_iou": 0.408203125, "loss_num": 0.045166015625, "loss_xval": 1.0390625, "num_input_tokens_seen": 168341780, "step": 3005 }, { "epoch": 6.694877505567929, "grad_norm": 14.52697467803955, "learning_rate": 1e-06, "loss": 0.4355, "num_input_tokens_seen": 168397592, "step": 3006 }, { "epoch": 6.694877505567929, "loss": 0.4221663177013397, "loss_ce": 0.00023030643933452666, "loss_iou": 0.17578125, "loss_num": 0.0140380859375, "loss_xval": 0.421875, "num_input_tokens_seen": 168397592, "step": 3006 }, { "epoch": 6.697104677060134, "grad_norm": 17.11386489868164, "learning_rate": 1e-06, "loss": 0.7575, "num_input_tokens_seen": 168453048, "step": 3007 }, { "epoch": 6.697104677060134, "loss": 0.9401570558547974, "loss_ce": 0.0002156144182663411, "loss_iou": 0.408203125, "loss_num": 0.0247802734375, "loss_xval": 0.94140625, "num_input_tokens_seen": 168453048, "step": 3007 }, { "epoch": 6.6993318485523385, "grad_norm": 23.60494041442871, "learning_rate": 1e-06, "loss": 0.6955, "num_input_tokens_seen": 168508760, "step": 3008 }, { "epoch": 6.6993318485523385, "loss": 0.49571555852890015, "loss_ce": 0.00023218031856231391, "loss_iou": 0.2314453125, "loss_num": 0.006805419921875, "loss_xval": 0.49609375, "num_input_tokens_seen": 168508760, "step": 3008 }, { "epoch": 6.701559020044543, "grad_norm": 14.749323844909668, "learning_rate": 1e-06, "loss": 0.6877, "num_input_tokens_seen": 168565604, "step": 3009 }, { "epoch": 6.701559020044543, "loss": 0.6759235858917236, "loss_ce": 0.0003864543105009943, "loss_iou": 0.271484375, "loss_num": 0.0267333984375, "loss_xval": 0.67578125, "num_input_tokens_seen": 168565604, "step": 3009 }, { "epoch": 6.703786191536748, "grad_norm": 24.45879554748535, "learning_rate": 1e-06, "loss": 0.6726, "num_input_tokens_seen": 168623400, "step": 3010 }, { "epoch": 6.703786191536748, "loss": 0.5331798791885376, "loss_ce": 0.00022087327670305967, "loss_iou": 0.23828125, "loss_num": 0.01153564453125, "loss_xval": 0.53125, "num_input_tokens_seen": 168623400, "step": 3010 }, { "epoch": 6.706013363028953, "grad_norm": 22.486263275146484, "learning_rate": 1e-06, "loss": 0.5951, "num_input_tokens_seen": 168678412, "step": 3011 }, { "epoch": 6.706013363028953, "loss": 0.6491339206695557, "loss_ce": 0.0002081873535644263, "loss_iou": 0.259765625, "loss_num": 0.0263671875, "loss_xval": 0.6484375, "num_input_tokens_seen": 168678412, "step": 3011 }, { "epoch": 6.708240534521158, "grad_norm": 12.50680923461914, "learning_rate": 1e-06, "loss": 0.721, "num_input_tokens_seen": 168734048, "step": 3012 }, { "epoch": 6.708240534521158, "loss": 0.7475176453590393, "loss_ce": 0.00020322672207839787, "loss_iou": 0.296875, "loss_num": 0.0306396484375, "loss_xval": 0.74609375, "num_input_tokens_seen": 168734048, "step": 3012 }, { "epoch": 6.710467706013363, "grad_norm": 28.51459503173828, "learning_rate": 1e-06, "loss": 0.5842, "num_input_tokens_seen": 168790568, "step": 3013 }, { "epoch": 6.710467706013363, "loss": 0.6308811902999878, "loss_ce": 0.000265932030742988, "loss_iou": 0.275390625, "loss_num": 0.0159912109375, "loss_xval": 0.62890625, "num_input_tokens_seen": 168790568, "step": 3013 }, { "epoch": 6.712694877505568, "grad_norm": 18.395198822021484, "learning_rate": 1e-06, "loss": 0.8135, "num_input_tokens_seen": 168844228, "step": 3014 }, { "epoch": 6.712694877505568, "loss": 0.7819979190826416, "loss_ce": 0.00025961300707422197, "loss_iou": 0.333984375, "loss_num": 0.0230712890625, "loss_xval": 0.78125, "num_input_tokens_seen": 168844228, "step": 3014 }, { "epoch": 6.714922048997773, "grad_norm": 23.87611198425293, "learning_rate": 1e-06, "loss": 0.7382, "num_input_tokens_seen": 168899728, "step": 3015 }, { "epoch": 6.714922048997773, "loss": 0.7516616582870483, "loss_ce": 0.00019676925148814917, "loss_iou": 0.326171875, "loss_num": 0.0198974609375, "loss_xval": 0.75, "num_input_tokens_seen": 168899728, "step": 3015 }, { "epoch": 6.717149220489977, "grad_norm": 20.738792419433594, "learning_rate": 1e-06, "loss": 0.786, "num_input_tokens_seen": 168956592, "step": 3016 }, { "epoch": 6.717149220489977, "loss": 0.8078007102012634, "loss_ce": 0.00018351592007093132, "loss_iou": 0.306640625, "loss_num": 0.0390625, "loss_xval": 0.80859375, "num_input_tokens_seen": 168956592, "step": 3016 }, { "epoch": 6.719376391982182, "grad_norm": 19.863662719726562, "learning_rate": 1e-06, "loss": 0.7038, "num_input_tokens_seen": 169012588, "step": 3017 }, { "epoch": 6.719376391982182, "loss": 0.5791813135147095, "loss_ce": 0.00020178730483166873, "loss_iou": 0.24609375, "loss_num": 0.017333984375, "loss_xval": 0.578125, "num_input_tokens_seen": 169012588, "step": 3017 }, { "epoch": 6.721603563474387, "grad_norm": 18.61119842529297, "learning_rate": 1e-06, "loss": 0.6261, "num_input_tokens_seen": 169067608, "step": 3018 }, { "epoch": 6.721603563474387, "loss": 0.6960831880569458, "loss_ce": 0.0002824185648933053, "loss_iou": 0.31640625, "loss_num": 0.01263427734375, "loss_xval": 0.6953125, "num_input_tokens_seen": 169067608, "step": 3018 }, { "epoch": 6.723830734966592, "grad_norm": 19.781635284423828, "learning_rate": 1e-06, "loss": 0.6468, "num_input_tokens_seen": 169122988, "step": 3019 }, { "epoch": 6.723830734966592, "loss": 0.7083573341369629, "loss_ce": 0.0002274075523018837, "loss_iou": 0.26953125, "loss_num": 0.033447265625, "loss_xval": 0.70703125, "num_input_tokens_seen": 169122988, "step": 3019 }, { "epoch": 6.726057906458797, "grad_norm": 17.748201370239258, "learning_rate": 1e-06, "loss": 0.5458, "num_input_tokens_seen": 169178700, "step": 3020 }, { "epoch": 6.726057906458797, "loss": 0.5851503610610962, "loss_ce": 0.00018942491442430764, "loss_iou": 0.25390625, "loss_num": 0.01513671875, "loss_xval": 0.5859375, "num_input_tokens_seen": 169178700, "step": 3020 }, { "epoch": 6.728285077951003, "grad_norm": 14.647541999816895, "learning_rate": 1e-06, "loss": 0.4199, "num_input_tokens_seen": 169234748, "step": 3021 }, { "epoch": 6.728285077951003, "loss": 0.4449549615383148, "loss_ce": 0.00019176513887941837, "loss_iou": 0.17578125, "loss_num": 0.0185546875, "loss_xval": 0.4453125, "num_input_tokens_seen": 169234748, "step": 3021 }, { "epoch": 6.7305122494432075, "grad_norm": 22.735586166381836, "learning_rate": 1e-06, "loss": 0.8144, "num_input_tokens_seen": 169291708, "step": 3022 }, { "epoch": 6.7305122494432075, "loss": 0.8748047351837158, "loss_ce": 0.0002930166956502944, "loss_iou": 0.328125, "loss_num": 0.0439453125, "loss_xval": 0.875, "num_input_tokens_seen": 169291708, "step": 3022 }, { "epoch": 6.732739420935412, "grad_norm": 10.555468559265137, "learning_rate": 1e-06, "loss": 0.8329, "num_input_tokens_seen": 169346188, "step": 3023 }, { "epoch": 6.732739420935412, "loss": 0.5163635611534119, "loss_ce": 0.00018923793686553836, "loss_iou": 0.1982421875, "loss_num": 0.0240478515625, "loss_xval": 0.515625, "num_input_tokens_seen": 169346188, "step": 3023 }, { "epoch": 6.734966592427617, "grad_norm": 25.756729125976562, "learning_rate": 1e-06, "loss": 0.7476, "num_input_tokens_seen": 169401748, "step": 3024 }, { "epoch": 6.734966592427617, "loss": 0.6750069856643677, "loss_ce": 0.00020230711379554123, "loss_iou": 0.291015625, "loss_num": 0.0186767578125, "loss_xval": 0.67578125, "num_input_tokens_seen": 169401748, "step": 3024 }, { "epoch": 6.737193763919822, "grad_norm": 16.90264129638672, "learning_rate": 1e-06, "loss": 0.5714, "num_input_tokens_seen": 169456820, "step": 3025 }, { "epoch": 6.737193763919822, "loss": 0.4832545220851898, "loss_ce": 0.0004664373118430376, "loss_iou": 0.2041015625, "loss_num": 0.01483154296875, "loss_xval": 0.482421875, "num_input_tokens_seen": 169456820, "step": 3025 }, { "epoch": 6.739420935412027, "grad_norm": 19.030052185058594, "learning_rate": 1e-06, "loss": 0.8233, "num_input_tokens_seen": 169512288, "step": 3026 }, { "epoch": 6.739420935412027, "loss": 0.8015903234481812, "loss_ce": 0.00032082191319204867, "loss_iou": 0.3515625, "loss_num": 0.0194091796875, "loss_xval": 0.80078125, "num_input_tokens_seen": 169512288, "step": 3026 }, { "epoch": 6.741648106904232, "grad_norm": 22.391340255737305, "learning_rate": 1e-06, "loss": 0.5371, "num_input_tokens_seen": 169568768, "step": 3027 }, { "epoch": 6.741648106904232, "loss": 0.39635348320007324, "loss_ce": 0.00017430493608117104, "loss_iou": 0.154296875, "loss_num": 0.017578125, "loss_xval": 0.396484375, "num_input_tokens_seen": 169568768, "step": 3027 }, { "epoch": 6.743875278396437, "grad_norm": 23.041179656982422, "learning_rate": 1e-06, "loss": 0.6764, "num_input_tokens_seen": 169623344, "step": 3028 }, { "epoch": 6.743875278396437, "loss": 0.6840122938156128, "loss_ce": 0.00017437028873246163, "loss_iou": 0.296875, "loss_num": 0.0185546875, "loss_xval": 0.68359375, "num_input_tokens_seen": 169623344, "step": 3028 }, { "epoch": 6.7461024498886415, "grad_norm": 22.090497970581055, "learning_rate": 1e-06, "loss": 0.6771, "num_input_tokens_seen": 169677776, "step": 3029 }, { "epoch": 6.7461024498886415, "loss": 0.7656627297401428, "loss_ce": 0.00015980687749106437, "loss_iou": 0.33203125, "loss_num": 0.020263671875, "loss_xval": 0.765625, "num_input_tokens_seen": 169677776, "step": 3029 }, { "epoch": 6.748329621380846, "grad_norm": 16.185291290283203, "learning_rate": 1e-06, "loss": 0.6104, "num_input_tokens_seen": 169734036, "step": 3030 }, { "epoch": 6.748329621380846, "loss": 0.643264651298523, "loss_ce": 0.00019819998124148697, "loss_iou": 0.279296875, "loss_num": 0.01708984375, "loss_xval": 0.64453125, "num_input_tokens_seen": 169734036, "step": 3030 }, { "epoch": 6.750556792873051, "grad_norm": 19.104719161987305, "learning_rate": 1e-06, "loss": 0.5948, "num_input_tokens_seen": 169788020, "step": 3031 }, { "epoch": 6.750556792873051, "loss": 0.5878357887268066, "loss_ce": 0.00018932024249807, "loss_iou": 0.251953125, "loss_num": 0.0164794921875, "loss_xval": 0.5859375, "num_input_tokens_seen": 169788020, "step": 3031 }, { "epoch": 6.752783964365256, "grad_norm": 23.02263641357422, "learning_rate": 1e-06, "loss": 0.629, "num_input_tokens_seen": 169844864, "step": 3032 }, { "epoch": 6.752783964365256, "loss": 0.5922929048538208, "loss_ce": 0.0002518455730751157, "loss_iou": 0.2373046875, "loss_num": 0.0235595703125, "loss_xval": 0.59375, "num_input_tokens_seen": 169844864, "step": 3032 }, { "epoch": 6.755011135857461, "grad_norm": 21.84469985961914, "learning_rate": 1e-06, "loss": 0.6563, "num_input_tokens_seen": 169901280, "step": 3033 }, { "epoch": 6.755011135857461, "loss": 0.6547205448150635, "loss_ce": 0.0001795107964426279, "loss_iou": 0.263671875, "loss_num": 0.025146484375, "loss_xval": 0.65625, "num_input_tokens_seen": 169901280, "step": 3033 }, { "epoch": 6.757238307349666, "grad_norm": 24.893165588378906, "learning_rate": 1e-06, "loss": 0.8051, "num_input_tokens_seen": 169955720, "step": 3034 }, { "epoch": 6.757238307349666, "loss": 0.9663440585136414, "loss_ce": 0.00027958687860518694, "loss_iou": 0.4140625, "loss_num": 0.0274658203125, "loss_xval": 0.96484375, "num_input_tokens_seen": 169955720, "step": 3034 }, { "epoch": 6.759465478841871, "grad_norm": 29.10569953918457, "learning_rate": 1e-06, "loss": 0.6784, "num_input_tokens_seen": 170011236, "step": 3035 }, { "epoch": 6.759465478841871, "loss": 0.6698566675186157, "loss_ce": 0.00017896070494316518, "loss_iou": 0.287109375, "loss_num": 0.0191650390625, "loss_xval": 0.66796875, "num_input_tokens_seen": 170011236, "step": 3035 }, { "epoch": 6.7616926503340755, "grad_norm": 22.953475952148438, "learning_rate": 1e-06, "loss": 1.0044, "num_input_tokens_seen": 170068108, "step": 3036 }, { "epoch": 6.7616926503340755, "loss": 1.014899492263794, "loss_ce": 0.0002510766498744488, "loss_iou": 0.416015625, "loss_num": 0.0361328125, "loss_xval": 1.015625, "num_input_tokens_seen": 170068108, "step": 3036 }, { "epoch": 6.76391982182628, "grad_norm": 18.038188934326172, "learning_rate": 1e-06, "loss": 0.6545, "num_input_tokens_seen": 170127512, "step": 3037 }, { "epoch": 6.76391982182628, "loss": 0.6023421883583069, "loss_ce": 0.00029138405807316303, "loss_iou": 0.271484375, "loss_num": 0.0118408203125, "loss_xval": 0.6015625, "num_input_tokens_seen": 170127512, "step": 3037 }, { "epoch": 6.766146993318485, "grad_norm": 20.037540435791016, "learning_rate": 1e-06, "loss": 0.5158, "num_input_tokens_seen": 170181356, "step": 3038 }, { "epoch": 6.766146993318485, "loss": 0.5402380228042603, "loss_ce": 0.00019893058924935758, "loss_iou": 0.2294921875, "loss_num": 0.0162353515625, "loss_xval": 0.5390625, "num_input_tokens_seen": 170181356, "step": 3038 }, { "epoch": 6.76837416481069, "grad_norm": 16.7264347076416, "learning_rate": 1e-06, "loss": 0.9252, "num_input_tokens_seen": 170237844, "step": 3039 }, { "epoch": 6.76837416481069, "loss": 1.0988414287567139, "loss_ce": 0.00020856756600551307, "loss_iou": 0.46875, "loss_num": 0.032470703125, "loss_xval": 1.1015625, "num_input_tokens_seen": 170237844, "step": 3039 }, { "epoch": 6.770601336302895, "grad_norm": 14.285926818847656, "learning_rate": 1e-06, "loss": 0.5733, "num_input_tokens_seen": 170291264, "step": 3040 }, { "epoch": 6.770601336302895, "loss": 0.44891494512557983, "loss_ce": 0.00018448734772391617, "loss_iou": 0.1826171875, "loss_num": 0.0167236328125, "loss_xval": 0.44921875, "num_input_tokens_seen": 170291264, "step": 3040 }, { "epoch": 6.772828507795101, "grad_norm": 29.201885223388672, "learning_rate": 1e-06, "loss": 0.5023, "num_input_tokens_seen": 170348048, "step": 3041 }, { "epoch": 6.772828507795101, "loss": 0.4634665846824646, "loss_ce": 0.000209752848604694, "loss_iou": 0.2021484375, "loss_num": 0.01190185546875, "loss_xval": 0.462890625, "num_input_tokens_seen": 170348048, "step": 3041 }, { "epoch": 6.775055679287306, "grad_norm": 41.29672622680664, "learning_rate": 1e-06, "loss": 0.7947, "num_input_tokens_seen": 170402500, "step": 3042 }, { "epoch": 6.775055679287306, "loss": 0.8640490770339966, "loss_ce": 0.00027952369418926537, "loss_iou": 0.37109375, "loss_num": 0.02392578125, "loss_xval": 0.86328125, "num_input_tokens_seen": 170402500, "step": 3042 }, { "epoch": 6.77728285077951, "grad_norm": 31.665985107421875, "learning_rate": 1e-06, "loss": 0.5856, "num_input_tokens_seen": 170458260, "step": 3043 }, { "epoch": 6.77728285077951, "loss": 0.45639604330062866, "loss_ce": 0.00021927341003902256, "loss_iou": 0.18359375, "loss_num": 0.0177001953125, "loss_xval": 0.45703125, "num_input_tokens_seen": 170458260, "step": 3043 }, { "epoch": 6.779510022271715, "grad_norm": 43.43659591674805, "learning_rate": 1e-06, "loss": 0.7794, "num_input_tokens_seen": 170514684, "step": 3044 }, { "epoch": 6.779510022271715, "loss": 0.8676573038101196, "loss_ce": 0.0009580720216035843, "loss_iou": 0.365234375, "loss_num": 0.02734375, "loss_xval": 0.8671875, "num_input_tokens_seen": 170514684, "step": 3044 }, { "epoch": 6.78173719376392, "grad_norm": 24.82050323486328, "learning_rate": 1e-06, "loss": 0.531, "num_input_tokens_seen": 170569476, "step": 3045 }, { "epoch": 6.78173719376392, "loss": 0.5165289640426636, "loss_ce": 0.00017151227802969515, "loss_iou": 0.2255859375, "loss_num": 0.01318359375, "loss_xval": 0.515625, "num_input_tokens_seen": 170569476, "step": 3045 }, { "epoch": 6.783964365256125, "grad_norm": 19.520343780517578, "learning_rate": 1e-06, "loss": 0.7418, "num_input_tokens_seen": 170622728, "step": 3046 }, { "epoch": 6.783964365256125, "loss": 0.9240363836288452, "loss_ce": 0.00026935621281154454, "loss_iou": 0.380859375, "loss_num": 0.032470703125, "loss_xval": 0.921875, "num_input_tokens_seen": 170622728, "step": 3046 }, { "epoch": 6.78619153674833, "grad_norm": 18.211183547973633, "learning_rate": 1e-06, "loss": 0.6673, "num_input_tokens_seen": 170679272, "step": 3047 }, { "epoch": 6.78619153674833, "loss": 0.7375867962837219, "loss_ce": 0.000282141612842679, "loss_iou": 0.328125, "loss_num": 0.0162353515625, "loss_xval": 0.73828125, "num_input_tokens_seen": 170679272, "step": 3047 }, { "epoch": 6.788418708240535, "grad_norm": 21.0445499420166, "learning_rate": 1e-06, "loss": 0.5899, "num_input_tokens_seen": 170733732, "step": 3048 }, { "epoch": 6.788418708240535, "loss": 0.5784125924110413, "loss_ce": 0.00016554353351239115, "loss_iou": 0.248046875, "loss_num": 0.016357421875, "loss_xval": 0.578125, "num_input_tokens_seen": 170733732, "step": 3048 }, { "epoch": 6.79064587973274, "grad_norm": 25.12030601501465, "learning_rate": 1e-06, "loss": 0.7525, "num_input_tokens_seen": 170790864, "step": 3049 }, { "epoch": 6.79064587973274, "loss": 0.8492100238800049, "loss_ce": 0.0011875506024807692, "loss_iou": 0.349609375, "loss_num": 0.0299072265625, "loss_xval": 0.84765625, "num_input_tokens_seen": 170790864, "step": 3049 }, { "epoch": 6.7928730512249444, "grad_norm": 14.534224510192871, "learning_rate": 1e-06, "loss": 0.5735, "num_input_tokens_seen": 170846928, "step": 3050 }, { "epoch": 6.7928730512249444, "loss": 0.6872592568397522, "loss_ce": 0.00024754402693361044, "loss_iou": 0.30078125, "loss_num": 0.01708984375, "loss_xval": 0.6875, "num_input_tokens_seen": 170846928, "step": 3050 }, { "epoch": 6.795100222717149, "grad_norm": 32.43819808959961, "learning_rate": 1e-06, "loss": 0.7584, "num_input_tokens_seen": 170901412, "step": 3051 }, { "epoch": 6.795100222717149, "loss": 0.47796866297721863, "loss_ce": 0.00018544365593697876, "loss_iou": 0.2138671875, "loss_num": 0.00994873046875, "loss_xval": 0.478515625, "num_input_tokens_seen": 170901412, "step": 3051 }, { "epoch": 6.797327394209354, "grad_norm": 26.212574005126953, "learning_rate": 1e-06, "loss": 0.7598, "num_input_tokens_seen": 170955608, "step": 3052 }, { "epoch": 6.797327394209354, "loss": 0.8884332776069641, "loss_ce": 0.0002496936358511448, "loss_iou": 0.39453125, "loss_num": 0.019775390625, "loss_xval": 0.88671875, "num_input_tokens_seen": 170955608, "step": 3052 }, { "epoch": 6.799554565701559, "grad_norm": 18.317108154296875, "learning_rate": 1e-06, "loss": 0.5841, "num_input_tokens_seen": 171010808, "step": 3053 }, { "epoch": 6.799554565701559, "loss": 0.6397278308868408, "loss_ce": 0.00020147013128735125, "loss_iou": 0.29296875, "loss_num": 0.0111083984375, "loss_xval": 0.640625, "num_input_tokens_seen": 171010808, "step": 3053 }, { "epoch": 6.801781737193764, "grad_norm": 17.737529754638672, "learning_rate": 1e-06, "loss": 0.8411, "num_input_tokens_seen": 171066148, "step": 3054 }, { "epoch": 6.801781737193764, "loss": 0.8929228186607361, "loss_ce": 0.0003446534974500537, "loss_iou": 0.375, "loss_num": 0.0284423828125, "loss_xval": 0.890625, "num_input_tokens_seen": 171066148, "step": 3054 }, { "epoch": 6.804008908685969, "grad_norm": 18.264446258544922, "learning_rate": 1e-06, "loss": 0.8505, "num_input_tokens_seen": 171121628, "step": 3055 }, { "epoch": 6.804008908685969, "loss": 0.6899810433387756, "loss_ce": 0.00028378370916470885, "loss_iou": 0.296875, "loss_num": 0.019287109375, "loss_xval": 0.69140625, "num_input_tokens_seen": 171121628, "step": 3055 }, { "epoch": 6.806236080178174, "grad_norm": 16.88157844543457, "learning_rate": 1e-06, "loss": 0.5521, "num_input_tokens_seen": 171177668, "step": 3056 }, { "epoch": 6.806236080178174, "loss": 0.5917320251464844, "loss_ce": 0.00017928854504134506, "loss_iou": 0.263671875, "loss_num": 0.0130615234375, "loss_xval": 0.58984375, "num_input_tokens_seen": 171177668, "step": 3056 }, { "epoch": 6.8084632516703785, "grad_norm": 19.890636444091797, "learning_rate": 1e-06, "loss": 0.7256, "num_input_tokens_seen": 171234924, "step": 3057 }, { "epoch": 6.8084632516703785, "loss": 0.9203733205795288, "loss_ce": 0.0002072805364150554, "loss_iou": 0.35546875, "loss_num": 0.0419921875, "loss_xval": 0.921875, "num_input_tokens_seen": 171234924, "step": 3057 }, { "epoch": 6.810690423162583, "grad_norm": 24.549684524536133, "learning_rate": 1e-06, "loss": 0.6066, "num_input_tokens_seen": 171289624, "step": 3058 }, { "epoch": 6.810690423162583, "loss": 0.6476849317550659, "loss_ce": 0.00022400161833502352, "loss_iou": 0.28125, "loss_num": 0.0167236328125, "loss_xval": 0.6484375, "num_input_tokens_seen": 171289624, "step": 3058 }, { "epoch": 6.812917594654788, "grad_norm": 29.96609878540039, "learning_rate": 1e-06, "loss": 0.544, "num_input_tokens_seen": 171345904, "step": 3059 }, { "epoch": 6.812917594654788, "loss": 0.37130942940711975, "loss_ce": 0.00021568889496847987, "loss_iou": 0.150390625, "loss_num": 0.0137939453125, "loss_xval": 0.37109375, "num_input_tokens_seen": 171345904, "step": 3059 }, { "epoch": 6.815144766146993, "grad_norm": 17.452735900878906, "learning_rate": 1e-06, "loss": 0.6359, "num_input_tokens_seen": 171403776, "step": 3060 }, { "epoch": 6.815144766146993, "loss": 0.5905120372772217, "loss_ce": 0.00018000410636886954, "loss_iou": 0.24609375, "loss_num": 0.019775390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 171403776, "step": 3060 }, { "epoch": 6.817371937639198, "grad_norm": 24.2515926361084, "learning_rate": 1e-06, "loss": 0.6321, "num_input_tokens_seen": 171459812, "step": 3061 }, { "epoch": 6.817371937639198, "loss": 0.6198359131813049, "loss_ce": 0.00020700221648439765, "loss_iou": 0.27734375, "loss_num": 0.01318359375, "loss_xval": 0.62109375, "num_input_tokens_seen": 171459812, "step": 3061 }, { "epoch": 6.819599109131403, "grad_norm": 26.85552215576172, "learning_rate": 1e-06, "loss": 0.583, "num_input_tokens_seen": 171517492, "step": 3062 }, { "epoch": 6.819599109131403, "loss": 0.5180150270462036, "loss_ce": 0.0001927339908434078, "loss_iou": 0.1943359375, "loss_num": 0.02587890625, "loss_xval": 0.51953125, "num_input_tokens_seen": 171517492, "step": 3062 }, { "epoch": 6.821826280623608, "grad_norm": 17.88697624206543, "learning_rate": 1e-06, "loss": 0.587, "num_input_tokens_seen": 171572508, "step": 3063 }, { "epoch": 6.821826280623608, "loss": 0.6976819634437561, "loss_ce": 0.00017221369489561766, "loss_iou": 0.294921875, "loss_num": 0.021728515625, "loss_xval": 0.69921875, "num_input_tokens_seen": 171572508, "step": 3063 }, { "epoch": 6.8240534521158125, "grad_norm": 18.11661148071289, "learning_rate": 1e-06, "loss": 0.5901, "num_input_tokens_seen": 171630664, "step": 3064 }, { "epoch": 6.8240534521158125, "loss": 0.6340024471282959, "loss_ce": 0.00021340540843084455, "loss_iou": 0.2734375, "loss_num": 0.017578125, "loss_xval": 0.6328125, "num_input_tokens_seen": 171630664, "step": 3064 }, { "epoch": 6.826280623608017, "grad_norm": 15.912714004516602, "learning_rate": 1e-06, "loss": 0.7199, "num_input_tokens_seen": 171688464, "step": 3065 }, { "epoch": 6.826280623608017, "loss": 0.7723828554153442, "loss_ce": 0.0002881219261325896, "loss_iou": 0.34765625, "loss_num": 0.015625, "loss_xval": 0.7734375, "num_input_tokens_seen": 171688464, "step": 3065 }, { "epoch": 6.828507795100223, "grad_norm": 104.20018768310547, "learning_rate": 1e-06, "loss": 0.829, "num_input_tokens_seen": 171743324, "step": 3066 }, { "epoch": 6.828507795100223, "loss": 1.210741400718689, "loss_ce": 0.0007805360364727676, "loss_iou": 0.51953125, "loss_num": 0.033935546875, "loss_xval": 1.2109375, "num_input_tokens_seen": 171743324, "step": 3066 }, { "epoch": 6.830734966592428, "grad_norm": 16.49806785583496, "learning_rate": 1e-06, "loss": 0.6798, "num_input_tokens_seen": 171799732, "step": 3067 }, { "epoch": 6.830734966592428, "loss": 0.5802951455116272, "loss_ce": 0.00021701655350625515, "loss_iou": 0.2294921875, "loss_num": 0.024169921875, "loss_xval": 0.578125, "num_input_tokens_seen": 171799732, "step": 3067 }, { "epoch": 6.832962138084633, "grad_norm": 20.636322021484375, "learning_rate": 1e-06, "loss": 0.6688, "num_input_tokens_seen": 171855072, "step": 3068 }, { "epoch": 6.832962138084633, "loss": 0.7048916816711426, "loss_ce": 0.00017975937225855887, "loss_iou": 0.310546875, "loss_num": 0.016357421875, "loss_xval": 0.703125, "num_input_tokens_seen": 171855072, "step": 3068 }, { "epoch": 6.835189309576838, "grad_norm": 14.782151222229004, "learning_rate": 1e-06, "loss": 0.7034, "num_input_tokens_seen": 171912412, "step": 3069 }, { "epoch": 6.835189309576838, "loss": 0.6367747187614441, "loss_ce": 0.0015207845717668533, "loss_iou": 0.2734375, "loss_num": 0.0177001953125, "loss_xval": 0.63671875, "num_input_tokens_seen": 171912412, "step": 3069 }, { "epoch": 6.8374164810690425, "grad_norm": 22.69580078125, "learning_rate": 1e-06, "loss": 0.6691, "num_input_tokens_seen": 171967604, "step": 3070 }, { "epoch": 6.8374164810690425, "loss": 0.6948800086975098, "loss_ce": 0.00017786939861252904, "loss_iou": 0.318359375, "loss_num": 0.01171875, "loss_xval": 0.6953125, "num_input_tokens_seen": 171967604, "step": 3070 }, { "epoch": 6.839643652561247, "grad_norm": 16.151691436767578, "learning_rate": 1e-06, "loss": 0.7264, "num_input_tokens_seen": 172024480, "step": 3071 }, { "epoch": 6.839643652561247, "loss": 0.7126206159591675, "loss_ce": 0.0002182956231990829, "loss_iou": 0.31640625, "loss_num": 0.0157470703125, "loss_xval": 0.7109375, "num_input_tokens_seen": 172024480, "step": 3071 }, { "epoch": 6.841870824053452, "grad_norm": 23.6954402923584, "learning_rate": 1e-06, "loss": 0.5101, "num_input_tokens_seen": 172081368, "step": 3072 }, { "epoch": 6.841870824053452, "loss": 0.41563284397125244, "loss_ce": 0.00034961808705702424, "loss_iou": 0.171875, "loss_num": 0.014404296875, "loss_xval": 0.416015625, "num_input_tokens_seen": 172081368, "step": 3072 }, { "epoch": 6.844097995545657, "grad_norm": 14.069080352783203, "learning_rate": 1e-06, "loss": 0.4567, "num_input_tokens_seen": 172134880, "step": 3073 }, { "epoch": 6.844097995545657, "loss": 0.4463517367839813, "loss_ce": 0.00018475353135727346, "loss_iou": 0.18359375, "loss_num": 0.0159912109375, "loss_xval": 0.4453125, "num_input_tokens_seen": 172134880, "step": 3073 }, { "epoch": 6.846325167037862, "grad_norm": 16.129291534423828, "learning_rate": 1e-06, "loss": 0.7115, "num_input_tokens_seen": 172191948, "step": 3074 }, { "epoch": 6.846325167037862, "loss": 1.0690743923187256, "loss_ce": 0.00022675658692605793, "loss_iou": 0.486328125, "loss_num": 0.0194091796875, "loss_xval": 1.0703125, "num_input_tokens_seen": 172191948, "step": 3074 }, { "epoch": 6.848552338530067, "grad_norm": 15.745563507080078, "learning_rate": 1e-06, "loss": 0.4251, "num_input_tokens_seen": 172250304, "step": 3075 }, { "epoch": 6.848552338530067, "loss": 0.40329307317733765, "loss_ce": 0.0002169150102417916, "loss_iou": 0.1806640625, "loss_num": 0.00848388671875, "loss_xval": 0.40234375, "num_input_tokens_seen": 172250304, "step": 3075 }, { "epoch": 6.850779510022272, "grad_norm": 23.94293975830078, "learning_rate": 1e-06, "loss": 0.7518, "num_input_tokens_seen": 172304700, "step": 3076 }, { "epoch": 6.850779510022272, "loss": 0.6821050643920898, "loss_ce": 0.00022032001288607717, "loss_iou": 0.2890625, "loss_num": 0.0205078125, "loss_xval": 0.68359375, "num_input_tokens_seen": 172304700, "step": 3076 }, { "epoch": 6.853006681514477, "grad_norm": 22.777400970458984, "learning_rate": 1e-06, "loss": 0.6088, "num_input_tokens_seen": 172360340, "step": 3077 }, { "epoch": 6.853006681514477, "loss": 0.36834925413131714, "loss_ce": 0.00030725146643817425, "loss_iou": 0.1416015625, "loss_num": 0.016845703125, "loss_xval": 0.3671875, "num_input_tokens_seen": 172360340, "step": 3077 }, { "epoch": 6.855233853006681, "grad_norm": 23.746376037597656, "learning_rate": 1e-06, "loss": 0.592, "num_input_tokens_seen": 172418912, "step": 3078 }, { "epoch": 6.855233853006681, "loss": 0.6163959503173828, "loss_ce": 0.0001850596017902717, "loss_iou": 0.265625, "loss_num": 0.0166015625, "loss_xval": 0.6171875, "num_input_tokens_seen": 172418912, "step": 3078 }, { "epoch": 6.857461024498886, "grad_norm": 35.40282440185547, "learning_rate": 1e-06, "loss": 0.8302, "num_input_tokens_seen": 172477360, "step": 3079 }, { "epoch": 6.857461024498886, "loss": 0.8708090782165527, "loss_ce": 0.00020358533947728574, "loss_iou": 0.3828125, "loss_num": 0.020751953125, "loss_xval": 0.87109375, "num_input_tokens_seen": 172477360, "step": 3079 }, { "epoch": 6.859688195991091, "grad_norm": 15.791609764099121, "learning_rate": 1e-06, "loss": 0.5824, "num_input_tokens_seen": 172533360, "step": 3080 }, { "epoch": 6.859688195991091, "loss": 0.8971776962280273, "loss_ce": 0.00020497874356806278, "loss_iou": 0.388671875, "loss_num": 0.0244140625, "loss_xval": 0.8984375, "num_input_tokens_seen": 172533360, "step": 3080 }, { "epoch": 6.861915367483296, "grad_norm": 16.064542770385742, "learning_rate": 1e-06, "loss": 0.6309, "num_input_tokens_seen": 172586820, "step": 3081 }, { "epoch": 6.861915367483296, "loss": 0.5737239122390747, "loss_ce": 0.0002375697804382071, "loss_iou": 0.267578125, "loss_num": 0.007598876953125, "loss_xval": 0.57421875, "num_input_tokens_seen": 172586820, "step": 3081 }, { "epoch": 6.864142538975501, "grad_norm": 23.688007354736328, "learning_rate": 1e-06, "loss": 0.7019, "num_input_tokens_seen": 172644792, "step": 3082 }, { "epoch": 6.864142538975501, "loss": 0.7206803560256958, "loss_ce": 0.00022136216284707189, "loss_iou": 0.322265625, "loss_num": 0.01519775390625, "loss_xval": 0.71875, "num_input_tokens_seen": 172644792, "step": 3082 }, { "epoch": 6.866369710467706, "grad_norm": 16.832897186279297, "learning_rate": 1e-06, "loss": 0.4801, "num_input_tokens_seen": 172703448, "step": 3083 }, { "epoch": 6.866369710467706, "loss": 0.4925019443035126, "loss_ce": 0.00019236501248087734, "loss_iou": 0.2158203125, "loss_num": 0.01202392578125, "loss_xval": 0.4921875, "num_input_tokens_seen": 172703448, "step": 3083 }, { "epoch": 6.868596881959911, "grad_norm": 19.060598373413086, "learning_rate": 1e-06, "loss": 0.6689, "num_input_tokens_seen": 172759888, "step": 3084 }, { "epoch": 6.868596881959911, "loss": 0.6370642185211182, "loss_ce": 0.0003454496618360281, "loss_iou": 0.275390625, "loss_num": 0.01708984375, "loss_xval": 0.63671875, "num_input_tokens_seen": 172759888, "step": 3084 }, { "epoch": 6.870824053452115, "grad_norm": 16.259754180908203, "learning_rate": 1e-06, "loss": 0.4995, "num_input_tokens_seen": 172815816, "step": 3085 }, { "epoch": 6.870824053452115, "loss": 0.529482364654541, "loss_ce": 0.00018545490456745028, "loss_iou": 0.228515625, "loss_num": 0.01434326171875, "loss_xval": 0.53125, "num_input_tokens_seen": 172815816, "step": 3085 }, { "epoch": 6.873051224944321, "grad_norm": 21.378643035888672, "learning_rate": 1e-06, "loss": 0.6019, "num_input_tokens_seen": 172871560, "step": 3086 }, { "epoch": 6.873051224944321, "loss": 0.6235019564628601, "loss_ce": 0.00021095495321787894, "loss_iou": 0.255859375, "loss_num": 0.02294921875, "loss_xval": 0.625, "num_input_tokens_seen": 172871560, "step": 3086 }, { "epoch": 6.875278396436526, "grad_norm": 36.62589645385742, "learning_rate": 1e-06, "loss": 0.7479, "num_input_tokens_seen": 172926412, "step": 3087 }, { "epoch": 6.875278396436526, "loss": 0.9482687711715698, "loss_ce": 0.00027069164207205176, "loss_iou": 0.412109375, "loss_num": 0.024658203125, "loss_xval": 0.94921875, "num_input_tokens_seen": 172926412, "step": 3087 }, { "epoch": 6.877505567928731, "grad_norm": 21.01188087463379, "learning_rate": 1e-06, "loss": 0.548, "num_input_tokens_seen": 172984452, "step": 3088 }, { "epoch": 6.877505567928731, "loss": 0.5002177357673645, "loss_ce": 0.00021776201901957393, "loss_iou": 0.2216796875, "loss_num": 0.01129150390625, "loss_xval": 0.5, "num_input_tokens_seen": 172984452, "step": 3088 }, { "epoch": 6.879732739420936, "grad_norm": 16.86750030517578, "learning_rate": 1e-06, "loss": 0.6602, "num_input_tokens_seen": 173040592, "step": 3089 }, { "epoch": 6.879732739420936, "loss": 0.757439911365509, "loss_ce": 0.0003598659241106361, "loss_iou": 0.314453125, "loss_num": 0.02587890625, "loss_xval": 0.7578125, "num_input_tokens_seen": 173040592, "step": 3089 }, { "epoch": 6.881959910913141, "grad_norm": 21.569618225097656, "learning_rate": 1e-06, "loss": 0.7411, "num_input_tokens_seen": 173097352, "step": 3090 }, { "epoch": 6.881959910913141, "loss": 0.8854343891143799, "loss_ce": 0.0001805470819817856, "loss_iou": 0.376953125, "loss_num": 0.0263671875, "loss_xval": 0.88671875, "num_input_tokens_seen": 173097352, "step": 3090 }, { "epoch": 6.8841870824053455, "grad_norm": 49.518394470214844, "learning_rate": 1e-06, "loss": 0.5467, "num_input_tokens_seen": 173156100, "step": 3091 }, { "epoch": 6.8841870824053455, "loss": 0.465660035610199, "loss_ce": 0.00020591478096321225, "loss_iou": 0.2041015625, "loss_num": 0.01153564453125, "loss_xval": 0.46484375, "num_input_tokens_seen": 173156100, "step": 3091 }, { "epoch": 6.88641425389755, "grad_norm": 75.19503784179688, "learning_rate": 1e-06, "loss": 0.7018, "num_input_tokens_seen": 173212776, "step": 3092 }, { "epoch": 6.88641425389755, "loss": 0.8085219860076904, "loss_ce": 0.00029449607245624065, "loss_iou": 0.34375, "loss_num": 0.024169921875, "loss_xval": 0.80859375, "num_input_tokens_seen": 173212776, "step": 3092 }, { "epoch": 6.888641425389755, "grad_norm": 18.865760803222656, "learning_rate": 1e-06, "loss": 0.7596, "num_input_tokens_seen": 173267624, "step": 3093 }, { "epoch": 6.888641425389755, "loss": 0.8561751246452332, "loss_ce": 0.00021812312479596585, "loss_iou": 0.3671875, "loss_num": 0.024658203125, "loss_xval": 0.85546875, "num_input_tokens_seen": 173267624, "step": 3093 }, { "epoch": 6.89086859688196, "grad_norm": 26.241153717041016, "learning_rate": 1e-06, "loss": 0.6993, "num_input_tokens_seen": 173324276, "step": 3094 }, { "epoch": 6.89086859688196, "loss": 0.7165099382400513, "loss_ce": 0.00020136788953095675, "loss_iou": 0.294921875, "loss_num": 0.025634765625, "loss_xval": 0.71484375, "num_input_tokens_seen": 173324276, "step": 3094 }, { "epoch": 6.893095768374165, "grad_norm": 17.569229125976562, "learning_rate": 1e-06, "loss": 0.5308, "num_input_tokens_seen": 173382708, "step": 3095 }, { "epoch": 6.893095768374165, "loss": 0.4684421718120575, "loss_ce": 0.0001804671046556905, "loss_iou": 0.1953125, "loss_num": 0.015625, "loss_xval": 0.46875, "num_input_tokens_seen": 173382708, "step": 3095 }, { "epoch": 6.89532293986637, "grad_norm": 13.874095916748047, "learning_rate": 1e-06, "loss": 0.7751, "num_input_tokens_seen": 173438884, "step": 3096 }, { "epoch": 6.89532293986637, "loss": 0.9164137840270996, "loss_ce": 0.00021500332513824105, "loss_iou": 0.345703125, "loss_num": 0.045166015625, "loss_xval": 0.91796875, "num_input_tokens_seen": 173438884, "step": 3096 }, { "epoch": 6.897550111358575, "grad_norm": 19.509395599365234, "learning_rate": 1e-06, "loss": 0.523, "num_input_tokens_seen": 173493924, "step": 3097 }, { "epoch": 6.897550111358575, "loss": 0.6444209814071655, "loss_ce": 0.000255966791883111, "loss_iou": 0.28125, "loss_num": 0.0167236328125, "loss_xval": 0.64453125, "num_input_tokens_seen": 173493924, "step": 3097 }, { "epoch": 6.8997772828507795, "grad_norm": 19.37684440612793, "learning_rate": 1e-06, "loss": 0.6321, "num_input_tokens_seen": 173549456, "step": 3098 }, { "epoch": 6.8997772828507795, "loss": 0.6488611698150635, "loss_ce": 0.00017949687025975436, "loss_iou": 0.271484375, "loss_num": 0.021484375, "loss_xval": 0.6484375, "num_input_tokens_seen": 173549456, "step": 3098 }, { "epoch": 6.902004454342984, "grad_norm": 17.55590057373047, "learning_rate": 1e-06, "loss": 0.5922, "num_input_tokens_seen": 173608184, "step": 3099 }, { "epoch": 6.902004454342984, "loss": 0.43987372517585754, "loss_ce": 0.00017645125626586378, "loss_iou": 0.19140625, "loss_num": 0.01141357421875, "loss_xval": 0.439453125, "num_input_tokens_seen": 173608184, "step": 3099 }, { "epoch": 6.904231625835189, "grad_norm": 17.88399887084961, "learning_rate": 1e-06, "loss": 0.5861, "num_input_tokens_seen": 173663616, "step": 3100 }, { "epoch": 6.904231625835189, "loss": 0.5784924030303955, "loss_ce": 0.00018430282943882048, "loss_iou": 0.2470703125, "loss_num": 0.016845703125, "loss_xval": 0.578125, "num_input_tokens_seen": 173663616, "step": 3100 }, { "epoch": 6.906458797327394, "grad_norm": 16.59260368347168, "learning_rate": 1e-06, "loss": 0.5021, "num_input_tokens_seen": 173716408, "step": 3101 }, { "epoch": 6.906458797327394, "loss": 0.5853925943374634, "loss_ce": 0.0001875399611890316, "loss_iou": 0.2392578125, "loss_num": 0.0213623046875, "loss_xval": 0.5859375, "num_input_tokens_seen": 173716408, "step": 3101 }, { "epoch": 6.908685968819599, "grad_norm": 17.521724700927734, "learning_rate": 1e-06, "loss": 0.6656, "num_input_tokens_seen": 173772800, "step": 3102 }, { "epoch": 6.908685968819599, "loss": 0.6075997948646545, "loss_ce": 0.0001779411395546049, "loss_iou": 0.2392578125, "loss_num": 0.025634765625, "loss_xval": 0.609375, "num_input_tokens_seen": 173772800, "step": 3102 }, { "epoch": 6.910913140311804, "grad_norm": 28.117921829223633, "learning_rate": 1e-06, "loss": 0.6282, "num_input_tokens_seen": 173829652, "step": 3103 }, { "epoch": 6.910913140311804, "loss": 0.5315419435501099, "loss_ce": 0.00016988006245810539, "loss_iou": 0.2294921875, "loss_num": 0.01446533203125, "loss_xval": 0.53125, "num_input_tokens_seen": 173829652, "step": 3103 }, { "epoch": 6.913140311804009, "grad_norm": 22.19889259338379, "learning_rate": 1e-06, "loss": 0.7273, "num_input_tokens_seen": 173884456, "step": 3104 }, { "epoch": 6.913140311804009, "loss": 0.7211758494377136, "loss_ce": 0.0002286143571836874, "loss_iou": 0.30859375, "loss_num": 0.0208740234375, "loss_xval": 0.72265625, "num_input_tokens_seen": 173884456, "step": 3104 }, { "epoch": 6.9153674832962135, "grad_norm": 11.665279388427734, "learning_rate": 1e-06, "loss": 0.62, "num_input_tokens_seen": 173939820, "step": 3105 }, { "epoch": 6.9153674832962135, "loss": 0.4306148886680603, "loss_ce": 0.0001950013975147158, "loss_iou": 0.1943359375, "loss_num": 0.00836181640625, "loss_xval": 0.4296875, "num_input_tokens_seen": 173939820, "step": 3105 }, { "epoch": 6.917594654788418, "grad_norm": 20.158309936523438, "learning_rate": 1e-06, "loss": 0.5542, "num_input_tokens_seen": 173995772, "step": 3106 }, { "epoch": 6.917594654788418, "loss": 0.44402259588241577, "loss_ce": 0.00017491859034635127, "loss_iou": 0.1796875, "loss_num": 0.0169677734375, "loss_xval": 0.443359375, "num_input_tokens_seen": 173995772, "step": 3106 }, { "epoch": 6.919821826280623, "grad_norm": 18.162250518798828, "learning_rate": 1e-06, "loss": 0.482, "num_input_tokens_seen": 174053676, "step": 3107 }, { "epoch": 6.919821826280623, "loss": 0.42890018224716187, "loss_ce": 0.0001892539585242048, "loss_iou": 0.17578125, "loss_num": 0.015380859375, "loss_xval": 0.4296875, "num_input_tokens_seen": 174053676, "step": 3107 }, { "epoch": 6.922048997772828, "grad_norm": 48.48846435546875, "learning_rate": 1e-06, "loss": 0.7151, "num_input_tokens_seen": 174112208, "step": 3108 }, { "epoch": 6.922048997772828, "loss": 0.827580451965332, "loss_ce": 0.00018786173313856125, "loss_iou": 0.373046875, "loss_num": 0.0164794921875, "loss_xval": 0.828125, "num_input_tokens_seen": 174112208, "step": 3108 }, { "epoch": 6.924276169265033, "grad_norm": 25.14827537536621, "learning_rate": 1e-06, "loss": 0.7384, "num_input_tokens_seen": 174168272, "step": 3109 }, { "epoch": 6.924276169265033, "loss": 0.7882997393608093, "loss_ce": 0.00021379378449637443, "loss_iou": 0.35546875, "loss_num": 0.0157470703125, "loss_xval": 0.7890625, "num_input_tokens_seen": 174168272, "step": 3109 }, { "epoch": 6.926503340757238, "grad_norm": 18.243032455444336, "learning_rate": 1e-06, "loss": 0.7986, "num_input_tokens_seen": 174226660, "step": 3110 }, { "epoch": 6.926503340757238, "loss": 0.8874292969703674, "loss_ce": 0.00022228219313547015, "loss_iou": 0.359375, "loss_num": 0.033447265625, "loss_xval": 0.88671875, "num_input_tokens_seen": 174226660, "step": 3110 }, { "epoch": 6.928730512249444, "grad_norm": 14.311541557312012, "learning_rate": 1e-06, "loss": 0.6963, "num_input_tokens_seen": 174279336, "step": 3111 }, { "epoch": 6.928730512249444, "loss": 0.7939878702163696, "loss_ce": 0.00028668707818724215, "loss_iou": 0.318359375, "loss_num": 0.031494140625, "loss_xval": 0.79296875, "num_input_tokens_seen": 174279336, "step": 3111 }, { "epoch": 6.9309576837416484, "grad_norm": 20.134523391723633, "learning_rate": 1e-06, "loss": 0.7007, "num_input_tokens_seen": 174335124, "step": 3112 }, { "epoch": 6.9309576837416484, "loss": 0.7140600681304932, "loss_ce": 0.00019287687609903514, "loss_iou": 0.302734375, "loss_num": 0.021728515625, "loss_xval": 0.71484375, "num_input_tokens_seen": 174335124, "step": 3112 }, { "epoch": 6.933184855233853, "grad_norm": 18.099767684936523, "learning_rate": 1e-06, "loss": 0.8233, "num_input_tokens_seen": 174391556, "step": 3113 }, { "epoch": 6.933184855233853, "loss": 0.7465390563011169, "loss_ce": 0.00020114146173000336, "loss_iou": 0.32421875, "loss_num": 0.02001953125, "loss_xval": 0.74609375, "num_input_tokens_seen": 174391556, "step": 3113 }, { "epoch": 6.935412026726058, "grad_norm": 22.79920768737793, "learning_rate": 1e-06, "loss": 0.6987, "num_input_tokens_seen": 174446136, "step": 3114 }, { "epoch": 6.935412026726058, "loss": 0.6254494190216064, "loss_ce": 0.00020533311180770397, "loss_iou": 0.2490234375, "loss_num": 0.0252685546875, "loss_xval": 0.625, "num_input_tokens_seen": 174446136, "step": 3114 }, { "epoch": 6.937639198218263, "grad_norm": 25.27695655822754, "learning_rate": 1e-06, "loss": 0.6646, "num_input_tokens_seen": 174500684, "step": 3115 }, { "epoch": 6.937639198218263, "loss": 0.7533596754074097, "loss_ce": 0.000185904442332685, "loss_iou": 0.328125, "loss_num": 0.0191650390625, "loss_xval": 0.75390625, "num_input_tokens_seen": 174500684, "step": 3115 }, { "epoch": 6.939866369710468, "grad_norm": 15.809171676635742, "learning_rate": 1e-06, "loss": 0.4947, "num_input_tokens_seen": 174554360, "step": 3116 }, { "epoch": 6.939866369710468, "loss": 0.6411548852920532, "loss_ce": 0.0002857603249140084, "loss_iou": 0.2734375, "loss_num": 0.0184326171875, "loss_xval": 0.640625, "num_input_tokens_seen": 174554360, "step": 3116 }, { "epoch": 6.942093541202673, "grad_norm": 21.57350730895996, "learning_rate": 1e-06, "loss": 0.5634, "num_input_tokens_seen": 174608692, "step": 3117 }, { "epoch": 6.942093541202673, "loss": 0.550343930721283, "loss_ce": 0.00017302096239291131, "loss_iou": 0.2421875, "loss_num": 0.0130615234375, "loss_xval": 0.55078125, "num_input_tokens_seen": 174608692, "step": 3117 }, { "epoch": 6.944320712694878, "grad_norm": 16.204471588134766, "learning_rate": 1e-06, "loss": 0.7834, "num_input_tokens_seen": 174665252, "step": 3118 }, { "epoch": 6.944320712694878, "loss": 0.9167299270629883, "loss_ce": 0.0008363361121155322, "loss_iou": 0.41015625, "loss_num": 0.0189208984375, "loss_xval": 0.9140625, "num_input_tokens_seen": 174665252, "step": 3118 }, { "epoch": 6.9465478841870825, "grad_norm": 17.026905059814453, "learning_rate": 1e-06, "loss": 0.5704, "num_input_tokens_seen": 174719864, "step": 3119 }, { "epoch": 6.9465478841870825, "loss": 0.657658576965332, "loss_ce": 0.0001878339098766446, "loss_iou": 0.28125, "loss_num": 0.0191650390625, "loss_xval": 0.65625, "num_input_tokens_seen": 174719864, "step": 3119 }, { "epoch": 6.948775055679287, "grad_norm": 22.539409637451172, "learning_rate": 1e-06, "loss": 0.75, "num_input_tokens_seen": 174773804, "step": 3120 }, { "epoch": 6.948775055679287, "loss": 0.5996890068054199, "loss_ce": 0.00020167973707430065, "loss_iou": 0.26171875, "loss_num": 0.014892578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 174773804, "step": 3120 }, { "epoch": 6.951002227171492, "grad_norm": 22.28173828125, "learning_rate": 1e-06, "loss": 0.7117, "num_input_tokens_seen": 174828396, "step": 3121 }, { "epoch": 6.951002227171492, "loss": 0.5873724818229675, "loss_ce": 0.00021425656450446695, "loss_iou": 0.25, "loss_num": 0.017333984375, "loss_xval": 0.5859375, "num_input_tokens_seen": 174828396, "step": 3121 }, { "epoch": 6.953229398663697, "grad_norm": 54.56577682495117, "learning_rate": 1e-06, "loss": 0.4691, "num_input_tokens_seen": 174884540, "step": 3122 }, { "epoch": 6.953229398663697, "loss": 0.5313279628753662, "loss_ce": 0.0003220998914912343, "loss_iou": 0.2333984375, "loss_num": 0.0128173828125, "loss_xval": 0.53125, "num_input_tokens_seen": 174884540, "step": 3122 }, { "epoch": 6.955456570155902, "grad_norm": 33.31269454956055, "learning_rate": 1e-06, "loss": 0.8895, "num_input_tokens_seen": 174939284, "step": 3123 }, { "epoch": 6.955456570155902, "loss": 0.7173866033554077, "loss_ce": 0.0003456372069194913, "loss_iou": 0.2734375, "loss_num": 0.03369140625, "loss_xval": 0.71875, "num_input_tokens_seen": 174939284, "step": 3123 }, { "epoch": 6.957683741648107, "grad_norm": 25.145395278930664, "learning_rate": 1e-06, "loss": 0.5721, "num_input_tokens_seen": 174994868, "step": 3124 }, { "epoch": 6.957683741648107, "loss": 0.6920799612998962, "loss_ce": 0.00018543089390732348, "loss_iou": 0.27734375, "loss_num": 0.0277099609375, "loss_xval": 0.69140625, "num_input_tokens_seen": 174994868, "step": 3124 }, { "epoch": 6.959910913140312, "grad_norm": 31.5352840423584, "learning_rate": 1e-06, "loss": 0.6128, "num_input_tokens_seen": 175050648, "step": 3125 }, { "epoch": 6.959910913140312, "loss": 0.5849337577819824, "loss_ce": 0.00021696032490581274, "loss_iou": 0.26171875, "loss_num": 0.0123291015625, "loss_xval": 0.5859375, "num_input_tokens_seen": 175050648, "step": 3125 }, { "epoch": 6.9621380846325165, "grad_norm": 28.711498260498047, "learning_rate": 1e-06, "loss": 0.7421, "num_input_tokens_seen": 175109124, "step": 3126 }, { "epoch": 6.9621380846325165, "loss": 0.5786285400390625, "loss_ce": 0.0002593994140625, "loss_iou": 0.25390625, "loss_num": 0.0142822265625, "loss_xval": 0.578125, "num_input_tokens_seen": 175109124, "step": 3126 }, { "epoch": 6.964365256124721, "grad_norm": 81.01925659179688, "learning_rate": 1e-06, "loss": 0.6552, "num_input_tokens_seen": 175165652, "step": 3127 }, { "epoch": 6.964365256124721, "loss": 0.4998403489589691, "loss_ce": 0.00020655704429373145, "loss_iou": 0.212890625, "loss_num": 0.01483154296875, "loss_xval": 0.5, "num_input_tokens_seen": 175165652, "step": 3127 }, { "epoch": 6.966592427616926, "grad_norm": 20.51897621154785, "learning_rate": 1e-06, "loss": 0.5369, "num_input_tokens_seen": 175223100, "step": 3128 }, { "epoch": 6.966592427616926, "loss": 0.5199558734893799, "loss_ce": 0.00018052573432214558, "loss_iou": 0.240234375, "loss_num": 0.0081787109375, "loss_xval": 0.51953125, "num_input_tokens_seen": 175223100, "step": 3128 }, { "epoch": 6.968819599109131, "grad_norm": 13.959333419799805, "learning_rate": 1e-06, "loss": 0.5609, "num_input_tokens_seen": 175280568, "step": 3129 }, { "epoch": 6.968819599109131, "loss": 0.7189445495605469, "loss_ce": 0.0001945712574524805, "loss_iou": 0.333984375, "loss_num": 0.010009765625, "loss_xval": 0.71875, "num_input_tokens_seen": 175280568, "step": 3129 }, { "epoch": 6.971046770601336, "grad_norm": 22.97583770751953, "learning_rate": 1e-06, "loss": 1.0313, "num_input_tokens_seen": 175337324, "step": 3130 }, { "epoch": 6.971046770601336, "loss": 0.8275806903839111, "loss_ce": 0.00018814984650816768, "loss_iou": 0.359375, "loss_num": 0.0213623046875, "loss_xval": 0.828125, "num_input_tokens_seen": 175337324, "step": 3130 }, { "epoch": 6.973273942093542, "grad_norm": 34.06120681762695, "learning_rate": 1e-06, "loss": 0.8319, "num_input_tokens_seen": 175392008, "step": 3131 }, { "epoch": 6.973273942093542, "loss": 0.9467490315437317, "loss_ce": 0.00021585801732726395, "loss_iou": 0.412109375, "loss_num": 0.024658203125, "loss_xval": 0.9453125, "num_input_tokens_seen": 175392008, "step": 3131 }, { "epoch": 6.9755011135857465, "grad_norm": 16.659931182861328, "learning_rate": 1e-06, "loss": 0.5561, "num_input_tokens_seen": 175450048, "step": 3132 }, { "epoch": 6.9755011135857465, "loss": 0.5903693437576294, "loss_ce": 0.0002204178017564118, "loss_iou": 0.255859375, "loss_num": 0.0159912109375, "loss_xval": 0.58984375, "num_input_tokens_seen": 175450048, "step": 3132 }, { "epoch": 6.977728285077951, "grad_norm": 31.275850296020508, "learning_rate": 1e-06, "loss": 0.7978, "num_input_tokens_seen": 175504996, "step": 3133 }, { "epoch": 6.977728285077951, "loss": 1.0408833026885986, "loss_ce": 0.0002338237245567143, "loss_iou": 0.419921875, "loss_num": 0.039794921875, "loss_xval": 1.0390625, "num_input_tokens_seen": 175504996, "step": 3133 }, { "epoch": 6.979955456570156, "grad_norm": 19.884502410888672, "learning_rate": 1e-06, "loss": 0.5947, "num_input_tokens_seen": 175562712, "step": 3134 }, { "epoch": 6.979955456570156, "loss": 0.4893288016319275, "loss_ce": 0.00019303697627037764, "loss_iou": 0.2314453125, "loss_num": 0.00555419921875, "loss_xval": 0.48828125, "num_input_tokens_seen": 175562712, "step": 3134 }, { "epoch": 6.982182628062361, "grad_norm": 13.180671691894531, "learning_rate": 1e-06, "loss": 0.7, "num_input_tokens_seen": 175620280, "step": 3135 }, { "epoch": 6.982182628062361, "loss": 0.8063207864761353, "loss_ce": 0.0004125875420868397, "loss_iou": 0.35546875, "loss_num": 0.01904296875, "loss_xval": 0.8046875, "num_input_tokens_seen": 175620280, "step": 3135 }, { "epoch": 6.984409799554566, "grad_norm": 21.996858596801758, "learning_rate": 1e-06, "loss": 0.5783, "num_input_tokens_seen": 175678996, "step": 3136 }, { "epoch": 6.984409799554566, "loss": 0.7316303253173828, "loss_ce": 0.00018497416749596596, "loss_iou": 0.302734375, "loss_num": 0.025146484375, "loss_xval": 0.73046875, "num_input_tokens_seen": 175678996, "step": 3136 }, { "epoch": 6.986636971046771, "grad_norm": 19.231781005859375, "learning_rate": 1e-06, "loss": 0.8218, "num_input_tokens_seen": 175733860, "step": 3137 }, { "epoch": 6.986636971046771, "loss": 0.6332485675811768, "loss_ce": 0.00019193578918930143, "loss_iou": 0.25, "loss_num": 0.0262451171875, "loss_xval": 0.6328125, "num_input_tokens_seen": 175733860, "step": 3137 }, { "epoch": 6.988864142538976, "grad_norm": 11.28582763671875, "learning_rate": 1e-06, "loss": 0.7356, "num_input_tokens_seen": 175792092, "step": 3138 }, { "epoch": 6.988864142538976, "loss": 0.7703654766082764, "loss_ce": 0.00022392123355530202, "loss_iou": 0.3046875, "loss_num": 0.032470703125, "loss_xval": 0.76953125, "num_input_tokens_seen": 175792092, "step": 3138 }, { "epoch": 6.991091314031181, "grad_norm": 18.971759796142578, "learning_rate": 1e-06, "loss": 0.6312, "num_input_tokens_seen": 175848452, "step": 3139 }, { "epoch": 6.991091314031181, "loss": 0.7722006440162659, "loss_ce": 0.00016694323858246207, "loss_iou": 0.3046875, "loss_num": 0.0322265625, "loss_xval": 0.7734375, "num_input_tokens_seen": 175848452, "step": 3139 }, { "epoch": 6.993318485523385, "grad_norm": 16.247879028320312, "learning_rate": 1e-06, "loss": 0.5357, "num_input_tokens_seen": 175906920, "step": 3140 }, { "epoch": 6.993318485523385, "loss": 0.5915122628211975, "loss_ce": 0.00020364229567348957, "loss_iou": 0.25390625, "loss_num": 0.0162353515625, "loss_xval": 0.58984375, "num_input_tokens_seen": 175906920, "step": 3140 }, { "epoch": 6.99554565701559, "grad_norm": 14.442233085632324, "learning_rate": 1e-06, "loss": 0.4958, "num_input_tokens_seen": 175965616, "step": 3141 }, { "epoch": 6.99554565701559, "loss": 0.5104349851608276, "loss_ce": 0.00018110190285369754, "loss_iou": 0.2373046875, "loss_num": 0.0069580078125, "loss_xval": 0.51171875, "num_input_tokens_seen": 175965616, "step": 3141 }, { "epoch": 6.997772828507795, "grad_norm": 13.822257995605469, "learning_rate": 1e-06, "loss": 0.9366, "num_input_tokens_seen": 176025056, "step": 3142 }, { "epoch": 6.997772828507795, "loss": 0.7665646076202393, "loss_ce": 0.00045135943219065666, "loss_iou": 0.3359375, "loss_num": 0.0185546875, "loss_xval": 0.765625, "num_input_tokens_seen": 176025056, "step": 3142 }, { "epoch": 7.0, "grad_norm": 20.180612564086914, "learning_rate": 1e-06, "loss": 0.7645, "num_input_tokens_seen": 176083108, "step": 3143 }, { "epoch": 7.0, "loss": 0.6660090088844299, "loss_ce": 0.00023752517881803215, "loss_iou": 0.291015625, "loss_num": 0.0167236328125, "loss_xval": 0.6640625, "num_input_tokens_seen": 176083108, "step": 3143 }, { "epoch": 7.002227171492205, "grad_norm": 15.68157958984375, "learning_rate": 1e-06, "loss": 0.6653, "num_input_tokens_seen": 176139524, "step": 3144 }, { "epoch": 7.002227171492205, "loss": 0.6556283831596375, "loss_ce": 0.00023284553026314825, "loss_iou": 0.287109375, "loss_num": 0.0164794921875, "loss_xval": 0.65625, "num_input_tokens_seen": 176139524, "step": 3144 }, { "epoch": 7.00445434298441, "grad_norm": 31.179643630981445, "learning_rate": 1e-06, "loss": 0.5867, "num_input_tokens_seen": 176194724, "step": 3145 }, { "epoch": 7.00445434298441, "loss": 0.49978122115135193, "loss_ce": 0.0002695125003810972, "loss_iou": 0.23046875, "loss_num": 0.00750732421875, "loss_xval": 0.5, "num_input_tokens_seen": 176194724, "step": 3145 }, { "epoch": 7.006681514476615, "grad_norm": 32.13369369506836, "learning_rate": 1e-06, "loss": 0.7949, "num_input_tokens_seen": 176250820, "step": 3146 }, { "epoch": 7.006681514476615, "loss": 0.7021375298500061, "loss_ce": 0.00023322663037106395, "loss_iou": 0.302734375, "loss_num": 0.019287109375, "loss_xval": 0.703125, "num_input_tokens_seen": 176250820, "step": 3146 }, { "epoch": 7.008908685968819, "grad_norm": 18.130083084106445, "learning_rate": 1e-06, "loss": 0.755, "num_input_tokens_seen": 176306224, "step": 3147 }, { "epoch": 7.008908685968819, "loss": 0.9072054624557495, "loss_ce": 0.00022300847922451794, "loss_iou": 0.365234375, "loss_num": 0.03564453125, "loss_xval": 0.90625, "num_input_tokens_seen": 176306224, "step": 3147 }, { "epoch": 7.011135857461024, "grad_norm": 24.085601806640625, "learning_rate": 1e-06, "loss": 0.5465, "num_input_tokens_seen": 176362740, "step": 3148 }, { "epoch": 7.011135857461024, "loss": 0.5722109079360962, "loss_ce": 0.00018940077279694378, "loss_iou": 0.25390625, "loss_num": 0.01275634765625, "loss_xval": 0.5703125, "num_input_tokens_seen": 176362740, "step": 3148 }, { "epoch": 7.013363028953229, "grad_norm": 16.56767463684082, "learning_rate": 1e-06, "loss": 0.6452, "num_input_tokens_seen": 176420184, "step": 3149 }, { "epoch": 7.013363028953229, "loss": 0.7920135259628296, "loss_ce": 0.00026545586297288537, "loss_iou": 0.322265625, "loss_num": 0.029052734375, "loss_xval": 0.79296875, "num_input_tokens_seen": 176420184, "step": 3149 }, { "epoch": 7.015590200445434, "grad_norm": 21.7204532623291, "learning_rate": 1e-06, "loss": 0.7989, "num_input_tokens_seen": 176475432, "step": 3150 }, { "epoch": 7.015590200445434, "loss": 0.5816566944122314, "loss_ce": 0.0003578995238058269, "loss_iou": 0.251953125, "loss_num": 0.01507568359375, "loss_xval": 0.58203125, "num_input_tokens_seen": 176475432, "step": 3150 }, { "epoch": 7.017817371937639, "grad_norm": 18.18883514404297, "learning_rate": 1e-06, "loss": 0.8199, "num_input_tokens_seen": 176532060, "step": 3151 }, { "epoch": 7.017817371937639, "loss": 0.8285827040672302, "loss_ce": 0.0002135752292815596, "loss_iou": 0.349609375, "loss_num": 0.02587890625, "loss_xval": 0.828125, "num_input_tokens_seen": 176532060, "step": 3151 }, { "epoch": 7.020044543429844, "grad_norm": 15.747713088989258, "learning_rate": 1e-06, "loss": 0.8188, "num_input_tokens_seen": 176589504, "step": 3152 }, { "epoch": 7.020044543429844, "loss": 0.8652061223983765, "loss_ce": 0.00021594867575913668, "loss_iou": 0.375, "loss_num": 0.0228271484375, "loss_xval": 0.86328125, "num_input_tokens_seen": 176589504, "step": 3152 }, { "epoch": 7.022271714922049, "grad_norm": 22.65471649169922, "learning_rate": 1e-06, "loss": 0.6007, "num_input_tokens_seen": 176647648, "step": 3153 }, { "epoch": 7.022271714922049, "loss": 0.7232741117477417, "loss_ce": 0.0003737136139534414, "loss_iou": 0.3203125, "loss_num": 0.0167236328125, "loss_xval": 0.72265625, "num_input_tokens_seen": 176647648, "step": 3153 }, { "epoch": 7.0244988864142535, "grad_norm": 16.48569107055664, "learning_rate": 1e-06, "loss": 0.7161, "num_input_tokens_seen": 176704740, "step": 3154 }, { "epoch": 7.0244988864142535, "loss": 0.6486464738845825, "loss_ce": 0.00045317187323234975, "loss_iou": 0.265625, "loss_num": 0.0235595703125, "loss_xval": 0.6484375, "num_input_tokens_seen": 176704740, "step": 3154 }, { "epoch": 7.026726057906459, "grad_norm": 26.504276275634766, "learning_rate": 1e-06, "loss": 0.6456, "num_input_tokens_seen": 176761396, "step": 3155 }, { "epoch": 7.026726057906459, "loss": 0.7807806730270386, "loss_ce": 0.00026307476218789816, "loss_iou": 0.296875, "loss_num": 0.037353515625, "loss_xval": 0.78125, "num_input_tokens_seen": 176761396, "step": 3155 }, { "epoch": 7.028953229398664, "grad_norm": 18.16960334777832, "learning_rate": 1e-06, "loss": 0.4767, "num_input_tokens_seen": 176816940, "step": 3156 }, { "epoch": 7.028953229398664, "loss": 0.40581727027893066, "loss_ce": 0.00017763671348802745, "loss_iou": 0.177734375, "loss_num": 0.01019287109375, "loss_xval": 0.40625, "num_input_tokens_seen": 176816940, "step": 3156 }, { "epoch": 7.031180400890869, "grad_norm": 16.1363468170166, "learning_rate": 1e-06, "loss": 0.5441, "num_input_tokens_seen": 176873404, "step": 3157 }, { "epoch": 7.031180400890869, "loss": 0.65606689453125, "loss_ce": 0.00018308302969671786, "loss_iou": 0.25390625, "loss_num": 0.0299072265625, "loss_xval": 0.65625, "num_input_tokens_seen": 176873404, "step": 3157 }, { "epoch": 7.033407572383074, "grad_norm": 21.781497955322266, "learning_rate": 1e-06, "loss": 0.4695, "num_input_tokens_seen": 176931848, "step": 3158 }, { "epoch": 7.033407572383074, "loss": 0.3050188720226288, "loss_ce": 0.00020930425671394914, "loss_iou": 0.1376953125, "loss_num": 0.00604248046875, "loss_xval": 0.3046875, "num_input_tokens_seen": 176931848, "step": 3158 }, { "epoch": 7.035634743875279, "grad_norm": 23.22254753112793, "learning_rate": 1e-06, "loss": 0.7001, "num_input_tokens_seen": 176988340, "step": 3159 }, { "epoch": 7.035634743875279, "loss": 0.7503657341003418, "loss_ce": 0.0020747713278979063, "loss_iou": 0.33203125, "loss_num": 0.01708984375, "loss_xval": 0.75, "num_input_tokens_seen": 176988340, "step": 3159 }, { "epoch": 7.0378619153674835, "grad_norm": 15.618306159973145, "learning_rate": 1e-06, "loss": 0.5591, "num_input_tokens_seen": 177043904, "step": 3160 }, { "epoch": 7.0378619153674835, "loss": 0.41194963455200195, "loss_ce": 0.0009388765902258456, "loss_iou": 0.189453125, "loss_num": 0.006500244140625, "loss_xval": 0.41015625, "num_input_tokens_seen": 177043904, "step": 3160 }, { "epoch": 7.040089086859688, "grad_norm": 17.742382049560547, "learning_rate": 1e-06, "loss": 0.5555, "num_input_tokens_seen": 177098532, "step": 3161 }, { "epoch": 7.040089086859688, "loss": 0.557831883430481, "loss_ce": 0.00021466660837177187, "loss_iou": 0.265625, "loss_num": 0.0050048828125, "loss_xval": 0.55859375, "num_input_tokens_seen": 177098532, "step": 3161 }, { "epoch": 7.042316258351893, "grad_norm": 22.0341854095459, "learning_rate": 1e-06, "loss": 0.7129, "num_input_tokens_seen": 177154748, "step": 3162 }, { "epoch": 7.042316258351893, "loss": 0.8048324584960938, "loss_ce": 0.0002669950481504202, "loss_iou": 0.353515625, "loss_num": 0.01904296875, "loss_xval": 0.8046875, "num_input_tokens_seen": 177154748, "step": 3162 }, { "epoch": 7.044543429844098, "grad_norm": 19.422056198120117, "learning_rate": 1e-06, "loss": 0.5373, "num_input_tokens_seen": 177208936, "step": 3163 }, { "epoch": 7.044543429844098, "loss": 0.5412275791168213, "loss_ce": 0.0031416614074259996, "loss_iou": 0.2490234375, "loss_num": 0.00799560546875, "loss_xval": 0.5390625, "num_input_tokens_seen": 177208936, "step": 3163 }, { "epoch": 7.046770601336303, "grad_norm": 16.3929443359375, "learning_rate": 1e-06, "loss": 0.819, "num_input_tokens_seen": 177262612, "step": 3164 }, { "epoch": 7.046770601336303, "loss": 0.8707385063171387, "loss_ce": 0.00025512618594802916, "loss_iou": 0.369140625, "loss_num": 0.0262451171875, "loss_xval": 0.87109375, "num_input_tokens_seen": 177262612, "step": 3164 }, { "epoch": 7.048997772828508, "grad_norm": 28.08599281311035, "learning_rate": 1e-06, "loss": 0.7073, "num_input_tokens_seen": 177319504, "step": 3165 }, { "epoch": 7.048997772828508, "loss": 0.7279913425445557, "loss_ce": 0.00020817822951357812, "loss_iou": 0.3125, "loss_num": 0.020751953125, "loss_xval": 0.7265625, "num_input_tokens_seen": 177319504, "step": 3165 }, { "epoch": 7.051224944320713, "grad_norm": 18.15179443359375, "learning_rate": 1e-06, "loss": 0.6731, "num_input_tokens_seen": 177375208, "step": 3166 }, { "epoch": 7.051224944320713, "loss": 0.7211670279502869, "loss_ce": 0.00021975839626975358, "loss_iou": 0.32421875, "loss_num": 0.01434326171875, "loss_xval": 0.72265625, "num_input_tokens_seen": 177375208, "step": 3166 }, { "epoch": 7.0534521158129175, "grad_norm": 24.333948135375977, "learning_rate": 1e-06, "loss": 0.7331, "num_input_tokens_seen": 177432468, "step": 3167 }, { "epoch": 7.0534521158129175, "loss": 0.6586510539054871, "loss_ce": 0.00020380858040880412, "loss_iou": 0.26953125, "loss_num": 0.02392578125, "loss_xval": 0.66015625, "num_input_tokens_seen": 177432468, "step": 3167 }, { "epoch": 7.055679287305122, "grad_norm": 24.297325134277344, "learning_rate": 1e-06, "loss": 0.7255, "num_input_tokens_seen": 177487920, "step": 3168 }, { "epoch": 7.055679287305122, "loss": 0.5983210206031799, "loss_ce": 0.0001764908665791154, "loss_iou": 0.2578125, "loss_num": 0.0166015625, "loss_xval": 0.59765625, "num_input_tokens_seen": 177487920, "step": 3168 }, { "epoch": 7.057906458797327, "grad_norm": 24.517797470092773, "learning_rate": 1e-06, "loss": 0.7016, "num_input_tokens_seen": 177543680, "step": 3169 }, { "epoch": 7.057906458797327, "loss": 0.697583019733429, "loss_ce": 0.00019531394354999065, "loss_iou": 0.296875, "loss_num": 0.0206298828125, "loss_xval": 0.69921875, "num_input_tokens_seen": 177543680, "step": 3169 }, { "epoch": 7.060133630289532, "grad_norm": 15.318553924560547, "learning_rate": 1e-06, "loss": 0.6435, "num_input_tokens_seen": 177600092, "step": 3170 }, { "epoch": 7.060133630289532, "loss": 0.6004433035850525, "loss_ce": 0.0002235766005469486, "loss_iou": 0.26953125, "loss_num": 0.012451171875, "loss_xval": 0.6015625, "num_input_tokens_seen": 177600092, "step": 3170 }, { "epoch": 7.062360801781737, "grad_norm": 26.361791610717773, "learning_rate": 1e-06, "loss": 0.6517, "num_input_tokens_seen": 177652892, "step": 3171 }, { "epoch": 7.062360801781737, "loss": 0.7172492146492004, "loss_ce": 0.00020820970530621707, "loss_iou": 0.30859375, "loss_num": 0.0203857421875, "loss_xval": 0.71875, "num_input_tokens_seen": 177652892, "step": 3171 }, { "epoch": 7.064587973273942, "grad_norm": 18.68074607849121, "learning_rate": 1e-06, "loss": 0.6691, "num_input_tokens_seen": 177705568, "step": 3172 }, { "epoch": 7.064587973273942, "loss": 0.696946382522583, "loss_ce": 0.0001690261415205896, "loss_iou": 0.30078125, "loss_num": 0.018798828125, "loss_xval": 0.6953125, "num_input_tokens_seen": 177705568, "step": 3172 }, { "epoch": 7.066815144766147, "grad_norm": 28.12506866455078, "learning_rate": 1e-06, "loss": 0.7585, "num_input_tokens_seen": 177759380, "step": 3173 }, { "epoch": 7.066815144766147, "loss": 0.8229764699935913, "loss_ce": 0.0002226082724519074, "loss_iou": 0.341796875, "loss_num": 0.0281982421875, "loss_xval": 0.82421875, "num_input_tokens_seen": 177759380, "step": 3173 }, { "epoch": 7.0690423162583516, "grad_norm": 25.993946075439453, "learning_rate": 1e-06, "loss": 0.6376, "num_input_tokens_seen": 177814872, "step": 3174 }, { "epoch": 7.0690423162583516, "loss": 0.8440980911254883, "loss_ce": 0.0015688535058870912, "loss_iou": 0.341796875, "loss_num": 0.031494140625, "loss_xval": 0.84375, "num_input_tokens_seen": 177814872, "step": 3174 }, { "epoch": 7.071269487750556, "grad_norm": 30.0826416015625, "learning_rate": 1e-06, "loss": 0.656, "num_input_tokens_seen": 177871144, "step": 3175 }, { "epoch": 7.071269487750556, "loss": 0.46419858932495117, "loss_ce": 0.00020933072664774954, "loss_iou": 0.197265625, "loss_num": 0.0140380859375, "loss_xval": 0.46484375, "num_input_tokens_seen": 177871144, "step": 3175 }, { "epoch": 7.073496659242761, "grad_norm": 22.121746063232422, "learning_rate": 1e-06, "loss": 0.6732, "num_input_tokens_seen": 177924664, "step": 3176 }, { "epoch": 7.073496659242761, "loss": 0.5211669206619263, "loss_ce": 0.00017085002036765218, "loss_iou": 0.232421875, "loss_num": 0.0111083984375, "loss_xval": 0.51953125, "num_input_tokens_seen": 177924664, "step": 3176 }, { "epoch": 7.075723830734967, "grad_norm": 52.88250732421875, "learning_rate": 1e-06, "loss": 0.5979, "num_input_tokens_seen": 177977040, "step": 3177 }, { "epoch": 7.075723830734967, "loss": 0.6259975433349609, "loss_ce": 0.00026510769384913146, "loss_iou": 0.25, "loss_num": 0.02490234375, "loss_xval": 0.625, "num_input_tokens_seen": 177977040, "step": 3177 }, { "epoch": 7.077951002227172, "grad_norm": 23.79888916015625, "learning_rate": 1e-06, "loss": 0.8311, "num_input_tokens_seen": 178030552, "step": 3178 }, { "epoch": 7.077951002227172, "loss": 0.6989488005638123, "loss_ce": 0.0002183011529268697, "loss_iou": 0.298828125, "loss_num": 0.0201416015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 178030552, "step": 3178 }, { "epoch": 7.080178173719377, "grad_norm": 18.478776931762695, "learning_rate": 1e-06, "loss": 0.6773, "num_input_tokens_seen": 178089092, "step": 3179 }, { "epoch": 7.080178173719377, "loss": 0.6097142696380615, "loss_ce": 0.0004613480414263904, "loss_iou": 0.275390625, "loss_num": 0.01141357421875, "loss_xval": 0.609375, "num_input_tokens_seen": 178089092, "step": 3179 }, { "epoch": 7.082405345211582, "grad_norm": 37.656944274902344, "learning_rate": 1e-06, "loss": 0.7103, "num_input_tokens_seen": 178139908, "step": 3180 }, { "epoch": 7.082405345211582, "loss": 0.7475607395172119, "loss_ce": 0.001344920601695776, "loss_iou": 0.333984375, "loss_num": 0.015380859375, "loss_xval": 0.74609375, "num_input_tokens_seen": 178139908, "step": 3180 }, { "epoch": 7.0846325167037865, "grad_norm": 21.057231903076172, "learning_rate": 1e-06, "loss": 0.7319, "num_input_tokens_seen": 178196864, "step": 3181 }, { "epoch": 7.0846325167037865, "loss": 0.6234728097915649, "loss_ce": 0.00018174458818975836, "loss_iou": 0.25390625, "loss_num": 0.0228271484375, "loss_xval": 0.625, "num_input_tokens_seen": 178196864, "step": 3181 }, { "epoch": 7.086859688195991, "grad_norm": 13.602341651916504, "learning_rate": 1e-06, "loss": 0.4496, "num_input_tokens_seen": 178254848, "step": 3182 }, { "epoch": 7.086859688195991, "loss": 0.40568333864212036, "loss_ce": 0.00016574125038459897, "loss_iou": 0.17578125, "loss_num": 0.01080322265625, "loss_xval": 0.40625, "num_input_tokens_seen": 178254848, "step": 3182 }, { "epoch": 7.089086859688196, "grad_norm": 19.736835479736328, "learning_rate": 1e-06, "loss": 0.6679, "num_input_tokens_seen": 178312356, "step": 3183 }, { "epoch": 7.089086859688196, "loss": 0.8520088195800781, "loss_ce": 0.00020223407773301005, "loss_iou": 0.373046875, "loss_num": 0.0213623046875, "loss_xval": 0.8515625, "num_input_tokens_seen": 178312356, "step": 3183 }, { "epoch": 7.091314031180401, "grad_norm": 18.998903274536133, "learning_rate": 1e-06, "loss": 0.577, "num_input_tokens_seen": 178369760, "step": 3184 }, { "epoch": 7.091314031180401, "loss": 0.6121712327003479, "loss_ce": 0.0004158708034083247, "loss_iou": 0.232421875, "loss_num": 0.0294189453125, "loss_xval": 0.61328125, "num_input_tokens_seen": 178369760, "step": 3184 }, { "epoch": 7.093541202672606, "grad_norm": 17.410633087158203, "learning_rate": 1e-06, "loss": 0.5794, "num_input_tokens_seen": 178424408, "step": 3185 }, { "epoch": 7.093541202672606, "loss": 0.6892697215080261, "loss_ce": 0.00030487452750094235, "loss_iou": 0.294921875, "loss_num": 0.0196533203125, "loss_xval": 0.6875, "num_input_tokens_seen": 178424408, "step": 3185 }, { "epoch": 7.095768374164811, "grad_norm": 25.50342559814453, "learning_rate": 1e-06, "loss": 0.752, "num_input_tokens_seen": 178478552, "step": 3186 }, { "epoch": 7.095768374164811, "loss": 0.6170066595077515, "loss_ce": 0.00018539902521297336, "loss_iou": 0.267578125, "loss_num": 0.0164794921875, "loss_xval": 0.6171875, "num_input_tokens_seen": 178478552, "step": 3186 }, { "epoch": 7.097995545657016, "grad_norm": 23.802183151245117, "learning_rate": 1e-06, "loss": 0.6589, "num_input_tokens_seen": 178533624, "step": 3187 }, { "epoch": 7.097995545657016, "loss": 0.632292628288269, "loss_ce": 0.0002125354076270014, "loss_iou": 0.271484375, "loss_num": 0.017578125, "loss_xval": 0.6328125, "num_input_tokens_seen": 178533624, "step": 3187 }, { "epoch": 7.1002227171492205, "grad_norm": 22.64967155456543, "learning_rate": 1e-06, "loss": 0.4275, "num_input_tokens_seen": 178590396, "step": 3188 }, { "epoch": 7.1002227171492205, "loss": 0.49148237705230713, "loss_ce": 0.00014936855586711317, "loss_iou": 0.22265625, "loss_num": 0.00933837890625, "loss_xval": 0.4921875, "num_input_tokens_seen": 178590396, "step": 3188 }, { "epoch": 7.102449888641425, "grad_norm": 17.186386108398438, "learning_rate": 1e-06, "loss": 0.6466, "num_input_tokens_seen": 178648316, "step": 3189 }, { "epoch": 7.102449888641425, "loss": 0.7294230461120605, "loss_ce": 0.00017502682749181986, "loss_iou": 0.333984375, "loss_num": 0.0125732421875, "loss_xval": 0.73046875, "num_input_tokens_seen": 178648316, "step": 3189 }, { "epoch": 7.10467706013363, "grad_norm": 14.459593772888184, "learning_rate": 1e-06, "loss": 0.7198, "num_input_tokens_seen": 178705096, "step": 3190 }, { "epoch": 7.10467706013363, "loss": 0.62459796667099, "loss_ce": 0.00020834297174587846, "loss_iou": 0.265625, "loss_num": 0.018798828125, "loss_xval": 0.625, "num_input_tokens_seen": 178705096, "step": 3190 }, { "epoch": 7.106904231625835, "grad_norm": 19.029233932495117, "learning_rate": 1e-06, "loss": 0.7772, "num_input_tokens_seen": 178761896, "step": 3191 }, { "epoch": 7.106904231625835, "loss": 0.5232353210449219, "loss_ce": 0.00016402616165578365, "loss_iou": 0.23046875, "loss_num": 0.0125732421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 178761896, "step": 3191 }, { "epoch": 7.10913140311804, "grad_norm": 27.418760299682617, "learning_rate": 1e-06, "loss": 0.7062, "num_input_tokens_seen": 178816456, "step": 3192 }, { "epoch": 7.10913140311804, "loss": 0.9633630514144897, "loss_ce": 0.0004724356404040009, "loss_iou": 0.390625, "loss_num": 0.036376953125, "loss_xval": 0.9609375, "num_input_tokens_seen": 178816456, "step": 3192 }, { "epoch": 7.111358574610245, "grad_norm": 14.343728065490723, "learning_rate": 1e-06, "loss": 0.4866, "num_input_tokens_seen": 178872960, "step": 3193 }, { "epoch": 7.111358574610245, "loss": 0.5601356029510498, "loss_ce": 0.0001990435121115297, "loss_iou": 0.2080078125, "loss_num": 0.0286865234375, "loss_xval": 0.55859375, "num_input_tokens_seen": 178872960, "step": 3193 }, { "epoch": 7.11358574610245, "grad_norm": 32.39339828491211, "learning_rate": 1e-06, "loss": 0.6818, "num_input_tokens_seen": 178927728, "step": 3194 }, { "epoch": 7.11358574610245, "loss": 0.7690376043319702, "loss_ce": 0.00023876590421423316, "loss_iou": 0.296875, "loss_num": 0.034912109375, "loss_xval": 0.76953125, "num_input_tokens_seen": 178927728, "step": 3194 }, { "epoch": 7.1158129175946545, "grad_norm": 21.278362274169922, "learning_rate": 1e-06, "loss": 0.6444, "num_input_tokens_seen": 178984676, "step": 3195 }, { "epoch": 7.1158129175946545, "loss": 0.5341231822967529, "loss_ce": 0.0001876165042631328, "loss_iou": 0.2216796875, "loss_num": 0.0181884765625, "loss_xval": 0.53515625, "num_input_tokens_seen": 178984676, "step": 3195 }, { "epoch": 7.118040089086859, "grad_norm": 30.706249237060547, "learning_rate": 1e-06, "loss": 0.7029, "num_input_tokens_seen": 179040412, "step": 3196 }, { "epoch": 7.118040089086859, "loss": 0.627882719039917, "loss_ce": 0.00019718779367394745, "loss_iou": 0.291015625, "loss_num": 0.00909423828125, "loss_xval": 0.62890625, "num_input_tokens_seen": 179040412, "step": 3196 }, { "epoch": 7.120267260579064, "grad_norm": 14.424131393432617, "learning_rate": 1e-06, "loss": 0.4814, "num_input_tokens_seen": 179096912, "step": 3197 }, { "epoch": 7.120267260579064, "loss": 0.46601158380508423, "loss_ce": 0.0001913084415718913, "loss_iou": 0.2099609375, "loss_num": 0.0093994140625, "loss_xval": 0.46484375, "num_input_tokens_seen": 179096912, "step": 3197 }, { "epoch": 7.122494432071269, "grad_norm": 17.124177932739258, "learning_rate": 1e-06, "loss": 0.6929, "num_input_tokens_seen": 179153884, "step": 3198 }, { "epoch": 7.122494432071269, "loss": 0.7391812801361084, "loss_ce": 0.00016765895998105407, "loss_iou": 0.3203125, "loss_num": 0.01953125, "loss_xval": 0.73828125, "num_input_tokens_seen": 179153884, "step": 3198 }, { "epoch": 7.124721603563474, "grad_norm": 71.17576599121094, "learning_rate": 1e-06, "loss": 0.7984, "num_input_tokens_seen": 179208364, "step": 3199 }, { "epoch": 7.124721603563474, "loss": 0.8881849646568298, "loss_ce": 0.0002455237554386258, "loss_iou": 0.357421875, "loss_num": 0.03466796875, "loss_xval": 0.88671875, "num_input_tokens_seen": 179208364, "step": 3199 }, { "epoch": 7.12694877505568, "grad_norm": 16.409549713134766, "learning_rate": 1e-06, "loss": 0.5439, "num_input_tokens_seen": 179262820, "step": 3200 }, { "epoch": 7.12694877505568, "loss": 0.5343835353851318, "loss_ce": 0.00020382186630740762, "loss_iou": 0.232421875, "loss_num": 0.01385498046875, "loss_xval": 0.53515625, "num_input_tokens_seen": 179262820, "step": 3200 }, { "epoch": 7.129175946547885, "grad_norm": 19.769577026367188, "learning_rate": 1e-06, "loss": 0.8334, "num_input_tokens_seen": 179320280, "step": 3201 }, { "epoch": 7.129175946547885, "loss": 0.5122010111808777, "loss_ce": 0.00017708051018416882, "loss_iou": 0.224609375, "loss_num": 0.0128173828125, "loss_xval": 0.51171875, "num_input_tokens_seen": 179320280, "step": 3201 }, { "epoch": 7.131403118040089, "grad_norm": 15.5679292678833, "learning_rate": 1e-06, "loss": 0.5519, "num_input_tokens_seen": 179377732, "step": 3202 }, { "epoch": 7.131403118040089, "loss": 0.5272064805030823, "loss_ce": 0.00022892479319125414, "loss_iou": 0.240234375, "loss_num": 0.00921630859375, "loss_xval": 0.52734375, "num_input_tokens_seen": 179377732, "step": 3202 }, { "epoch": 7.133630289532294, "grad_norm": 20.086763381958008, "learning_rate": 1e-06, "loss": 0.7019, "num_input_tokens_seen": 179433624, "step": 3203 }, { "epoch": 7.133630289532294, "loss": 0.6670153141021729, "loss_ce": 0.0002672579721547663, "loss_iou": 0.2890625, "loss_num": 0.01806640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 179433624, "step": 3203 }, { "epoch": 7.135857461024499, "grad_norm": 23.608802795410156, "learning_rate": 1e-06, "loss": 0.511, "num_input_tokens_seen": 179491448, "step": 3204 }, { "epoch": 7.135857461024499, "loss": 0.4480966031551361, "loss_ce": 0.00022062801872380078, "loss_iou": 0.1953125, "loss_num": 0.01141357421875, "loss_xval": 0.447265625, "num_input_tokens_seen": 179491448, "step": 3204 }, { "epoch": 7.138084632516704, "grad_norm": 15.496456146240234, "learning_rate": 1e-06, "loss": 0.4442, "num_input_tokens_seen": 179550108, "step": 3205 }, { "epoch": 7.138084632516704, "loss": 0.369751513004303, "loss_ce": 0.00036674662260338664, "loss_iou": 0.1572265625, "loss_num": 0.01104736328125, "loss_xval": 0.369140625, "num_input_tokens_seen": 179550108, "step": 3205 }, { "epoch": 7.140311804008909, "grad_norm": 16.712627410888672, "learning_rate": 1e-06, "loss": 0.6253, "num_input_tokens_seen": 179609256, "step": 3206 }, { "epoch": 7.140311804008909, "loss": 0.561935305595398, "loss_ce": 0.00016772476374171674, "loss_iou": 0.2353515625, "loss_num": 0.0181884765625, "loss_xval": 0.5625, "num_input_tokens_seen": 179609256, "step": 3206 }, { "epoch": 7.142538975501114, "grad_norm": 18.983253479003906, "learning_rate": 1e-06, "loss": 0.8887, "num_input_tokens_seen": 179664724, "step": 3207 }, { "epoch": 7.142538975501114, "loss": 1.041954755783081, "loss_ce": 0.00020668681827373803, "loss_iou": 0.421875, "loss_num": 0.039306640625, "loss_xval": 1.0390625, "num_input_tokens_seen": 179664724, "step": 3207 }, { "epoch": 7.144766146993319, "grad_norm": 31.973432540893555, "learning_rate": 1e-06, "loss": 0.6015, "num_input_tokens_seen": 179723472, "step": 3208 }, { "epoch": 7.144766146993319, "loss": 0.5833202600479126, "loss_ce": 0.00019040628103539348, "loss_iou": 0.267578125, "loss_num": 0.0098876953125, "loss_xval": 0.58203125, "num_input_tokens_seen": 179723472, "step": 3208 }, { "epoch": 7.146993318485523, "grad_norm": 17.996723175048828, "learning_rate": 1e-06, "loss": 0.5506, "num_input_tokens_seen": 179781296, "step": 3209 }, { "epoch": 7.146993318485523, "loss": 0.613503098487854, "loss_ce": 0.00022186974820215255, "loss_iou": 0.259765625, "loss_num": 0.0185546875, "loss_xval": 0.61328125, "num_input_tokens_seen": 179781296, "step": 3209 }, { "epoch": 7.149220489977728, "grad_norm": 12.357635498046875, "learning_rate": 1e-06, "loss": 0.5798, "num_input_tokens_seen": 179836852, "step": 3210 }, { "epoch": 7.149220489977728, "loss": 0.6682331562042236, "loss_ce": 0.0002034001227002591, "loss_iou": 0.2373046875, "loss_num": 0.03857421875, "loss_xval": 0.66796875, "num_input_tokens_seen": 179836852, "step": 3210 }, { "epoch": 7.151447661469933, "grad_norm": 16.912948608398438, "learning_rate": 1e-06, "loss": 0.6662, "num_input_tokens_seen": 179892884, "step": 3211 }, { "epoch": 7.151447661469933, "loss": 0.8070770502090454, "loss_ce": 0.00019233419152442366, "loss_iou": 0.337890625, "loss_num": 0.0263671875, "loss_xval": 0.80859375, "num_input_tokens_seen": 179892884, "step": 3211 }, { "epoch": 7.153674832962138, "grad_norm": 17.714216232299805, "learning_rate": 1e-06, "loss": 0.4681, "num_input_tokens_seen": 179949768, "step": 3212 }, { "epoch": 7.153674832962138, "loss": 0.4969853460788727, "loss_ce": 0.00015915744006633759, "loss_iou": 0.205078125, "loss_num": 0.01708984375, "loss_xval": 0.49609375, "num_input_tokens_seen": 179949768, "step": 3212 }, { "epoch": 7.155902004454343, "grad_norm": 26.61876106262207, "learning_rate": 1e-06, "loss": 0.8391, "num_input_tokens_seen": 180005168, "step": 3213 }, { "epoch": 7.155902004454343, "loss": 0.7213802337646484, "loss_ce": 0.00018884365272242576, "loss_iou": 0.31640625, "loss_num": 0.0177001953125, "loss_xval": 0.72265625, "num_input_tokens_seen": 180005168, "step": 3213 }, { "epoch": 7.158129175946548, "grad_norm": 15.515600204467773, "learning_rate": 1e-06, "loss": 0.6922, "num_input_tokens_seen": 180061648, "step": 3214 }, { "epoch": 7.158129175946548, "loss": 0.6254113912582397, "loss_ce": 0.0001672459620749578, "loss_iou": 0.26171875, "loss_num": 0.02001953125, "loss_xval": 0.625, "num_input_tokens_seen": 180061648, "step": 3214 }, { "epoch": 7.160356347438753, "grad_norm": 25.33653450012207, "learning_rate": 1e-06, "loss": 0.6162, "num_input_tokens_seen": 180118080, "step": 3215 }, { "epoch": 7.160356347438753, "loss": 0.5609911680221558, "loss_ce": 0.00020018592476844788, "loss_iou": 0.2431640625, "loss_num": 0.0147705078125, "loss_xval": 0.5625, "num_input_tokens_seen": 180118080, "step": 3215 }, { "epoch": 7.1625835189309575, "grad_norm": 16.110671997070312, "learning_rate": 1e-06, "loss": 0.6583, "num_input_tokens_seen": 180173604, "step": 3216 }, { "epoch": 7.1625835189309575, "loss": 0.7682472467422485, "loss_ce": 0.00018078596622217447, "loss_iou": 0.328125, "loss_num": 0.0223388671875, "loss_xval": 0.76953125, "num_input_tokens_seen": 180173604, "step": 3216 }, { "epoch": 7.164810690423162, "grad_norm": 20.151470184326172, "learning_rate": 1e-06, "loss": 0.5956, "num_input_tokens_seen": 180230020, "step": 3217 }, { "epoch": 7.164810690423162, "loss": 0.5044952630996704, "loss_ce": 0.0003448851057328284, "loss_iou": 0.2265625, "loss_num": 0.01007080078125, "loss_xval": 0.50390625, "num_input_tokens_seen": 180230020, "step": 3217 }, { "epoch": 7.167037861915367, "grad_norm": 13.990897178649902, "learning_rate": 1e-06, "loss": 0.8846, "num_input_tokens_seen": 180283952, "step": 3218 }, { "epoch": 7.167037861915367, "loss": 0.638586699962616, "loss_ce": 0.0002810558071359992, "loss_iou": 0.283203125, "loss_num": 0.0142822265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 180283952, "step": 3218 }, { "epoch": 7.169265033407572, "grad_norm": 17.823301315307617, "learning_rate": 1e-06, "loss": 0.8448, "num_input_tokens_seen": 180340276, "step": 3219 }, { "epoch": 7.169265033407572, "loss": 0.7374616861343384, "loss_ce": 0.00015702965902164578, "loss_iou": 0.310546875, "loss_num": 0.0235595703125, "loss_xval": 0.73828125, "num_input_tokens_seen": 180340276, "step": 3219 }, { "epoch": 7.171492204899777, "grad_norm": 21.112295150756836, "learning_rate": 1e-06, "loss": 0.6465, "num_input_tokens_seen": 180397884, "step": 3220 }, { "epoch": 7.171492204899777, "loss": 0.4930199384689331, "loss_ce": 0.00022211679606698453, "loss_iou": 0.2197265625, "loss_num": 0.01043701171875, "loss_xval": 0.4921875, "num_input_tokens_seen": 180397884, "step": 3220 }, { "epoch": 7.173719376391982, "grad_norm": 14.755711555480957, "learning_rate": 1e-06, "loss": 0.4308, "num_input_tokens_seen": 180452508, "step": 3221 }, { "epoch": 7.173719376391982, "loss": 0.5091297030448914, "loss_ce": 0.00021859334083274007, "loss_iou": 0.2138671875, "loss_num": 0.0164794921875, "loss_xval": 0.5078125, "num_input_tokens_seen": 180452508, "step": 3221 }, { "epoch": 7.1759465478841875, "grad_norm": 17.953371047973633, "learning_rate": 1e-06, "loss": 0.5174, "num_input_tokens_seen": 180510496, "step": 3222 }, { "epoch": 7.1759465478841875, "loss": 0.6082879304885864, "loss_ce": 0.0004388437664601952, "loss_iou": 0.25390625, "loss_num": 0.0203857421875, "loss_xval": 0.609375, "num_input_tokens_seen": 180510496, "step": 3222 }, { "epoch": 7.178173719376392, "grad_norm": 27.752344131469727, "learning_rate": 1e-06, "loss": 0.6884, "num_input_tokens_seen": 180567600, "step": 3223 }, { "epoch": 7.178173719376392, "loss": 0.8676443696022034, "loss_ce": 0.00021275154722388834, "loss_iou": 0.35546875, "loss_num": 0.03173828125, "loss_xval": 0.8671875, "num_input_tokens_seen": 180567600, "step": 3223 }, { "epoch": 7.180400890868597, "grad_norm": 20.237045288085938, "learning_rate": 1e-06, "loss": 0.7489, "num_input_tokens_seen": 180622160, "step": 3224 }, { "epoch": 7.180400890868597, "loss": 0.7350777387619019, "loss_ce": 0.0002144520840374753, "loss_iou": 0.318359375, "loss_num": 0.01953125, "loss_xval": 0.734375, "num_input_tokens_seen": 180622160, "step": 3224 }, { "epoch": 7.182628062360802, "grad_norm": 17.729013442993164, "learning_rate": 1e-06, "loss": 0.6288, "num_input_tokens_seen": 180678492, "step": 3225 }, { "epoch": 7.182628062360802, "loss": 0.7737811803817749, "loss_ce": 0.00034367607440799475, "loss_iou": 0.3515625, "loss_num": 0.01458740234375, "loss_xval": 0.7734375, "num_input_tokens_seen": 180678492, "step": 3225 }, { "epoch": 7.184855233853007, "grad_norm": 17.447938919067383, "learning_rate": 1e-06, "loss": 0.8197, "num_input_tokens_seen": 180734308, "step": 3226 }, { "epoch": 7.184855233853007, "loss": 0.6977049708366394, "loss_ce": 0.00019519682973623276, "loss_iou": 0.298828125, "loss_num": 0.02001953125, "loss_xval": 0.69921875, "num_input_tokens_seen": 180734308, "step": 3226 }, { "epoch": 7.187082405345212, "grad_norm": 17.61049461364746, "learning_rate": 1e-06, "loss": 0.7649, "num_input_tokens_seen": 180789952, "step": 3227 }, { "epoch": 7.187082405345212, "loss": 0.6152649521827698, "loss_ce": 0.00027470148052088916, "loss_iou": 0.255859375, "loss_num": 0.0206298828125, "loss_xval": 0.61328125, "num_input_tokens_seen": 180789952, "step": 3227 }, { "epoch": 7.189309576837417, "grad_norm": 17.236371994018555, "learning_rate": 1e-06, "loss": 0.6585, "num_input_tokens_seen": 180844252, "step": 3228 }, { "epoch": 7.189309576837417, "loss": 0.5438805818557739, "loss_ce": 0.00017939825193025172, "loss_iou": 0.25, "loss_num": 0.00823974609375, "loss_xval": 0.54296875, "num_input_tokens_seen": 180844252, "step": 3228 }, { "epoch": 7.1915367483296215, "grad_norm": 15.305548667907715, "learning_rate": 1e-06, "loss": 0.543, "num_input_tokens_seen": 180898788, "step": 3229 }, { "epoch": 7.1915367483296215, "loss": 0.5575687885284424, "loss_ce": 0.00019571834127418697, "loss_iou": 0.2421875, "loss_num": 0.0146484375, "loss_xval": 0.55859375, "num_input_tokens_seen": 180898788, "step": 3229 }, { "epoch": 7.193763919821826, "grad_norm": 36.117584228515625, "learning_rate": 1e-06, "loss": 0.8034, "num_input_tokens_seen": 180955360, "step": 3230 }, { "epoch": 7.193763919821826, "loss": 0.7606761455535889, "loss_ce": 0.00017807254334911704, "loss_iou": 0.31640625, "loss_num": 0.0257568359375, "loss_xval": 0.76171875, "num_input_tokens_seen": 180955360, "step": 3230 }, { "epoch": 7.195991091314031, "grad_norm": 26.037805557250977, "learning_rate": 1e-06, "loss": 0.5336, "num_input_tokens_seen": 181010364, "step": 3231 }, { "epoch": 7.195991091314031, "loss": 0.4867013990879059, "loss_ce": 0.0001901786308735609, "loss_iou": 0.2119140625, "loss_num": 0.012451171875, "loss_xval": 0.486328125, "num_input_tokens_seen": 181010364, "step": 3231 }, { "epoch": 7.198218262806236, "grad_norm": 20.942747116088867, "learning_rate": 1e-06, "loss": 0.6008, "num_input_tokens_seen": 181065840, "step": 3232 }, { "epoch": 7.198218262806236, "loss": 0.5934904217720032, "loss_ce": 0.00022868883388582617, "loss_iou": 0.25390625, "loss_num": 0.01708984375, "loss_xval": 0.59375, "num_input_tokens_seen": 181065840, "step": 3232 }, { "epoch": 7.200445434298441, "grad_norm": 14.775635719299316, "learning_rate": 1e-06, "loss": 0.6616, "num_input_tokens_seen": 181122380, "step": 3233 }, { "epoch": 7.200445434298441, "loss": 0.5939116477966309, "loss_ce": 0.0001616643276065588, "loss_iou": 0.267578125, "loss_num": 0.0115966796875, "loss_xval": 0.59375, "num_input_tokens_seen": 181122380, "step": 3233 }, { "epoch": 7.202672605790646, "grad_norm": 16.27145004272461, "learning_rate": 1e-06, "loss": 0.84, "num_input_tokens_seen": 181179864, "step": 3234 }, { "epoch": 7.202672605790646, "loss": 0.9337013959884644, "loss_ce": 0.0008400785736739635, "loss_iou": 0.38671875, "loss_num": 0.03173828125, "loss_xval": 0.93359375, "num_input_tokens_seen": 181179864, "step": 3234 }, { "epoch": 7.204899777282851, "grad_norm": 29.473827362060547, "learning_rate": 1e-06, "loss": 0.7444, "num_input_tokens_seen": 181233212, "step": 3235 }, { "epoch": 7.204899777282851, "loss": 0.8354470729827881, "loss_ce": 0.0004861447378061712, "loss_iou": 0.3671875, "loss_num": 0.020263671875, "loss_xval": 0.8359375, "num_input_tokens_seen": 181233212, "step": 3235 }, { "epoch": 7.2071269487750556, "grad_norm": 22.71228790283203, "learning_rate": 1e-06, "loss": 0.6487, "num_input_tokens_seen": 181288260, "step": 3236 }, { "epoch": 7.2071269487750556, "loss": 0.7758283615112305, "loss_ce": 0.00019357707060407847, "loss_iou": 0.32421875, "loss_num": 0.025146484375, "loss_xval": 0.77734375, "num_input_tokens_seen": 181288260, "step": 3236 }, { "epoch": 7.20935412026726, "grad_norm": 16.81915283203125, "learning_rate": 1e-06, "loss": 0.7454, "num_input_tokens_seen": 181343576, "step": 3237 }, { "epoch": 7.20935412026726, "loss": 0.8085384368896484, "loss_ce": 0.00031090632546693087, "loss_iou": 0.357421875, "loss_num": 0.018310546875, "loss_xval": 0.80859375, "num_input_tokens_seen": 181343576, "step": 3237 }, { "epoch": 7.211581291759465, "grad_norm": 15.39242172241211, "learning_rate": 1e-06, "loss": 0.5735, "num_input_tokens_seen": 181400228, "step": 3238 }, { "epoch": 7.211581291759465, "loss": 0.6538634300231934, "loss_ce": 0.0002989704080391675, "loss_iou": 0.279296875, "loss_num": 0.018798828125, "loss_xval": 0.65234375, "num_input_tokens_seen": 181400228, "step": 3238 }, { "epoch": 7.21380846325167, "grad_norm": 59.97997283935547, "learning_rate": 1e-06, "loss": 0.6083, "num_input_tokens_seen": 181458112, "step": 3239 }, { "epoch": 7.21380846325167, "loss": 0.7236316204071045, "loss_ce": 0.0002429374580970034, "loss_iou": 0.318359375, "loss_num": 0.0172119140625, "loss_xval": 0.72265625, "num_input_tokens_seen": 181458112, "step": 3239 }, { "epoch": 7.216035634743875, "grad_norm": 13.519067764282227, "learning_rate": 1e-06, "loss": 0.5369, "num_input_tokens_seen": 181514996, "step": 3240 }, { "epoch": 7.216035634743875, "loss": 0.36852580308914185, "loss_ce": 0.00017863643006421626, "loss_iou": 0.1572265625, "loss_num": 0.01080322265625, "loss_xval": 0.369140625, "num_input_tokens_seen": 181514996, "step": 3240 }, { "epoch": 7.21826280623608, "grad_norm": 26.277721405029297, "learning_rate": 1e-06, "loss": 0.4887, "num_input_tokens_seen": 181571304, "step": 3241 }, { "epoch": 7.21826280623608, "loss": 0.5072898268699646, "loss_ce": 0.0006980198086239398, "loss_iou": 0.2265625, "loss_num": 0.01055908203125, "loss_xval": 0.5078125, "num_input_tokens_seen": 181571304, "step": 3241 }, { "epoch": 7.220489977728285, "grad_norm": 146.0947265625, "learning_rate": 1e-06, "loss": 0.62, "num_input_tokens_seen": 181626812, "step": 3242 }, { "epoch": 7.220489977728285, "loss": 0.640555202960968, "loss_ce": 0.000174342465470545, "loss_iou": 0.294921875, "loss_num": 0.010009765625, "loss_xval": 0.640625, "num_input_tokens_seen": 181626812, "step": 3242 }, { "epoch": 7.22271714922049, "grad_norm": 26.887720108032227, "learning_rate": 1e-06, "loss": 0.8177, "num_input_tokens_seen": 181681800, "step": 3243 }, { "epoch": 7.22271714922049, "loss": 0.6879905462265015, "loss_ce": 0.0004905810346826911, "loss_iou": 0.296875, "loss_num": 0.018798828125, "loss_xval": 0.6875, "num_input_tokens_seen": 181681800, "step": 3243 }, { "epoch": 7.224944320712694, "grad_norm": 17.908056259155273, "learning_rate": 1e-06, "loss": 0.6145, "num_input_tokens_seen": 181736656, "step": 3244 }, { "epoch": 7.224944320712694, "loss": 0.5160667896270752, "loss_ce": 0.00019765175238717347, "loss_iou": 0.2314453125, "loss_num": 0.0107421875, "loss_xval": 0.515625, "num_input_tokens_seen": 181736656, "step": 3244 }, { "epoch": 7.2271714922049, "grad_norm": 13.982433319091797, "learning_rate": 1e-06, "loss": 0.5749, "num_input_tokens_seen": 181794728, "step": 3245 }, { "epoch": 7.2271714922049, "loss": 0.4950827658176422, "loss_ce": 0.0002097318647429347, "loss_iou": 0.1953125, "loss_num": 0.02099609375, "loss_xval": 0.494140625, "num_input_tokens_seen": 181794728, "step": 3245 }, { "epoch": 7.229398663697105, "grad_norm": 19.076622009277344, "learning_rate": 1e-06, "loss": 0.5997, "num_input_tokens_seen": 181850576, "step": 3246 }, { "epoch": 7.229398663697105, "loss": 0.5747532844543457, "loss_ce": 0.00029037147760391235, "loss_iou": 0.23046875, "loss_num": 0.0225830078125, "loss_xval": 0.57421875, "num_input_tokens_seen": 181850576, "step": 3246 }, { "epoch": 7.23162583518931, "grad_norm": 36.7861442565918, "learning_rate": 1e-06, "loss": 0.8128, "num_input_tokens_seen": 181905156, "step": 3247 }, { "epoch": 7.23162583518931, "loss": 0.9604984521865845, "loss_ce": 0.0010258200345560908, "loss_iou": 0.40234375, "loss_num": 0.03076171875, "loss_xval": 0.9609375, "num_input_tokens_seen": 181905156, "step": 3247 }, { "epoch": 7.233853006681515, "grad_norm": 28.286663055419922, "learning_rate": 1e-06, "loss": 0.7149, "num_input_tokens_seen": 181961164, "step": 3248 }, { "epoch": 7.233853006681515, "loss": 0.7203893065452576, "loss_ce": 0.0001744742039591074, "loss_iou": 0.33203125, "loss_num": 0.01141357421875, "loss_xval": 0.71875, "num_input_tokens_seen": 181961164, "step": 3248 }, { "epoch": 7.23608017817372, "grad_norm": 18.691064834594727, "learning_rate": 1e-06, "loss": 0.6303, "num_input_tokens_seen": 182020592, "step": 3249 }, { "epoch": 7.23608017817372, "loss": 0.6998783946037292, "loss_ce": 0.00017135526286438107, "loss_iou": 0.296875, "loss_num": 0.0211181640625, "loss_xval": 0.69921875, "num_input_tokens_seen": 182020592, "step": 3249 }, { "epoch": 7.2383073496659245, "grad_norm": 15.494166374206543, "learning_rate": 1e-06, "loss": 0.6059, "num_input_tokens_seen": 182075240, "step": 3250 }, { "epoch": 7.2383073496659245, "eval_seeclick_web_CIoU": 0.5736072361469269, "eval_seeclick_web_GIoU": 0.5713272094726562, "eval_seeclick_web_IoU": 0.5900914669036865, "eval_seeclick_web_MAE_all": 0.01693468587473035, "eval_seeclick_web_MAE_h": 0.009493312099948525, "eval_seeclick_web_MAE_w": 0.017465373501181602, "eval_seeclick_web_MAE_x_boxes": 0.009315244387835264, "eval_seeclick_web_MAE_y_boxes": 0.022717589512467384, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9355779886245728, "eval_seeclick_web_loss_ce": 0.00025881038163788617, "eval_seeclick_web_loss_iou": 0.4263916015625, "eval_seeclick_web_loss_num": 0.013437271118164062, "eval_seeclick_web_loss_xval": 0.920166015625, "eval_seeclick_web_runtime": 22.493, "eval_seeclick_web_samples_per_second": 2.223, "eval_seeclick_web_steps_per_second": 0.089, "num_input_tokens_seen": 182075240, "step": 3250 }, { "epoch": 7.2383073496659245, "eval_icons_CIoU": 0.2808741182088852, "eval_icons_GIoU": 0.3052249103784561, "eval_icons_IoU": 0.3607981503009796, "eval_icons_MAE_all": 0.06516656465828419, "eval_icons_MAE_h": 0.03764822334051132, "eval_icons_MAE_w": 0.07068785838782787, "eval_icons_MAE_x_boxes": 0.06141933798789978, "eval_icons_MAE_y_boxes": 0.03828867059201002, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.7471874952316284, "eval_icons_loss_ce": 0.0003105820360360667, "eval_icons_loss_iou": 0.67919921875, "eval_icons_loss_num": 0.06205558776855469, "eval_icons_loss_xval": 1.66943359375, "eval_icons_runtime": 21.7009, "eval_icons_samples_per_second": 2.304, "eval_icons_steps_per_second": 0.092, "num_input_tokens_seen": 182075240, "step": 3250 }, { "epoch": 7.2383073496659245, "eval_screenspot_CIoU": 0.342861811319987, "eval_screenspot_GIoU": 0.3614034950733185, "eval_screenspot_IoU": 0.42649880051612854, "eval_screenspot_MAE_all": 0.06335503856341045, "eval_screenspot_MAE_h": 0.03755492903292179, "eval_screenspot_MAE_w": 0.07218488802512486, "eval_screenspot_MAE_x_boxes": 0.07481345720589161, "eval_screenspot_MAE_y_boxes": 0.04668992726753155, "eval_screenspot_inside_bbox": 0.6729166706403097, "eval_screenspot_loss": 1.6553105115890503, "eval_screenspot_loss_ce": 0.00030076557110684615, "eval_screenspot_loss_iou": 0.6756998697916666, "eval_screenspot_loss_num": 0.07468668619791667, "eval_screenspot_loss_xval": 1.7252604166666667, "eval_screenspot_runtime": 36.4126, "eval_screenspot_samples_per_second": 2.444, "eval_screenspot_steps_per_second": 0.082, "num_input_tokens_seen": 182075240, "step": 3250 }, { "epoch": 7.2383073496659245, "eval_compot_CIoU": 0.35138705372810364, "eval_compot_GIoU": 0.3623329848051071, "eval_compot_IoU": 0.4089464396238327, "eval_compot_MAE_all": 0.01784850051626563, "eval_compot_MAE_h": 0.009071170818060637, "eval_compot_MAE_w": 0.02207251265645027, "eval_compot_MAE_x_boxes": 0.029478789307177067, "eval_compot_MAE_y_boxes": 0.0062866308726370335, "eval_compot_inside_bbox": 0.6458333432674408, "eval_compot_loss": 1.3766494989395142, "eval_compot_loss_ce": 0.00024336049682460725, "eval_compot_loss_iou": 0.6270751953125, "eval_compot_loss_num": 0.0166168212890625, "eval_compot_loss_xval": 1.3369140625, "eval_compot_runtime": 22.3641, "eval_compot_samples_per_second": 2.236, "eval_compot_steps_per_second": 0.089, "num_input_tokens_seen": 182075240, "step": 3250 }, { "epoch": 7.2383073496659245, "eval_custom_ui_val_CIoU": 0.46513410409291583, "eval_custom_ui_val_GIoU": 0.47927956614229417, "eval_custom_ui_val_IoU": 0.5227026873164706, "eval_custom_ui_val_MAE_all": 0.03009833147128423, "eval_custom_ui_val_MAE_h": 0.017143823982526857, "eval_custom_ui_val_MAE_w": 0.038110896220637694, "eval_custom_ui_val_MAE_x_boxes": 0.03446255738122596, "eval_custom_ui_val_MAE_y_boxes": 0.015643620294415288, "eval_custom_ui_val_inside_bbox": 0.7527006202273898, "eval_custom_ui_val_loss": 1.198036789894104, "eval_custom_ui_val_loss_ce": 0.000274412335582181, "eval_custom_ui_val_loss_iou": 0.5103217230902778, "eval_custom_ui_val_loss_num": 0.028072569105360243, "eval_custom_ui_val_loss_xval": 1.1607801649305556, "eval_custom_ui_val_runtime": 60.3528, "eval_custom_ui_val_samples_per_second": 4.391, "eval_custom_ui_val_steps_per_second": 0.149, "num_input_tokens_seen": 182075240, "step": 3250 }, { "epoch": 7.2383073496659245, "loss": 0.9084522724151611, "loss_ce": 0.00024910393403843045, "loss_iou": 0.396484375, "loss_num": 0.023193359375, "loss_xval": 0.90625, "num_input_tokens_seen": 182075240, "step": 3250 }, { "epoch": 7.240534521158129, "grad_norm": 24.0264892578125, "learning_rate": 1e-06, "loss": 0.7723, "num_input_tokens_seen": 182134612, "step": 3251 }, { "epoch": 7.240534521158129, "loss": 0.7384548187255859, "loss_ce": 0.00017352268332615495, "loss_iou": 0.291015625, "loss_num": 0.03125, "loss_xval": 0.73828125, "num_input_tokens_seen": 182134612, "step": 3251 }, { "epoch": 7.242761692650334, "grad_norm": 17.387958526611328, "learning_rate": 1e-06, "loss": 0.4481, "num_input_tokens_seen": 182192124, "step": 3252 }, { "epoch": 7.242761692650334, "loss": 0.40355396270751953, "loss_ce": 0.004872324876487255, "loss_iou": 0.171875, "loss_num": 0.01104736328125, "loss_xval": 0.3984375, "num_input_tokens_seen": 182192124, "step": 3252 }, { "epoch": 7.244988864142539, "grad_norm": 26.076507568359375, "learning_rate": 1e-06, "loss": 0.6101, "num_input_tokens_seen": 182248140, "step": 3253 }, { "epoch": 7.244988864142539, "loss": 0.5168513059616089, "loss_ce": 0.0002497411041986197, "loss_iou": 0.220703125, "loss_num": 0.01513671875, "loss_xval": 0.515625, "num_input_tokens_seen": 182248140, "step": 3253 }, { "epoch": 7.247216035634744, "grad_norm": 15.936118125915527, "learning_rate": 1e-06, "loss": 0.6972, "num_input_tokens_seen": 182302560, "step": 3254 }, { "epoch": 7.247216035634744, "loss": 0.6615881323814392, "loss_ce": 0.00027220824267715216, "loss_iou": 0.251953125, "loss_num": 0.031982421875, "loss_xval": 0.66015625, "num_input_tokens_seen": 182302560, "step": 3254 }, { "epoch": 7.249443207126949, "grad_norm": 21.23505210876465, "learning_rate": 1e-06, "loss": 0.6678, "num_input_tokens_seen": 182359212, "step": 3255 }, { "epoch": 7.249443207126949, "loss": 0.6819499135017395, "loss_ce": 0.00024824903812259436, "loss_iou": 0.263671875, "loss_num": 0.030517578125, "loss_xval": 0.68359375, "num_input_tokens_seen": 182359212, "step": 3255 }, { "epoch": 7.251670378619154, "grad_norm": 14.325440406799316, "learning_rate": 1e-06, "loss": 0.6858, "num_input_tokens_seen": 182412908, "step": 3256 }, { "epoch": 7.251670378619154, "loss": 0.6170239448547363, "loss_ce": 0.0008129666093736887, "loss_iou": 0.251953125, "loss_num": 0.022705078125, "loss_xval": 0.6171875, "num_input_tokens_seen": 182412908, "step": 3256 }, { "epoch": 7.2538975501113585, "grad_norm": 16.26470184326172, "learning_rate": 1e-06, "loss": 0.9041, "num_input_tokens_seen": 182471924, "step": 3257 }, { "epoch": 7.2538975501113585, "loss": 0.7431246042251587, "loss_ce": 0.00044888071715831757, "loss_iou": 0.310546875, "loss_num": 0.0240478515625, "loss_xval": 0.7421875, "num_input_tokens_seen": 182471924, "step": 3257 }, { "epoch": 7.256124721603563, "grad_norm": 18.07449722290039, "learning_rate": 1e-06, "loss": 0.5345, "num_input_tokens_seen": 182526908, "step": 3258 }, { "epoch": 7.256124721603563, "loss": 0.549538791179657, "loss_ce": 0.00022235384676605463, "loss_iou": 0.2421875, "loss_num": 0.012939453125, "loss_xval": 0.55078125, "num_input_tokens_seen": 182526908, "step": 3258 }, { "epoch": 7.258351893095768, "grad_norm": 15.7253999710083, "learning_rate": 1e-06, "loss": 0.6923, "num_input_tokens_seen": 182583592, "step": 3259 }, { "epoch": 7.258351893095768, "loss": 0.9013206958770752, "loss_ce": 0.001052145496942103, "loss_iou": 0.4140625, "loss_num": 0.01446533203125, "loss_xval": 0.8984375, "num_input_tokens_seen": 182583592, "step": 3259 }, { "epoch": 7.260579064587973, "grad_norm": 20.088714599609375, "learning_rate": 1e-06, "loss": 0.535, "num_input_tokens_seen": 182641252, "step": 3260 }, { "epoch": 7.260579064587973, "loss": 0.5077439546585083, "loss_ce": 0.00017562185530550778, "loss_iou": 0.228515625, "loss_num": 0.01019287109375, "loss_xval": 0.5078125, "num_input_tokens_seen": 182641252, "step": 3260 }, { "epoch": 7.262806236080178, "grad_norm": 15.09283447265625, "learning_rate": 1e-06, "loss": 0.5227, "num_input_tokens_seen": 182697120, "step": 3261 }, { "epoch": 7.262806236080178, "loss": 0.43932655453681946, "loss_ce": 0.00017860504158306867, "loss_iou": 0.1826171875, "loss_num": 0.01470947265625, "loss_xval": 0.439453125, "num_input_tokens_seen": 182697120, "step": 3261 }, { "epoch": 7.265033407572383, "grad_norm": 19.676998138427734, "learning_rate": 1e-06, "loss": 0.6918, "num_input_tokens_seen": 182752892, "step": 3262 }, { "epoch": 7.265033407572383, "loss": 0.630585789680481, "loss_ce": 0.00021469607600010931, "loss_iou": 0.275390625, "loss_num": 0.016357421875, "loss_xval": 0.62890625, "num_input_tokens_seen": 182752892, "step": 3262 }, { "epoch": 7.267260579064588, "grad_norm": 110.68122863769531, "learning_rate": 1e-06, "loss": 0.7325, "num_input_tokens_seen": 182810764, "step": 3263 }, { "epoch": 7.267260579064588, "loss": 0.7132278084754944, "loss_ce": 0.0002150905056623742, "loss_iou": 0.310546875, "loss_num": 0.01806640625, "loss_xval": 0.71484375, "num_input_tokens_seen": 182810764, "step": 3263 }, { "epoch": 7.2694877505567925, "grad_norm": 23.634044647216797, "learning_rate": 1e-06, "loss": 0.7038, "num_input_tokens_seen": 182868244, "step": 3264 }, { "epoch": 7.2694877505567925, "loss": 0.6935637593269348, "loss_ce": 0.002157476032152772, "loss_iou": 0.28515625, "loss_num": 0.0244140625, "loss_xval": 0.69140625, "num_input_tokens_seen": 182868244, "step": 3264 }, { "epoch": 7.271714922048997, "grad_norm": 23.025278091430664, "learning_rate": 1e-06, "loss": 0.5485, "num_input_tokens_seen": 182924824, "step": 3265 }, { "epoch": 7.271714922048997, "loss": 0.5591219067573547, "loss_ce": 0.00028401942108757794, "loss_iou": 0.2578125, "loss_num": 0.00860595703125, "loss_xval": 0.55859375, "num_input_tokens_seen": 182924824, "step": 3265 }, { "epoch": 7.273942093541203, "grad_norm": 15.47992992401123, "learning_rate": 1e-06, "loss": 0.6323, "num_input_tokens_seen": 182979672, "step": 3266 }, { "epoch": 7.273942093541203, "loss": 0.7570323944091797, "loss_ce": 0.00019647592853289098, "loss_iou": 0.31640625, "loss_num": 0.0252685546875, "loss_xval": 0.7578125, "num_input_tokens_seen": 182979672, "step": 3266 }, { "epoch": 7.276169265033408, "grad_norm": 19.16633415222168, "learning_rate": 1e-06, "loss": 0.6991, "num_input_tokens_seen": 183035308, "step": 3267 }, { "epoch": 7.276169265033408, "loss": 0.8601139783859253, "loss_ce": 0.00037270993925631046, "loss_iou": 0.34765625, "loss_num": 0.03271484375, "loss_xval": 0.859375, "num_input_tokens_seen": 183035308, "step": 3267 }, { "epoch": 7.278396436525613, "grad_norm": 17.217559814453125, "learning_rate": 1e-06, "loss": 0.8387, "num_input_tokens_seen": 183093172, "step": 3268 }, { "epoch": 7.278396436525613, "loss": 0.6842477321624756, "loss_ce": 0.00016567618877161294, "loss_iou": 0.2734375, "loss_num": 0.0274658203125, "loss_xval": 0.68359375, "num_input_tokens_seen": 183093172, "step": 3268 }, { "epoch": 7.280623608017818, "grad_norm": 16.485475540161133, "learning_rate": 1e-06, "loss": 0.6404, "num_input_tokens_seen": 183148780, "step": 3269 }, { "epoch": 7.280623608017818, "loss": 0.7775435447692871, "loss_ce": 0.00019982852973043919, "loss_iou": 0.328125, "loss_num": 0.0240478515625, "loss_xval": 0.77734375, "num_input_tokens_seen": 183148780, "step": 3269 }, { "epoch": 7.282850779510023, "grad_norm": 14.587860107421875, "learning_rate": 1e-06, "loss": 0.6758, "num_input_tokens_seen": 183205300, "step": 3270 }, { "epoch": 7.282850779510023, "loss": 0.5569312572479248, "loss_ce": 0.0001685761963017285, "loss_iou": 0.2197265625, "loss_num": 0.0233154296875, "loss_xval": 0.55859375, "num_input_tokens_seen": 183205300, "step": 3270 }, { "epoch": 7.285077951002227, "grad_norm": 20.71013832092285, "learning_rate": 1e-06, "loss": 0.4886, "num_input_tokens_seen": 183261900, "step": 3271 }, { "epoch": 7.285077951002227, "loss": 0.45970138907432556, "loss_ce": 0.00022872036788612604, "loss_iou": 0.1806640625, "loss_num": 0.01953125, "loss_xval": 0.458984375, "num_input_tokens_seen": 183261900, "step": 3271 }, { "epoch": 7.287305122494432, "grad_norm": 15.918169975280762, "learning_rate": 1e-06, "loss": 0.8508, "num_input_tokens_seen": 183317060, "step": 3272 }, { "epoch": 7.287305122494432, "loss": 0.6296719312667847, "loss_ce": 0.00015531133976764977, "loss_iou": 0.26171875, "loss_num": 0.0213623046875, "loss_xval": 0.62890625, "num_input_tokens_seen": 183317060, "step": 3272 }, { "epoch": 7.289532293986637, "grad_norm": 13.940874099731445, "learning_rate": 1e-06, "loss": 0.6049, "num_input_tokens_seen": 183372896, "step": 3273 }, { "epoch": 7.289532293986637, "loss": 0.6454752087593079, "loss_ce": 0.00021151437249500304, "loss_iou": 0.263671875, "loss_num": 0.0233154296875, "loss_xval": 0.64453125, "num_input_tokens_seen": 183372896, "step": 3273 }, { "epoch": 7.291759465478842, "grad_norm": 13.392332077026367, "learning_rate": 1e-06, "loss": 0.5429, "num_input_tokens_seen": 183431176, "step": 3274 }, { "epoch": 7.291759465478842, "loss": 0.5099714398384094, "loss_ce": 0.00020580792624969035, "loss_iou": 0.2314453125, "loss_num": 0.0093994140625, "loss_xval": 0.5078125, "num_input_tokens_seen": 183431176, "step": 3274 }, { "epoch": 7.293986636971047, "grad_norm": 31.767141342163086, "learning_rate": 1e-06, "loss": 0.6066, "num_input_tokens_seen": 183485436, "step": 3275 }, { "epoch": 7.293986636971047, "loss": 0.7388736605644226, "loss_ce": 0.00022620504023507237, "loss_iou": 0.287109375, "loss_num": 0.03271484375, "loss_xval": 0.73828125, "num_input_tokens_seen": 183485436, "step": 3275 }, { "epoch": 7.296213808463252, "grad_norm": 27.66456413269043, "learning_rate": 1e-06, "loss": 0.6369, "num_input_tokens_seen": 183542084, "step": 3276 }, { "epoch": 7.296213808463252, "loss": 0.4608932137489319, "loss_ce": 0.00019985750259365886, "loss_iou": 0.203125, "loss_num": 0.010986328125, "loss_xval": 0.4609375, "num_input_tokens_seen": 183542084, "step": 3276 }, { "epoch": 7.298440979955457, "grad_norm": 23.609827041625977, "learning_rate": 1e-06, "loss": 0.7595, "num_input_tokens_seen": 183597960, "step": 3277 }, { "epoch": 7.298440979955457, "loss": 0.683929443359375, "loss_ce": 0.0003357165842317045, "loss_iou": 0.3046875, "loss_num": 0.01446533203125, "loss_xval": 0.68359375, "num_input_tokens_seen": 183597960, "step": 3277 }, { "epoch": 7.3006681514476615, "grad_norm": 19.84480094909668, "learning_rate": 1e-06, "loss": 0.5777, "num_input_tokens_seen": 183655488, "step": 3278 }, { "epoch": 7.3006681514476615, "loss": 0.7001070976257324, "loss_ce": 0.00015596086450386792, "loss_iou": 0.3046875, "loss_num": 0.017822265625, "loss_xval": 0.69921875, "num_input_tokens_seen": 183655488, "step": 3278 }, { "epoch": 7.302895322939866, "grad_norm": 20.375669479370117, "learning_rate": 1e-06, "loss": 0.6575, "num_input_tokens_seen": 183712908, "step": 3279 }, { "epoch": 7.302895322939866, "loss": 0.7356512546539307, "loss_ce": 0.00029969203751534224, "loss_iou": 0.30859375, "loss_num": 0.0235595703125, "loss_xval": 0.734375, "num_input_tokens_seen": 183712908, "step": 3279 }, { "epoch": 7.305122494432071, "grad_norm": 24.645275115966797, "learning_rate": 1e-06, "loss": 0.6105, "num_input_tokens_seen": 183770608, "step": 3280 }, { "epoch": 7.305122494432071, "loss": 0.6635428667068481, "loss_ce": 0.0002127783081959933, "loss_iou": 0.30078125, "loss_num": 0.01239013671875, "loss_xval": 0.6640625, "num_input_tokens_seen": 183770608, "step": 3280 }, { "epoch": 7.307349665924276, "grad_norm": 14.512449264526367, "learning_rate": 1e-06, "loss": 0.7044, "num_input_tokens_seen": 183825576, "step": 3281 }, { "epoch": 7.307349665924276, "loss": 0.9417954087257385, "loss_ce": 0.00038921748637221754, "loss_iou": 0.384765625, "loss_num": 0.0341796875, "loss_xval": 0.94140625, "num_input_tokens_seen": 183825576, "step": 3281 }, { "epoch": 7.309576837416481, "grad_norm": 19.870107650756836, "learning_rate": 1e-06, "loss": 0.6247, "num_input_tokens_seen": 183881588, "step": 3282 }, { "epoch": 7.309576837416481, "loss": 0.8251048922538757, "loss_ce": 0.00015370306209661067, "loss_iou": 0.345703125, "loss_num": 0.026611328125, "loss_xval": 0.82421875, "num_input_tokens_seen": 183881588, "step": 3282 }, { "epoch": 7.311804008908686, "grad_norm": 55.68107604980469, "learning_rate": 1e-06, "loss": 0.5833, "num_input_tokens_seen": 183940448, "step": 3283 }, { "epoch": 7.311804008908686, "loss": 0.564154326915741, "loss_ce": 0.00018950334924738854, "loss_iou": 0.244140625, "loss_num": 0.0150146484375, "loss_xval": 0.5625, "num_input_tokens_seen": 183940448, "step": 3283 }, { "epoch": 7.314031180400891, "grad_norm": 22.890865325927734, "learning_rate": 1e-06, "loss": 0.5107, "num_input_tokens_seen": 183997440, "step": 3284 }, { "epoch": 7.314031180400891, "loss": 0.40723344683647156, "loss_ce": 0.000251030403887853, "loss_iou": 0.185546875, "loss_num": 0.00732421875, "loss_xval": 0.40625, "num_input_tokens_seen": 183997440, "step": 3284 }, { "epoch": 7.3162583518930955, "grad_norm": 38.493743896484375, "learning_rate": 1e-06, "loss": 0.7763, "num_input_tokens_seen": 184054300, "step": 3285 }, { "epoch": 7.3162583518930955, "loss": 0.7445439100265503, "loss_ce": 0.00015913081006146967, "loss_iou": 0.330078125, "loss_num": 0.01708984375, "loss_xval": 0.74609375, "num_input_tokens_seen": 184054300, "step": 3285 }, { "epoch": 7.3184855233853, "grad_norm": 12.925439834594727, "learning_rate": 1e-06, "loss": 0.4989, "num_input_tokens_seen": 184109764, "step": 3286 }, { "epoch": 7.3184855233853, "loss": 0.5402973294258118, "loss_ce": 0.00025825732154771686, "loss_iou": 0.240234375, "loss_num": 0.01171875, "loss_xval": 0.5390625, "num_input_tokens_seen": 184109764, "step": 3286 }, { "epoch": 7.320712694877505, "grad_norm": 14.311683654785156, "learning_rate": 1e-06, "loss": 0.7607, "num_input_tokens_seen": 184166476, "step": 3287 }, { "epoch": 7.320712694877505, "loss": 0.5723039507865906, "loss_ce": 0.0004045310488436371, "loss_iou": 0.255859375, "loss_num": 0.01214599609375, "loss_xval": 0.5703125, "num_input_tokens_seen": 184166476, "step": 3287 }, { "epoch": 7.32293986636971, "grad_norm": 21.740203857421875, "learning_rate": 1e-06, "loss": 0.7618, "num_input_tokens_seen": 184220904, "step": 3288 }, { "epoch": 7.32293986636971, "loss": 0.7254713773727417, "loss_ce": 0.00031269137980416417, "loss_iou": 0.283203125, "loss_num": 0.031982421875, "loss_xval": 0.7265625, "num_input_tokens_seen": 184220904, "step": 3288 }, { "epoch": 7.325167037861915, "grad_norm": 20.65580177307129, "learning_rate": 1e-06, "loss": 0.7546, "num_input_tokens_seen": 184277784, "step": 3289 }, { "epoch": 7.325167037861915, "loss": 0.6745328903198242, "loss_ce": 0.0002165070327464491, "loss_iou": 0.271484375, "loss_num": 0.0262451171875, "loss_xval": 0.67578125, "num_input_tokens_seen": 184277784, "step": 3289 }, { "epoch": 7.327394209354121, "grad_norm": 22.88839340209961, "learning_rate": 1e-06, "loss": 0.6154, "num_input_tokens_seen": 184334484, "step": 3290 }, { "epoch": 7.327394209354121, "loss": 0.6290842294692993, "loss_ce": 0.0004221061826683581, "loss_iou": 0.287109375, "loss_num": 0.01129150390625, "loss_xval": 0.62890625, "num_input_tokens_seen": 184334484, "step": 3290 }, { "epoch": 7.3296213808463255, "grad_norm": 19.395837783813477, "learning_rate": 1e-06, "loss": 0.8562, "num_input_tokens_seen": 184389964, "step": 3291 }, { "epoch": 7.3296213808463255, "loss": 0.7204995155334473, "loss_ce": 0.00028470673714764416, "loss_iou": 0.30078125, "loss_num": 0.023681640625, "loss_xval": 0.71875, "num_input_tokens_seen": 184389964, "step": 3291 }, { "epoch": 7.33184855233853, "grad_norm": 18.469728469848633, "learning_rate": 1e-06, "loss": 0.6447, "num_input_tokens_seen": 184441476, "step": 3292 }, { "epoch": 7.33184855233853, "loss": 0.6571934819221497, "loss_ce": 0.0002110537316184491, "loss_iou": 0.2890625, "loss_num": 0.0162353515625, "loss_xval": 0.65625, "num_input_tokens_seen": 184441476, "step": 3292 }, { "epoch": 7.334075723830735, "grad_norm": 21.28434944152832, "learning_rate": 1e-06, "loss": 0.637, "num_input_tokens_seen": 184499576, "step": 3293 }, { "epoch": 7.334075723830735, "loss": 0.7633267045021057, "loss_ce": 0.00026520711253397167, "loss_iou": 0.294921875, "loss_num": 0.034912109375, "loss_xval": 0.76171875, "num_input_tokens_seen": 184499576, "step": 3293 }, { "epoch": 7.33630289532294, "grad_norm": 14.56684398651123, "learning_rate": 1e-06, "loss": 0.7462, "num_input_tokens_seen": 184557016, "step": 3294 }, { "epoch": 7.33630289532294, "loss": 0.6941665410995483, "loss_ce": 0.0001968404685612768, "loss_iou": 0.30859375, "loss_num": 0.01519775390625, "loss_xval": 0.6953125, "num_input_tokens_seen": 184557016, "step": 3294 }, { "epoch": 7.338530066815145, "grad_norm": 33.30550003051758, "learning_rate": 1e-06, "loss": 0.7178, "num_input_tokens_seen": 184612596, "step": 3295 }, { "epoch": 7.338530066815145, "loss": 0.8899809122085571, "loss_ce": 0.00033249915577471256, "loss_iou": 0.349609375, "loss_num": 0.037841796875, "loss_xval": 0.890625, "num_input_tokens_seen": 184612596, "step": 3295 }, { "epoch": 7.34075723830735, "grad_norm": 77.08248138427734, "learning_rate": 1e-06, "loss": 0.5869, "num_input_tokens_seen": 184669968, "step": 3296 }, { "epoch": 7.34075723830735, "loss": 0.4362117350101471, "loss_ce": 0.0004207201418466866, "loss_iou": 0.189453125, "loss_num": 0.01123046875, "loss_xval": 0.435546875, "num_input_tokens_seen": 184669968, "step": 3296 }, { "epoch": 7.342984409799555, "grad_norm": 17.855411529541016, "learning_rate": 1e-06, "loss": 0.606, "num_input_tokens_seen": 184727100, "step": 3297 }, { "epoch": 7.342984409799555, "loss": 0.5758390426635742, "loss_ce": 0.00015543993504252285, "loss_iou": 0.2353515625, "loss_num": 0.0208740234375, "loss_xval": 0.57421875, "num_input_tokens_seen": 184727100, "step": 3297 }, { "epoch": 7.3452115812917596, "grad_norm": 18.587966918945312, "learning_rate": 1e-06, "loss": 0.5917, "num_input_tokens_seen": 184781776, "step": 3298 }, { "epoch": 7.3452115812917596, "loss": 0.4892728626728058, "loss_ce": 0.0003202102379873395, "loss_iou": 0.2109375, "loss_num": 0.01318359375, "loss_xval": 0.48828125, "num_input_tokens_seen": 184781776, "step": 3298 }, { "epoch": 7.347438752783964, "grad_norm": 45.11115646362305, "learning_rate": 1e-06, "loss": 0.5559, "num_input_tokens_seen": 184833932, "step": 3299 }, { "epoch": 7.347438752783964, "loss": 0.6328990459442139, "loss_ce": 0.00020860567747149616, "loss_iou": 0.271484375, "loss_num": 0.017822265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 184833932, "step": 3299 }, { "epoch": 7.349665924276169, "grad_norm": 22.717544555664062, "learning_rate": 1e-06, "loss": 0.5208, "num_input_tokens_seen": 184888688, "step": 3300 }, { "epoch": 7.349665924276169, "loss": 0.46978557109832764, "loss_ce": 0.00018108604126609862, "loss_iou": 0.2099609375, "loss_num": 0.00994873046875, "loss_xval": 0.46875, "num_input_tokens_seen": 184888688, "step": 3300 }, { "epoch": 7.351893095768374, "grad_norm": 20.01310920715332, "learning_rate": 1e-06, "loss": 0.5374, "num_input_tokens_seen": 184943680, "step": 3301 }, { "epoch": 7.351893095768374, "loss": 0.451114296913147, "loss_ce": 0.00018655930762179196, "loss_iou": 0.1767578125, "loss_num": 0.01953125, "loss_xval": 0.451171875, "num_input_tokens_seen": 184943680, "step": 3301 }, { "epoch": 7.354120267260579, "grad_norm": 36.787132263183594, "learning_rate": 1e-06, "loss": 0.8746, "num_input_tokens_seen": 184996220, "step": 3302 }, { "epoch": 7.354120267260579, "loss": 0.8494809865951538, "loss_ce": 0.0003598902840167284, "loss_iou": 0.33984375, "loss_num": 0.033447265625, "loss_xval": 0.84765625, "num_input_tokens_seen": 184996220, "step": 3302 }, { "epoch": 7.356347438752784, "grad_norm": 22.735063552856445, "learning_rate": 1e-06, "loss": 0.7577, "num_input_tokens_seen": 185047952, "step": 3303 }, { "epoch": 7.356347438752784, "loss": 0.5736503601074219, "loss_ce": 0.00016400867025367916, "loss_iou": 0.255859375, "loss_num": 0.01214599609375, "loss_xval": 0.57421875, "num_input_tokens_seen": 185047952, "step": 3303 }, { "epoch": 7.358574610244989, "grad_norm": 15.140356063842773, "learning_rate": 1e-06, "loss": 0.6265, "num_input_tokens_seen": 185104504, "step": 3304 }, { "epoch": 7.358574610244989, "loss": 0.8113583922386169, "loss_ce": 0.00020119798136875033, "loss_iou": 0.34375, "loss_num": 0.0247802734375, "loss_xval": 0.8125, "num_input_tokens_seen": 185104504, "step": 3304 }, { "epoch": 7.360801781737194, "grad_norm": 25.078630447387695, "learning_rate": 1e-06, "loss": 0.7108, "num_input_tokens_seen": 185162056, "step": 3305 }, { "epoch": 7.360801781737194, "loss": 0.7138417959213257, "loss_ce": 0.00021878939878661186, "loss_iou": 0.3125, "loss_num": 0.017822265625, "loss_xval": 0.71484375, "num_input_tokens_seen": 185162056, "step": 3305 }, { "epoch": 7.363028953229398, "grad_norm": 21.57823371887207, "learning_rate": 1e-06, "loss": 0.7628, "num_input_tokens_seen": 185218544, "step": 3306 }, { "epoch": 7.363028953229398, "loss": 1.0595169067382812, "loss_ce": 0.00019062710634898394, "loss_iou": 0.466796875, "loss_num": 0.0252685546875, "loss_xval": 1.0625, "num_input_tokens_seen": 185218544, "step": 3306 }, { "epoch": 7.365256124721603, "grad_norm": 17.32356071472168, "learning_rate": 1e-06, "loss": 0.574, "num_input_tokens_seen": 185276144, "step": 3307 }, { "epoch": 7.365256124721603, "loss": 0.49967584013938904, "loss_ce": 0.00016412066179327667, "loss_iou": 0.21484375, "loss_num": 0.01397705078125, "loss_xval": 0.5, "num_input_tokens_seen": 185276144, "step": 3307 }, { "epoch": 7.367483296213808, "grad_norm": 17.607749938964844, "learning_rate": 1e-06, "loss": 0.6742, "num_input_tokens_seen": 185333240, "step": 3308 }, { "epoch": 7.367483296213808, "loss": 0.6454328298568726, "loss_ce": 0.00016912329010665417, "loss_iou": 0.27734375, "loss_num": 0.01806640625, "loss_xval": 0.64453125, "num_input_tokens_seen": 185333240, "step": 3308 }, { "epoch": 7.369710467706013, "grad_norm": 16.388914108276367, "learning_rate": 1e-06, "loss": 0.5042, "num_input_tokens_seen": 185390156, "step": 3309 }, { "epoch": 7.369710467706013, "loss": 0.4991820454597473, "loss_ce": 0.00021965075575280935, "loss_iou": 0.2275390625, "loss_num": 0.009033203125, "loss_xval": 0.498046875, "num_input_tokens_seen": 185390156, "step": 3309 }, { "epoch": 7.371937639198218, "grad_norm": 18.82445526123047, "learning_rate": 1e-06, "loss": 0.6255, "num_input_tokens_seen": 185443792, "step": 3310 }, { "epoch": 7.371937639198218, "loss": 0.8694823980331421, "loss_ce": 0.00034184064134024084, "loss_iou": 0.3671875, "loss_num": 0.02685546875, "loss_xval": 0.8671875, "num_input_tokens_seen": 185443792, "step": 3310 }, { "epoch": 7.374164810690424, "grad_norm": 23.051231384277344, "learning_rate": 1e-06, "loss": 0.5239, "num_input_tokens_seen": 185500112, "step": 3311 }, { "epoch": 7.374164810690424, "loss": 0.6354948282241821, "loss_ce": 0.00024095734988804907, "loss_iou": 0.2734375, "loss_num": 0.01806640625, "loss_xval": 0.63671875, "num_input_tokens_seen": 185500112, "step": 3311 }, { "epoch": 7.3763919821826285, "grad_norm": 12.503715515136719, "learning_rate": 1e-06, "loss": 0.6206, "num_input_tokens_seen": 185556836, "step": 3312 }, { "epoch": 7.3763919821826285, "loss": 0.6298484206199646, "loss_ce": 0.00020972295897081494, "loss_iou": 0.279296875, "loss_num": 0.01434326171875, "loss_xval": 0.62890625, "num_input_tokens_seen": 185556836, "step": 3312 }, { "epoch": 7.378619153674833, "grad_norm": 39.04258728027344, "learning_rate": 1e-06, "loss": 0.7489, "num_input_tokens_seen": 185611472, "step": 3313 }, { "epoch": 7.378619153674833, "loss": 0.6686286330223083, "loss_ce": 0.000171576626598835, "loss_iou": 0.279296875, "loss_num": 0.0216064453125, "loss_xval": 0.66796875, "num_input_tokens_seen": 185611472, "step": 3313 }, { "epoch": 7.380846325167038, "grad_norm": 20.53398323059082, "learning_rate": 1e-06, "loss": 0.5664, "num_input_tokens_seen": 185664932, "step": 3314 }, { "epoch": 7.380846325167038, "loss": 0.5182798504829407, "loss_ce": 0.00021345698041841388, "loss_iou": 0.2265625, "loss_num": 0.01275634765625, "loss_xval": 0.51953125, "num_input_tokens_seen": 185664932, "step": 3314 }, { "epoch": 7.383073496659243, "grad_norm": 17.228731155395508, "learning_rate": 1e-06, "loss": 0.7039, "num_input_tokens_seen": 185719144, "step": 3315 }, { "epoch": 7.383073496659243, "loss": 0.6190841197967529, "loss_ce": 0.00018761484534479678, "loss_iou": 0.2890625, "loss_num": 0.00836181640625, "loss_xval": 0.6171875, "num_input_tokens_seen": 185719144, "step": 3315 }, { "epoch": 7.385300668151448, "grad_norm": 25.301191329956055, "learning_rate": 1e-06, "loss": 0.6395, "num_input_tokens_seen": 185773492, "step": 3316 }, { "epoch": 7.385300668151448, "loss": 0.483335018157959, "loss_ce": 0.00018073133833240718, "loss_iou": 0.1982421875, "loss_num": 0.017578125, "loss_xval": 0.482421875, "num_input_tokens_seen": 185773492, "step": 3316 }, { "epoch": 7.387527839643653, "grad_norm": 21.02423095703125, "learning_rate": 1e-06, "loss": 0.4614, "num_input_tokens_seen": 185830376, "step": 3317 }, { "epoch": 7.387527839643653, "loss": 0.5402176380157471, "loss_ce": 0.00017858328646980226, "loss_iou": 0.2421875, "loss_num": 0.010986328125, "loss_xval": 0.5390625, "num_input_tokens_seen": 185830376, "step": 3317 }, { "epoch": 7.389755011135858, "grad_norm": 17.576339721679688, "learning_rate": 1e-06, "loss": 0.607, "num_input_tokens_seen": 185884160, "step": 3318 }, { "epoch": 7.389755011135858, "loss": 0.7509297728538513, "loss_ce": 0.00019736467220354825, "loss_iou": 0.33203125, "loss_num": 0.0172119140625, "loss_xval": 0.75, "num_input_tokens_seen": 185884160, "step": 3318 }, { "epoch": 7.3919821826280625, "grad_norm": 26.350475311279297, "learning_rate": 1e-06, "loss": 0.5901, "num_input_tokens_seen": 185939524, "step": 3319 }, { "epoch": 7.3919821826280625, "loss": 0.5638967156410217, "loss_ce": 0.0001760525774443522, "loss_iou": 0.2373046875, "loss_num": 0.017822265625, "loss_xval": 0.5625, "num_input_tokens_seen": 185939524, "step": 3319 }, { "epoch": 7.394209354120267, "grad_norm": 16.871248245239258, "learning_rate": 1e-06, "loss": 0.6936, "num_input_tokens_seen": 185996464, "step": 3320 }, { "epoch": 7.394209354120267, "loss": 0.6505089998245239, "loss_ce": 0.0007286987965926528, "loss_iou": 0.28125, "loss_num": 0.01708984375, "loss_xval": 0.6484375, "num_input_tokens_seen": 185996464, "step": 3320 }, { "epoch": 7.396436525612472, "grad_norm": 28.474199295043945, "learning_rate": 1e-06, "loss": 0.5359, "num_input_tokens_seen": 186051964, "step": 3321 }, { "epoch": 7.396436525612472, "loss": 0.5825638771057129, "loss_ce": 0.00016640947433188558, "loss_iou": 0.25390625, "loss_num": 0.0146484375, "loss_xval": 0.58203125, "num_input_tokens_seen": 186051964, "step": 3321 }, { "epoch": 7.398663697104677, "grad_norm": 15.175790786743164, "learning_rate": 1e-06, "loss": 0.5039, "num_input_tokens_seen": 186106580, "step": 3322 }, { "epoch": 7.398663697104677, "loss": 0.4596654176712036, "loss_ce": 0.00019277595856692642, "loss_iou": 0.203125, "loss_num": 0.0106201171875, "loss_xval": 0.458984375, "num_input_tokens_seen": 186106580, "step": 3322 }, { "epoch": 7.400890868596882, "grad_norm": 20.897014617919922, "learning_rate": 1e-06, "loss": 0.6625, "num_input_tokens_seen": 186163020, "step": 3323 }, { "epoch": 7.400890868596882, "loss": 0.5817232131958008, "loss_ce": 0.00018024235032498837, "loss_iou": 0.265625, "loss_num": 0.009765625, "loss_xval": 0.58203125, "num_input_tokens_seen": 186163020, "step": 3323 }, { "epoch": 7.403118040089087, "grad_norm": 17.83477210998535, "learning_rate": 1e-06, "loss": 0.5627, "num_input_tokens_seen": 186218364, "step": 3324 }, { "epoch": 7.403118040089087, "loss": 0.580369234085083, "loss_ce": 0.0002911181654781103, "loss_iou": 0.2294921875, "loss_num": 0.0242919921875, "loss_xval": 0.578125, "num_input_tokens_seen": 186218364, "step": 3324 }, { "epoch": 7.405345211581292, "grad_norm": 18.244247436523438, "learning_rate": 1e-06, "loss": 0.7525, "num_input_tokens_seen": 186271824, "step": 3325 }, { "epoch": 7.405345211581292, "loss": 0.5565927028656006, "loss_ce": 0.00019619996601250023, "loss_iou": 0.25, "loss_num": 0.01092529296875, "loss_xval": 0.5546875, "num_input_tokens_seen": 186271824, "step": 3325 }, { "epoch": 7.4075723830734965, "grad_norm": 25.529314041137695, "learning_rate": 1e-06, "loss": 0.7204, "num_input_tokens_seen": 186326604, "step": 3326 }, { "epoch": 7.4075723830734965, "loss": 0.7397571802139282, "loss_ce": 0.0002552796504460275, "loss_iou": 0.32421875, "loss_num": 0.0181884765625, "loss_xval": 0.73828125, "num_input_tokens_seen": 186326604, "step": 3326 }, { "epoch": 7.409799554565701, "grad_norm": 15.39941692352295, "learning_rate": 1e-06, "loss": 0.562, "num_input_tokens_seen": 186383040, "step": 3327 }, { "epoch": 7.409799554565701, "loss": 0.6352710723876953, "loss_ce": 0.00026131048798561096, "loss_iou": 0.27734375, "loss_num": 0.016357421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 186383040, "step": 3327 }, { "epoch": 7.412026726057906, "grad_norm": 14.803224563598633, "learning_rate": 1e-06, "loss": 0.641, "num_input_tokens_seen": 186435624, "step": 3328 }, { "epoch": 7.412026726057906, "loss": 0.512509286403656, "loss_ce": 0.0005464061396196485, "loss_iou": 0.2080078125, "loss_num": 0.0191650390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 186435624, "step": 3328 }, { "epoch": 7.414253897550111, "grad_norm": 19.84122085571289, "learning_rate": 1e-06, "loss": 0.6622, "num_input_tokens_seen": 186492116, "step": 3329 }, { "epoch": 7.414253897550111, "loss": 0.579806923866272, "loss_ce": 0.00021710841974709183, "loss_iou": 0.2490234375, "loss_num": 0.0162353515625, "loss_xval": 0.578125, "num_input_tokens_seen": 186492116, "step": 3329 }, { "epoch": 7.416481069042316, "grad_norm": 18.60784149169922, "learning_rate": 1e-06, "loss": 0.7456, "num_input_tokens_seen": 186549676, "step": 3330 }, { "epoch": 7.416481069042316, "loss": 0.7443963885307312, "loss_ce": 0.0002557601546868682, "loss_iou": 0.302734375, "loss_num": 0.0274658203125, "loss_xval": 0.7421875, "num_input_tokens_seen": 186549676, "step": 3330 }, { "epoch": 7.418708240534521, "grad_norm": 21.594797134399414, "learning_rate": 1e-06, "loss": 0.7149, "num_input_tokens_seen": 186604212, "step": 3331 }, { "epoch": 7.418708240534521, "loss": 0.7223727107048035, "loss_ce": 0.00020475327619351447, "loss_iou": 0.314453125, "loss_num": 0.0189208984375, "loss_xval": 0.72265625, "num_input_tokens_seen": 186604212, "step": 3331 }, { "epoch": 7.420935412026726, "grad_norm": 20.443817138671875, "learning_rate": 1e-06, "loss": 0.6048, "num_input_tokens_seen": 186658836, "step": 3332 }, { "epoch": 7.420935412026726, "loss": 0.5924441814422607, "loss_ce": 0.000281118496786803, "loss_iou": 0.24609375, "loss_num": 0.0198974609375, "loss_xval": 0.59375, "num_input_tokens_seen": 186658836, "step": 3332 }, { "epoch": 7.4231625835189305, "grad_norm": 22.519840240478516, "learning_rate": 1e-06, "loss": 0.4982, "num_input_tokens_seen": 186716272, "step": 3333 }, { "epoch": 7.4231625835189305, "loss": 0.5731725096702576, "loss_ce": 0.0002965159364975989, "loss_iou": 0.263671875, "loss_num": 0.00927734375, "loss_xval": 0.57421875, "num_input_tokens_seen": 186716272, "step": 3333 }, { "epoch": 7.425389755011135, "grad_norm": 16.776288986206055, "learning_rate": 1e-06, "loss": 0.5875, "num_input_tokens_seen": 186769964, "step": 3334 }, { "epoch": 7.425389755011135, "loss": 0.5091996192932129, "loss_ce": 0.0001664264709688723, "loss_iou": 0.205078125, "loss_num": 0.019775390625, "loss_xval": 0.5078125, "num_input_tokens_seen": 186769964, "step": 3334 }, { "epoch": 7.427616926503341, "grad_norm": 19.484222412109375, "learning_rate": 1e-06, "loss": 0.5844, "num_input_tokens_seen": 186826344, "step": 3335 }, { "epoch": 7.427616926503341, "loss": 0.6198055744171143, "loss_ce": 0.0001766337372828275, "loss_iou": 0.259765625, "loss_num": 0.0205078125, "loss_xval": 0.62109375, "num_input_tokens_seen": 186826344, "step": 3335 }, { "epoch": 7.429844097995546, "grad_norm": 17.908937454223633, "learning_rate": 1e-06, "loss": 0.624, "num_input_tokens_seen": 186881588, "step": 3336 }, { "epoch": 7.429844097995546, "loss": 0.6411352753639221, "loss_ce": 0.0002661352336872369, "loss_iou": 0.279296875, "loss_num": 0.0164794921875, "loss_xval": 0.640625, "num_input_tokens_seen": 186881588, "step": 3336 }, { "epoch": 7.432071269487751, "grad_norm": 24.69992446899414, "learning_rate": 1e-06, "loss": 0.5701, "num_input_tokens_seen": 186939068, "step": 3337 }, { "epoch": 7.432071269487751, "loss": 0.5966298580169678, "loss_ce": 0.00019431284454185516, "loss_iou": 0.2392578125, "loss_num": 0.0235595703125, "loss_xval": 0.59765625, "num_input_tokens_seen": 186939068, "step": 3337 }, { "epoch": 7.434298440979956, "grad_norm": 14.345844268798828, "learning_rate": 1e-06, "loss": 0.4817, "num_input_tokens_seen": 186996924, "step": 3338 }, { "epoch": 7.434298440979956, "loss": 0.6793899536132812, "loss_ce": 0.00019069564586970955, "loss_iou": 0.296875, "loss_num": 0.0169677734375, "loss_xval": 0.6796875, "num_input_tokens_seen": 186996924, "step": 3338 }, { "epoch": 7.436525612472161, "grad_norm": 14.285273551940918, "learning_rate": 1e-06, "loss": 0.5295, "num_input_tokens_seen": 187053760, "step": 3339 }, { "epoch": 7.436525612472161, "loss": 0.7244080305099487, "loss_ce": 0.0001648097822908312, "loss_iou": 0.3125, "loss_num": 0.0201416015625, "loss_xval": 0.72265625, "num_input_tokens_seen": 187053760, "step": 3339 }, { "epoch": 7.4387527839643655, "grad_norm": 16.611595153808594, "learning_rate": 1e-06, "loss": 0.8745, "num_input_tokens_seen": 187108896, "step": 3340 }, { "epoch": 7.4387527839643655, "loss": 1.1862220764160156, "loss_ce": 0.0001869781408458948, "loss_iou": 0.498046875, "loss_num": 0.037353515625, "loss_xval": 1.1875, "num_input_tokens_seen": 187108896, "step": 3340 }, { "epoch": 7.44097995545657, "grad_norm": 22.56696891784668, "learning_rate": 1e-06, "loss": 0.6369, "num_input_tokens_seen": 187166280, "step": 3341 }, { "epoch": 7.44097995545657, "loss": 0.7133273482322693, "loss_ce": 0.00019260836415924132, "loss_iou": 0.298828125, "loss_num": 0.022705078125, "loss_xval": 0.71484375, "num_input_tokens_seen": 187166280, "step": 3341 }, { "epoch": 7.443207126948775, "grad_norm": 31.73601531982422, "learning_rate": 1e-06, "loss": 0.7095, "num_input_tokens_seen": 187223724, "step": 3342 }, { "epoch": 7.443207126948775, "loss": 0.5013738870620728, "loss_ce": 0.00027527124620974064, "loss_iou": 0.2109375, "loss_num": 0.0157470703125, "loss_xval": 0.5, "num_input_tokens_seen": 187223724, "step": 3342 }, { "epoch": 7.44543429844098, "grad_norm": 15.37706184387207, "learning_rate": 1e-06, "loss": 0.457, "num_input_tokens_seen": 187281536, "step": 3343 }, { "epoch": 7.44543429844098, "loss": 0.43623799085617065, "loss_ce": 0.00020283354388084263, "loss_iou": 0.185546875, "loss_num": 0.01287841796875, "loss_xval": 0.435546875, "num_input_tokens_seen": 187281536, "step": 3343 }, { "epoch": 7.447661469933185, "grad_norm": 14.091643333435059, "learning_rate": 1e-06, "loss": 0.7446, "num_input_tokens_seen": 187339076, "step": 3344 }, { "epoch": 7.447661469933185, "loss": 0.8033976554870605, "loss_ce": 0.00026650671497918665, "loss_iou": 0.318359375, "loss_num": 0.033203125, "loss_xval": 0.8046875, "num_input_tokens_seen": 187339076, "step": 3344 }, { "epoch": 7.44988864142539, "grad_norm": 17.009387969970703, "learning_rate": 1e-06, "loss": 0.6225, "num_input_tokens_seen": 187395496, "step": 3345 }, { "epoch": 7.44988864142539, "loss": 0.7989903688430786, "loss_ce": 0.001260860008187592, "loss_iou": 0.28125, "loss_num": 0.046875, "loss_xval": 0.796875, "num_input_tokens_seen": 187395496, "step": 3345 }, { "epoch": 7.452115812917595, "grad_norm": 16.3013916015625, "learning_rate": 1e-06, "loss": 0.7418, "num_input_tokens_seen": 187452228, "step": 3346 }, { "epoch": 7.452115812917595, "loss": 0.7919469475746155, "loss_ce": 0.0001989098673220724, "loss_iou": 0.3359375, "loss_num": 0.024169921875, "loss_xval": 0.79296875, "num_input_tokens_seen": 187452228, "step": 3346 }, { "epoch": 7.4543429844097995, "grad_norm": 25.37590217590332, "learning_rate": 1e-06, "loss": 0.6981, "num_input_tokens_seen": 187507640, "step": 3347 }, { "epoch": 7.4543429844097995, "loss": 0.5329313278198242, "loss_ce": 0.0007047686958685517, "loss_iou": 0.2314453125, "loss_num": 0.013916015625, "loss_xval": 0.53125, "num_input_tokens_seen": 187507640, "step": 3347 }, { "epoch": 7.456570155902004, "grad_norm": 15.719435691833496, "learning_rate": 1e-06, "loss": 0.6663, "num_input_tokens_seen": 187564908, "step": 3348 }, { "epoch": 7.456570155902004, "loss": 0.7233518362045288, "loss_ce": 0.00020732844131998718, "loss_iou": 0.30078125, "loss_num": 0.0240478515625, "loss_xval": 0.72265625, "num_input_tokens_seen": 187564908, "step": 3348 }, { "epoch": 7.458797327394209, "grad_norm": 14.450872421264648, "learning_rate": 1e-06, "loss": 0.6857, "num_input_tokens_seen": 187616496, "step": 3349 }, { "epoch": 7.458797327394209, "loss": 0.5809314250946045, "loss_ce": 0.0006091895047575235, "loss_iou": 0.25390625, "loss_num": 0.0147705078125, "loss_xval": 0.58203125, "num_input_tokens_seen": 187616496, "step": 3349 }, { "epoch": 7.461024498886414, "grad_norm": 16.598936080932617, "learning_rate": 1e-06, "loss": 0.6052, "num_input_tokens_seen": 187670232, "step": 3350 }, { "epoch": 7.461024498886414, "loss": 0.479515016078949, "loss_ce": 0.00017542547720950097, "loss_iou": 0.20703125, "loss_num": 0.0130615234375, "loss_xval": 0.478515625, "num_input_tokens_seen": 187670232, "step": 3350 }, { "epoch": 7.463251670378619, "grad_norm": 29.393518447875977, "learning_rate": 1e-06, "loss": 0.6938, "num_input_tokens_seen": 187724992, "step": 3351 }, { "epoch": 7.463251670378619, "loss": 0.8341479897499084, "loss_ce": 0.00040772499050945044, "loss_iou": 0.375, "loss_num": 0.0167236328125, "loss_xval": 0.83203125, "num_input_tokens_seen": 187724992, "step": 3351 }, { "epoch": 7.465478841870824, "grad_norm": 17.579273223876953, "learning_rate": 1e-06, "loss": 0.6167, "num_input_tokens_seen": 187782612, "step": 3352 }, { "epoch": 7.465478841870824, "loss": 0.7424448132514954, "loss_ce": 0.0002573099918663502, "loss_iou": 0.31640625, "loss_num": 0.021728515625, "loss_xval": 0.7421875, "num_input_tokens_seen": 187782612, "step": 3352 }, { "epoch": 7.467706013363029, "grad_norm": 20.376893997192383, "learning_rate": 1e-06, "loss": 0.6322, "num_input_tokens_seen": 187838060, "step": 3353 }, { "epoch": 7.467706013363029, "loss": 0.6124618649482727, "loss_ce": 0.00040132386493496597, "loss_iou": 0.2578125, "loss_num": 0.019287109375, "loss_xval": 0.61328125, "num_input_tokens_seen": 187838060, "step": 3353 }, { "epoch": 7.4699331848552335, "grad_norm": 27.1313533782959, "learning_rate": 1e-06, "loss": 0.8042, "num_input_tokens_seen": 187891800, "step": 3354 }, { "epoch": 7.4699331848552335, "loss": 0.8244494795799255, "loss_ce": 0.00023073022020980716, "loss_iou": 0.3828125, "loss_num": 0.01165771484375, "loss_xval": 0.82421875, "num_input_tokens_seen": 187891800, "step": 3354 }, { "epoch": 7.472160356347438, "grad_norm": 17.113248825073242, "learning_rate": 1e-06, "loss": 0.5967, "num_input_tokens_seen": 187949628, "step": 3355 }, { "epoch": 7.472160356347438, "loss": 0.414481520652771, "loss_ce": 0.00017491632024757564, "loss_iou": 0.1767578125, "loss_num": 0.0123291015625, "loss_xval": 0.4140625, "num_input_tokens_seen": 187949628, "step": 3355 }, { "epoch": 7.474387527839644, "grad_norm": 34.95979309082031, "learning_rate": 1e-06, "loss": 0.7369, "num_input_tokens_seen": 188006312, "step": 3356 }, { "epoch": 7.474387527839644, "loss": 0.7557699680328369, "loss_ce": 0.00030729081481695175, "loss_iou": 0.298828125, "loss_num": 0.031982421875, "loss_xval": 0.75390625, "num_input_tokens_seen": 188006312, "step": 3356 }, { "epoch": 7.476614699331849, "grad_norm": 18.19429588317871, "learning_rate": 1e-06, "loss": 0.7208, "num_input_tokens_seen": 188062748, "step": 3357 }, { "epoch": 7.476614699331849, "loss": 0.5873679518699646, "loss_ce": 0.00027081643929705024, "loss_iou": 0.25, "loss_num": 0.01708984375, "loss_xval": 0.5859375, "num_input_tokens_seen": 188062748, "step": 3357 }, { "epoch": 7.478841870824054, "grad_norm": 25.23208236694336, "learning_rate": 1e-06, "loss": 0.462, "num_input_tokens_seen": 188118952, "step": 3358 }, { "epoch": 7.478841870824054, "loss": 0.6015306711196899, "loss_ce": 0.00021232501603662968, "loss_iou": 0.26171875, "loss_num": 0.0159912109375, "loss_xval": 0.6015625, "num_input_tokens_seen": 188118952, "step": 3358 }, { "epoch": 7.481069042316259, "grad_norm": 13.118025779724121, "learning_rate": 1e-06, "loss": 0.7661, "num_input_tokens_seen": 188172408, "step": 3359 }, { "epoch": 7.481069042316259, "loss": 0.7937259674072266, "loss_ce": 0.00026893243193626404, "loss_iou": 0.33984375, "loss_num": 0.0224609375, "loss_xval": 0.79296875, "num_input_tokens_seen": 188172408, "step": 3359 }, { "epoch": 7.4832962138084635, "grad_norm": 21.09063148498535, "learning_rate": 1e-06, "loss": 0.655, "num_input_tokens_seen": 188228848, "step": 3360 }, { "epoch": 7.4832962138084635, "loss": 0.7106525897979736, "loss_ce": 0.000203342831810005, "loss_iou": 0.271484375, "loss_num": 0.03369140625, "loss_xval": 0.7109375, "num_input_tokens_seen": 188228848, "step": 3360 }, { "epoch": 7.485523385300668, "grad_norm": 19.18509292602539, "learning_rate": 1e-06, "loss": 0.737, "num_input_tokens_seen": 188284528, "step": 3361 }, { "epoch": 7.485523385300668, "loss": 0.8408026695251465, "loss_ce": 0.00022645562421530485, "loss_iou": 0.359375, "loss_num": 0.024658203125, "loss_xval": 0.83984375, "num_input_tokens_seen": 188284528, "step": 3361 }, { "epoch": 7.487750556792873, "grad_norm": 21.143802642822266, "learning_rate": 1e-06, "loss": 0.6618, "num_input_tokens_seen": 188340712, "step": 3362 }, { "epoch": 7.487750556792873, "loss": 0.7464203238487244, "loss_ce": 0.00020453488104976714, "loss_iou": 0.33203125, "loss_num": 0.016357421875, "loss_xval": 0.74609375, "num_input_tokens_seen": 188340712, "step": 3362 }, { "epoch": 7.489977728285078, "grad_norm": 25.710002899169922, "learning_rate": 1e-06, "loss": 0.7553, "num_input_tokens_seen": 188395496, "step": 3363 }, { "epoch": 7.489977728285078, "loss": 0.898524820804596, "loss_ce": 0.00033146512578241527, "loss_iou": 0.373046875, "loss_num": 0.030517578125, "loss_xval": 0.8984375, "num_input_tokens_seen": 188395496, "step": 3363 }, { "epoch": 7.492204899777283, "grad_norm": 18.37621307373047, "learning_rate": 1e-06, "loss": 0.6617, "num_input_tokens_seen": 188451292, "step": 3364 }, { "epoch": 7.492204899777283, "loss": 0.6165107488632202, "loss_ce": 0.00029981633997522295, "loss_iou": 0.25, "loss_num": 0.0234375, "loss_xval": 0.6171875, "num_input_tokens_seen": 188451292, "step": 3364 }, { "epoch": 7.494432071269488, "grad_norm": 23.23649787902832, "learning_rate": 1e-06, "loss": 0.723, "num_input_tokens_seen": 188510116, "step": 3365 }, { "epoch": 7.494432071269488, "loss": 0.5230201482772827, "loss_ce": 0.00019301672000437975, "loss_iou": 0.232421875, "loss_num": 0.0115966796875, "loss_xval": 0.5234375, "num_input_tokens_seen": 188510116, "step": 3365 }, { "epoch": 7.496659242761693, "grad_norm": 19.028898239135742, "learning_rate": 1e-06, "loss": 0.6373, "num_input_tokens_seen": 188566168, "step": 3366 }, { "epoch": 7.496659242761693, "loss": 0.5705129504203796, "loss_ce": 0.00020043338008690625, "loss_iou": 0.255859375, "loss_num": 0.01165771484375, "loss_xval": 0.5703125, "num_input_tokens_seen": 188566168, "step": 3366 }, { "epoch": 7.498886414253898, "grad_norm": 60.06413650512695, "learning_rate": 1e-06, "loss": 0.6584, "num_input_tokens_seen": 188622556, "step": 3367 }, { "epoch": 7.498886414253898, "loss": 0.5773051977157593, "loss_ce": 0.00021782593103125691, "loss_iou": 0.248046875, "loss_num": 0.01611328125, "loss_xval": 0.578125, "num_input_tokens_seen": 188622556, "step": 3367 }, { "epoch": 7.501113585746102, "grad_norm": 29.536270141601562, "learning_rate": 1e-06, "loss": 0.67, "num_input_tokens_seen": 188675804, "step": 3368 }, { "epoch": 7.501113585746102, "loss": 0.7318795919418335, "loss_ce": 0.0001901389186969027, "loss_iou": 0.326171875, "loss_num": 0.015869140625, "loss_xval": 0.73046875, "num_input_tokens_seen": 188675804, "step": 3368 }, { "epoch": 7.503340757238307, "grad_norm": 19.667911529541016, "learning_rate": 1e-06, "loss": 0.7901, "num_input_tokens_seen": 188729532, "step": 3369 }, { "epoch": 7.503340757238307, "loss": 0.6141993999481201, "loss_ce": 0.00018573581473901868, "loss_iou": 0.275390625, "loss_num": 0.0123291015625, "loss_xval": 0.61328125, "num_input_tokens_seen": 188729532, "step": 3369 }, { "epoch": 7.505567928730512, "grad_norm": 19.440563201904297, "learning_rate": 1e-06, "loss": 0.6208, "num_input_tokens_seen": 188784192, "step": 3370 }, { "epoch": 7.505567928730512, "loss": 0.608165979385376, "loss_ce": 0.0002558160631451756, "loss_iou": 0.259765625, "loss_num": 0.0177001953125, "loss_xval": 0.609375, "num_input_tokens_seen": 188784192, "step": 3370 }, { "epoch": 7.507795100222717, "grad_norm": 20.731998443603516, "learning_rate": 1e-06, "loss": 0.6593, "num_input_tokens_seen": 188838624, "step": 3371 }, { "epoch": 7.507795100222717, "loss": 0.6856630444526672, "loss_ce": 0.00023825542302802205, "loss_iou": 0.322265625, "loss_num": 0.008056640625, "loss_xval": 0.68359375, "num_input_tokens_seen": 188838624, "step": 3371 }, { "epoch": 7.510022271714922, "grad_norm": 21.571271896362305, "learning_rate": 1e-06, "loss": 0.6617, "num_input_tokens_seen": 188896352, "step": 3372 }, { "epoch": 7.510022271714922, "loss": 0.7389297485351562, "loss_ce": 0.0001602266274858266, "loss_iou": 0.333984375, "loss_num": 0.01397705078125, "loss_xval": 0.73828125, "num_input_tokens_seen": 188896352, "step": 3372 }, { "epoch": 7.512249443207127, "grad_norm": 26.354827880859375, "learning_rate": 1e-06, "loss": 0.5974, "num_input_tokens_seen": 188953156, "step": 3373 }, { "epoch": 7.512249443207127, "loss": 0.5054886341094971, "loss_ce": 0.00023960010730661452, "loss_iou": 0.228515625, "loss_num": 0.00946044921875, "loss_xval": 0.50390625, "num_input_tokens_seen": 188953156, "step": 3373 }, { "epoch": 7.514476614699332, "grad_norm": 33.86724853515625, "learning_rate": 1e-06, "loss": 0.529, "num_input_tokens_seen": 189007540, "step": 3374 }, { "epoch": 7.514476614699332, "loss": 0.34347233176231384, "loss_ce": 0.0002106213360093534, "loss_iou": 0.1474609375, "loss_num": 0.0096435546875, "loss_xval": 0.34375, "num_input_tokens_seen": 189007540, "step": 3374 }, { "epoch": 7.5167037861915365, "grad_norm": 23.846332550048828, "learning_rate": 1e-06, "loss": 0.8868, "num_input_tokens_seen": 189063288, "step": 3375 }, { "epoch": 7.5167037861915365, "loss": 1.2485597133636475, "loss_ce": 0.00026864674873650074, "loss_iou": 0.51953125, "loss_num": 0.041748046875, "loss_xval": 1.25, "num_input_tokens_seen": 189063288, "step": 3375 }, { "epoch": 7.518930957683741, "grad_norm": 34.920448303222656, "learning_rate": 1e-06, "loss": 0.619, "num_input_tokens_seen": 189121948, "step": 3376 }, { "epoch": 7.518930957683741, "loss": 0.4434422254562378, "loss_ce": 0.0002049226895906031, "loss_iou": 0.1640625, "loss_num": 0.0233154296875, "loss_xval": 0.443359375, "num_input_tokens_seen": 189121948, "step": 3376 }, { "epoch": 7.521158129175946, "grad_norm": 45.45869445800781, "learning_rate": 1e-06, "loss": 0.6966, "num_input_tokens_seen": 189178620, "step": 3377 }, { "epoch": 7.521158129175946, "loss": 0.9557803869247437, "loss_ce": 0.00021394254872575402, "loss_iou": 0.376953125, "loss_num": 0.04052734375, "loss_xval": 0.95703125, "num_input_tokens_seen": 189178620, "step": 3377 }, { "epoch": 7.523385300668151, "grad_norm": 21.550317764282227, "learning_rate": 1e-06, "loss": 0.5571, "num_input_tokens_seen": 189234636, "step": 3378 }, { "epoch": 7.523385300668151, "loss": 0.6044266819953918, "loss_ce": 0.00017864728579297662, "loss_iou": 0.279296875, "loss_num": 0.00946044921875, "loss_xval": 0.60546875, "num_input_tokens_seen": 189234636, "step": 3378 }, { "epoch": 7.525612472160356, "grad_norm": 17.31205177307129, "learning_rate": 1e-06, "loss": 0.5324, "num_input_tokens_seen": 189293248, "step": 3379 }, { "epoch": 7.525612472160356, "loss": 0.5417789220809937, "loss_ce": 0.00027502982993610203, "loss_iou": 0.2275390625, "loss_num": 0.017333984375, "loss_xval": 0.54296875, "num_input_tokens_seen": 189293248, "step": 3379 }, { "epoch": 7.527839643652561, "grad_norm": 18.366722106933594, "learning_rate": 1e-06, "loss": 0.6955, "num_input_tokens_seen": 189351236, "step": 3380 }, { "epoch": 7.527839643652561, "loss": 0.6718355417251587, "loss_ce": 0.00020462839165702462, "loss_iou": 0.30859375, "loss_num": 0.01055908203125, "loss_xval": 0.671875, "num_input_tokens_seen": 189351236, "step": 3380 }, { "epoch": 7.5300668151447665, "grad_norm": 28.49734878540039, "learning_rate": 1e-06, "loss": 0.6942, "num_input_tokens_seen": 189408204, "step": 3381 }, { "epoch": 7.5300668151447665, "loss": 0.6963520050048828, "loss_ce": 0.00018498743884265423, "loss_iou": 0.306640625, "loss_num": 0.0162353515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 189408204, "step": 3381 }, { "epoch": 7.532293986636971, "grad_norm": 21.700590133666992, "learning_rate": 1e-06, "loss": 0.4877, "num_input_tokens_seen": 189466496, "step": 3382 }, { "epoch": 7.532293986636971, "loss": 0.6716306209564209, "loss_ce": 0.00024389507598243654, "loss_iou": 0.28125, "loss_num": 0.02197265625, "loss_xval": 0.671875, "num_input_tokens_seen": 189466496, "step": 3382 }, { "epoch": 7.534521158129176, "grad_norm": 18.741180419921875, "learning_rate": 1e-06, "loss": 0.6188, "num_input_tokens_seen": 189524108, "step": 3383 }, { "epoch": 7.534521158129176, "loss": 0.4249844551086426, "loss_ce": 0.0003018215938936919, "loss_iou": 0.185546875, "loss_num": 0.0108642578125, "loss_xval": 0.423828125, "num_input_tokens_seen": 189524108, "step": 3383 }, { "epoch": 7.536748329621381, "grad_norm": 18.38628578186035, "learning_rate": 1e-06, "loss": 0.5341, "num_input_tokens_seen": 189580564, "step": 3384 }, { "epoch": 7.536748329621381, "loss": 0.6349685192108154, "loss_ce": 0.00020285190839786083, "loss_iou": 0.263671875, "loss_num": 0.0211181640625, "loss_xval": 0.6328125, "num_input_tokens_seen": 189580564, "step": 3384 }, { "epoch": 7.538975501113586, "grad_norm": 17.587261199951172, "learning_rate": 1e-06, "loss": 0.5419, "num_input_tokens_seen": 189633448, "step": 3385 }, { "epoch": 7.538975501113586, "loss": 0.590072512626648, "loss_ce": 0.00022877063020132482, "loss_iou": 0.2353515625, "loss_num": 0.023681640625, "loss_xval": 0.58984375, "num_input_tokens_seen": 189633448, "step": 3385 }, { "epoch": 7.541202672605791, "grad_norm": 23.872756958007812, "learning_rate": 1e-06, "loss": 0.5987, "num_input_tokens_seen": 189690868, "step": 3386 }, { "epoch": 7.541202672605791, "loss": 0.568671464920044, "loss_ce": 0.00019003619672730565, "loss_iou": 0.24609375, "loss_num": 0.01544189453125, "loss_xval": 0.5703125, "num_input_tokens_seen": 189690868, "step": 3386 }, { "epoch": 7.543429844097996, "grad_norm": 20.22979164123535, "learning_rate": 1e-06, "loss": 0.585, "num_input_tokens_seen": 189746736, "step": 3387 }, { "epoch": 7.543429844097996, "loss": 0.5246177911758423, "loss_ce": 0.00020370190031826496, "loss_iou": 0.2177734375, "loss_num": 0.0174560546875, "loss_xval": 0.5234375, "num_input_tokens_seen": 189746736, "step": 3387 }, { "epoch": 7.5456570155902005, "grad_norm": 18.41916275024414, "learning_rate": 1e-06, "loss": 0.6426, "num_input_tokens_seen": 189803012, "step": 3388 }, { "epoch": 7.5456570155902005, "loss": 0.621086597442627, "loss_ce": 0.0002370403817621991, "loss_iou": 0.28125, "loss_num": 0.01202392578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 189803012, "step": 3388 }, { "epoch": 7.547884187082405, "grad_norm": 189.67298889160156, "learning_rate": 1e-06, "loss": 0.5411, "num_input_tokens_seen": 189859212, "step": 3389 }, { "epoch": 7.547884187082405, "loss": 0.5852431654930115, "loss_ce": 0.0002822045935317874, "loss_iou": 0.251953125, "loss_num": 0.01611328125, "loss_xval": 0.5859375, "num_input_tokens_seen": 189859212, "step": 3389 }, { "epoch": 7.55011135857461, "grad_norm": 15.547959327697754, "learning_rate": 1e-06, "loss": 0.8417, "num_input_tokens_seen": 189916504, "step": 3390 }, { "epoch": 7.55011135857461, "loss": 0.5157856941223145, "loss_ce": 0.00022171747696120292, "loss_iou": 0.2138671875, "loss_num": 0.0174560546875, "loss_xval": 0.515625, "num_input_tokens_seen": 189916504, "step": 3390 }, { "epoch": 7.552338530066815, "grad_norm": 26.860252380371094, "learning_rate": 1e-06, "loss": 0.7319, "num_input_tokens_seen": 189973312, "step": 3391 }, { "epoch": 7.552338530066815, "loss": 0.7284436225891113, "loss_ce": 0.00017210739315487444, "loss_iou": 0.28515625, "loss_num": 0.031494140625, "loss_xval": 0.7265625, "num_input_tokens_seen": 189973312, "step": 3391 }, { "epoch": 7.55456570155902, "grad_norm": 19.536853790283203, "learning_rate": 1e-06, "loss": 0.4387, "num_input_tokens_seen": 190030136, "step": 3392 }, { "epoch": 7.55456570155902, "loss": 0.4148935079574585, "loss_ce": 0.00022065457596909255, "loss_iou": 0.173828125, "loss_num": 0.013427734375, "loss_xval": 0.4140625, "num_input_tokens_seen": 190030136, "step": 3392 }, { "epoch": 7.556792873051225, "grad_norm": 20.840858459472656, "learning_rate": 1e-06, "loss": 0.6976, "num_input_tokens_seen": 190086148, "step": 3393 }, { "epoch": 7.556792873051225, "loss": 0.8776832222938538, "loss_ce": 0.00024186470545828342, "loss_iou": 0.369140625, "loss_num": 0.02783203125, "loss_xval": 0.87890625, "num_input_tokens_seen": 190086148, "step": 3393 }, { "epoch": 7.55902004454343, "grad_norm": 16.58432960510254, "learning_rate": 1e-06, "loss": 0.736, "num_input_tokens_seen": 190141764, "step": 3394 }, { "epoch": 7.55902004454343, "loss": 0.7458630204200745, "loss_ce": 0.0002575473044998944, "loss_iou": 0.291015625, "loss_num": 0.032470703125, "loss_xval": 0.74609375, "num_input_tokens_seen": 190141764, "step": 3394 }, { "epoch": 7.5612472160356345, "grad_norm": 15.23847770690918, "learning_rate": 1e-06, "loss": 0.7859, "num_input_tokens_seen": 190197356, "step": 3395 }, { "epoch": 7.5612472160356345, "loss": 0.8410194516181946, "loss_ce": 0.00019913521828129888, "loss_iou": 0.33984375, "loss_num": 0.032470703125, "loss_xval": 0.83984375, "num_input_tokens_seen": 190197356, "step": 3395 }, { "epoch": 7.563474387527839, "grad_norm": 17.79561424255371, "learning_rate": 1e-06, "loss": 0.5632, "num_input_tokens_seen": 190256636, "step": 3396 }, { "epoch": 7.563474387527839, "loss": 0.458886057138443, "loss_ce": 0.00026789220282807946, "loss_iou": 0.2158203125, "loss_num": 0.0054931640625, "loss_xval": 0.458984375, "num_input_tokens_seen": 190256636, "step": 3396 }, { "epoch": 7.565701559020044, "grad_norm": 15.726015090942383, "learning_rate": 1e-06, "loss": 0.4185, "num_input_tokens_seen": 190311620, "step": 3397 }, { "epoch": 7.565701559020044, "loss": 0.4230450987815857, "loss_ce": 0.0010480263736099005, "loss_iou": 0.1787109375, "loss_num": 0.0128173828125, "loss_xval": 0.421875, "num_input_tokens_seen": 190311620, "step": 3397 }, { "epoch": 7.567928730512249, "grad_norm": 25.931352615356445, "learning_rate": 1e-06, "loss": 0.7059, "num_input_tokens_seen": 190366792, "step": 3398 }, { "epoch": 7.567928730512249, "loss": 0.7654964923858643, "loss_ce": 0.0003598046605475247, "loss_iou": 0.337890625, "loss_num": 0.017822265625, "loss_xval": 0.765625, "num_input_tokens_seen": 190366792, "step": 3398 }, { "epoch": 7.570155902004454, "grad_norm": 21.16999053955078, "learning_rate": 1e-06, "loss": 0.7098, "num_input_tokens_seen": 190422324, "step": 3399 }, { "epoch": 7.570155902004454, "loss": 0.48839089274406433, "loss_ce": 0.0001706905895844102, "loss_iou": 0.212890625, "loss_num": 0.01214599609375, "loss_xval": 0.48828125, "num_input_tokens_seen": 190422324, "step": 3399 }, { "epoch": 7.57238307349666, "grad_norm": 23.648658752441406, "learning_rate": 1e-06, "loss": 0.4927, "num_input_tokens_seen": 190480076, "step": 3400 }, { "epoch": 7.57238307349666, "loss": 0.5361147522926331, "loss_ce": 0.00016503711231052876, "loss_iou": 0.2236328125, "loss_num": 0.0179443359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 190480076, "step": 3400 }, { "epoch": 7.574610244988865, "grad_norm": 15.166297912597656, "learning_rate": 1e-06, "loss": 0.5387, "num_input_tokens_seen": 190535360, "step": 3401 }, { "epoch": 7.574610244988865, "loss": 0.6972156763076782, "loss_ce": 0.0001941586670000106, "loss_iou": 0.294921875, "loss_num": 0.021240234375, "loss_xval": 0.6953125, "num_input_tokens_seen": 190535360, "step": 3401 }, { "epoch": 7.5768374164810695, "grad_norm": 21.948759078979492, "learning_rate": 1e-06, "loss": 0.6897, "num_input_tokens_seen": 190589160, "step": 3402 }, { "epoch": 7.5768374164810695, "loss": 0.5853911638259888, "loss_ce": 0.0001860598858911544, "loss_iou": 0.26171875, "loss_num": 0.0126953125, "loss_xval": 0.5859375, "num_input_tokens_seen": 190589160, "step": 3402 }, { "epoch": 7.579064587973274, "grad_norm": 36.27766799926758, "learning_rate": 1e-06, "loss": 0.9359, "num_input_tokens_seen": 190644804, "step": 3403 }, { "epoch": 7.579064587973274, "loss": 0.9691004753112793, "loss_ce": 0.00022840322344563901, "loss_iou": 0.41015625, "loss_num": 0.030029296875, "loss_xval": 0.96875, "num_input_tokens_seen": 190644804, "step": 3403 }, { "epoch": 7.581291759465479, "grad_norm": 27.834016799926758, "learning_rate": 1e-06, "loss": 0.6663, "num_input_tokens_seen": 190702760, "step": 3404 }, { "epoch": 7.581291759465479, "loss": 0.7742569446563721, "loss_ce": 0.000209053119760938, "loss_iou": 0.345703125, "loss_num": 0.0167236328125, "loss_xval": 0.7734375, "num_input_tokens_seen": 190702760, "step": 3404 }, { "epoch": 7.583518930957684, "grad_norm": 18.967069625854492, "learning_rate": 1e-06, "loss": 0.5449, "num_input_tokens_seen": 190758396, "step": 3405 }, { "epoch": 7.583518930957684, "loss": 0.6806260943412781, "loss_ce": 0.0002061867417069152, "loss_iou": 0.296875, "loss_num": 0.0174560546875, "loss_xval": 0.6796875, "num_input_tokens_seen": 190758396, "step": 3405 }, { "epoch": 7.585746102449889, "grad_norm": 22.21535873413086, "learning_rate": 1e-06, "loss": 0.668, "num_input_tokens_seen": 190810616, "step": 3406 }, { "epoch": 7.585746102449889, "loss": 0.7401872873306274, "loss_ce": 0.0001970837765838951, "loss_iou": 0.32421875, "loss_num": 0.01806640625, "loss_xval": 0.73828125, "num_input_tokens_seen": 190810616, "step": 3406 }, { "epoch": 7.587973273942094, "grad_norm": 16.877315521240234, "learning_rate": 1e-06, "loss": 0.5691, "num_input_tokens_seen": 190866152, "step": 3407 }, { "epoch": 7.587973273942094, "loss": 0.545766294002533, "loss_ce": 0.00023407851404044777, "loss_iou": 0.232421875, "loss_num": 0.01611328125, "loss_xval": 0.546875, "num_input_tokens_seen": 190866152, "step": 3407 }, { "epoch": 7.590200445434299, "grad_norm": 29.44357681274414, "learning_rate": 1e-06, "loss": 0.5684, "num_input_tokens_seen": 190924720, "step": 3408 }, { "epoch": 7.590200445434299, "loss": 0.6174308061599731, "loss_ce": 0.0002433276386000216, "loss_iou": 0.25390625, "loss_num": 0.022216796875, "loss_xval": 0.6171875, "num_input_tokens_seen": 190924720, "step": 3408 }, { "epoch": 7.5924276169265035, "grad_norm": 22.013757705688477, "learning_rate": 1e-06, "loss": 0.6058, "num_input_tokens_seen": 190982220, "step": 3409 }, { "epoch": 7.5924276169265035, "loss": 0.6500968337059021, "loss_ce": 0.0001944488030858338, "loss_iou": 0.28515625, "loss_num": 0.015869140625, "loss_xval": 0.6484375, "num_input_tokens_seen": 190982220, "step": 3409 }, { "epoch": 7.594654788418708, "grad_norm": 19.142648696899414, "learning_rate": 1e-06, "loss": 0.6046, "num_input_tokens_seen": 191039340, "step": 3410 }, { "epoch": 7.594654788418708, "loss": 0.35581400990486145, "loss_ce": 0.0001621594128664583, "loss_iou": 0.1640625, "loss_num": 0.00531005859375, "loss_xval": 0.35546875, "num_input_tokens_seen": 191039340, "step": 3410 }, { "epoch": 7.596881959910913, "grad_norm": 19.273056030273438, "learning_rate": 1e-06, "loss": 0.6247, "num_input_tokens_seen": 191097160, "step": 3411 }, { "epoch": 7.596881959910913, "loss": 0.5918649435043335, "loss_ce": 0.00019010651158168912, "loss_iou": 0.2490234375, "loss_num": 0.0186767578125, "loss_xval": 0.58984375, "num_input_tokens_seen": 191097160, "step": 3411 }, { "epoch": 7.599109131403118, "grad_norm": 12.594167709350586, "learning_rate": 1e-06, "loss": 0.5376, "num_input_tokens_seen": 191153944, "step": 3412 }, { "epoch": 7.599109131403118, "loss": 0.5286160707473755, "loss_ce": 0.00017369385750498623, "loss_iou": 0.2314453125, "loss_num": 0.01287841796875, "loss_xval": 0.52734375, "num_input_tokens_seen": 191153944, "step": 3412 }, { "epoch": 7.601336302895323, "grad_norm": 21.295249938964844, "learning_rate": 1e-06, "loss": 0.7192, "num_input_tokens_seen": 191210128, "step": 3413 }, { "epoch": 7.601336302895323, "loss": 0.8668521642684937, "loss_ce": 0.00015292070747818798, "loss_iou": 0.3359375, "loss_num": 0.038330078125, "loss_xval": 0.8671875, "num_input_tokens_seen": 191210128, "step": 3413 }, { "epoch": 7.603563474387528, "grad_norm": 20.785724639892578, "learning_rate": 1e-06, "loss": 0.7145, "num_input_tokens_seen": 191267092, "step": 3414 }, { "epoch": 7.603563474387528, "loss": 0.6779365539550781, "loss_ce": 0.0002021369436988607, "loss_iou": 0.27734375, "loss_num": 0.0247802734375, "loss_xval": 0.6796875, "num_input_tokens_seen": 191267092, "step": 3414 }, { "epoch": 7.605790645879733, "grad_norm": 15.27004337310791, "learning_rate": 1e-06, "loss": 0.6446, "num_input_tokens_seen": 191323100, "step": 3415 }, { "epoch": 7.605790645879733, "loss": 0.7496963739395142, "loss_ce": 0.00018462681327946484, "loss_iou": 0.328125, "loss_num": 0.018310546875, "loss_xval": 0.75, "num_input_tokens_seen": 191323100, "step": 3415 }, { "epoch": 7.6080178173719375, "grad_norm": 34.315914154052734, "learning_rate": 1e-06, "loss": 0.5219, "num_input_tokens_seen": 191379484, "step": 3416 }, { "epoch": 7.6080178173719375, "loss": 0.558527410030365, "loss_ce": 0.00017779026529751718, "loss_iou": 0.25, "loss_num": 0.01190185546875, "loss_xval": 0.55859375, "num_input_tokens_seen": 191379484, "step": 3416 }, { "epoch": 7.610244988864142, "grad_norm": 16.876907348632812, "learning_rate": 1e-06, "loss": 0.875, "num_input_tokens_seen": 191434760, "step": 3417 }, { "epoch": 7.610244988864142, "loss": 1.0747464895248413, "loss_ce": 0.00028365751495584846, "loss_iou": 0.47265625, "loss_num": 0.02587890625, "loss_xval": 1.078125, "num_input_tokens_seen": 191434760, "step": 3417 }, { "epoch": 7.612472160356347, "grad_norm": 13.883415222167969, "learning_rate": 1e-06, "loss": 0.7571, "num_input_tokens_seen": 191492628, "step": 3418 }, { "epoch": 7.612472160356347, "loss": 0.7818785309791565, "loss_ce": 0.00026232993695884943, "loss_iou": 0.28515625, "loss_num": 0.04248046875, "loss_xval": 0.78125, "num_input_tokens_seen": 191492628, "step": 3418 }, { "epoch": 7.614699331848552, "grad_norm": 12.954174995422363, "learning_rate": 1e-06, "loss": 0.5369, "num_input_tokens_seen": 191549316, "step": 3419 }, { "epoch": 7.614699331848552, "loss": 0.5483337640762329, "loss_ce": 0.00023806520039215684, "loss_iou": 0.2236328125, "loss_num": 0.02001953125, "loss_xval": 0.546875, "num_input_tokens_seen": 191549316, "step": 3419 }, { "epoch": 7.616926503340757, "grad_norm": 14.275961875915527, "learning_rate": 1e-06, "loss": 0.7559, "num_input_tokens_seen": 191605056, "step": 3420 }, { "epoch": 7.616926503340757, "loss": 0.5231179594993591, "loss_ce": 0.0001687241019681096, "loss_iou": 0.23046875, "loss_num": 0.01239013671875, "loss_xval": 0.5234375, "num_input_tokens_seen": 191605056, "step": 3420 }, { "epoch": 7.619153674832962, "grad_norm": 18.700197219848633, "learning_rate": 1e-06, "loss": 0.6053, "num_input_tokens_seen": 191662236, "step": 3421 }, { "epoch": 7.619153674832962, "loss": 0.7925351858139038, "loss_ce": 0.00029885018011555076, "loss_iou": 0.341796875, "loss_num": 0.0218505859375, "loss_xval": 0.79296875, "num_input_tokens_seen": 191662236, "step": 3421 }, { "epoch": 7.621380846325167, "grad_norm": 24.45431900024414, "learning_rate": 1e-06, "loss": 0.6113, "num_input_tokens_seen": 191720748, "step": 3422 }, { "epoch": 7.621380846325167, "loss": 0.6171329617500305, "loss_ce": 0.00018957394058816135, "loss_iou": 0.267578125, "loss_num": 0.0162353515625, "loss_xval": 0.6171875, "num_input_tokens_seen": 191720748, "step": 3422 }, { "epoch": 7.6236080178173715, "grad_norm": 20.672542572021484, "learning_rate": 1e-06, "loss": 0.8138, "num_input_tokens_seen": 191774632, "step": 3423 }, { "epoch": 7.6236080178173715, "loss": 0.8238697052001953, "loss_ce": 0.00013917218893766403, "loss_iou": 0.361328125, "loss_num": 0.0205078125, "loss_xval": 0.82421875, "num_input_tokens_seen": 191774632, "step": 3423 }, { "epoch": 7.625835189309576, "grad_norm": 23.883867263793945, "learning_rate": 1e-06, "loss": 0.7723, "num_input_tokens_seen": 191831068, "step": 3424 }, { "epoch": 7.625835189309576, "loss": 0.6435463428497314, "loss_ce": 0.0002357645716983825, "loss_iou": 0.283203125, "loss_num": 0.01495361328125, "loss_xval": 0.64453125, "num_input_tokens_seen": 191831068, "step": 3424 }, { "epoch": 7.628062360801781, "grad_norm": 17.740406036376953, "learning_rate": 1e-06, "loss": 0.4917, "num_input_tokens_seen": 191888888, "step": 3425 }, { "epoch": 7.628062360801781, "loss": 0.5690467953681946, "loss_ce": 0.00019914188305847347, "loss_iou": 0.2412109375, "loss_num": 0.0174560546875, "loss_xval": 0.5703125, "num_input_tokens_seen": 191888888, "step": 3425 }, { "epoch": 7.630289532293987, "grad_norm": 34.307125091552734, "learning_rate": 1e-06, "loss": 0.518, "num_input_tokens_seen": 191944488, "step": 3426 }, { "epoch": 7.630289532293987, "loss": 0.5533403158187866, "loss_ce": 0.00036179396556690335, "loss_iou": 0.20703125, "loss_num": 0.0277099609375, "loss_xval": 0.5546875, "num_input_tokens_seen": 191944488, "step": 3426 }, { "epoch": 7.632516703786192, "grad_norm": 15.701261520385742, "learning_rate": 1e-06, "loss": 0.5397, "num_input_tokens_seen": 191999632, "step": 3427 }, { "epoch": 7.632516703786192, "loss": 0.6593526601791382, "loss_ce": 0.0004170782049186528, "loss_iou": 0.28125, "loss_num": 0.019287109375, "loss_xval": 0.66015625, "num_input_tokens_seen": 191999632, "step": 3427 }, { "epoch": 7.634743875278397, "grad_norm": 20.72135353088379, "learning_rate": 1e-06, "loss": 0.6254, "num_input_tokens_seen": 192055488, "step": 3428 }, { "epoch": 7.634743875278397, "loss": 0.6992897987365723, "loss_ce": 0.00019309617346152663, "loss_iou": 0.302734375, "loss_num": 0.0185546875, "loss_xval": 0.69921875, "num_input_tokens_seen": 192055488, "step": 3428 }, { "epoch": 7.636971046770602, "grad_norm": 30.77797508239746, "learning_rate": 1e-06, "loss": 0.5652, "num_input_tokens_seen": 192112044, "step": 3429 }, { "epoch": 7.636971046770602, "loss": 0.5464463233947754, "loss_ce": 0.00018164291395805776, "loss_iou": 0.2470703125, "loss_num": 0.0106201171875, "loss_xval": 0.546875, "num_input_tokens_seen": 192112044, "step": 3429 }, { "epoch": 7.639198218262806, "grad_norm": 17.127363204956055, "learning_rate": 1e-06, "loss": 0.942, "num_input_tokens_seen": 192165832, "step": 3430 }, { "epoch": 7.639198218262806, "loss": 0.7089158892631531, "loss_ce": 0.00017566155293025076, "loss_iou": 0.294921875, "loss_num": 0.0240478515625, "loss_xval": 0.70703125, "num_input_tokens_seen": 192165832, "step": 3430 }, { "epoch": 7.641425389755011, "grad_norm": 19.867774963378906, "learning_rate": 1e-06, "loss": 0.5599, "num_input_tokens_seen": 192219304, "step": 3431 }, { "epoch": 7.641425389755011, "loss": 0.4574645757675171, "loss_ce": 0.0001891769061330706, "loss_iou": 0.2119140625, "loss_num": 0.0067138671875, "loss_xval": 0.45703125, "num_input_tokens_seen": 192219304, "step": 3431 }, { "epoch": 7.643652561247216, "grad_norm": 13.734804153442383, "learning_rate": 1e-06, "loss": 0.6357, "num_input_tokens_seen": 192276704, "step": 3432 }, { "epoch": 7.643652561247216, "loss": 0.6115978956222534, "loss_ce": 0.0002697639574762434, "loss_iou": 0.2451171875, "loss_num": 0.0245361328125, "loss_xval": 0.609375, "num_input_tokens_seen": 192276704, "step": 3432 }, { "epoch": 7.645879732739421, "grad_norm": 18.749895095825195, "learning_rate": 1e-06, "loss": 0.7049, "num_input_tokens_seen": 192335292, "step": 3433 }, { "epoch": 7.645879732739421, "loss": 0.6146724820137024, "loss_ce": 0.00017052568728104234, "loss_iou": 0.26171875, "loss_num": 0.0185546875, "loss_xval": 0.61328125, "num_input_tokens_seen": 192335292, "step": 3433 }, { "epoch": 7.648106904231626, "grad_norm": 17.647464752197266, "learning_rate": 1e-06, "loss": 0.5601, "num_input_tokens_seen": 192391092, "step": 3434 }, { "epoch": 7.648106904231626, "loss": 0.519446611404419, "loss_ce": 0.00015951888053677976, "loss_iou": 0.228515625, "loss_num": 0.0123291015625, "loss_xval": 0.51953125, "num_input_tokens_seen": 192391092, "step": 3434 }, { "epoch": 7.650334075723831, "grad_norm": 17.00111961364746, "learning_rate": 1e-06, "loss": 0.5881, "num_input_tokens_seen": 192447236, "step": 3435 }, { "epoch": 7.650334075723831, "loss": 0.4986279606819153, "loss_ce": 0.00033695262391120195, "loss_iou": 0.1923828125, "loss_num": 0.0225830078125, "loss_xval": 0.498046875, "num_input_tokens_seen": 192447236, "step": 3435 }, { "epoch": 7.652561247216036, "grad_norm": 16.545228958129883, "learning_rate": 1e-06, "loss": 0.7619, "num_input_tokens_seen": 192502120, "step": 3436 }, { "epoch": 7.652561247216036, "loss": 0.8621032238006592, "loss_ce": 0.0002867898438125849, "loss_iou": 0.353515625, "loss_num": 0.0306396484375, "loss_xval": 0.86328125, "num_input_tokens_seen": 192502120, "step": 3436 }, { "epoch": 7.6547884187082404, "grad_norm": 26.11361312866211, "learning_rate": 1e-06, "loss": 0.7085, "num_input_tokens_seen": 192556024, "step": 3437 }, { "epoch": 7.6547884187082404, "loss": 0.9188729524612427, "loss_ce": 0.00017173260857816786, "loss_iou": 0.375, "loss_num": 0.0341796875, "loss_xval": 0.91796875, "num_input_tokens_seen": 192556024, "step": 3437 }, { "epoch": 7.657015590200445, "grad_norm": 14.627326011657715, "learning_rate": 1e-06, "loss": 0.4922, "num_input_tokens_seen": 192614920, "step": 3438 }, { "epoch": 7.657015590200445, "loss": 0.47406378388404846, "loss_ce": 0.0007971928571350873, "loss_iou": 0.212890625, "loss_num": 0.0093994140625, "loss_xval": 0.47265625, "num_input_tokens_seen": 192614920, "step": 3438 }, { "epoch": 7.65924276169265, "grad_norm": 22.492679595947266, "learning_rate": 1e-06, "loss": 0.502, "num_input_tokens_seen": 192670472, "step": 3439 }, { "epoch": 7.65924276169265, "loss": 0.49001675844192505, "loss_ce": 0.000270665914285928, "loss_iou": 0.220703125, "loss_num": 0.00982666015625, "loss_xval": 0.490234375, "num_input_tokens_seen": 192670472, "step": 3439 }, { "epoch": 7.661469933184855, "grad_norm": 25.153066635131836, "learning_rate": 1e-06, "loss": 0.6003, "num_input_tokens_seen": 192724972, "step": 3440 }, { "epoch": 7.661469933184855, "loss": 0.6613759994506836, "loss_ce": 0.0002431726170470938, "loss_iou": 0.2890625, "loss_num": 0.0169677734375, "loss_xval": 0.66015625, "num_input_tokens_seen": 192724972, "step": 3440 }, { "epoch": 7.66369710467706, "grad_norm": 23.217100143432617, "learning_rate": 1e-06, "loss": 0.4673, "num_input_tokens_seen": 192780272, "step": 3441 }, { "epoch": 7.66369710467706, "loss": 0.49449318647384644, "loss_ce": 0.00023047745344229043, "loss_iou": 0.21484375, "loss_num": 0.0128173828125, "loss_xval": 0.494140625, "num_input_tokens_seen": 192780272, "step": 3441 }, { "epoch": 7.665924276169265, "grad_norm": 19.738222122192383, "learning_rate": 1e-06, "loss": 0.5292, "num_input_tokens_seen": 192834056, "step": 3442 }, { "epoch": 7.665924276169265, "loss": 0.4528142213821411, "loss_ce": 0.0001774845877662301, "loss_iou": 0.193359375, "loss_num": 0.01318359375, "loss_xval": 0.453125, "num_input_tokens_seen": 192834056, "step": 3442 }, { "epoch": 7.66815144766147, "grad_norm": 17.538352966308594, "learning_rate": 1e-06, "loss": 0.5892, "num_input_tokens_seen": 192887184, "step": 3443 }, { "epoch": 7.66815144766147, "loss": 0.5604883432388306, "loss_ce": 0.00018561651813797653, "loss_iou": 0.2451171875, "loss_num": 0.01416015625, "loss_xval": 0.55859375, "num_input_tokens_seen": 192887184, "step": 3443 }, { "epoch": 7.6703786191536745, "grad_norm": 20.0687198638916, "learning_rate": 1e-06, "loss": 0.6592, "num_input_tokens_seen": 192944640, "step": 3444 }, { "epoch": 7.6703786191536745, "loss": 0.5089391469955444, "loss_ce": 0.00027216560556553304, "loss_iou": 0.228515625, "loss_num": 0.01043701171875, "loss_xval": 0.5078125, "num_input_tokens_seen": 192944640, "step": 3444 }, { "epoch": 7.67260579064588, "grad_norm": 15.960465431213379, "learning_rate": 1e-06, "loss": 0.5891, "num_input_tokens_seen": 193000164, "step": 3445 }, { "epoch": 7.67260579064588, "loss": 0.6576277017593384, "loss_ce": 0.00040116519085131586, "loss_iou": 0.267578125, "loss_num": 0.024169921875, "loss_xval": 0.65625, "num_input_tokens_seen": 193000164, "step": 3445 }, { "epoch": 7.674832962138085, "grad_norm": 18.63033103942871, "learning_rate": 1e-06, "loss": 0.623, "num_input_tokens_seen": 193053128, "step": 3446 }, { "epoch": 7.674832962138085, "loss": 0.5889174342155457, "loss_ce": 0.00017231784295290709, "loss_iou": 0.25, "loss_num": 0.017822265625, "loss_xval": 0.58984375, "num_input_tokens_seen": 193053128, "step": 3446 }, { "epoch": 7.67706013363029, "grad_norm": 14.499951362609863, "learning_rate": 1e-06, "loss": 0.5724, "num_input_tokens_seen": 193108676, "step": 3447 }, { "epoch": 7.67706013363029, "loss": 0.657649576663971, "loss_ce": 0.000178877409780398, "loss_iou": 0.3046875, "loss_num": 0.009765625, "loss_xval": 0.65625, "num_input_tokens_seen": 193108676, "step": 3447 }, { "epoch": 7.679287305122495, "grad_norm": 18.459136962890625, "learning_rate": 1e-06, "loss": 0.5688, "num_input_tokens_seen": 193166308, "step": 3448 }, { "epoch": 7.679287305122495, "loss": 0.7646270990371704, "loss_ce": 0.00022284439182840288, "loss_iou": 0.34375, "loss_num": 0.015625, "loss_xval": 0.765625, "num_input_tokens_seen": 193166308, "step": 3448 }, { "epoch": 7.6815144766147, "grad_norm": 35.68458938598633, "learning_rate": 1e-06, "loss": 0.5712, "num_input_tokens_seen": 193222788, "step": 3449 }, { "epoch": 7.6815144766147, "loss": 0.48069441318511963, "loss_ce": 0.0001646471064304933, "loss_iou": 0.212890625, "loss_num": 0.01104736328125, "loss_xval": 0.48046875, "num_input_tokens_seen": 193222788, "step": 3449 }, { "epoch": 7.6837416481069045, "grad_norm": 23.960975646972656, "learning_rate": 1e-06, "loss": 0.7771, "num_input_tokens_seen": 193273416, "step": 3450 }, { "epoch": 7.6837416481069045, "loss": 0.5310087203979492, "loss_ce": 0.0002469586324878037, "loss_iou": 0.2255859375, "loss_num": 0.015869140625, "loss_xval": 0.53125, "num_input_tokens_seen": 193273416, "step": 3450 }, { "epoch": 7.685968819599109, "grad_norm": 21.065771102905273, "learning_rate": 1e-06, "loss": 0.6134, "num_input_tokens_seen": 193327328, "step": 3451 }, { "epoch": 7.685968819599109, "loss": 0.8937476873397827, "loss_ce": 0.0001929743157234043, "loss_iou": 0.365234375, "loss_num": 0.032470703125, "loss_xval": 0.89453125, "num_input_tokens_seen": 193327328, "step": 3451 }, { "epoch": 7.688195991091314, "grad_norm": 16.559165954589844, "learning_rate": 1e-06, "loss": 0.5826, "num_input_tokens_seen": 193384244, "step": 3452 }, { "epoch": 7.688195991091314, "loss": 0.6634224653244019, "loss_ce": 0.0002144278259947896, "loss_iou": 0.302734375, "loss_num": 0.01177978515625, "loss_xval": 0.6640625, "num_input_tokens_seen": 193384244, "step": 3452 }, { "epoch": 7.690423162583519, "grad_norm": 16.13030433654785, "learning_rate": 1e-06, "loss": 0.6468, "num_input_tokens_seen": 193439784, "step": 3453 }, { "epoch": 7.690423162583519, "loss": 0.6554673910140991, "loss_ce": 0.0001939490030054003, "loss_iou": 0.283203125, "loss_num": 0.0179443359375, "loss_xval": 0.65625, "num_input_tokens_seen": 193439784, "step": 3453 }, { "epoch": 7.692650334075724, "grad_norm": 17.711097717285156, "learning_rate": 1e-06, "loss": 0.6227, "num_input_tokens_seen": 193493800, "step": 3454 }, { "epoch": 7.692650334075724, "loss": 0.6291646957397461, "loss_ce": 0.0002584658795967698, "loss_iou": 0.25390625, "loss_num": 0.0244140625, "loss_xval": 0.62890625, "num_input_tokens_seen": 193493800, "step": 3454 }, { "epoch": 7.694877505567929, "grad_norm": 16.89303207397461, "learning_rate": 1e-06, "loss": 0.5846, "num_input_tokens_seen": 193546256, "step": 3455 }, { "epoch": 7.694877505567929, "loss": 0.550576388835907, "loss_ce": 0.00016140022489707917, "loss_iou": 0.2060546875, "loss_num": 0.027587890625, "loss_xval": 0.55078125, "num_input_tokens_seen": 193546256, "step": 3455 }, { "epoch": 7.697104677060134, "grad_norm": 34.59210968017578, "learning_rate": 1e-06, "loss": 0.6684, "num_input_tokens_seen": 193602852, "step": 3456 }, { "epoch": 7.697104677060134, "loss": 0.5592398643493652, "loss_ce": 0.000157813512487337, "loss_iou": 0.263671875, "loss_num": 0.00653076171875, "loss_xval": 0.55859375, "num_input_tokens_seen": 193602852, "step": 3456 }, { "epoch": 7.6993318485523385, "grad_norm": 20.46678352355957, "learning_rate": 1e-06, "loss": 0.5734, "num_input_tokens_seen": 193661456, "step": 3457 }, { "epoch": 7.6993318485523385, "loss": 0.5596364140510559, "loss_ce": 0.00018814706709235907, "loss_iou": 0.2421875, "loss_num": 0.01513671875, "loss_xval": 0.55859375, "num_input_tokens_seen": 193661456, "step": 3457 }, { "epoch": 7.701559020044543, "grad_norm": 14.73276424407959, "learning_rate": 1e-06, "loss": 0.6287, "num_input_tokens_seen": 193720104, "step": 3458 }, { "epoch": 7.701559020044543, "loss": 0.5451168417930603, "loss_ce": 0.00019496420281939209, "loss_iou": 0.25, "loss_num": 0.00860595703125, "loss_xval": 0.546875, "num_input_tokens_seen": 193720104, "step": 3458 }, { "epoch": 7.703786191536748, "grad_norm": 14.44981575012207, "learning_rate": 1e-06, "loss": 0.3805, "num_input_tokens_seen": 193776696, "step": 3459 }, { "epoch": 7.703786191536748, "loss": 0.3983895778656006, "loss_ce": 0.00019621921819634736, "loss_iou": 0.177734375, "loss_num": 0.00830078125, "loss_xval": 0.3984375, "num_input_tokens_seen": 193776696, "step": 3459 }, { "epoch": 7.706013363028953, "grad_norm": 19.433345794677734, "learning_rate": 1e-06, "loss": 0.4632, "num_input_tokens_seen": 193834220, "step": 3460 }, { "epoch": 7.706013363028953, "loss": 0.4960872530937195, "loss_ce": 0.00023767323000356555, "loss_iou": 0.185546875, "loss_num": 0.0247802734375, "loss_xval": 0.49609375, "num_input_tokens_seen": 193834220, "step": 3460 }, { "epoch": 7.708240534521158, "grad_norm": 55.87489318847656, "learning_rate": 1e-06, "loss": 0.5689, "num_input_tokens_seen": 193888720, "step": 3461 }, { "epoch": 7.708240534521158, "loss": 0.6605603098869324, "loss_ce": 0.0004040383209940046, "loss_iou": 0.2890625, "loss_num": 0.0162353515625, "loss_xval": 0.66015625, "num_input_tokens_seen": 193888720, "step": 3461 }, { "epoch": 7.710467706013363, "grad_norm": 28.30753517150879, "learning_rate": 1e-06, "loss": 0.7928, "num_input_tokens_seen": 193943548, "step": 3462 }, { "epoch": 7.710467706013363, "loss": 0.6849916577339172, "loss_ce": 0.00017721363110467792, "loss_iou": 0.287109375, "loss_num": 0.02197265625, "loss_xval": 0.68359375, "num_input_tokens_seen": 193943548, "step": 3462 }, { "epoch": 7.712694877505568, "grad_norm": 23.885019302368164, "learning_rate": 1e-06, "loss": 0.7603, "num_input_tokens_seen": 194000004, "step": 3463 }, { "epoch": 7.712694877505568, "loss": 0.6882323026657104, "loss_ce": 0.00024399746325798333, "loss_iou": 0.27734375, "loss_num": 0.0262451171875, "loss_xval": 0.6875, "num_input_tokens_seen": 194000004, "step": 3463 }, { "epoch": 7.714922048997773, "grad_norm": 14.701726913452148, "learning_rate": 1e-06, "loss": 0.5217, "num_input_tokens_seen": 194056588, "step": 3464 }, { "epoch": 7.714922048997773, "loss": 0.5858080387115479, "loss_ce": 0.00020622329611796886, "loss_iou": 0.240234375, "loss_num": 0.0208740234375, "loss_xval": 0.5859375, "num_input_tokens_seen": 194056588, "step": 3464 }, { "epoch": 7.717149220489977, "grad_norm": 16.439701080322266, "learning_rate": 1e-06, "loss": 0.5724, "num_input_tokens_seen": 194111836, "step": 3465 }, { "epoch": 7.717149220489977, "loss": 0.5037325620651245, "loss_ce": 0.0001925197138916701, "loss_iou": 0.21484375, "loss_num": 0.0146484375, "loss_xval": 0.50390625, "num_input_tokens_seen": 194111836, "step": 3465 }, { "epoch": 7.719376391982182, "grad_norm": 21.691720962524414, "learning_rate": 1e-06, "loss": 0.628, "num_input_tokens_seen": 194168228, "step": 3466 }, { "epoch": 7.719376391982182, "loss": 0.7247686982154846, "loss_ce": 0.00015930971130728722, "loss_iou": 0.306640625, "loss_num": 0.0218505859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 194168228, "step": 3466 }, { "epoch": 7.721603563474387, "grad_norm": 13.828973770141602, "learning_rate": 1e-06, "loss": 0.5437, "num_input_tokens_seen": 194223784, "step": 3467 }, { "epoch": 7.721603563474387, "loss": 0.5051822662353516, "loss_ce": 0.00017740536713972688, "loss_iou": 0.2333984375, "loss_num": 0.007659912109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 194223784, "step": 3467 }, { "epoch": 7.723830734966592, "grad_norm": 12.506599426269531, "learning_rate": 1e-06, "loss": 0.5575, "num_input_tokens_seen": 194280580, "step": 3468 }, { "epoch": 7.723830734966592, "loss": 0.5337764620780945, "loss_ce": 0.00032918865326792, "loss_iou": 0.2119140625, "loss_num": 0.0218505859375, "loss_xval": 0.53515625, "num_input_tokens_seen": 194280580, "step": 3468 }, { "epoch": 7.726057906458797, "grad_norm": 52.32142639160156, "learning_rate": 1e-06, "loss": 0.6348, "num_input_tokens_seen": 194336084, "step": 3469 }, { "epoch": 7.726057906458797, "loss": 0.6026872992515564, "loss_ce": 0.0001482292718719691, "loss_iou": 0.27734375, "loss_num": 0.0098876953125, "loss_xval": 0.6015625, "num_input_tokens_seen": 194336084, "step": 3469 }, { "epoch": 7.728285077951003, "grad_norm": 26.636455535888672, "learning_rate": 1e-06, "loss": 0.5941, "num_input_tokens_seen": 194389144, "step": 3470 }, { "epoch": 7.728285077951003, "loss": 0.5668909549713135, "loss_ce": 0.00024056396796368062, "loss_iou": 0.234375, "loss_num": 0.0196533203125, "loss_xval": 0.56640625, "num_input_tokens_seen": 194389144, "step": 3470 }, { "epoch": 7.7305122494432075, "grad_norm": 25.504764556884766, "learning_rate": 1e-06, "loss": 0.6929, "num_input_tokens_seen": 194446564, "step": 3471 }, { "epoch": 7.7305122494432075, "loss": 0.7231018543243408, "loss_ce": 0.00020141596905887127, "loss_iou": 0.31640625, "loss_num": 0.0181884765625, "loss_xval": 0.72265625, "num_input_tokens_seen": 194446564, "step": 3471 }, { "epoch": 7.732739420935412, "grad_norm": 18.57636070251465, "learning_rate": 1e-06, "loss": 0.7205, "num_input_tokens_seen": 194501216, "step": 3472 }, { "epoch": 7.732739420935412, "loss": 0.757434606552124, "loss_ce": 0.00017141405260190368, "loss_iou": 0.326171875, "loss_num": 0.021240234375, "loss_xval": 0.7578125, "num_input_tokens_seen": 194501216, "step": 3472 }, { "epoch": 7.734966592427617, "grad_norm": 23.02729034423828, "learning_rate": 1e-06, "loss": 0.6108, "num_input_tokens_seen": 194558088, "step": 3473 }, { "epoch": 7.734966592427617, "loss": 0.591259241104126, "loss_ce": 0.0001947856944752857, "loss_iou": 0.248046875, "loss_num": 0.0189208984375, "loss_xval": 0.58984375, "num_input_tokens_seen": 194558088, "step": 3473 }, { "epoch": 7.737193763919822, "grad_norm": 16.5152587890625, "learning_rate": 1e-06, "loss": 0.591, "num_input_tokens_seen": 194616640, "step": 3474 }, { "epoch": 7.737193763919822, "loss": 0.42105597257614136, "loss_ce": 0.00015753699699416757, "loss_iou": 0.189453125, "loss_num": 0.00830078125, "loss_xval": 0.421875, "num_input_tokens_seen": 194616640, "step": 3474 }, { "epoch": 7.739420935412027, "grad_norm": 20.325769424438477, "learning_rate": 1e-06, "loss": 0.8947, "num_input_tokens_seen": 194674240, "step": 3475 }, { "epoch": 7.739420935412027, "loss": 1.23799729347229, "loss_ce": 0.0003264172119088471, "loss_iou": 0.478515625, "loss_num": 0.055908203125, "loss_xval": 1.234375, "num_input_tokens_seen": 194674240, "step": 3475 }, { "epoch": 7.741648106904232, "grad_norm": 21.625873565673828, "learning_rate": 1e-06, "loss": 0.5786, "num_input_tokens_seen": 194729012, "step": 3476 }, { "epoch": 7.741648106904232, "loss": 0.391615629196167, "loss_ce": 0.000136150760226883, "loss_iou": 0.16796875, "loss_num": 0.01104736328125, "loss_xval": 0.390625, "num_input_tokens_seen": 194729012, "step": 3476 }, { "epoch": 7.743875278396437, "grad_norm": 14.393267631530762, "learning_rate": 1e-06, "loss": 0.6558, "num_input_tokens_seen": 194786480, "step": 3477 }, { "epoch": 7.743875278396437, "loss": 0.5332167148590088, "loss_ce": 0.0001356468565063551, "loss_iou": 0.220703125, "loss_num": 0.0181884765625, "loss_xval": 0.53125, "num_input_tokens_seen": 194786480, "step": 3477 }, { "epoch": 7.7461024498886415, "grad_norm": 33.26962661743164, "learning_rate": 1e-06, "loss": 0.9039, "num_input_tokens_seen": 194842640, "step": 3478 }, { "epoch": 7.7461024498886415, "loss": 0.8585567474365234, "loss_ce": 0.00015828575124032795, "loss_iou": 0.37109375, "loss_num": 0.0234375, "loss_xval": 0.859375, "num_input_tokens_seen": 194842640, "step": 3478 }, { "epoch": 7.748329621380846, "grad_norm": 20.943241119384766, "learning_rate": 1e-06, "loss": 0.5986, "num_input_tokens_seen": 194900196, "step": 3479 }, { "epoch": 7.748329621380846, "loss": 0.5145847797393799, "loss_ce": 0.00018048740457743406, "loss_iou": 0.232421875, "loss_num": 0.01007080078125, "loss_xval": 0.515625, "num_input_tokens_seen": 194900196, "step": 3479 }, { "epoch": 7.750556792873051, "grad_norm": 27.85867691040039, "learning_rate": 1e-06, "loss": 0.4849, "num_input_tokens_seen": 194956328, "step": 3480 }, { "epoch": 7.750556792873051, "loss": 0.5256341695785522, "loss_ce": 0.0007318383431993425, "loss_iou": 0.2421875, "loss_num": 0.00799560546875, "loss_xval": 0.5234375, "num_input_tokens_seen": 194956328, "step": 3480 }, { "epoch": 7.752783964365256, "grad_norm": 23.426788330078125, "learning_rate": 1e-06, "loss": 0.8095, "num_input_tokens_seen": 195011508, "step": 3481 }, { "epoch": 7.752783964365256, "loss": 0.9079303741455078, "loss_ce": 0.00021555817511398345, "loss_iou": 0.3828125, "loss_num": 0.02880859375, "loss_xval": 0.90625, "num_input_tokens_seen": 195011508, "step": 3481 }, { "epoch": 7.755011135857461, "grad_norm": 26.646705627441406, "learning_rate": 1e-06, "loss": 0.6848, "num_input_tokens_seen": 195066276, "step": 3482 }, { "epoch": 7.755011135857461, "loss": 0.5551425218582153, "loss_ce": 0.00021085041225887835, "loss_iou": 0.2314453125, "loss_num": 0.0185546875, "loss_xval": 0.5546875, "num_input_tokens_seen": 195066276, "step": 3482 }, { "epoch": 7.757238307349666, "grad_norm": 21.877819061279297, "learning_rate": 1e-06, "loss": 0.75, "num_input_tokens_seen": 195121820, "step": 3483 }, { "epoch": 7.757238307349666, "loss": 0.87198805809021, "loss_ce": 0.00016191550821531564, "loss_iou": 0.37890625, "loss_num": 0.0224609375, "loss_xval": 0.87109375, "num_input_tokens_seen": 195121820, "step": 3483 }, { "epoch": 7.759465478841871, "grad_norm": 17.727195739746094, "learning_rate": 1e-06, "loss": 0.7847, "num_input_tokens_seen": 195179468, "step": 3484 }, { "epoch": 7.759465478841871, "loss": 0.8190126419067383, "loss_ce": 0.00016502838116139174, "loss_iou": 0.353515625, "loss_num": 0.0224609375, "loss_xval": 0.8203125, "num_input_tokens_seen": 195179468, "step": 3484 }, { "epoch": 7.7616926503340755, "grad_norm": 18.596248626708984, "learning_rate": 1e-06, "loss": 0.4576, "num_input_tokens_seen": 195233616, "step": 3485 }, { "epoch": 7.7616926503340755, "loss": 0.4784582555294037, "loss_ce": 0.0001867622195277363, "loss_iou": 0.203125, "loss_num": 0.01446533203125, "loss_xval": 0.478515625, "num_input_tokens_seen": 195233616, "step": 3485 }, { "epoch": 7.76391982182628, "grad_norm": 21.53468894958496, "learning_rate": 1e-06, "loss": 0.5638, "num_input_tokens_seen": 195289464, "step": 3486 }, { "epoch": 7.76391982182628, "loss": 0.6328760981559753, "loss_ce": 0.00018564131460152566, "loss_iou": 0.265625, "loss_num": 0.0198974609375, "loss_xval": 0.6328125, "num_input_tokens_seen": 195289464, "step": 3486 }, { "epoch": 7.766146993318485, "grad_norm": 17.920970916748047, "learning_rate": 1e-06, "loss": 0.8169, "num_input_tokens_seen": 195344536, "step": 3487 }, { "epoch": 7.766146993318485, "loss": 0.45805519819259644, "loss_ce": 0.00016945481183938682, "loss_iou": 0.2080078125, "loss_num": 0.0084228515625, "loss_xval": 0.45703125, "num_input_tokens_seen": 195344536, "step": 3487 }, { "epoch": 7.76837416481069, "grad_norm": 17.782272338867188, "learning_rate": 1e-06, "loss": 0.5641, "num_input_tokens_seen": 195399244, "step": 3488 }, { "epoch": 7.76837416481069, "loss": 0.5128250122070312, "loss_ce": 0.00025179749354720116, "loss_iou": 0.2255859375, "loss_num": 0.01214599609375, "loss_xval": 0.51171875, "num_input_tokens_seen": 195399244, "step": 3488 }, { "epoch": 7.770601336302895, "grad_norm": 28.824478149414062, "learning_rate": 1e-06, "loss": 0.4858, "num_input_tokens_seen": 195455096, "step": 3489 }, { "epoch": 7.770601336302895, "loss": 0.5841382741928101, "loss_ce": 0.0001538892393000424, "loss_iou": 0.259765625, "loss_num": 0.0125732421875, "loss_xval": 0.5859375, "num_input_tokens_seen": 195455096, "step": 3489 }, { "epoch": 7.772828507795101, "grad_norm": 16.21584129333496, "learning_rate": 1e-06, "loss": 0.6751, "num_input_tokens_seen": 195507752, "step": 3490 }, { "epoch": 7.772828507795101, "loss": 0.7475310564041138, "loss_ce": 0.0002166137855965644, "loss_iou": 0.322265625, "loss_num": 0.0206298828125, "loss_xval": 0.74609375, "num_input_tokens_seen": 195507752, "step": 3490 }, { "epoch": 7.775055679287306, "grad_norm": 16.735774993896484, "learning_rate": 1e-06, "loss": 0.5796, "num_input_tokens_seen": 195565384, "step": 3491 }, { "epoch": 7.775055679287306, "loss": 0.6273143291473389, "loss_ce": 0.00023911299649626017, "loss_iou": 0.25390625, "loss_num": 0.02392578125, "loss_xval": 0.62890625, "num_input_tokens_seen": 195565384, "step": 3491 }, { "epoch": 7.77728285077951, "grad_norm": 17.522539138793945, "learning_rate": 1e-06, "loss": 0.7027, "num_input_tokens_seen": 195619800, "step": 3492 }, { "epoch": 7.77728285077951, "loss": 0.7621063590049744, "loss_ce": 0.00014349556295201182, "loss_iou": 0.322265625, "loss_num": 0.0235595703125, "loss_xval": 0.76171875, "num_input_tokens_seen": 195619800, "step": 3492 }, { "epoch": 7.779510022271715, "grad_norm": 26.973304748535156, "learning_rate": 1e-06, "loss": 0.9673, "num_input_tokens_seen": 195673328, "step": 3493 }, { "epoch": 7.779510022271715, "loss": 1.0272767543792725, "loss_ce": 0.0006654445314779878, "loss_iou": 0.435546875, "loss_num": 0.03125, "loss_xval": 1.0234375, "num_input_tokens_seen": 195673328, "step": 3493 }, { "epoch": 7.78173719376392, "grad_norm": 19.321470260620117, "learning_rate": 1e-06, "loss": 0.6188, "num_input_tokens_seen": 195728884, "step": 3494 }, { "epoch": 7.78173719376392, "loss": 0.5602940320968628, "loss_ce": 0.0002354723692405969, "loss_iou": 0.2265625, "loss_num": 0.0213623046875, "loss_xval": 0.55859375, "num_input_tokens_seen": 195728884, "step": 3494 }, { "epoch": 7.783964365256125, "grad_norm": 19.93840789794922, "learning_rate": 1e-06, "loss": 0.5698, "num_input_tokens_seen": 195783412, "step": 3495 }, { "epoch": 7.783964365256125, "loss": 0.5731680393218994, "loss_ce": 0.00016997376224026084, "loss_iou": 0.224609375, "loss_num": 0.02490234375, "loss_xval": 0.57421875, "num_input_tokens_seen": 195783412, "step": 3495 }, { "epoch": 7.78619153674833, "grad_norm": 23.796375274658203, "learning_rate": 1e-06, "loss": 0.6692, "num_input_tokens_seen": 195838768, "step": 3496 }, { "epoch": 7.78619153674833, "loss": 0.717685341835022, "loss_ce": 0.00015606911620125175, "loss_iou": 0.3125, "loss_num": 0.01806640625, "loss_xval": 0.71875, "num_input_tokens_seen": 195838768, "step": 3496 }, { "epoch": 7.788418708240535, "grad_norm": 15.419715881347656, "learning_rate": 1e-06, "loss": 0.6021, "num_input_tokens_seen": 195894208, "step": 3497 }, { "epoch": 7.788418708240535, "loss": 0.7013325691223145, "loss_ce": 0.00016067746037151664, "loss_iou": 0.310546875, "loss_num": 0.0159912109375, "loss_xval": 0.703125, "num_input_tokens_seen": 195894208, "step": 3497 }, { "epoch": 7.79064587973274, "grad_norm": 21.454118728637695, "learning_rate": 1e-06, "loss": 0.6689, "num_input_tokens_seen": 195949980, "step": 3498 }, { "epoch": 7.79064587973274, "loss": 0.6081476211547852, "loss_ce": 0.00023741269251331687, "loss_iou": 0.2470703125, "loss_num": 0.02294921875, "loss_xval": 0.609375, "num_input_tokens_seen": 195949980, "step": 3498 }, { "epoch": 7.7928730512249444, "grad_norm": 21.61153793334961, "learning_rate": 1e-06, "loss": 0.7103, "num_input_tokens_seen": 196005100, "step": 3499 }, { "epoch": 7.7928730512249444, "loss": 0.5294746160507202, "loss_ce": 0.0012153343996033072, "loss_iou": 0.2353515625, "loss_num": 0.0113525390625, "loss_xval": 0.52734375, "num_input_tokens_seen": 196005100, "step": 3499 }, { "epoch": 7.795100222717149, "grad_norm": 15.124077796936035, "learning_rate": 1e-06, "loss": 0.7598, "num_input_tokens_seen": 196060760, "step": 3500 }, { "epoch": 7.795100222717149, "eval_seeclick_web_CIoU": 0.5788059234619141, "eval_seeclick_web_GIoU": 0.5771978497505188, "eval_seeclick_web_IoU": 0.5963517725467682, "eval_seeclick_web_MAE_all": 0.01642331574112177, "eval_seeclick_web_MAE_h": 0.008296339772641659, "eval_seeclick_web_MAE_w": 0.01685692649334669, "eval_seeclick_web_MAE_x_boxes": 0.009794581681489944, "eval_seeclick_web_MAE_y_boxes": 0.02204720745794475, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9286947846412659, "eval_seeclick_web_loss_ce": 0.0002503915602574125, "eval_seeclick_web_loss_iou": 0.4237060546875, "eval_seeclick_web_loss_num": 0.013225555419921875, "eval_seeclick_web_loss_xval": 0.913330078125, "eval_seeclick_web_runtime": 20.0349, "eval_seeclick_web_samples_per_second": 2.496, "eval_seeclick_web_steps_per_second": 0.1, "num_input_tokens_seen": 196060760, "step": 3500 }, { "epoch": 7.795100222717149, "eval_icons_CIoU": 0.3098849505186081, "eval_icons_GIoU": 0.3192085027694702, "eval_icons_IoU": 0.3817008137702942, "eval_icons_MAE_all": 0.0535985603928566, "eval_icons_MAE_h": 0.038005659356713295, "eval_icons_MAE_w": 0.04143812507390976, "eval_icons_MAE_x_boxes": 0.05823635682463646, "eval_icons_MAE_y_boxes": 0.037908594124019146, "eval_icons_inside_bbox": 0.6493055522441864, "eval_icons_loss": 1.6437243223190308, "eval_icons_loss_ce": 0.00029484537662938237, "eval_icons_loss_iou": 0.649658203125, "eval_icons_loss_num": 0.04685401916503906, "eval_icons_loss_xval": 1.533203125, "eval_icons_runtime": 21.1399, "eval_icons_samples_per_second": 2.365, "eval_icons_steps_per_second": 0.095, "num_input_tokens_seen": 196060760, "step": 3500 }, { "epoch": 7.795100222717149, "eval_screenspot_CIoU": 0.34179479877154034, "eval_screenspot_GIoU": 0.35942623019218445, "eval_screenspot_IoU": 0.4252944588661194, "eval_screenspot_MAE_all": 0.06343474984169006, "eval_screenspot_MAE_h": 0.037477921694517136, "eval_screenspot_MAE_w": 0.07201713944474857, "eval_screenspot_MAE_x_boxes": 0.07516818679869175, "eval_screenspot_MAE_y_boxes": 0.046938162917892136, "eval_screenspot_inside_bbox": 0.6966666579246521, "eval_screenspot_loss": 1.6630264520645142, "eval_screenspot_loss_ce": 0.0002933433085369567, "eval_screenspot_loss_iou": 0.6790364583333334, "eval_screenspot_loss_num": 0.07456461588541667, "eval_screenspot_loss_xval": 1.7312825520833333, "eval_screenspot_runtime": 33.7448, "eval_screenspot_samples_per_second": 2.637, "eval_screenspot_steps_per_second": 0.089, "num_input_tokens_seen": 196060760, "step": 3500 }, { "epoch": 7.795100222717149, "eval_compot_CIoU": 0.3524170517921448, "eval_compot_GIoU": 0.3639347553253174, "eval_compot_IoU": 0.40965285897254944, "eval_compot_MAE_all": 0.017891014460474253, "eval_compot_MAE_h": 0.00863239774480462, "eval_compot_MAE_w": 0.022040129639208317, "eval_compot_MAE_x_boxes": 0.029601704329252243, "eval_compot_MAE_y_boxes": 0.00674438988789916, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3697645664215088, "eval_compot_loss_ce": 0.00023254087136592716, "eval_compot_loss_iou": 0.62646484375, "eval_compot_loss_num": 0.016546249389648438, "eval_compot_loss_xval": 1.335205078125, "eval_compot_runtime": 20.9161, "eval_compot_samples_per_second": 2.391, "eval_compot_steps_per_second": 0.096, "num_input_tokens_seen": 196060760, "step": 3500 }, { "epoch": 7.795100222717149, "eval_custom_ui_val_CIoU": 0.4692361321714189, "eval_custom_ui_val_GIoU": 0.48255549867947894, "eval_custom_ui_val_IoU": 0.5274171696768867, "eval_custom_ui_val_MAE_all": 0.029507537372410297, "eval_custom_ui_val_MAE_h": 0.016804457098866504, "eval_custom_ui_val_MAE_w": 0.03681204499055942, "eval_custom_ui_val_MAE_x_boxes": 0.03406558599736956, "eval_custom_ui_val_MAE_y_boxes": 0.015829176952441532, "eval_custom_ui_val_inside_bbox": 0.7457561757829454, "eval_custom_ui_val_loss": 1.1831005811691284, "eval_custom_ui_val_loss_ce": 0.0002701325170669912, "eval_custom_ui_val_loss_iou": 0.5026584201388888, "eval_custom_ui_val_loss_num": 0.027642779880099826, "eval_custom_ui_val_loss_xval": 1.1439887152777777, "eval_custom_ui_val_runtime": 59.4212, "eval_custom_ui_val_samples_per_second": 4.46, "eval_custom_ui_val_steps_per_second": 0.151, "num_input_tokens_seen": 196060760, "step": 3500 }, { "epoch": 7.795100222717149, "loss": 0.8771896958351135, "loss_ce": 0.0002365570981055498, "loss_iou": 0.3828125, "loss_num": 0.0220947265625, "loss_xval": 0.875, "num_input_tokens_seen": 196060760, "step": 3500 }, { "epoch": 7.797327394209354, "grad_norm": 20.44601058959961, "learning_rate": 1e-06, "loss": 0.7573, "num_input_tokens_seen": 196116308, "step": 3501 }, { "epoch": 7.797327394209354, "loss": 0.790895402431488, "loss_ce": 0.0003680819063447416, "loss_iou": 0.306640625, "loss_num": 0.035400390625, "loss_xval": 0.7890625, "num_input_tokens_seen": 196116308, "step": 3501 }, { "epoch": 7.799554565701559, "grad_norm": 19.4777774810791, "learning_rate": 1e-06, "loss": 0.6974, "num_input_tokens_seen": 196172864, "step": 3502 }, { "epoch": 7.799554565701559, "loss": 0.7237052917480469, "loss_ce": 0.00019457802409306169, "loss_iou": 0.3125, "loss_num": 0.0196533203125, "loss_xval": 0.72265625, "num_input_tokens_seen": 196172864, "step": 3502 }, { "epoch": 7.801781737193764, "grad_norm": 18.440637588500977, "learning_rate": 1e-06, "loss": 0.5529, "num_input_tokens_seen": 196229132, "step": 3503 }, { "epoch": 7.801781737193764, "loss": 0.5999140739440918, "loss_ce": 0.0005488179158419371, "loss_iou": 0.2734375, "loss_num": 0.0107421875, "loss_xval": 0.59765625, "num_input_tokens_seen": 196229132, "step": 3503 }, { "epoch": 7.804008908685969, "grad_norm": 24.30689811706543, "learning_rate": 1e-06, "loss": 0.4758, "num_input_tokens_seen": 196287452, "step": 3504 }, { "epoch": 7.804008908685969, "loss": 0.4462117552757263, "loss_ce": 0.00016680760018061846, "loss_iou": 0.1962890625, "loss_num": 0.0106201171875, "loss_xval": 0.4453125, "num_input_tokens_seen": 196287452, "step": 3504 }, { "epoch": 7.806236080178174, "grad_norm": 21.83576011657715, "learning_rate": 1e-06, "loss": 0.7566, "num_input_tokens_seen": 196340952, "step": 3505 }, { "epoch": 7.806236080178174, "loss": 0.8626251220703125, "loss_ce": 0.0003204582317266613, "loss_iou": 0.38671875, "loss_num": 0.018310546875, "loss_xval": 0.86328125, "num_input_tokens_seen": 196340952, "step": 3505 }, { "epoch": 7.8084632516703785, "grad_norm": 41.28773880004883, "learning_rate": 1e-06, "loss": 0.5113, "num_input_tokens_seen": 196397792, "step": 3506 }, { "epoch": 7.8084632516703785, "loss": 0.5087121725082397, "loss_ce": 0.0001672878279350698, "loss_iou": 0.216796875, "loss_num": 0.01519775390625, "loss_xval": 0.5078125, "num_input_tokens_seen": 196397792, "step": 3506 }, { "epoch": 7.810690423162583, "grad_norm": 14.272880554199219, "learning_rate": 1e-06, "loss": 0.505, "num_input_tokens_seen": 196455836, "step": 3507 }, { "epoch": 7.810690423162583, "loss": 0.4430527091026306, "loss_ce": 0.0001816436561057344, "loss_iou": 0.19140625, "loss_num": 0.0120849609375, "loss_xval": 0.443359375, "num_input_tokens_seen": 196455836, "step": 3507 }, { "epoch": 7.812917594654788, "grad_norm": 16.481369018554688, "learning_rate": 1e-06, "loss": 0.6081, "num_input_tokens_seen": 196513684, "step": 3508 }, { "epoch": 7.812917594654788, "loss": 0.6896613836288452, "loss_ce": 0.00020824806415475905, "loss_iou": 0.3046875, "loss_num": 0.01611328125, "loss_xval": 0.6875, "num_input_tokens_seen": 196513684, "step": 3508 }, { "epoch": 7.815144766146993, "grad_norm": 16.66682243347168, "learning_rate": 1e-06, "loss": 0.561, "num_input_tokens_seen": 196566484, "step": 3509 }, { "epoch": 7.815144766146993, "loss": 0.47554415464401245, "loss_ce": 0.00020236926502548158, "loss_iou": 0.1865234375, "loss_num": 0.0206298828125, "loss_xval": 0.474609375, "num_input_tokens_seen": 196566484, "step": 3509 }, { "epoch": 7.817371937639198, "grad_norm": 31.580455780029297, "learning_rate": 1e-06, "loss": 0.584, "num_input_tokens_seen": 196621400, "step": 3510 }, { "epoch": 7.817371937639198, "loss": 0.6484927535057068, "loss_ce": 0.00017732605920173228, "loss_iou": 0.275390625, "loss_num": 0.019775390625, "loss_xval": 0.6484375, "num_input_tokens_seen": 196621400, "step": 3510 }, { "epoch": 7.819599109131403, "grad_norm": 14.395856857299805, "learning_rate": 1e-06, "loss": 0.5939, "num_input_tokens_seen": 196678068, "step": 3511 }, { "epoch": 7.819599109131403, "loss": 0.5976732969284058, "loss_ce": 0.0001391283149132505, "loss_iou": 0.248046875, "loss_num": 0.020263671875, "loss_xval": 0.59765625, "num_input_tokens_seen": 196678068, "step": 3511 }, { "epoch": 7.821826280623608, "grad_norm": 22.546255111694336, "learning_rate": 1e-06, "loss": 0.5801, "num_input_tokens_seen": 196733972, "step": 3512 }, { "epoch": 7.821826280623608, "loss": 0.467131644487381, "loss_ce": 0.00015164985961746424, "loss_iou": 0.212890625, "loss_num": 0.00811767578125, "loss_xval": 0.466796875, "num_input_tokens_seen": 196733972, "step": 3512 }, { "epoch": 7.8240534521158125, "grad_norm": 16.682117462158203, "learning_rate": 1e-06, "loss": 0.5618, "num_input_tokens_seen": 196790868, "step": 3513 }, { "epoch": 7.8240534521158125, "loss": 0.6464253664016724, "loss_ce": 0.00018512400856707245, "loss_iou": 0.267578125, "loss_num": 0.02197265625, "loss_xval": 0.64453125, "num_input_tokens_seen": 196790868, "step": 3513 }, { "epoch": 7.826280623608017, "grad_norm": 18.157453536987305, "learning_rate": 1e-06, "loss": 0.6781, "num_input_tokens_seen": 196847364, "step": 3514 }, { "epoch": 7.826280623608017, "loss": 0.6460493803024292, "loss_ce": 0.0001753381366143003, "loss_iou": 0.279296875, "loss_num": 0.017578125, "loss_xval": 0.64453125, "num_input_tokens_seen": 196847364, "step": 3514 }, { "epoch": 7.828507795100223, "grad_norm": 20.77778434753418, "learning_rate": 1e-06, "loss": 0.743, "num_input_tokens_seen": 196904288, "step": 3515 }, { "epoch": 7.828507795100223, "loss": 0.7536814212799072, "loss_ce": 0.00026343436911702156, "loss_iou": 0.318359375, "loss_num": 0.0235595703125, "loss_xval": 0.75390625, "num_input_tokens_seen": 196904288, "step": 3515 }, { "epoch": 7.830734966592428, "grad_norm": 15.515265464782715, "learning_rate": 1e-06, "loss": 0.4926, "num_input_tokens_seen": 196961928, "step": 3516 }, { "epoch": 7.830734966592428, "loss": 0.4176163673400879, "loss_ce": 0.00013586709974333644, "loss_iou": 0.1689453125, "loss_num": 0.01611328125, "loss_xval": 0.41796875, "num_input_tokens_seen": 196961928, "step": 3516 }, { "epoch": 7.832962138084633, "grad_norm": 23.900028228759766, "learning_rate": 1e-06, "loss": 0.4868, "num_input_tokens_seen": 197017236, "step": 3517 }, { "epoch": 7.832962138084633, "loss": 0.49435505270957947, "loss_ce": 0.00015339165111072361, "loss_iou": 0.2275390625, "loss_num": 0.0079345703125, "loss_xval": 0.494140625, "num_input_tokens_seen": 197017236, "step": 3517 }, { "epoch": 7.835189309576838, "grad_norm": 15.74838638305664, "learning_rate": 1e-06, "loss": 0.7655, "num_input_tokens_seen": 197073028, "step": 3518 }, { "epoch": 7.835189309576838, "loss": 1.0873568058013916, "loss_ce": 0.00019860133761540055, "loss_iou": 0.443359375, "loss_num": 0.04052734375, "loss_xval": 1.0859375, "num_input_tokens_seen": 197073028, "step": 3518 }, { "epoch": 7.8374164810690425, "grad_norm": 25.064544677734375, "learning_rate": 1e-06, "loss": 0.552, "num_input_tokens_seen": 197130308, "step": 3519 }, { "epoch": 7.8374164810690425, "loss": 0.512490451335907, "loss_ce": 0.0001613302156329155, "loss_iou": 0.2158203125, "loss_num": 0.01611328125, "loss_xval": 0.51171875, "num_input_tokens_seen": 197130308, "step": 3519 }, { "epoch": 7.839643652561247, "grad_norm": 20.62727928161621, "learning_rate": 1e-06, "loss": 0.4757, "num_input_tokens_seen": 197186840, "step": 3520 }, { "epoch": 7.839643652561247, "loss": 0.6267750263214111, "loss_ce": 0.000188069258001633, "loss_iou": 0.2734375, "loss_num": 0.015625, "loss_xval": 0.625, "num_input_tokens_seen": 197186840, "step": 3520 }, { "epoch": 7.841870824053452, "grad_norm": 15.101804733276367, "learning_rate": 1e-06, "loss": 0.6523, "num_input_tokens_seen": 197244324, "step": 3521 }, { "epoch": 7.841870824053452, "loss": 0.8312093019485474, "loss_ce": 0.0002156283298972994, "loss_iou": 0.357421875, "loss_num": 0.0233154296875, "loss_xval": 0.83203125, "num_input_tokens_seen": 197244324, "step": 3521 }, { "epoch": 7.844097995545657, "grad_norm": 27.03868865966797, "learning_rate": 1e-06, "loss": 0.52, "num_input_tokens_seen": 197299300, "step": 3522 }, { "epoch": 7.844097995545657, "loss": 0.5687766075134277, "loss_ce": 0.0002951917704194784, "loss_iou": 0.244140625, "loss_num": 0.015869140625, "loss_xval": 0.5703125, "num_input_tokens_seen": 197299300, "step": 3522 }, { "epoch": 7.846325167037862, "grad_norm": 51.59347915649414, "learning_rate": 1e-06, "loss": 0.6587, "num_input_tokens_seen": 197355036, "step": 3523 }, { "epoch": 7.846325167037862, "loss": 0.6813713908195496, "loss_ce": 0.0002190661762142554, "loss_iou": 0.298828125, "loss_num": 0.0166015625, "loss_xval": 0.6796875, "num_input_tokens_seen": 197355036, "step": 3523 }, { "epoch": 7.848552338530067, "grad_norm": 14.566356658935547, "learning_rate": 1e-06, "loss": 0.5917, "num_input_tokens_seen": 197410556, "step": 3524 }, { "epoch": 7.848552338530067, "loss": 0.5617358684539795, "loss_ce": 0.0002124165475834161, "loss_iou": 0.2255859375, "loss_num": 0.0220947265625, "loss_xval": 0.5625, "num_input_tokens_seen": 197410556, "step": 3524 }, { "epoch": 7.850779510022272, "grad_norm": 17.55548858642578, "learning_rate": 1e-06, "loss": 0.5818, "num_input_tokens_seen": 197468284, "step": 3525 }, { "epoch": 7.850779510022272, "loss": 0.6032076478004456, "loss_ce": 0.0001802719198167324, "loss_iou": 0.26953125, "loss_num": 0.01300048828125, "loss_xval": 0.6015625, "num_input_tokens_seen": 197468284, "step": 3525 }, { "epoch": 7.853006681514477, "grad_norm": 48.917110443115234, "learning_rate": 1e-06, "loss": 0.7346, "num_input_tokens_seen": 197525216, "step": 3526 }, { "epoch": 7.853006681514477, "loss": 0.9218271970748901, "loss_ce": 0.0001963673421414569, "loss_iou": 0.375, "loss_num": 0.033935546875, "loss_xval": 0.921875, "num_input_tokens_seen": 197525216, "step": 3526 }, { "epoch": 7.855233853006681, "grad_norm": 32.04833984375, "learning_rate": 1e-06, "loss": 0.5597, "num_input_tokens_seen": 197580472, "step": 3527 }, { "epoch": 7.855233853006681, "loss": 0.5170796513557434, "loss_ce": 0.00023395352764055133, "loss_iou": 0.2265625, "loss_num": 0.012451171875, "loss_xval": 0.515625, "num_input_tokens_seen": 197580472, "step": 3527 }, { "epoch": 7.857461024498886, "grad_norm": 20.859832763671875, "learning_rate": 1e-06, "loss": 0.6578, "num_input_tokens_seen": 197636904, "step": 3528 }, { "epoch": 7.857461024498886, "loss": 0.8837323188781738, "loss_ce": 0.00018741836538538337, "loss_iou": 0.34765625, "loss_num": 0.03759765625, "loss_xval": 0.8828125, "num_input_tokens_seen": 197636904, "step": 3528 }, { "epoch": 7.859688195991091, "grad_norm": 20.60695457458496, "learning_rate": 1e-06, "loss": 0.7797, "num_input_tokens_seen": 197689184, "step": 3529 }, { "epoch": 7.859688195991091, "loss": 0.9426758289337158, "loss_ce": 0.0003540791803970933, "loss_iou": 0.39453125, "loss_num": 0.0303955078125, "loss_xval": 0.94140625, "num_input_tokens_seen": 197689184, "step": 3529 }, { "epoch": 7.861915367483296, "grad_norm": 23.411840438842773, "learning_rate": 1e-06, "loss": 0.7374, "num_input_tokens_seen": 197745056, "step": 3530 }, { "epoch": 7.861915367483296, "loss": 0.8417353630065918, "loss_ce": 0.00018263611127622426, "loss_iou": 0.337890625, "loss_num": 0.032958984375, "loss_xval": 0.83984375, "num_input_tokens_seen": 197745056, "step": 3530 }, { "epoch": 7.864142538975501, "grad_norm": 14.93423843383789, "learning_rate": 1e-06, "loss": 0.4174, "num_input_tokens_seen": 197801836, "step": 3531 }, { "epoch": 7.864142538975501, "loss": 0.4481639862060547, "loss_ce": 0.00016593134205322713, "loss_iou": 0.171875, "loss_num": 0.0208740234375, "loss_xval": 0.447265625, "num_input_tokens_seen": 197801836, "step": 3531 }, { "epoch": 7.866369710467706, "grad_norm": 19.369924545288086, "learning_rate": 1e-06, "loss": 0.6988, "num_input_tokens_seen": 197858596, "step": 3532 }, { "epoch": 7.866369710467706, "loss": 0.987027108669281, "loss_ce": 0.0002106587344314903, "loss_iou": 0.412109375, "loss_num": 0.032470703125, "loss_xval": 0.98828125, "num_input_tokens_seen": 197858596, "step": 3532 }, { "epoch": 7.868596881959911, "grad_norm": 37.832054138183594, "learning_rate": 1e-06, "loss": 0.7192, "num_input_tokens_seen": 197915224, "step": 3533 }, { "epoch": 7.868596881959911, "loss": 0.7180832028388977, "loss_ce": 0.000309744878904894, "loss_iou": 0.30078125, "loss_num": 0.023681640625, "loss_xval": 0.71875, "num_input_tokens_seen": 197915224, "step": 3533 }, { "epoch": 7.870824053452115, "grad_norm": 13.525866508483887, "learning_rate": 1e-06, "loss": 0.4544, "num_input_tokens_seen": 197971236, "step": 3534 }, { "epoch": 7.870824053452115, "loss": 0.5098298788070679, "loss_ce": 0.0001863364304881543, "loss_iou": 0.2158203125, "loss_num": 0.015380859375, "loss_xval": 0.5078125, "num_input_tokens_seen": 197971236, "step": 3534 }, { "epoch": 7.873051224944321, "grad_norm": 20.633840560913086, "learning_rate": 1e-06, "loss": 0.6103, "num_input_tokens_seen": 198029292, "step": 3535 }, { "epoch": 7.873051224944321, "loss": 0.7734463810920715, "loss_ce": 0.0002530375204514712, "loss_iou": 0.361328125, "loss_num": 0.0103759765625, "loss_xval": 0.7734375, "num_input_tokens_seen": 198029292, "step": 3535 }, { "epoch": 7.875278396436526, "grad_norm": 169.28097534179688, "learning_rate": 1e-06, "loss": 0.6063, "num_input_tokens_seen": 198083980, "step": 3536 }, { "epoch": 7.875278396436526, "loss": 0.52978515625, "loss_ce": 0.0004882930079475045, "loss_iou": 0.2294921875, "loss_num": 0.0140380859375, "loss_xval": 0.53125, "num_input_tokens_seen": 198083980, "step": 3536 }, { "epoch": 7.877505567928731, "grad_norm": 20.492658615112305, "learning_rate": 1e-06, "loss": 0.5758, "num_input_tokens_seen": 198139948, "step": 3537 }, { "epoch": 7.877505567928731, "loss": 0.6573839783668518, "loss_ce": 0.00015742500545457006, "loss_iou": 0.271484375, "loss_num": 0.0230712890625, "loss_xval": 0.65625, "num_input_tokens_seen": 198139948, "step": 3537 }, { "epoch": 7.879732739420936, "grad_norm": 27.89311981201172, "learning_rate": 1e-06, "loss": 0.8226, "num_input_tokens_seen": 198198180, "step": 3538 }, { "epoch": 7.879732739420936, "loss": 0.9203274250030518, "loss_ce": 0.00016145262634381652, "loss_iou": 0.40625, "loss_num": 0.021728515625, "loss_xval": 0.921875, "num_input_tokens_seen": 198198180, "step": 3538 }, { "epoch": 7.881959910913141, "grad_norm": 21.136817932128906, "learning_rate": 1e-06, "loss": 0.5017, "num_input_tokens_seen": 198253928, "step": 3539 }, { "epoch": 7.881959910913141, "loss": 0.5626224279403687, "loss_ce": 0.0003665737749543041, "loss_iou": 0.248046875, "loss_num": 0.0133056640625, "loss_xval": 0.5625, "num_input_tokens_seen": 198253928, "step": 3539 }, { "epoch": 7.8841870824053455, "grad_norm": 28.12201499938965, "learning_rate": 1e-06, "loss": 0.4507, "num_input_tokens_seen": 198309852, "step": 3540 }, { "epoch": 7.8841870824053455, "loss": 0.5494855642318726, "loss_ce": 0.00016917182074394077, "loss_iou": 0.22265625, "loss_num": 0.0206298828125, "loss_xval": 0.55078125, "num_input_tokens_seen": 198309852, "step": 3540 }, { "epoch": 7.88641425389755, "grad_norm": 47.74600601196289, "learning_rate": 1e-06, "loss": 0.5923, "num_input_tokens_seen": 198365504, "step": 3541 }, { "epoch": 7.88641425389755, "loss": 0.6581393480300903, "loss_ce": 0.00042452660272829235, "loss_iou": 0.267578125, "loss_num": 0.0240478515625, "loss_xval": 0.65625, "num_input_tokens_seen": 198365504, "step": 3541 }, { "epoch": 7.888641425389755, "grad_norm": 11.964970588684082, "learning_rate": 1e-06, "loss": 0.4837, "num_input_tokens_seen": 198420992, "step": 3542 }, { "epoch": 7.888641425389755, "loss": 0.46798640489578247, "loss_ce": 0.00021299449144862592, "loss_iou": 0.2021484375, "loss_num": 0.01263427734375, "loss_xval": 0.46875, "num_input_tokens_seen": 198420992, "step": 3542 }, { "epoch": 7.89086859688196, "grad_norm": 14.015028953552246, "learning_rate": 1e-06, "loss": 0.5561, "num_input_tokens_seen": 198478588, "step": 3543 }, { "epoch": 7.89086859688196, "loss": 0.5441027879714966, "loss_ce": 0.0001574681227793917, "loss_iou": 0.234375, "loss_num": 0.01495361328125, "loss_xval": 0.54296875, "num_input_tokens_seen": 198478588, "step": 3543 }, { "epoch": 7.893095768374165, "grad_norm": 13.727971076965332, "learning_rate": 1e-06, "loss": 0.7564, "num_input_tokens_seen": 198532012, "step": 3544 }, { "epoch": 7.893095768374165, "loss": 0.9394014477729797, "loss_ce": 0.0001924369134940207, "loss_iou": 0.3671875, "loss_num": 0.040771484375, "loss_xval": 0.9375, "num_input_tokens_seen": 198532012, "step": 3544 }, { "epoch": 7.89532293986637, "grad_norm": 24.72553253173828, "learning_rate": 1e-06, "loss": 0.7666, "num_input_tokens_seen": 198587972, "step": 3545 }, { "epoch": 7.89532293986637, "loss": 0.6909983158111572, "loss_ce": 0.0003245328553020954, "loss_iou": 0.2470703125, "loss_num": 0.0390625, "loss_xval": 0.69140625, "num_input_tokens_seen": 198587972, "step": 3545 }, { "epoch": 7.897550111358575, "grad_norm": 16.695913314819336, "learning_rate": 1e-06, "loss": 0.6233, "num_input_tokens_seen": 198644912, "step": 3546 }, { "epoch": 7.897550111358575, "loss": 0.5989300608634949, "loss_ce": 0.00017520345863886178, "loss_iou": 0.2412109375, "loss_num": 0.023193359375, "loss_xval": 0.59765625, "num_input_tokens_seen": 198644912, "step": 3546 }, { "epoch": 7.8997772828507795, "grad_norm": 23.642194747924805, "learning_rate": 1e-06, "loss": 0.8229, "num_input_tokens_seen": 198701832, "step": 3547 }, { "epoch": 7.8997772828507795, "loss": 0.7591318488121033, "loss_ce": 0.0008310843259096146, "loss_iou": 0.310546875, "loss_num": 0.0274658203125, "loss_xval": 0.7578125, "num_input_tokens_seen": 198701832, "step": 3547 }, { "epoch": 7.902004454342984, "grad_norm": 17.38911247253418, "learning_rate": 1e-06, "loss": 0.4997, "num_input_tokens_seen": 198759072, "step": 3548 }, { "epoch": 7.902004454342984, "loss": 0.4457376301288605, "loss_ce": 0.004087251145392656, "loss_iou": 0.1796875, "loss_num": 0.016357421875, "loss_xval": 0.44140625, "num_input_tokens_seen": 198759072, "step": 3548 }, { "epoch": 7.904231625835189, "grad_norm": 15.332737922668457, "learning_rate": 1e-06, "loss": 0.9554, "num_input_tokens_seen": 198815520, "step": 3549 }, { "epoch": 7.904231625835189, "loss": 0.7602176666259766, "loss_ce": 0.00020788329129572958, "loss_iou": 0.322265625, "loss_num": 0.0235595703125, "loss_xval": 0.76171875, "num_input_tokens_seen": 198815520, "step": 3549 }, { "epoch": 7.906458797327394, "grad_norm": 17.26343536376953, "learning_rate": 1e-06, "loss": 0.6921, "num_input_tokens_seen": 198871008, "step": 3550 }, { "epoch": 7.906458797327394, "loss": 0.7331159710884094, "loss_ce": 0.00020580444834195077, "loss_iou": 0.30078125, "loss_num": 0.026123046875, "loss_xval": 0.734375, "num_input_tokens_seen": 198871008, "step": 3550 }, { "epoch": 7.908685968819599, "grad_norm": 18.0777587890625, "learning_rate": 1e-06, "loss": 0.9367, "num_input_tokens_seen": 198927764, "step": 3551 }, { "epoch": 7.908685968819599, "loss": 0.9931344985961914, "loss_ce": 0.0002145655162166804, "loss_iou": 0.4140625, "loss_num": 0.032958984375, "loss_xval": 0.9921875, "num_input_tokens_seen": 198927764, "step": 3551 }, { "epoch": 7.910913140311804, "grad_norm": 15.072328567504883, "learning_rate": 1e-06, "loss": 0.4566, "num_input_tokens_seen": 198983208, "step": 3552 }, { "epoch": 7.910913140311804, "loss": 0.3726545572280884, "loss_ce": 0.00021803012350574136, "loss_iou": 0.1689453125, "loss_num": 0.00689697265625, "loss_xval": 0.373046875, "num_input_tokens_seen": 198983208, "step": 3552 }, { "epoch": 7.913140311804009, "grad_norm": 23.05716896057129, "learning_rate": 1e-06, "loss": 0.541, "num_input_tokens_seen": 199038544, "step": 3553 }, { "epoch": 7.913140311804009, "loss": 0.4792807102203369, "loss_ce": 0.000154730340000242, "loss_iou": 0.2138671875, "loss_num": 0.0101318359375, "loss_xval": 0.478515625, "num_input_tokens_seen": 199038544, "step": 3553 }, { "epoch": 7.9153674832962135, "grad_norm": 14.205634117126465, "learning_rate": 1e-06, "loss": 0.511, "num_input_tokens_seen": 199093300, "step": 3554 }, { "epoch": 7.9153674832962135, "loss": 0.5586905479431152, "loss_ce": 0.00021891451615374535, "loss_iou": 0.21875, "loss_num": 0.0242919921875, "loss_xval": 0.55859375, "num_input_tokens_seen": 199093300, "step": 3554 }, { "epoch": 7.917594654788418, "grad_norm": 20.449451446533203, "learning_rate": 1e-06, "loss": 0.529, "num_input_tokens_seen": 199151908, "step": 3555 }, { "epoch": 7.917594654788418, "loss": 0.3755338788032532, "loss_ce": 0.0001676726678851992, "loss_iou": 0.1689453125, "loss_num": 0.00750732421875, "loss_xval": 0.375, "num_input_tokens_seen": 199151908, "step": 3555 }, { "epoch": 7.919821826280623, "grad_norm": 24.3996639251709, "learning_rate": 1e-06, "loss": 0.775, "num_input_tokens_seen": 199204824, "step": 3556 }, { "epoch": 7.919821826280623, "loss": 0.6280686855316162, "loss_ce": 0.00016949971904978156, "loss_iou": 0.265625, "loss_num": 0.01953125, "loss_xval": 0.62890625, "num_input_tokens_seen": 199204824, "step": 3556 }, { "epoch": 7.922048997772828, "grad_norm": 16.553987503051758, "learning_rate": 1e-06, "loss": 0.7373, "num_input_tokens_seen": 199259004, "step": 3557 }, { "epoch": 7.922048997772828, "loss": 0.8635649681091309, "loss_ce": 0.00016163403051905334, "loss_iou": 0.3671875, "loss_num": 0.0255126953125, "loss_xval": 0.86328125, "num_input_tokens_seen": 199259004, "step": 3557 }, { "epoch": 7.924276169265033, "grad_norm": 20.715532302856445, "learning_rate": 1e-06, "loss": 0.5043, "num_input_tokens_seen": 199314500, "step": 3558 }, { "epoch": 7.924276169265033, "loss": 0.5213112235069275, "loss_ce": 0.00019308787886984646, "loss_iou": 0.234375, "loss_num": 0.01043701171875, "loss_xval": 0.51953125, "num_input_tokens_seen": 199314500, "step": 3558 }, { "epoch": 7.926503340757238, "grad_norm": 33.767356872558594, "learning_rate": 1e-06, "loss": 0.4129, "num_input_tokens_seen": 199372168, "step": 3559 }, { "epoch": 7.926503340757238, "loss": 0.31914597749710083, "loss_ce": 0.0001762679312378168, "loss_iou": 0.1484375, "loss_num": 0.0042724609375, "loss_xval": 0.318359375, "num_input_tokens_seen": 199372168, "step": 3559 }, { "epoch": 7.928730512249444, "grad_norm": 23.694744110107422, "learning_rate": 1e-06, "loss": 0.713, "num_input_tokens_seen": 199425588, "step": 3560 }, { "epoch": 7.928730512249444, "loss": 0.7644591331481934, "loss_ce": 0.00042102765291929245, "loss_iou": 0.302734375, "loss_num": 0.031494140625, "loss_xval": 0.765625, "num_input_tokens_seen": 199425588, "step": 3560 }, { "epoch": 7.9309576837416484, "grad_norm": 22.84273338317871, "learning_rate": 1e-06, "loss": 0.6157, "num_input_tokens_seen": 199483636, "step": 3561 }, { "epoch": 7.9309576837416484, "loss": 0.646425724029541, "loss_ce": 0.00018549279775470495, "loss_iou": 0.275390625, "loss_num": 0.01904296875, "loss_xval": 0.64453125, "num_input_tokens_seen": 199483636, "step": 3561 }, { "epoch": 7.933184855233853, "grad_norm": 16.44258689880371, "learning_rate": 1e-06, "loss": 0.507, "num_input_tokens_seen": 199538272, "step": 3562 }, { "epoch": 7.933184855233853, "loss": 0.5787392854690552, "loss_ce": 0.00024805517750792205, "loss_iou": 0.2578125, "loss_num": 0.0125732421875, "loss_xval": 0.578125, "num_input_tokens_seen": 199538272, "step": 3562 }, { "epoch": 7.935412026726058, "grad_norm": 20.36518669128418, "learning_rate": 1e-06, "loss": 0.6295, "num_input_tokens_seen": 199596328, "step": 3563 }, { "epoch": 7.935412026726058, "loss": 0.641238272190094, "loss_ce": 0.00036911331699229777, "loss_iou": 0.296875, "loss_num": 0.009521484375, "loss_xval": 0.640625, "num_input_tokens_seen": 199596328, "step": 3563 }, { "epoch": 7.937639198218263, "grad_norm": 49.14147186279297, "learning_rate": 1e-06, "loss": 0.6839, "num_input_tokens_seen": 199653252, "step": 3564 }, { "epoch": 7.937639198218263, "loss": 0.6464834213256836, "loss_ce": 0.00024316800408996642, "loss_iou": 0.30078125, "loss_num": 0.00927734375, "loss_xval": 0.64453125, "num_input_tokens_seen": 199653252, "step": 3564 }, { "epoch": 7.939866369710468, "grad_norm": 16.839582443237305, "learning_rate": 1e-06, "loss": 0.6352, "num_input_tokens_seen": 199708264, "step": 3565 }, { "epoch": 7.939866369710468, "loss": 0.49529188871383667, "loss_ce": 0.0001746913476381451, "loss_iou": 0.212890625, "loss_num": 0.01397705078125, "loss_xval": 0.49609375, "num_input_tokens_seen": 199708264, "step": 3565 }, { "epoch": 7.942093541202673, "grad_norm": 16.844385147094727, "learning_rate": 1e-06, "loss": 0.6126, "num_input_tokens_seen": 199762920, "step": 3566 }, { "epoch": 7.942093541202673, "loss": 0.6229934096336365, "loss_ce": 0.00019066702225245535, "loss_iou": 0.263671875, "loss_num": 0.0194091796875, "loss_xval": 0.62109375, "num_input_tokens_seen": 199762920, "step": 3566 }, { "epoch": 7.944320712694878, "grad_norm": 23.8248233795166, "learning_rate": 1e-06, "loss": 0.6024, "num_input_tokens_seen": 199820520, "step": 3567 }, { "epoch": 7.944320712694878, "loss": 0.6216274499893188, "loss_ce": 0.00016751314979046583, "loss_iou": 0.26953125, "loss_num": 0.0164794921875, "loss_xval": 0.62109375, "num_input_tokens_seen": 199820520, "step": 3567 }, { "epoch": 7.9465478841870825, "grad_norm": 13.384178161621094, "learning_rate": 1e-06, "loss": 0.6889, "num_input_tokens_seen": 199874968, "step": 3568 }, { "epoch": 7.9465478841870825, "loss": 0.6415170431137085, "loss_ce": 0.00022065843222662807, "loss_iou": 0.259765625, "loss_num": 0.0242919921875, "loss_xval": 0.640625, "num_input_tokens_seen": 199874968, "step": 3568 }, { "epoch": 7.948775055679287, "grad_norm": 17.003244400024414, "learning_rate": 1e-06, "loss": 0.7046, "num_input_tokens_seen": 199931032, "step": 3569 }, { "epoch": 7.948775055679287, "loss": 0.7286696434020996, "loss_ce": 0.00015403382712975144, "loss_iou": 0.326171875, "loss_num": 0.0150146484375, "loss_xval": 0.7265625, "num_input_tokens_seen": 199931032, "step": 3569 }, { "epoch": 7.951002227171492, "grad_norm": 18.784358978271484, "learning_rate": 1e-06, "loss": 0.6467, "num_input_tokens_seen": 199986240, "step": 3570 }, { "epoch": 7.951002227171492, "loss": 0.711881160736084, "loss_ce": 0.00021116853167768568, "loss_iou": 0.3203125, "loss_num": 0.01458740234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 199986240, "step": 3570 }, { "epoch": 7.953229398663697, "grad_norm": 25.438982009887695, "learning_rate": 1e-06, "loss": 0.6809, "num_input_tokens_seen": 200041048, "step": 3571 }, { "epoch": 7.953229398663697, "loss": 0.8664193153381348, "loss_ce": 0.00020840237266384065, "loss_iou": 0.34375, "loss_num": 0.03564453125, "loss_xval": 0.8671875, "num_input_tokens_seen": 200041048, "step": 3571 }, { "epoch": 7.955456570155902, "grad_norm": 18.567123413085938, "learning_rate": 1e-06, "loss": 0.6805, "num_input_tokens_seen": 200098136, "step": 3572 }, { "epoch": 7.955456570155902, "loss": 0.8434309363365173, "loss_ce": 0.0001692144141998142, "loss_iou": 0.35546875, "loss_num": 0.02685546875, "loss_xval": 0.84375, "num_input_tokens_seen": 200098136, "step": 3572 }, { "epoch": 7.957683741648107, "grad_norm": 271.4332275390625, "learning_rate": 1e-06, "loss": 0.6131, "num_input_tokens_seen": 200152360, "step": 3573 }, { "epoch": 7.957683741648107, "loss": 0.7838395833969116, "loss_ce": 0.00014815322356298566, "loss_iou": 0.3359375, "loss_num": 0.0225830078125, "loss_xval": 0.78515625, "num_input_tokens_seen": 200152360, "step": 3573 }, { "epoch": 7.959910913140312, "grad_norm": 18.194717407226562, "learning_rate": 1e-06, "loss": 0.5837, "num_input_tokens_seen": 200208800, "step": 3574 }, { "epoch": 7.959910913140312, "loss": 0.3856315314769745, "loss_ce": 0.00013348979700822383, "loss_iou": 0.15625, "loss_num": 0.0146484375, "loss_xval": 0.384765625, "num_input_tokens_seen": 200208800, "step": 3574 }, { "epoch": 7.9621380846325165, "grad_norm": 23.228425979614258, "learning_rate": 1e-06, "loss": 0.8644, "num_input_tokens_seen": 200265476, "step": 3575 }, { "epoch": 7.9621380846325165, "loss": 0.6910591125488281, "loss_ce": 0.0001411863195244223, "loss_iou": 0.263671875, "loss_num": 0.032958984375, "loss_xval": 0.69140625, "num_input_tokens_seen": 200265476, "step": 3575 }, { "epoch": 7.964365256124721, "grad_norm": 28.046964645385742, "learning_rate": 1e-06, "loss": 0.6866, "num_input_tokens_seen": 200322152, "step": 3576 }, { "epoch": 7.964365256124721, "loss": 0.7025761604309082, "loss_ce": 0.00018362130504101515, "loss_iou": 0.294921875, "loss_num": 0.0224609375, "loss_xval": 0.703125, "num_input_tokens_seen": 200322152, "step": 3576 }, { "epoch": 7.966592427616926, "grad_norm": 23.809234619140625, "learning_rate": 1e-06, "loss": 0.5926, "num_input_tokens_seen": 200375664, "step": 3577 }, { "epoch": 7.966592427616926, "loss": 0.6842349767684937, "loss_ce": 0.00015298397920560092, "loss_iou": 0.31640625, "loss_num": 0.010498046875, "loss_xval": 0.68359375, "num_input_tokens_seen": 200375664, "step": 3577 }, { "epoch": 7.968819599109131, "grad_norm": 35.44462966918945, "learning_rate": 1e-06, "loss": 0.6293, "num_input_tokens_seen": 200430944, "step": 3578 }, { "epoch": 7.968819599109131, "loss": 0.6284739375114441, "loss_ce": 0.0001780486200004816, "loss_iou": 0.28515625, "loss_num": 0.01129150390625, "loss_xval": 0.62890625, "num_input_tokens_seen": 200430944, "step": 3578 }, { "epoch": 7.971046770601336, "grad_norm": 22.342145919799805, "learning_rate": 1e-06, "loss": 0.5531, "num_input_tokens_seen": 200486764, "step": 3579 }, { "epoch": 7.971046770601336, "loss": 0.5724341869354248, "loss_ce": 0.0001685347524471581, "loss_iou": 0.259765625, "loss_num": 0.01055908203125, "loss_xval": 0.5703125, "num_input_tokens_seen": 200486764, "step": 3579 }, { "epoch": 7.973273942093542, "grad_norm": 18.398874282836914, "learning_rate": 1e-06, "loss": 0.641, "num_input_tokens_seen": 200544420, "step": 3580 }, { "epoch": 7.973273942093542, "loss": 0.6851996779441833, "loss_ce": 0.0001410768600180745, "loss_iou": 0.28515625, "loss_num": 0.0228271484375, "loss_xval": 0.68359375, "num_input_tokens_seen": 200544420, "step": 3580 }, { "epoch": 7.9755011135857465, "grad_norm": 12.79372787475586, "learning_rate": 1e-06, "loss": 0.3946, "num_input_tokens_seen": 200601156, "step": 3581 }, { "epoch": 7.9755011135857465, "loss": 0.40876662731170654, "loss_ce": 0.0010518098715692759, "loss_iou": 0.177734375, "loss_num": 0.01043701171875, "loss_xval": 0.408203125, "num_input_tokens_seen": 200601156, "step": 3581 }, { "epoch": 7.977728285077951, "grad_norm": 15.637197494506836, "learning_rate": 1e-06, "loss": 0.8963, "num_input_tokens_seen": 200657656, "step": 3582 }, { "epoch": 7.977728285077951, "loss": 1.1127707958221436, "loss_ce": 0.0002219329762738198, "loss_iou": 0.4453125, "loss_num": 0.043701171875, "loss_xval": 1.109375, "num_input_tokens_seen": 200657656, "step": 3582 }, { "epoch": 7.979955456570156, "grad_norm": 19.720239639282227, "learning_rate": 1e-06, "loss": 0.7492, "num_input_tokens_seen": 200713320, "step": 3583 }, { "epoch": 7.979955456570156, "loss": 0.5831568241119385, "loss_ce": 0.000149025785503909, "loss_iou": 0.251953125, "loss_num": 0.0155029296875, "loss_xval": 0.58203125, "num_input_tokens_seen": 200713320, "step": 3583 }, { "epoch": 7.982182628062361, "grad_norm": 13.927251815795898, "learning_rate": 1e-06, "loss": 0.371, "num_input_tokens_seen": 200768308, "step": 3584 }, { "epoch": 7.982182628062361, "loss": 0.3358955383300781, "loss_ce": 0.00044634263031184673, "loss_iou": 0.1416015625, "loss_num": 0.01055908203125, "loss_xval": 0.3359375, "num_input_tokens_seen": 200768308, "step": 3584 }, { "epoch": 7.984409799554566, "grad_norm": 19.705570220947266, "learning_rate": 1e-06, "loss": 0.5878, "num_input_tokens_seen": 200821804, "step": 3585 }, { "epoch": 7.984409799554566, "loss": 0.4889172911643982, "loss_ce": 0.00020879982912447304, "loss_iou": 0.212890625, "loss_num": 0.01275634765625, "loss_xval": 0.48828125, "num_input_tokens_seen": 200821804, "step": 3585 }, { "epoch": 7.986636971046771, "grad_norm": 125.53588104248047, "learning_rate": 1e-06, "loss": 0.778, "num_input_tokens_seen": 200880704, "step": 3586 }, { "epoch": 7.986636971046771, "loss": 0.7231355905532837, "loss_ce": 0.00023522632545791566, "loss_iou": 0.296875, "loss_num": 0.0257568359375, "loss_xval": 0.72265625, "num_input_tokens_seen": 200880704, "step": 3586 }, { "epoch": 7.988864142538976, "grad_norm": 15.870854377746582, "learning_rate": 1e-06, "loss": 0.7217, "num_input_tokens_seen": 200937412, "step": 3587 }, { "epoch": 7.988864142538976, "loss": 0.3839578628540039, "loss_ce": 0.00022984019597060978, "loss_iou": 0.1318359375, "loss_num": 0.02392578125, "loss_xval": 0.3828125, "num_input_tokens_seen": 200937412, "step": 3587 }, { "epoch": 7.991091314031181, "grad_norm": 17.978586196899414, "learning_rate": 1e-06, "loss": 0.495, "num_input_tokens_seen": 200992028, "step": 3588 }, { "epoch": 7.991091314031181, "loss": 0.49199825525283813, "loss_ce": 0.0001769806258380413, "loss_iou": 0.2138671875, "loss_num": 0.0126953125, "loss_xval": 0.4921875, "num_input_tokens_seen": 200992028, "step": 3588 }, { "epoch": 7.993318485523385, "grad_norm": 18.851573944091797, "learning_rate": 1e-06, "loss": 0.6107, "num_input_tokens_seen": 201049456, "step": 3589 }, { "epoch": 7.993318485523385, "loss": 0.6161330938339233, "loss_ce": 0.00016627684817649424, "loss_iou": 0.26171875, "loss_num": 0.01904296875, "loss_xval": 0.6171875, "num_input_tokens_seen": 201049456, "step": 3589 }, { "epoch": 7.99554565701559, "grad_norm": 24.63906478881836, "learning_rate": 1e-06, "loss": 0.6317, "num_input_tokens_seen": 201104932, "step": 3590 }, { "epoch": 7.99554565701559, "loss": 0.6456716060638428, "loss_ce": 0.00016378730651922524, "loss_iou": 0.298828125, "loss_num": 0.00946044921875, "loss_xval": 0.64453125, "num_input_tokens_seen": 201104932, "step": 3590 }, { "epoch": 7.997772828507795, "grad_norm": 19.594945907592773, "learning_rate": 1e-06, "loss": 0.6995, "num_input_tokens_seen": 201161408, "step": 3591 }, { "epoch": 7.997772828507795, "loss": 0.6261618137359619, "loss_ce": 0.00018528125656303018, "loss_iou": 0.2734375, "loss_num": 0.015869140625, "loss_xval": 0.625, "num_input_tokens_seen": 201161408, "step": 3591 }, { "epoch": 8.0, "grad_norm": 13.603879928588867, "learning_rate": 1e-06, "loss": 0.6194, "num_input_tokens_seen": 201216936, "step": 3592 }, { "epoch": 8.0, "loss": 0.7476929426193237, "loss_ce": 0.00013434665743261576, "loss_iou": 0.33203125, "loss_num": 0.0172119140625, "loss_xval": 0.74609375, "num_input_tokens_seen": 201216936, "step": 3592 }, { "epoch": 8.002227171492205, "grad_norm": 18.30463409423828, "learning_rate": 1e-06, "loss": 0.7265, "num_input_tokens_seen": 201274140, "step": 3593 }, { "epoch": 8.002227171492205, "loss": 0.8002690076828003, "loss_ce": 0.00022022916527930647, "loss_iou": 0.353515625, "loss_num": 0.0185546875, "loss_xval": 0.80078125, "num_input_tokens_seen": 201274140, "step": 3593 }, { "epoch": 8.00445434298441, "grad_norm": 21.524507522583008, "learning_rate": 1e-06, "loss": 0.7233, "num_input_tokens_seen": 201330520, "step": 3594 }, { "epoch": 8.00445434298441, "loss": 0.7201758027076721, "loss_ce": 0.00020510726608335972, "loss_iou": 0.333984375, "loss_num": 0.01055908203125, "loss_xval": 0.71875, "num_input_tokens_seen": 201330520, "step": 3594 }, { "epoch": 8.006681514476615, "grad_norm": 23.734619140625, "learning_rate": 1e-06, "loss": 0.8149, "num_input_tokens_seen": 201386452, "step": 3595 }, { "epoch": 8.006681514476615, "loss": 0.6124993562698364, "loss_ce": 0.00019466172670945525, "loss_iou": 0.2734375, "loss_num": 0.01312255859375, "loss_xval": 0.61328125, "num_input_tokens_seen": 201386452, "step": 3595 }, { "epoch": 8.00890868596882, "grad_norm": 20.260971069335938, "learning_rate": 1e-06, "loss": 0.8065, "num_input_tokens_seen": 201444920, "step": 3596 }, { "epoch": 8.00890868596882, "loss": 0.48429691791534424, "loss_ce": 0.00016606590361334383, "loss_iou": 0.20703125, "loss_num": 0.01397705078125, "loss_xval": 0.484375, "num_input_tokens_seen": 201444920, "step": 3596 }, { "epoch": 8.011135857461024, "grad_norm": 13.375497817993164, "learning_rate": 1e-06, "loss": 0.4723, "num_input_tokens_seen": 201504192, "step": 3597 }, { "epoch": 8.011135857461024, "loss": 0.42642343044281006, "loss_ce": 0.00015389773761853576, "loss_iou": 0.1943359375, "loss_num": 0.0074462890625, "loss_xval": 0.42578125, "num_input_tokens_seen": 201504192, "step": 3597 }, { "epoch": 8.01336302895323, "grad_norm": 13.759163856506348, "learning_rate": 1e-06, "loss": 0.4227, "num_input_tokens_seen": 201560760, "step": 3598 }, { "epoch": 8.01336302895323, "loss": 0.5462967157363892, "loss_ce": 0.0002762216317933053, "loss_iou": 0.2431640625, "loss_num": 0.01190185546875, "loss_xval": 0.546875, "num_input_tokens_seen": 201560760, "step": 3598 }, { "epoch": 8.015590200445434, "grad_norm": 13.41096305847168, "learning_rate": 1e-06, "loss": 0.6107, "num_input_tokens_seen": 201614240, "step": 3599 }, { "epoch": 8.015590200445434, "loss": 0.5510572791099548, "loss_ce": 0.0001539345394121483, "loss_iou": 0.2470703125, "loss_num": 0.01123046875, "loss_xval": 0.55078125, "num_input_tokens_seen": 201614240, "step": 3599 }, { "epoch": 8.017817371937639, "grad_norm": 23.751489639282227, "learning_rate": 1e-06, "loss": 0.6214, "num_input_tokens_seen": 201672716, "step": 3600 }, { "epoch": 8.017817371937639, "loss": 0.7004799842834473, "loss_ce": 0.00040674611227586865, "loss_iou": 0.314453125, "loss_num": 0.0142822265625, "loss_xval": 0.69921875, "num_input_tokens_seen": 201672716, "step": 3600 }, { "epoch": 8.020044543429844, "grad_norm": 14.586628913879395, "learning_rate": 1e-06, "loss": 0.7327, "num_input_tokens_seen": 201729168, "step": 3601 }, { "epoch": 8.020044543429844, "loss": 0.636683464050293, "loss_ce": 0.00020884581317659467, "loss_iou": 0.263671875, "loss_num": 0.021728515625, "loss_xval": 0.63671875, "num_input_tokens_seen": 201729168, "step": 3601 }, { "epoch": 8.022271714922049, "grad_norm": 19.21128273010254, "learning_rate": 1e-06, "loss": 0.5544, "num_input_tokens_seen": 201784708, "step": 3602 }, { "epoch": 8.022271714922049, "loss": 0.4761182367801666, "loss_ce": 0.00016609346494078636, "loss_iou": 0.2001953125, "loss_num": 0.0152587890625, "loss_xval": 0.4765625, "num_input_tokens_seen": 201784708, "step": 3602 }, { "epoch": 8.024498886414253, "grad_norm": 16.912395477294922, "learning_rate": 1e-06, "loss": 0.671, "num_input_tokens_seen": 201843236, "step": 3603 }, { "epoch": 8.024498886414253, "loss": 0.7302785515785217, "loss_ce": 0.00017601058061700314, "loss_iou": 0.3046875, "loss_num": 0.023681640625, "loss_xval": 0.73046875, "num_input_tokens_seen": 201843236, "step": 3603 }, { "epoch": 8.026726057906458, "grad_norm": 18.214143753051758, "learning_rate": 1e-06, "loss": 0.6397, "num_input_tokens_seen": 201897720, "step": 3604 }, { "epoch": 8.026726057906458, "loss": 0.41655105352401733, "loss_ce": 0.00016921485075727105, "loss_iou": 0.19140625, "loss_num": 0.006988525390625, "loss_xval": 0.416015625, "num_input_tokens_seen": 201897720, "step": 3604 }, { "epoch": 8.028953229398663, "grad_norm": 18.966310501098633, "learning_rate": 1e-06, "loss": 0.6243, "num_input_tokens_seen": 201953360, "step": 3605 }, { "epoch": 8.028953229398663, "loss": 0.5837928056716919, "loss_ce": 0.00017461951938457787, "loss_iou": 0.232421875, "loss_num": 0.0238037109375, "loss_xval": 0.58203125, "num_input_tokens_seen": 201953360, "step": 3605 }, { "epoch": 8.031180400890868, "grad_norm": 18.62992286682129, "learning_rate": 1e-06, "loss": 0.6118, "num_input_tokens_seen": 202008704, "step": 3606 }, { "epoch": 8.031180400890868, "loss": 0.5211237668991089, "loss_ce": 0.0003718439256772399, "loss_iou": 0.216796875, "loss_num": 0.0177001953125, "loss_xval": 0.51953125, "num_input_tokens_seen": 202008704, "step": 3606 }, { "epoch": 8.033407572383073, "grad_norm": 24.204988479614258, "learning_rate": 1e-06, "loss": 0.5671, "num_input_tokens_seen": 202068704, "step": 3607 }, { "epoch": 8.033407572383073, "loss": 0.4836837947368622, "loss_ce": 0.0005294845905154943, "loss_iou": 0.1923828125, "loss_num": 0.01953125, "loss_xval": 0.482421875, "num_input_tokens_seen": 202068704, "step": 3607 }, { "epoch": 8.035634743875278, "grad_norm": 13.87259292602539, "learning_rate": 1e-06, "loss": 0.6244, "num_input_tokens_seen": 202123772, "step": 3608 }, { "epoch": 8.035634743875278, "loss": 0.5231030583381653, "loss_ce": 0.0001537989010103047, "loss_iou": 0.220703125, "loss_num": 0.01611328125, "loss_xval": 0.5234375, "num_input_tokens_seen": 202123772, "step": 3608 }, { "epoch": 8.037861915367483, "grad_norm": 23.374591827392578, "learning_rate": 1e-06, "loss": 0.7716, "num_input_tokens_seen": 202179184, "step": 3609 }, { "epoch": 8.037861915367483, "loss": 0.7146378755569458, "loss_ce": 0.00016029010294005275, "loss_iou": 0.310546875, "loss_num": 0.0185546875, "loss_xval": 0.71484375, "num_input_tokens_seen": 202179184, "step": 3609 }, { "epoch": 8.040089086859687, "grad_norm": 18.180028915405273, "learning_rate": 1e-06, "loss": 0.6477, "num_input_tokens_seen": 202234064, "step": 3610 }, { "epoch": 8.040089086859687, "loss": 0.8393857479095459, "loss_ce": 0.0001523745886515826, "loss_iou": 0.359375, "loss_num": 0.024169921875, "loss_xval": 0.83984375, "num_input_tokens_seen": 202234064, "step": 3610 }, { "epoch": 8.042316258351892, "grad_norm": 22.855134963989258, "learning_rate": 1e-06, "loss": 0.565, "num_input_tokens_seen": 202290848, "step": 3611 }, { "epoch": 8.042316258351892, "loss": 0.4523416757583618, "loss_ce": 0.0001932748273247853, "loss_iou": 0.193359375, "loss_num": 0.01300048828125, "loss_xval": 0.453125, "num_input_tokens_seen": 202290848, "step": 3611 }, { "epoch": 8.044543429844097, "grad_norm": 22.91286849975586, "learning_rate": 1e-06, "loss": 0.7699, "num_input_tokens_seen": 202349456, "step": 3612 }, { "epoch": 8.044543429844097, "loss": 0.7767907381057739, "loss_ce": 0.0001794241979951039, "loss_iou": 0.322265625, "loss_num": 0.0267333984375, "loss_xval": 0.77734375, "num_input_tokens_seen": 202349456, "step": 3612 }, { "epoch": 8.046770601336302, "grad_norm": 22.00181770324707, "learning_rate": 1e-06, "loss": 0.5741, "num_input_tokens_seen": 202406208, "step": 3613 }, { "epoch": 8.046770601336302, "loss": 0.6100356578826904, "loss_ce": 0.00017238240980077535, "loss_iou": 0.2451171875, "loss_num": 0.0238037109375, "loss_xval": 0.609375, "num_input_tokens_seen": 202406208, "step": 3613 }, { "epoch": 8.048997772828507, "grad_norm": 23.352352142333984, "learning_rate": 1e-06, "loss": 0.4346, "num_input_tokens_seen": 202461324, "step": 3614 }, { "epoch": 8.048997772828507, "loss": 0.4557119607925415, "loss_ce": 0.00014553280198015273, "loss_iou": 0.1845703125, "loss_num": 0.0172119140625, "loss_xval": 0.455078125, "num_input_tokens_seen": 202461324, "step": 3614 }, { "epoch": 8.051224944320714, "grad_norm": 24.288408279418945, "learning_rate": 1e-06, "loss": 0.5669, "num_input_tokens_seen": 202520016, "step": 3615 }, { "epoch": 8.051224944320714, "loss": 0.8215472102165222, "loss_ce": 0.0006243445095606148, "loss_iou": 0.33203125, "loss_num": 0.0311279296875, "loss_xval": 0.8203125, "num_input_tokens_seen": 202520016, "step": 3615 }, { "epoch": 8.053452115812918, "grad_norm": 18.481889724731445, "learning_rate": 1e-06, "loss": 0.44, "num_input_tokens_seen": 202576916, "step": 3616 }, { "epoch": 8.053452115812918, "loss": 0.38955333828926086, "loss_ce": 0.0001490543072577566, "loss_iou": 0.1708984375, "loss_num": 0.0096435546875, "loss_xval": 0.388671875, "num_input_tokens_seen": 202576916, "step": 3616 }, { "epoch": 8.055679287305123, "grad_norm": 23.070777893066406, "learning_rate": 1e-06, "loss": 0.6434, "num_input_tokens_seen": 202633128, "step": 3617 }, { "epoch": 8.055679287305123, "loss": 0.6288352608680725, "loss_ce": 0.00017316042794845998, "loss_iou": 0.267578125, "loss_num": 0.0186767578125, "loss_xval": 0.62890625, "num_input_tokens_seen": 202633128, "step": 3617 }, { "epoch": 8.057906458797328, "grad_norm": 21.540128707885742, "learning_rate": 1e-06, "loss": 0.5371, "num_input_tokens_seen": 202686364, "step": 3618 }, { "epoch": 8.057906458797328, "loss": 0.5090727210044861, "loss_ce": 0.0001615592191228643, "loss_iou": 0.224609375, "loss_num": 0.01177978515625, "loss_xval": 0.5078125, "num_input_tokens_seen": 202686364, "step": 3618 }, { "epoch": 8.060133630289533, "grad_norm": 29.778921127319336, "learning_rate": 1e-06, "loss": 0.8185, "num_input_tokens_seen": 202740904, "step": 3619 }, { "epoch": 8.060133630289533, "loss": 0.6977142691612244, "loss_ce": 0.000204495110665448, "loss_iou": 0.30078125, "loss_num": 0.0194091796875, "loss_xval": 0.69921875, "num_input_tokens_seen": 202740904, "step": 3619 }, { "epoch": 8.062360801781738, "grad_norm": 15.424278259277344, "learning_rate": 1e-06, "loss": 0.5548, "num_input_tokens_seen": 202796944, "step": 3620 }, { "epoch": 8.062360801781738, "loss": 0.6742639541625977, "loss_ce": 0.00019166519632562995, "loss_iou": 0.279296875, "loss_num": 0.023193359375, "loss_xval": 0.67578125, "num_input_tokens_seen": 202796944, "step": 3620 }, { "epoch": 8.064587973273943, "grad_norm": 15.881271362304688, "learning_rate": 1e-06, "loss": 0.6079, "num_input_tokens_seen": 202852488, "step": 3621 }, { "epoch": 8.064587973273943, "loss": 0.6368354558944702, "loss_ce": 0.00017772393766790628, "loss_iou": 0.29296875, "loss_num": 0.01031494140625, "loss_xval": 0.63671875, "num_input_tokens_seen": 202852488, "step": 3621 }, { "epoch": 8.066815144766148, "grad_norm": 20.29325294494629, "learning_rate": 1e-06, "loss": 0.7275, "num_input_tokens_seen": 202905252, "step": 3622 }, { "epoch": 8.066815144766148, "loss": 0.5565529465675354, "loss_ce": 0.00015647773398086429, "loss_iou": 0.234375, "loss_num": 0.0174560546875, "loss_xval": 0.5546875, "num_input_tokens_seen": 202905252, "step": 3622 }, { "epoch": 8.069042316258352, "grad_norm": 18.05845069885254, "learning_rate": 1e-06, "loss": 0.5679, "num_input_tokens_seen": 202962516, "step": 3623 }, { "epoch": 8.069042316258352, "loss": 0.5240879058837891, "loss_ce": 0.0001620947732590139, "loss_iou": 0.2314453125, "loss_num": 0.01239013671875, "loss_xval": 0.5234375, "num_input_tokens_seen": 202962516, "step": 3623 }, { "epoch": 8.071269487750557, "grad_norm": 16.5575008392334, "learning_rate": 1e-06, "loss": 0.3642, "num_input_tokens_seen": 203018252, "step": 3624 }, { "epoch": 8.071269487750557, "loss": 0.23819105327129364, "loss_ce": 0.00015395878290291876, "loss_iou": 0.10693359375, "loss_num": 0.00482177734375, "loss_xval": 0.23828125, "num_input_tokens_seen": 203018252, "step": 3624 }, { "epoch": 8.073496659242762, "grad_norm": 14.785902976989746, "learning_rate": 1e-06, "loss": 0.7054, "num_input_tokens_seen": 203075692, "step": 3625 }, { "epoch": 8.073496659242762, "loss": 0.8658159971237183, "loss_ce": 0.0003374355146661401, "loss_iou": 0.34765625, "loss_num": 0.0341796875, "loss_xval": 0.8671875, "num_input_tokens_seen": 203075692, "step": 3625 }, { "epoch": 8.075723830734967, "grad_norm": 30.066858291625977, "learning_rate": 1e-06, "loss": 0.6224, "num_input_tokens_seen": 203132688, "step": 3626 }, { "epoch": 8.075723830734967, "loss": 0.5541576743125916, "loss_ce": 0.00020257396681699902, "loss_iou": 0.21875, "loss_num": 0.0233154296875, "loss_xval": 0.5546875, "num_input_tokens_seen": 203132688, "step": 3626 }, { "epoch": 8.077951002227172, "grad_norm": 13.862289428710938, "learning_rate": 1e-06, "loss": 0.6046, "num_input_tokens_seen": 203189732, "step": 3627 }, { "epoch": 8.077951002227172, "loss": 0.6278433203697205, "loss_ce": 0.00015777646331116557, "loss_iou": 0.2578125, "loss_num": 0.0220947265625, "loss_xval": 0.62890625, "num_input_tokens_seen": 203189732, "step": 3627 }, { "epoch": 8.080178173719377, "grad_norm": 14.706859588623047, "learning_rate": 1e-06, "loss": 0.5169, "num_input_tokens_seen": 203247688, "step": 3628 }, { "epoch": 8.080178173719377, "loss": 0.48699110746383667, "loss_ce": 0.00017472410399932414, "loss_iou": 0.220703125, "loss_num": 0.00909423828125, "loss_xval": 0.486328125, "num_input_tokens_seen": 203247688, "step": 3628 }, { "epoch": 8.082405345211582, "grad_norm": 15.710479736328125, "learning_rate": 1e-06, "loss": 0.661, "num_input_tokens_seen": 203304544, "step": 3629 }, { "epoch": 8.082405345211582, "loss": 0.6681832075119019, "loss_ce": 0.00021445300080813468, "loss_iou": 0.271484375, "loss_num": 0.0247802734375, "loss_xval": 0.66796875, "num_input_tokens_seen": 203304544, "step": 3629 }, { "epoch": 8.084632516703786, "grad_norm": 64.82049560546875, "learning_rate": 1e-06, "loss": 0.6353, "num_input_tokens_seen": 203358916, "step": 3630 }, { "epoch": 8.084632516703786, "loss": 0.7306484580039978, "loss_ce": 0.00024074350949376822, "loss_iou": 0.314453125, "loss_num": 0.0203857421875, "loss_xval": 0.73046875, "num_input_tokens_seen": 203358916, "step": 3630 }, { "epoch": 8.086859688195991, "grad_norm": 17.783905029296875, "learning_rate": 1e-06, "loss": 0.558, "num_input_tokens_seen": 203414864, "step": 3631 }, { "epoch": 8.086859688195991, "loss": 0.6720456480979919, "loss_ce": 0.00017064949497580528, "loss_iou": 0.298828125, "loss_num": 0.01458740234375, "loss_xval": 0.671875, "num_input_tokens_seen": 203414864, "step": 3631 }, { "epoch": 8.089086859688196, "grad_norm": 13.846236228942871, "learning_rate": 1e-06, "loss": 0.5735, "num_input_tokens_seen": 203469972, "step": 3632 }, { "epoch": 8.089086859688196, "loss": 0.61847984790802, "loss_ce": 0.00019372851238586009, "loss_iou": 0.27734375, "loss_num": 0.01312255859375, "loss_xval": 0.6171875, "num_input_tokens_seen": 203469972, "step": 3632 }, { "epoch": 8.091314031180401, "grad_norm": 49.66973876953125, "learning_rate": 1e-06, "loss": 0.7761, "num_input_tokens_seen": 203528200, "step": 3633 }, { "epoch": 8.091314031180401, "loss": 0.869318962097168, "loss_ce": 0.00017833446327131242, "loss_iou": 0.365234375, "loss_num": 0.0274658203125, "loss_xval": 0.8671875, "num_input_tokens_seen": 203528200, "step": 3633 }, { "epoch": 8.093541202672606, "grad_norm": 23.811777114868164, "learning_rate": 1e-06, "loss": 0.4735, "num_input_tokens_seen": 203584436, "step": 3634 }, { "epoch": 8.093541202672606, "loss": 0.4863722026348114, "loss_ce": 0.00016612093895673752, "loss_iou": 0.21875, "loss_num": 0.00982666015625, "loss_xval": 0.486328125, "num_input_tokens_seen": 203584436, "step": 3634 }, { "epoch": 8.09576837416481, "grad_norm": 15.80212116241455, "learning_rate": 1e-06, "loss": 0.5608, "num_input_tokens_seen": 203641428, "step": 3635 }, { "epoch": 8.09576837416481, "loss": 0.48964816331863403, "loss_ce": 0.00014622273738496006, "loss_iou": 0.2099609375, "loss_num": 0.0140380859375, "loss_xval": 0.490234375, "num_input_tokens_seen": 203641428, "step": 3635 }, { "epoch": 8.097995545657016, "grad_norm": 24.546077728271484, "learning_rate": 1e-06, "loss": 0.6726, "num_input_tokens_seen": 203693980, "step": 3636 }, { "epoch": 8.097995545657016, "loss": 0.569033682346344, "loss_ce": 0.0001860179763752967, "loss_iou": 0.2578125, "loss_num": 0.01104736328125, "loss_xval": 0.5703125, "num_input_tokens_seen": 203693980, "step": 3636 }, { "epoch": 8.10022271714922, "grad_norm": 45.66901397705078, "learning_rate": 1e-06, "loss": 0.8868, "num_input_tokens_seen": 203747768, "step": 3637 }, { "epoch": 8.10022271714922, "loss": 0.6677889227867126, "loss_ce": 0.00018637420726008713, "loss_iou": 0.291015625, "loss_num": 0.016845703125, "loss_xval": 0.66796875, "num_input_tokens_seen": 203747768, "step": 3637 }, { "epoch": 8.102449888641425, "grad_norm": 25.512130737304688, "learning_rate": 1e-06, "loss": 0.5878, "num_input_tokens_seen": 203805264, "step": 3638 }, { "epoch": 8.102449888641425, "loss": 0.47341108322143555, "loss_ce": 0.00014449466834776103, "loss_iou": 0.201171875, "loss_num": 0.01422119140625, "loss_xval": 0.47265625, "num_input_tokens_seen": 203805264, "step": 3638 }, { "epoch": 8.10467706013363, "grad_norm": 16.746042251586914, "learning_rate": 1e-06, "loss": 0.5147, "num_input_tokens_seen": 203862852, "step": 3639 }, { "epoch": 8.10467706013363, "loss": 0.5026716589927673, "loss_ce": 0.0003523434279486537, "loss_iou": 0.2060546875, "loss_num": 0.0179443359375, "loss_xval": 0.50390625, "num_input_tokens_seen": 203862852, "step": 3639 }, { "epoch": 8.106904231625835, "grad_norm": 14.684697151184082, "learning_rate": 1e-06, "loss": 0.6076, "num_input_tokens_seen": 203920432, "step": 3640 }, { "epoch": 8.106904231625835, "loss": 0.7979079484939575, "loss_ce": 0.00017849741561803967, "loss_iou": 0.30859375, "loss_num": 0.035888671875, "loss_xval": 0.796875, "num_input_tokens_seen": 203920432, "step": 3640 }, { "epoch": 8.10913140311804, "grad_norm": 29.783206939697266, "learning_rate": 1e-06, "loss": 0.6487, "num_input_tokens_seen": 203977000, "step": 3641 }, { "epoch": 8.10913140311804, "loss": 0.7514079809188843, "loss_ce": 0.00043139857007190585, "loss_iou": 0.34375, "loss_num": 0.012451171875, "loss_xval": 0.75, "num_input_tokens_seen": 203977000, "step": 3641 }, { "epoch": 8.111358574610245, "grad_norm": 17.605884552001953, "learning_rate": 1e-06, "loss": 0.5144, "num_input_tokens_seen": 204034360, "step": 3642 }, { "epoch": 8.111358574610245, "loss": 0.6749836206436157, "loss_ce": 0.00017892984033096582, "loss_iou": 0.30859375, "loss_num": 0.01153564453125, "loss_xval": 0.67578125, "num_input_tokens_seen": 204034360, "step": 3642 }, { "epoch": 8.11358574610245, "grad_norm": 17.724246978759766, "learning_rate": 1e-06, "loss": 0.6901, "num_input_tokens_seen": 204093164, "step": 3643 }, { "epoch": 8.11358574610245, "loss": 0.8570829033851624, "loss_ce": 0.00033243943471461535, "loss_iou": 0.365234375, "loss_num": 0.0250244140625, "loss_xval": 0.85546875, "num_input_tokens_seen": 204093164, "step": 3643 }, { "epoch": 8.115812917594655, "grad_norm": 23.620380401611328, "learning_rate": 1e-06, "loss": 0.6023, "num_input_tokens_seen": 204147504, "step": 3644 }, { "epoch": 8.115812917594655, "loss": 0.5590322017669678, "loss_ce": 0.0001943043462233618, "loss_iou": 0.25, "loss_num": 0.0113525390625, "loss_xval": 0.55859375, "num_input_tokens_seen": 204147504, "step": 3644 }, { "epoch": 8.11804008908686, "grad_norm": 20.3369140625, "learning_rate": 1e-06, "loss": 0.5411, "num_input_tokens_seen": 204204044, "step": 3645 }, { "epoch": 8.11804008908686, "loss": 0.5622153878211975, "loss_ce": 0.00020367707475088537, "loss_iou": 0.22265625, "loss_num": 0.0233154296875, "loss_xval": 0.5625, "num_input_tokens_seen": 204204044, "step": 3645 }, { "epoch": 8.120267260579064, "grad_norm": 20.101959228515625, "learning_rate": 1e-06, "loss": 0.5644, "num_input_tokens_seen": 204262612, "step": 3646 }, { "epoch": 8.120267260579064, "loss": 0.5232473611831665, "loss_ce": 0.0001760848390404135, "loss_iou": 0.228515625, "loss_num": 0.01318359375, "loss_xval": 0.5234375, "num_input_tokens_seen": 204262612, "step": 3646 }, { "epoch": 8.122494432071269, "grad_norm": 40.90657043457031, "learning_rate": 1e-06, "loss": 0.6132, "num_input_tokens_seen": 204318668, "step": 3647 }, { "epoch": 8.122494432071269, "loss": 0.5699893832206726, "loss_ce": 0.00016517913900315762, "loss_iou": 0.2294921875, "loss_num": 0.0220947265625, "loss_xval": 0.5703125, "num_input_tokens_seen": 204318668, "step": 3647 }, { "epoch": 8.124721603563474, "grad_norm": 22.63620948791504, "learning_rate": 1e-06, "loss": 0.5056, "num_input_tokens_seen": 204376132, "step": 3648 }, { "epoch": 8.124721603563474, "loss": 0.478777140378952, "loss_ce": 0.00013943444355390966, "loss_iou": 0.2158203125, "loss_num": 0.00921630859375, "loss_xval": 0.478515625, "num_input_tokens_seen": 204376132, "step": 3648 }, { "epoch": 8.126948775055679, "grad_norm": 13.594147682189941, "learning_rate": 1e-06, "loss": 0.4997, "num_input_tokens_seen": 204435292, "step": 3649 }, { "epoch": 8.126948775055679, "loss": 0.5463213920593262, "loss_ce": 0.00017879356164485216, "loss_iou": 0.2578125, "loss_num": 0.006378173828125, "loss_xval": 0.546875, "num_input_tokens_seen": 204435292, "step": 3649 }, { "epoch": 8.129175946547884, "grad_norm": 22.09722137451172, "learning_rate": 1e-06, "loss": 0.6183, "num_input_tokens_seen": 204491100, "step": 3650 }, { "epoch": 8.129175946547884, "loss": 0.5094276666641235, "loss_ce": 0.00015031076327431947, "loss_iou": 0.208984375, "loss_num": 0.0181884765625, "loss_xval": 0.5078125, "num_input_tokens_seen": 204491100, "step": 3650 }, { "epoch": 8.131403118040089, "grad_norm": 19.736661911010742, "learning_rate": 1e-06, "loss": 0.6618, "num_input_tokens_seen": 204546644, "step": 3651 }, { "epoch": 8.131403118040089, "loss": 0.7262549996376038, "loss_ce": 0.00018080734298564494, "loss_iou": 0.298828125, "loss_num": 0.0260009765625, "loss_xval": 0.7265625, "num_input_tokens_seen": 204546644, "step": 3651 }, { "epoch": 8.133630289532293, "grad_norm": 46.30784606933594, "learning_rate": 1e-06, "loss": 0.78, "num_input_tokens_seen": 204601144, "step": 3652 }, { "epoch": 8.133630289532293, "loss": 0.8595629930496216, "loss_ce": 0.00018801141413860023, "loss_iou": 0.333984375, "loss_num": 0.0380859375, "loss_xval": 0.859375, "num_input_tokens_seen": 204601144, "step": 3652 }, { "epoch": 8.135857461024498, "grad_norm": 19.52750587463379, "learning_rate": 1e-06, "loss": 0.7199, "num_input_tokens_seen": 204657164, "step": 3653 }, { "epoch": 8.135857461024498, "loss": 0.9219685196876526, "loss_ce": 0.0002156377595383674, "loss_iou": 0.38671875, "loss_num": 0.029296875, "loss_xval": 0.921875, "num_input_tokens_seen": 204657164, "step": 3653 }, { "epoch": 8.138084632516703, "grad_norm": 16.912254333496094, "learning_rate": 1e-06, "loss": 0.7982, "num_input_tokens_seen": 204714952, "step": 3654 }, { "epoch": 8.138084632516703, "loss": 0.9020793437957764, "loss_ce": 0.00022387836361303926, "loss_iou": 0.3828125, "loss_num": 0.0272216796875, "loss_xval": 0.90234375, "num_input_tokens_seen": 204714952, "step": 3654 }, { "epoch": 8.140311804008908, "grad_norm": 16.71441650390625, "learning_rate": 1e-06, "loss": 0.4875, "num_input_tokens_seen": 204768916, "step": 3655 }, { "epoch": 8.140311804008908, "loss": 0.5125894546508789, "loss_ce": 0.00013829523231834173, "loss_iou": 0.19921875, "loss_num": 0.0228271484375, "loss_xval": 0.51171875, "num_input_tokens_seen": 204768916, "step": 3655 }, { "epoch": 8.142538975501113, "grad_norm": 20.34796714782715, "learning_rate": 1e-06, "loss": 0.6757, "num_input_tokens_seen": 204825592, "step": 3656 }, { "epoch": 8.142538975501113, "loss": 0.7248254418373108, "loss_ce": 0.00021602565539069474, "loss_iou": 0.28515625, "loss_num": 0.031005859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 204825592, "step": 3656 }, { "epoch": 8.144766146993318, "grad_norm": 29.74313735961914, "learning_rate": 1e-06, "loss": 0.7219, "num_input_tokens_seen": 204878420, "step": 3657 }, { "epoch": 8.144766146993318, "loss": 0.5412226319313049, "loss_ce": 0.0002070144983008504, "loss_iou": 0.2392578125, "loss_num": 0.01251220703125, "loss_xval": 0.5390625, "num_input_tokens_seen": 204878420, "step": 3657 }, { "epoch": 8.146993318485523, "grad_norm": 17.01011085510254, "learning_rate": 1e-06, "loss": 0.8195, "num_input_tokens_seen": 204933092, "step": 3658 }, { "epoch": 8.146993318485523, "loss": 0.9253840446472168, "loss_ce": 0.0003352175117470324, "loss_iou": 0.38671875, "loss_num": 0.030029296875, "loss_xval": 0.92578125, "num_input_tokens_seen": 204933092, "step": 3658 }, { "epoch": 8.14922048997773, "grad_norm": 44.65693664550781, "learning_rate": 1e-06, "loss": 0.7511, "num_input_tokens_seen": 204988252, "step": 3659 }, { "epoch": 8.14922048997773, "loss": 0.5185383558273315, "loss_ce": 0.00016674870857968926, "loss_iou": 0.234375, "loss_num": 0.0098876953125, "loss_xval": 0.51953125, "num_input_tokens_seen": 204988252, "step": 3659 }, { "epoch": 8.151447661469934, "grad_norm": 12.71816635131836, "learning_rate": 1e-06, "loss": 0.4983, "num_input_tokens_seen": 205044848, "step": 3660 }, { "epoch": 8.151447661469934, "loss": 0.5829252600669861, "loss_ce": 0.0003446881892159581, "loss_iou": 0.2578125, "loss_num": 0.0130615234375, "loss_xval": 0.58203125, "num_input_tokens_seen": 205044848, "step": 3660 }, { "epoch": 8.153674832962139, "grad_norm": 16.42379379272461, "learning_rate": 1e-06, "loss": 0.6397, "num_input_tokens_seen": 205101712, "step": 3661 }, { "epoch": 8.153674832962139, "loss": 0.8078476190567017, "loss_ce": 0.00023041786334943026, "loss_iou": 0.34375, "loss_num": 0.0240478515625, "loss_xval": 0.80859375, "num_input_tokens_seen": 205101712, "step": 3661 }, { "epoch": 8.155902004454344, "grad_norm": 17.691226959228516, "learning_rate": 1e-06, "loss": 0.5595, "num_input_tokens_seen": 205158256, "step": 3662 }, { "epoch": 8.155902004454344, "loss": 0.5641229152679443, "loss_ce": 0.00015803641872480512, "loss_iou": 0.2578125, "loss_num": 0.0093994140625, "loss_xval": 0.5625, "num_input_tokens_seen": 205158256, "step": 3662 }, { "epoch": 8.158129175946549, "grad_norm": 27.215301513671875, "learning_rate": 1e-06, "loss": 0.738, "num_input_tokens_seen": 205215004, "step": 3663 }, { "epoch": 8.158129175946549, "loss": 0.6344413757324219, "loss_ce": 0.0002250707766506821, "loss_iou": 0.24609375, "loss_num": 0.028564453125, "loss_xval": 0.6328125, "num_input_tokens_seen": 205215004, "step": 3663 }, { "epoch": 8.160356347438753, "grad_norm": 27.16131591796875, "learning_rate": 1e-06, "loss": 0.5939, "num_input_tokens_seen": 205269476, "step": 3664 }, { "epoch": 8.160356347438753, "loss": 0.729198694229126, "loss_ce": 0.00019480480113998055, "loss_iou": 0.306640625, "loss_num": 0.022705078125, "loss_xval": 0.73046875, "num_input_tokens_seen": 205269476, "step": 3664 }, { "epoch": 8.162583518930958, "grad_norm": 14.612595558166504, "learning_rate": 1e-06, "loss": 0.7695, "num_input_tokens_seen": 205321936, "step": 3665 }, { "epoch": 8.162583518930958, "loss": 0.7426295280456543, "loss_ce": 0.00019791701924987137, "loss_iou": 0.3125, "loss_num": 0.02392578125, "loss_xval": 0.7421875, "num_input_tokens_seen": 205321936, "step": 3665 }, { "epoch": 8.164810690423163, "grad_norm": 23.188566207885742, "learning_rate": 1e-06, "loss": 0.9123, "num_input_tokens_seen": 205373464, "step": 3666 }, { "epoch": 8.164810690423163, "loss": 1.0600320100784302, "loss_ce": 0.0003396637039259076, "loss_iou": 0.455078125, "loss_num": 0.0301513671875, "loss_xval": 1.0625, "num_input_tokens_seen": 205373464, "step": 3666 }, { "epoch": 8.167037861915368, "grad_norm": 17.917818069458008, "learning_rate": 1e-06, "loss": 0.6325, "num_input_tokens_seen": 205424324, "step": 3667 }, { "epoch": 8.167037861915368, "loss": 0.43619126081466675, "loss_ce": 0.00015609552792739123, "loss_iou": 0.189453125, "loss_num": 0.011474609375, "loss_xval": 0.435546875, "num_input_tokens_seen": 205424324, "step": 3667 }, { "epoch": 8.169265033407573, "grad_norm": 22.07512092590332, "learning_rate": 1e-06, "loss": 1.011, "num_input_tokens_seen": 205479044, "step": 3668 }, { "epoch": 8.169265033407573, "loss": 1.0271341800689697, "loss_ce": 0.00027870899066329, "loss_iou": 0.3984375, "loss_num": 0.0458984375, "loss_xval": 1.0234375, "num_input_tokens_seen": 205479044, "step": 3668 }, { "epoch": 8.171492204899778, "grad_norm": 47.829078674316406, "learning_rate": 1e-06, "loss": 0.4878, "num_input_tokens_seen": 205533932, "step": 3669 }, { "epoch": 8.171492204899778, "loss": 0.40675288438796997, "loss_ce": 0.0001671954378252849, "loss_iou": 0.18359375, "loss_num": 0.007720947265625, "loss_xval": 0.40625, "num_input_tokens_seen": 205533932, "step": 3669 }, { "epoch": 8.173719376391983, "grad_norm": 18.006383895874023, "learning_rate": 1e-06, "loss": 0.4668, "num_input_tokens_seen": 205589416, "step": 3670 }, { "epoch": 8.173719376391983, "loss": 0.3985961675643921, "loss_ce": 0.00015864020679146051, "loss_iou": 0.17578125, "loss_num": 0.00921630859375, "loss_xval": 0.3984375, "num_input_tokens_seen": 205589416, "step": 3670 }, { "epoch": 8.175946547884188, "grad_norm": 27.27997589111328, "learning_rate": 1e-06, "loss": 0.5638, "num_input_tokens_seen": 205643196, "step": 3671 }, { "epoch": 8.175946547884188, "loss": 0.5613173246383667, "loss_ce": 0.00016013637650758028, "loss_iou": 0.2216796875, "loss_num": 0.0234375, "loss_xval": 0.5625, "num_input_tokens_seen": 205643196, "step": 3671 }, { "epoch": 8.178173719376392, "grad_norm": 19.659027099609375, "learning_rate": 1e-06, "loss": 0.7405, "num_input_tokens_seen": 205698748, "step": 3672 }, { "epoch": 8.178173719376392, "loss": 0.9643779397010803, "loss_ce": 0.0002666152431629598, "loss_iou": 0.369140625, "loss_num": 0.044921875, "loss_xval": 0.96484375, "num_input_tokens_seen": 205698748, "step": 3672 }, { "epoch": 8.180400890868597, "grad_norm": 25.99626922607422, "learning_rate": 1e-06, "loss": 0.6958, "num_input_tokens_seen": 205753176, "step": 3673 }, { "epoch": 8.180400890868597, "loss": 0.4478331208229065, "loss_ce": 0.00020127877360209823, "loss_iou": 0.203125, "loss_num": 0.0084228515625, "loss_xval": 0.447265625, "num_input_tokens_seen": 205753176, "step": 3673 }, { "epoch": 8.182628062360802, "grad_norm": 15.863686561584473, "learning_rate": 1e-06, "loss": 0.5438, "num_input_tokens_seen": 205807732, "step": 3674 }, { "epoch": 8.182628062360802, "loss": 0.5248715877532959, "loss_ce": 0.00015238260675687343, "loss_iou": 0.2353515625, "loss_num": 0.0107421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 205807732, "step": 3674 }, { "epoch": 8.184855233853007, "grad_norm": 21.04659652709961, "learning_rate": 1e-06, "loss": 0.6401, "num_input_tokens_seen": 205860244, "step": 3675 }, { "epoch": 8.184855233853007, "loss": 0.5119701623916626, "loss_ce": 0.0001293848908971995, "loss_iou": 0.2177734375, "loss_num": 0.01519775390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 205860244, "step": 3675 }, { "epoch": 8.187082405345212, "grad_norm": 15.499350547790527, "learning_rate": 1e-06, "loss": 0.6712, "num_input_tokens_seen": 205918972, "step": 3676 }, { "epoch": 8.187082405345212, "loss": 0.7695894241333008, "loss_ce": 0.00018028570048045367, "loss_iou": 0.302734375, "loss_num": 0.033203125, "loss_xval": 0.76953125, "num_input_tokens_seen": 205918972, "step": 3676 }, { "epoch": 8.189309576837417, "grad_norm": 19.32471466064453, "learning_rate": 1e-06, "loss": 0.592, "num_input_tokens_seen": 205975060, "step": 3677 }, { "epoch": 8.189309576837417, "loss": 0.6343322992324829, "loss_ce": 0.00017700789612717927, "loss_iou": 0.283203125, "loss_num": 0.01336669921875, "loss_xval": 0.6328125, "num_input_tokens_seen": 205975060, "step": 3677 }, { "epoch": 8.191536748329622, "grad_norm": 18.84845542907715, "learning_rate": 1e-06, "loss": 0.69, "num_input_tokens_seen": 206030576, "step": 3678 }, { "epoch": 8.191536748329622, "loss": 0.8888704776763916, "loss_ce": 0.00019854953279718757, "loss_iou": 0.375, "loss_num": 0.02734375, "loss_xval": 0.890625, "num_input_tokens_seen": 206030576, "step": 3678 }, { "epoch": 8.193763919821826, "grad_norm": 16.970340728759766, "learning_rate": 1e-06, "loss": 0.6626, "num_input_tokens_seen": 206087296, "step": 3679 }, { "epoch": 8.193763919821826, "loss": 0.6373680830001831, "loss_ce": 0.0002831476158462465, "loss_iou": 0.28125, "loss_num": 0.01470947265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 206087296, "step": 3679 }, { "epoch": 8.195991091314031, "grad_norm": 31.894102096557617, "learning_rate": 1e-06, "loss": 0.5481, "num_input_tokens_seen": 206141220, "step": 3680 }, { "epoch": 8.195991091314031, "loss": 0.3562151789665222, "loss_ce": 0.0001360624737571925, "loss_iou": 0.16015625, "loss_num": 0.007049560546875, "loss_xval": 0.35546875, "num_input_tokens_seen": 206141220, "step": 3680 }, { "epoch": 8.198218262806236, "grad_norm": 15.175886154174805, "learning_rate": 1e-06, "loss": 0.4565, "num_input_tokens_seen": 206199092, "step": 3681 }, { "epoch": 8.198218262806236, "loss": 0.38468533754348755, "loss_ce": 0.00022489193361252546, "loss_iou": 0.1669921875, "loss_num": 0.01019287109375, "loss_xval": 0.384765625, "num_input_tokens_seen": 206199092, "step": 3681 }, { "epoch": 8.200445434298441, "grad_norm": 14.856992721557617, "learning_rate": 1e-06, "loss": 0.5933, "num_input_tokens_seen": 206255596, "step": 3682 }, { "epoch": 8.200445434298441, "loss": 0.6677369475364685, "loss_ce": 0.0002565070753917098, "loss_iou": 0.28125, "loss_num": 0.0211181640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 206255596, "step": 3682 }, { "epoch": 8.202672605790646, "grad_norm": 26.978513717651367, "learning_rate": 1e-06, "loss": 0.6554, "num_input_tokens_seen": 206310404, "step": 3683 }, { "epoch": 8.202672605790646, "loss": 0.6696029901504517, "loss_ce": 0.0004135652561672032, "loss_iou": 0.28125, "loss_num": 0.021240234375, "loss_xval": 0.66796875, "num_input_tokens_seen": 206310404, "step": 3683 }, { "epoch": 8.20489977728285, "grad_norm": 15.110715866088867, "learning_rate": 1e-06, "loss": 0.5443, "num_input_tokens_seen": 206364188, "step": 3684 }, { "epoch": 8.20489977728285, "loss": 0.4510454833507538, "loss_ce": 0.00017877297068480402, "loss_iou": 0.1943359375, "loss_num": 0.01251220703125, "loss_xval": 0.451171875, "num_input_tokens_seen": 206364188, "step": 3684 }, { "epoch": 8.207126948775056, "grad_norm": 18.641998291015625, "learning_rate": 1e-06, "loss": 0.563, "num_input_tokens_seen": 206419384, "step": 3685 }, { "epoch": 8.207126948775056, "loss": 0.5365896821022034, "loss_ce": 0.00021273433230817318, "loss_iou": 0.2197265625, "loss_num": 0.01953125, "loss_xval": 0.53515625, "num_input_tokens_seen": 206419384, "step": 3685 }, { "epoch": 8.20935412026726, "grad_norm": 16.16904640197754, "learning_rate": 1e-06, "loss": 0.6996, "num_input_tokens_seen": 206474860, "step": 3686 }, { "epoch": 8.20935412026726, "loss": 0.9597395062446594, "loss_ce": 0.00026684050681069493, "loss_iou": 0.369140625, "loss_num": 0.0439453125, "loss_xval": 0.9609375, "num_input_tokens_seen": 206474860, "step": 3686 }, { "epoch": 8.211581291759465, "grad_norm": 14.339240074157715, "learning_rate": 1e-06, "loss": 0.5036, "num_input_tokens_seen": 206532756, "step": 3687 }, { "epoch": 8.211581291759465, "loss": 0.5385290384292603, "loss_ce": 0.0001989899465115741, "loss_iou": 0.2392578125, "loss_num": 0.01202392578125, "loss_xval": 0.5390625, "num_input_tokens_seen": 206532756, "step": 3687 }, { "epoch": 8.21380846325167, "grad_norm": 16.02851104736328, "learning_rate": 1e-06, "loss": 0.7048, "num_input_tokens_seen": 206588744, "step": 3688 }, { "epoch": 8.21380846325167, "loss": 0.8307337164878845, "loss_ce": 0.000167329068062827, "loss_iou": 0.36328125, "loss_num": 0.0205078125, "loss_xval": 0.83203125, "num_input_tokens_seen": 206588744, "step": 3688 }, { "epoch": 8.216035634743875, "grad_norm": 20.067358016967773, "learning_rate": 1e-06, "loss": 0.5148, "num_input_tokens_seen": 206645832, "step": 3689 }, { "epoch": 8.216035634743875, "loss": 0.4788054823875427, "loss_ce": 0.0001677550608292222, "loss_iou": 0.2001953125, "loss_num": 0.01544189453125, "loss_xval": 0.478515625, "num_input_tokens_seen": 206645832, "step": 3689 }, { "epoch": 8.21826280623608, "grad_norm": 13.61365795135498, "learning_rate": 1e-06, "loss": 0.4008, "num_input_tokens_seen": 206701388, "step": 3690 }, { "epoch": 8.21826280623608, "loss": 0.34245729446411133, "loss_ce": 0.00017215096158906817, "loss_iou": 0.1513671875, "loss_num": 0.007781982421875, "loss_xval": 0.341796875, "num_input_tokens_seen": 206701388, "step": 3690 }, { "epoch": 8.220489977728285, "grad_norm": 16.062326431274414, "learning_rate": 1e-06, "loss": 0.6755, "num_input_tokens_seen": 206756396, "step": 3691 }, { "epoch": 8.220489977728285, "loss": 0.7819403409957886, "loss_ce": 0.00020207473426125944, "loss_iou": 0.337890625, "loss_num": 0.02099609375, "loss_xval": 0.78125, "num_input_tokens_seen": 206756396, "step": 3691 }, { "epoch": 8.22271714922049, "grad_norm": 16.20405387878418, "learning_rate": 1e-06, "loss": 0.8364, "num_input_tokens_seen": 206813452, "step": 3692 }, { "epoch": 8.22271714922049, "loss": 0.7428736686706543, "loss_ce": 0.00019789818907156587, "loss_iou": 0.34765625, "loss_num": 0.00885009765625, "loss_xval": 0.7421875, "num_input_tokens_seen": 206813452, "step": 3692 }, { "epoch": 8.224944320712694, "grad_norm": 15.899574279785156, "learning_rate": 1e-06, "loss": 0.5448, "num_input_tokens_seen": 206871936, "step": 3693 }, { "epoch": 8.224944320712694, "loss": 0.4710107445716858, "loss_ce": 0.00018556615395937115, "loss_iou": 0.2080078125, "loss_num": 0.010986328125, "loss_xval": 0.470703125, "num_input_tokens_seen": 206871936, "step": 3693 }, { "epoch": 8.2271714922049, "grad_norm": 21.231388092041016, "learning_rate": 1e-06, "loss": 0.6426, "num_input_tokens_seen": 206927444, "step": 3694 }, { "epoch": 8.2271714922049, "loss": 0.717461347579956, "loss_ce": 0.00017620844300836325, "loss_iou": 0.291015625, "loss_num": 0.02685546875, "loss_xval": 0.71875, "num_input_tokens_seen": 206927444, "step": 3694 }, { "epoch": 8.229398663697104, "grad_norm": 14.745061874389648, "learning_rate": 1e-06, "loss": 0.4019, "num_input_tokens_seen": 206982152, "step": 3695 }, { "epoch": 8.229398663697104, "loss": 0.32421159744262695, "loss_ce": 0.00014541992277372628, "loss_iou": 0.1279296875, "loss_num": 0.01361083984375, "loss_xval": 0.32421875, "num_input_tokens_seen": 206982152, "step": 3695 }, { "epoch": 8.231625835189309, "grad_norm": 16.86565589904785, "learning_rate": 1e-06, "loss": 0.577, "num_input_tokens_seen": 207036748, "step": 3696 }, { "epoch": 8.231625835189309, "loss": 0.5805333852767944, "loss_ce": 0.00015002592408563942, "loss_iou": 0.23828125, "loss_num": 0.020751953125, "loss_xval": 0.58203125, "num_input_tokens_seen": 207036748, "step": 3696 }, { "epoch": 8.233853006681514, "grad_norm": 16.574954986572266, "learning_rate": 1e-06, "loss": 0.751, "num_input_tokens_seen": 207090424, "step": 3697 }, { "epoch": 8.233853006681514, "loss": 0.7723691463470459, "loss_ce": 0.0001523814134998247, "loss_iou": 0.33984375, "loss_num": 0.018798828125, "loss_xval": 0.7734375, "num_input_tokens_seen": 207090424, "step": 3697 }, { "epoch": 8.236080178173719, "grad_norm": 32.75465774536133, "learning_rate": 1e-06, "loss": 0.4883, "num_input_tokens_seen": 207146916, "step": 3698 }, { "epoch": 8.236080178173719, "loss": 0.5894994735717773, "loss_ce": 0.0001440244377590716, "loss_iou": 0.263671875, "loss_num": 0.0126953125, "loss_xval": 0.58984375, "num_input_tokens_seen": 207146916, "step": 3698 }, { "epoch": 8.238307349665924, "grad_norm": 19.782665252685547, "learning_rate": 1e-06, "loss": 1.013, "num_input_tokens_seen": 207202692, "step": 3699 }, { "epoch": 8.238307349665924, "loss": 1.0240821838378906, "loss_ce": 0.00015636572788935155, "loss_iou": 0.384765625, "loss_num": 0.050537109375, "loss_xval": 1.0234375, "num_input_tokens_seen": 207202692, "step": 3699 }, { "epoch": 8.240534521158128, "grad_norm": 25.966341018676758, "learning_rate": 1e-06, "loss": 0.6403, "num_input_tokens_seen": 207253188, "step": 3700 }, { "epoch": 8.240534521158128, "loss": 0.602780818939209, "loss_ce": 0.00024172097619157284, "loss_iou": 0.232421875, "loss_num": 0.027587890625, "loss_xval": 0.6015625, "num_input_tokens_seen": 207253188, "step": 3700 }, { "epoch": 8.242761692650333, "grad_norm": 19.4698543548584, "learning_rate": 1e-06, "loss": 0.565, "num_input_tokens_seen": 207307820, "step": 3701 }, { "epoch": 8.242761692650333, "loss": 0.6106410026550293, "loss_ce": 0.00016741504077799618, "loss_iou": 0.2734375, "loss_num": 0.0128173828125, "loss_xval": 0.609375, "num_input_tokens_seen": 207307820, "step": 3701 }, { "epoch": 8.244988864142538, "grad_norm": 15.045053482055664, "learning_rate": 1e-06, "loss": 0.4617, "num_input_tokens_seen": 207363340, "step": 3702 }, { "epoch": 8.244988864142538, "loss": 0.40211230516433716, "loss_ce": 0.00013477080210577697, "loss_iou": 0.1767578125, "loss_num": 0.0096435546875, "loss_xval": 0.40234375, "num_input_tokens_seen": 207363340, "step": 3702 }, { "epoch": 8.247216035634743, "grad_norm": 15.657466888427734, "learning_rate": 1e-06, "loss": 0.721, "num_input_tokens_seen": 207416744, "step": 3703 }, { "epoch": 8.247216035634743, "loss": 0.7376276254653931, "loss_ce": 0.00026186800096184015, "loss_iou": 0.298828125, "loss_num": 0.0281982421875, "loss_xval": 0.73828125, "num_input_tokens_seen": 207416744, "step": 3703 }, { "epoch": 8.249443207126948, "grad_norm": 16.417505264282227, "learning_rate": 1e-06, "loss": 0.4049, "num_input_tokens_seen": 207473136, "step": 3704 }, { "epoch": 8.249443207126948, "loss": 0.3186110258102417, "loss_ce": 0.00012959179002791643, "loss_iou": 0.1259765625, "loss_num": 0.01324462890625, "loss_xval": 0.318359375, "num_input_tokens_seen": 207473136, "step": 3704 }, { "epoch": 8.251670378619155, "grad_norm": 21.20426368713379, "learning_rate": 1e-06, "loss": 0.5388, "num_input_tokens_seen": 207528736, "step": 3705 }, { "epoch": 8.251670378619155, "loss": 0.6479160189628601, "loss_ce": 0.00014987270697019994, "loss_iou": 0.275390625, "loss_num": 0.01953125, "loss_xval": 0.6484375, "num_input_tokens_seen": 207528736, "step": 3705 }, { "epoch": 8.25389755011136, "grad_norm": 27.81067657470703, "learning_rate": 1e-06, "loss": 0.6099, "num_input_tokens_seen": 207579912, "step": 3706 }, { "epoch": 8.25389755011136, "loss": 0.6510103344917297, "loss_ce": 0.00019250249897595495, "loss_iou": 0.267578125, "loss_num": 0.023193359375, "loss_xval": 0.65234375, "num_input_tokens_seen": 207579912, "step": 3706 }, { "epoch": 8.256124721603564, "grad_norm": 24.32451629638672, "learning_rate": 1e-06, "loss": 0.6658, "num_input_tokens_seen": 207635324, "step": 3707 }, { "epoch": 8.256124721603564, "loss": 0.6035555601119995, "loss_ce": 0.00016204667917918414, "loss_iou": 0.25390625, "loss_num": 0.018798828125, "loss_xval": 0.6015625, "num_input_tokens_seen": 207635324, "step": 3707 }, { "epoch": 8.25835189309577, "grad_norm": 19.096254348754883, "learning_rate": 1e-06, "loss": 0.6809, "num_input_tokens_seen": 207691984, "step": 3708 }, { "epoch": 8.25835189309577, "loss": 0.6975338459014893, "loss_ce": 0.0002682044287212193, "loss_iou": 0.298828125, "loss_num": 0.0201416015625, "loss_xval": 0.6953125, "num_input_tokens_seen": 207691984, "step": 3708 }, { "epoch": 8.260579064587974, "grad_norm": 19.96952247619629, "learning_rate": 1e-06, "loss": 0.9264, "num_input_tokens_seen": 207742476, "step": 3709 }, { "epoch": 8.260579064587974, "loss": 0.9751963019371033, "loss_ce": 0.00022070904378779233, "loss_iou": 0.416015625, "loss_num": 0.0283203125, "loss_xval": 0.9765625, "num_input_tokens_seen": 207742476, "step": 3709 }, { "epoch": 8.262806236080179, "grad_norm": 15.585442543029785, "learning_rate": 1e-06, "loss": 0.4606, "num_input_tokens_seen": 207800484, "step": 3710 }, { "epoch": 8.262806236080179, "loss": 0.5031048655509949, "loss_ce": 0.00017520022811368108, "loss_iou": 0.2265625, "loss_num": 0.0098876953125, "loss_xval": 0.50390625, "num_input_tokens_seen": 207800484, "step": 3710 }, { "epoch": 8.265033407572384, "grad_norm": 36.97969055175781, "learning_rate": 1e-06, "loss": 0.9413, "num_input_tokens_seen": 207855020, "step": 3711 }, { "epoch": 8.265033407572384, "loss": 0.9603962898254395, "loss_ce": 0.00019123686070088297, "loss_iou": 0.373046875, "loss_num": 0.043212890625, "loss_xval": 0.9609375, "num_input_tokens_seen": 207855020, "step": 3711 }, { "epoch": 8.267260579064589, "grad_norm": 15.044690132141113, "learning_rate": 1e-06, "loss": 0.7137, "num_input_tokens_seen": 207911256, "step": 3712 }, { "epoch": 8.267260579064589, "loss": 0.6639913320541382, "loss_ce": 0.00017301499610766768, "loss_iou": 0.279296875, "loss_num": 0.0211181640625, "loss_xval": 0.6640625, "num_input_tokens_seen": 207911256, "step": 3712 }, { "epoch": 8.269487750556793, "grad_norm": 19.051246643066406, "learning_rate": 1e-06, "loss": 0.6152, "num_input_tokens_seen": 207968744, "step": 3713 }, { "epoch": 8.269487750556793, "loss": 0.5926859378814697, "loss_ce": 0.00015662802616134286, "loss_iou": 0.259765625, "loss_num": 0.0145263671875, "loss_xval": 0.59375, "num_input_tokens_seen": 207968744, "step": 3713 }, { "epoch": 8.271714922048998, "grad_norm": 17.641752243041992, "learning_rate": 1e-06, "loss": 0.6043, "num_input_tokens_seen": 208024108, "step": 3714 }, { "epoch": 8.271714922048998, "loss": 0.5748984813690186, "loss_ce": 0.0001914296008180827, "loss_iou": 0.251953125, "loss_num": 0.01397705078125, "loss_xval": 0.57421875, "num_input_tokens_seen": 208024108, "step": 3714 }, { "epoch": 8.273942093541203, "grad_norm": 17.993663787841797, "learning_rate": 1e-06, "loss": 0.7099, "num_input_tokens_seen": 208075840, "step": 3715 }, { "epoch": 8.273942093541203, "loss": 0.6750204563140869, "loss_ce": 0.0004599187523126602, "loss_iou": 0.30078125, "loss_num": 0.014892578125, "loss_xval": 0.67578125, "num_input_tokens_seen": 208075840, "step": 3715 }, { "epoch": 8.276169265033408, "grad_norm": 23.302898406982422, "learning_rate": 1e-06, "loss": 0.4742, "num_input_tokens_seen": 208132584, "step": 3716 }, { "epoch": 8.276169265033408, "loss": 0.5063949823379517, "loss_ce": 0.0001694063248578459, "loss_iou": 0.2275390625, "loss_num": 0.01031494140625, "loss_xval": 0.5078125, "num_input_tokens_seen": 208132584, "step": 3716 }, { "epoch": 8.278396436525613, "grad_norm": 19.531972885131836, "learning_rate": 1e-06, "loss": 0.5614, "num_input_tokens_seen": 208189256, "step": 3717 }, { "epoch": 8.278396436525613, "loss": 0.46560224890708923, "loss_ce": 0.0001481281651649624, "loss_iou": 0.1953125, "loss_num": 0.014892578125, "loss_xval": 0.46484375, "num_input_tokens_seen": 208189256, "step": 3717 }, { "epoch": 8.280623608017818, "grad_norm": 23.255966186523438, "learning_rate": 1e-06, "loss": 0.6233, "num_input_tokens_seen": 208247268, "step": 3718 }, { "epoch": 8.280623608017818, "loss": 0.630403995513916, "loss_ce": 0.0002770504215732217, "loss_iou": 0.263671875, "loss_num": 0.020263671875, "loss_xval": 0.62890625, "num_input_tokens_seen": 208247268, "step": 3718 }, { "epoch": 8.282850779510023, "grad_norm": 31.8346004486084, "learning_rate": 1e-06, "loss": 0.4895, "num_input_tokens_seen": 208303824, "step": 3719 }, { "epoch": 8.282850779510023, "loss": 0.49452388286590576, "loss_ce": 0.00020015303744003177, "loss_iou": 0.212890625, "loss_num": 0.0140380859375, "loss_xval": 0.494140625, "num_input_tokens_seen": 208303824, "step": 3719 }, { "epoch": 8.285077951002227, "grad_norm": 16.450027465820312, "learning_rate": 1e-06, "loss": 0.8151, "num_input_tokens_seen": 208361756, "step": 3720 }, { "epoch": 8.285077951002227, "loss": 0.8614362478256226, "loss_ce": 0.00023015934857539833, "loss_iou": 0.361328125, "loss_num": 0.0274658203125, "loss_xval": 0.859375, "num_input_tokens_seen": 208361756, "step": 3720 }, { "epoch": 8.287305122494432, "grad_norm": 73.26863098144531, "learning_rate": 1e-06, "loss": 0.8532, "num_input_tokens_seen": 208418208, "step": 3721 }, { "epoch": 8.287305122494432, "loss": 1.0102782249450684, "loss_ce": 0.0002685172366909683, "loss_iou": 0.3515625, "loss_num": 0.0615234375, "loss_xval": 1.0078125, "num_input_tokens_seen": 208418208, "step": 3721 }, { "epoch": 8.289532293986637, "grad_norm": 14.410170555114746, "learning_rate": 1e-06, "loss": 0.5911, "num_input_tokens_seen": 208475988, "step": 3722 }, { "epoch": 8.289532293986637, "loss": 0.736660897731781, "loss_ce": 0.00033277933835051954, "loss_iou": 0.322265625, "loss_num": 0.018310546875, "loss_xval": 0.734375, "num_input_tokens_seen": 208475988, "step": 3722 }, { "epoch": 8.291759465478842, "grad_norm": 17.486080169677734, "learning_rate": 1e-06, "loss": 0.6389, "num_input_tokens_seen": 208532072, "step": 3723 }, { "epoch": 8.291759465478842, "loss": 0.7401440143585205, "loss_ce": 0.00015374486974906176, "loss_iou": 0.333984375, "loss_num": 0.01446533203125, "loss_xval": 0.73828125, "num_input_tokens_seen": 208532072, "step": 3723 }, { "epoch": 8.293986636971047, "grad_norm": 27.663610458374023, "learning_rate": 1e-06, "loss": 0.5406, "num_input_tokens_seen": 208587856, "step": 3724 }, { "epoch": 8.293986636971047, "loss": 0.46732282638549805, "loss_ce": 0.0001597225054865703, "loss_iou": 0.2021484375, "loss_num": 0.0125732421875, "loss_xval": 0.466796875, "num_input_tokens_seen": 208587856, "step": 3724 }, { "epoch": 8.296213808463252, "grad_norm": 23.040565490722656, "learning_rate": 1e-06, "loss": 0.7325, "num_input_tokens_seen": 208640652, "step": 3725 }, { "epoch": 8.296213808463252, "loss": 0.9635574817657471, "loss_ce": 0.00017856716294772923, "loss_iou": 0.42578125, "loss_num": 0.0220947265625, "loss_xval": 0.96484375, "num_input_tokens_seen": 208640652, "step": 3725 }, { "epoch": 8.298440979955457, "grad_norm": 17.724363327026367, "learning_rate": 1e-06, "loss": 0.4894, "num_input_tokens_seen": 208694956, "step": 3726 }, { "epoch": 8.298440979955457, "loss": 0.42843109369277954, "loss_ce": 0.0002084198349621147, "loss_iou": 0.189453125, "loss_num": 0.00970458984375, "loss_xval": 0.427734375, "num_input_tokens_seen": 208694956, "step": 3726 }, { "epoch": 8.300668151447661, "grad_norm": 21.658283233642578, "learning_rate": 1e-06, "loss": 0.5328, "num_input_tokens_seen": 208749444, "step": 3727 }, { "epoch": 8.300668151447661, "loss": 0.5249356627464294, "loss_ce": 0.0001553678303025663, "loss_iou": 0.2138671875, "loss_num": 0.019287109375, "loss_xval": 0.5234375, "num_input_tokens_seen": 208749444, "step": 3727 }, { "epoch": 8.302895322939866, "grad_norm": 25.13924789428711, "learning_rate": 1e-06, "loss": 0.5197, "num_input_tokens_seen": 208807968, "step": 3728 }, { "epoch": 8.302895322939866, "loss": 0.7015814781188965, "loss_ce": 0.00016545310791116208, "loss_iou": 0.3046875, "loss_num": 0.01806640625, "loss_xval": 0.703125, "num_input_tokens_seen": 208807968, "step": 3728 }, { "epoch": 8.305122494432071, "grad_norm": 19.753189086914062, "learning_rate": 1e-06, "loss": 0.6141, "num_input_tokens_seen": 208865056, "step": 3729 }, { "epoch": 8.305122494432071, "loss": 0.7641258835792542, "loss_ce": 0.0002098674012813717, "loss_iou": 0.330078125, "loss_num": 0.0211181640625, "loss_xval": 0.765625, "num_input_tokens_seen": 208865056, "step": 3729 }, { "epoch": 8.307349665924276, "grad_norm": 18.014732360839844, "learning_rate": 1e-06, "loss": 0.5394, "num_input_tokens_seen": 208923552, "step": 3730 }, { "epoch": 8.307349665924276, "loss": 0.6470487713813782, "loss_ce": 0.0001981953828362748, "loss_iou": 0.28515625, "loss_num": 0.01507568359375, "loss_xval": 0.6484375, "num_input_tokens_seen": 208923552, "step": 3730 }, { "epoch": 8.309576837416481, "grad_norm": 35.129364013671875, "learning_rate": 1e-06, "loss": 0.6561, "num_input_tokens_seen": 208979572, "step": 3731 }, { "epoch": 8.309576837416481, "loss": 0.6719157695770264, "loss_ce": 0.0002849046722985804, "loss_iou": 0.30078125, "loss_num": 0.01373291015625, "loss_xval": 0.671875, "num_input_tokens_seen": 208979572, "step": 3731 }, { "epoch": 8.311804008908686, "grad_norm": 22.689146041870117, "learning_rate": 1e-06, "loss": 0.7743, "num_input_tokens_seen": 209034148, "step": 3732 }, { "epoch": 8.311804008908686, "loss": 0.7121527194976807, "loss_ce": 0.00023866846458986402, "loss_iou": 0.28125, "loss_num": 0.0302734375, "loss_xval": 0.7109375, "num_input_tokens_seen": 209034148, "step": 3732 }, { "epoch": 8.31403118040089, "grad_norm": 91.16738891601562, "learning_rate": 1e-06, "loss": 0.5123, "num_input_tokens_seen": 209090968, "step": 3733 }, { "epoch": 8.31403118040089, "loss": 0.5099412202835083, "loss_ce": 0.00017555063823238015, "loss_iou": 0.2294921875, "loss_num": 0.010009765625, "loss_xval": 0.5078125, "num_input_tokens_seen": 209090968, "step": 3733 }, { "epoch": 8.316258351893095, "grad_norm": 19.895280838012695, "learning_rate": 1e-06, "loss": 0.6358, "num_input_tokens_seen": 209145588, "step": 3734 }, { "epoch": 8.316258351893095, "loss": 0.8830050826072693, "loss_ce": 0.00019260949920862913, "loss_iou": 0.369140625, "loss_num": 0.0294189453125, "loss_xval": 0.8828125, "num_input_tokens_seen": 209145588, "step": 3734 }, { "epoch": 8.3184855233853, "grad_norm": 28.7868709564209, "learning_rate": 1e-06, "loss": 0.6875, "num_input_tokens_seen": 209201476, "step": 3735 }, { "epoch": 8.3184855233853, "loss": 0.7821846008300781, "loss_ce": 0.00020213823881931603, "loss_iou": 0.3515625, "loss_num": 0.015380859375, "loss_xval": 0.78125, "num_input_tokens_seen": 209201476, "step": 3735 }, { "epoch": 8.320712694877505, "grad_norm": 21.692697525024414, "learning_rate": 1e-06, "loss": 0.5106, "num_input_tokens_seen": 209257012, "step": 3736 }, { "epoch": 8.320712694877505, "loss": 0.5338290929794312, "loss_ce": 0.0001377178414259106, "loss_iou": 0.2392578125, "loss_num": 0.01104736328125, "loss_xval": 0.53515625, "num_input_tokens_seen": 209257012, "step": 3736 }, { "epoch": 8.32293986636971, "grad_norm": 24.94318199157715, "learning_rate": 1e-06, "loss": 0.8265, "num_input_tokens_seen": 209312432, "step": 3737 }, { "epoch": 8.32293986636971, "loss": 0.6608332395553589, "loss_ce": 0.0001887211692519486, "loss_iou": 0.2734375, "loss_num": 0.022705078125, "loss_xval": 0.66015625, "num_input_tokens_seen": 209312432, "step": 3737 }, { "epoch": 8.325167037861915, "grad_norm": 17.53243637084961, "learning_rate": 1e-06, "loss": 0.5902, "num_input_tokens_seen": 209368084, "step": 3738 }, { "epoch": 8.325167037861915, "loss": 0.7750497460365295, "loss_ce": 0.00014740778715349734, "loss_iou": 0.306640625, "loss_num": 0.032470703125, "loss_xval": 0.7734375, "num_input_tokens_seen": 209368084, "step": 3738 }, { "epoch": 8.32739420935412, "grad_norm": 14.224679946899414, "learning_rate": 1e-06, "loss": 0.5397, "num_input_tokens_seen": 209421340, "step": 3739 }, { "epoch": 8.32739420935412, "loss": 0.43520334362983704, "loss_ce": 0.00014473804912995547, "loss_iou": 0.1962890625, "loss_num": 0.0086669921875, "loss_xval": 0.435546875, "num_input_tokens_seen": 209421340, "step": 3739 }, { "epoch": 8.329621380846325, "grad_norm": 17.133176803588867, "learning_rate": 1e-06, "loss": 0.5827, "num_input_tokens_seen": 209478168, "step": 3740 }, { "epoch": 8.329621380846325, "loss": 0.5902957916259766, "loss_ce": 0.0002079373225569725, "loss_iou": 0.2431640625, "loss_num": 0.0206298828125, "loss_xval": 0.58984375, "num_input_tokens_seen": 209478168, "step": 3740 }, { "epoch": 8.33184855233853, "grad_norm": 23.81924057006836, "learning_rate": 1e-06, "loss": 0.5458, "num_input_tokens_seen": 209533220, "step": 3741 }, { "epoch": 8.33184855233853, "loss": 0.7213869094848633, "loss_ce": 0.00019548808631952852, "loss_iou": 0.322265625, "loss_num": 0.0155029296875, "loss_xval": 0.72265625, "num_input_tokens_seen": 209533220, "step": 3741 }, { "epoch": 8.334075723830734, "grad_norm": 16.98560333251953, "learning_rate": 1e-06, "loss": 0.4944, "num_input_tokens_seen": 209589144, "step": 3742 }, { "epoch": 8.334075723830734, "loss": 0.3810575604438782, "loss_ce": 0.00019817678548861295, "loss_iou": 0.1640625, "loss_num": 0.01055908203125, "loss_xval": 0.380859375, "num_input_tokens_seen": 209589144, "step": 3742 }, { "epoch": 8.33630289532294, "grad_norm": 18.857044219970703, "learning_rate": 1e-06, "loss": 0.4678, "num_input_tokens_seen": 209640328, "step": 3743 }, { "epoch": 8.33630289532294, "loss": 0.413556307554245, "loss_ce": 0.00016517053882125765, "loss_iou": 0.173828125, "loss_num": 0.01312255859375, "loss_xval": 0.4140625, "num_input_tokens_seen": 209640328, "step": 3743 }, { "epoch": 8.338530066815144, "grad_norm": 16.573524475097656, "learning_rate": 1e-06, "loss": 0.6259, "num_input_tokens_seen": 209696820, "step": 3744 }, { "epoch": 8.338530066815144, "loss": 0.6830825805664062, "loss_ce": 0.00022125753457657993, "loss_iou": 0.296875, "loss_num": 0.01806640625, "loss_xval": 0.68359375, "num_input_tokens_seen": 209696820, "step": 3744 }, { "epoch": 8.340757238307349, "grad_norm": 29.868751525878906, "learning_rate": 1e-06, "loss": 0.8029, "num_input_tokens_seen": 209750812, "step": 3745 }, { "epoch": 8.340757238307349, "loss": 0.7501974701881409, "loss_ce": 0.0001974825281649828, "loss_iou": 0.31640625, "loss_num": 0.0238037109375, "loss_xval": 0.75, "num_input_tokens_seen": 209750812, "step": 3745 }, { "epoch": 8.342984409799554, "grad_norm": 13.687642097473145, "learning_rate": 1e-06, "loss": 0.6552, "num_input_tokens_seen": 209805656, "step": 3746 }, { "epoch": 8.342984409799554, "loss": 0.8280588984489441, "loss_ce": 0.00017803689115680754, "loss_iou": 0.337890625, "loss_num": 0.0308837890625, "loss_xval": 0.828125, "num_input_tokens_seen": 209805656, "step": 3746 }, { "epoch": 8.345211581291759, "grad_norm": 40.525962829589844, "learning_rate": 1e-06, "loss": 0.7054, "num_input_tokens_seen": 209859280, "step": 3747 }, { "epoch": 8.345211581291759, "loss": 0.7600662708282471, "loss_ce": 0.00017863856919575483, "loss_iou": 0.3203125, "loss_num": 0.02392578125, "loss_xval": 0.76171875, "num_input_tokens_seen": 209859280, "step": 3747 }, { "epoch": 8.347438752783964, "grad_norm": 81.74958801269531, "learning_rate": 1e-06, "loss": 0.5614, "num_input_tokens_seen": 209917268, "step": 3748 }, { "epoch": 8.347438752783964, "loss": 0.37694764137268066, "loss_ce": 0.0002996893017552793, "loss_iou": 0.16015625, "loss_num": 0.01129150390625, "loss_xval": 0.376953125, "num_input_tokens_seen": 209917268, "step": 3748 }, { "epoch": 8.34966592427617, "grad_norm": 58.23908996582031, "learning_rate": 1e-06, "loss": 0.6929, "num_input_tokens_seen": 209973232, "step": 3749 }, { "epoch": 8.34966592427617, "loss": 0.5084662437438965, "loss_ce": 0.0001654803636483848, "loss_iou": 0.2294921875, "loss_num": 0.00994873046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 209973232, "step": 3749 }, { "epoch": 8.351893095768375, "grad_norm": 14.04624080657959, "learning_rate": 1e-06, "loss": 0.4782, "num_input_tokens_seen": 210030984, "step": 3750 }, { "epoch": 8.351893095768375, "eval_seeclick_web_CIoU": 0.5785337090492249, "eval_seeclick_web_GIoU": 0.5769274234771729, "eval_seeclick_web_IoU": 0.5960685312747955, "eval_seeclick_web_MAE_all": 0.01612033136188984, "eval_seeclick_web_MAE_h": 0.008123957552015781, "eval_seeclick_web_MAE_w": 0.016420952044427395, "eval_seeclick_web_MAE_x_boxes": 0.009630883112549782, "eval_seeclick_web_MAE_y_boxes": 0.021957224002107978, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9174290299415588, "eval_seeclick_web_loss_ce": 0.00023504487762693316, "eval_seeclick_web_loss_iou": 0.41943359375, "eval_seeclick_web_loss_num": 0.012887954711914062, "eval_seeclick_web_loss_xval": 0.90283203125, "eval_seeclick_web_runtime": 22.7475, "eval_seeclick_web_samples_per_second": 2.198, "eval_seeclick_web_steps_per_second": 0.088, "num_input_tokens_seen": 210030984, "step": 3750 }, { "epoch": 8.351893095768375, "eval_icons_CIoU": 0.2745140343904495, "eval_icons_GIoU": 0.2994941622018814, "eval_icons_IoU": 0.35809725522994995, "eval_icons_MAE_all": 0.06353667378425598, "eval_icons_MAE_h": 0.03699115989729762, "eval_icons_MAE_w": 0.06941121257841587, "eval_icons_MAE_x_boxes": 0.05955472029745579, "eval_icons_MAE_y_boxes": 0.038531024008989334, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.762790560722351, "eval_icons_loss_ce": 0.0002956779644591734, "eval_icons_loss_iou": 0.687744140625, "eval_icons_loss_num": 0.06077384948730469, "eval_icons_loss_xval": 1.6787109375, "eval_icons_runtime": 22.6059, "eval_icons_samples_per_second": 2.212, "eval_icons_steps_per_second": 0.088, "num_input_tokens_seen": 210030984, "step": 3750 }, { "epoch": 8.351893095768375, "eval_screenspot_CIoU": 0.34976951281229657, "eval_screenspot_GIoU": 0.36631672581036884, "eval_screenspot_IoU": 0.4275648792584737, "eval_screenspot_MAE_all": 0.05975629389286041, "eval_screenspot_MAE_h": 0.03780995992322763, "eval_screenspot_MAE_w": 0.06876554464300473, "eval_screenspot_MAE_x_boxes": 0.06643692528208096, "eval_screenspot_MAE_y_boxes": 0.04602641022453705, "eval_screenspot_inside_bbox": 0.6862499912579855, "eval_screenspot_loss": 1.6282535791397095, "eval_screenspot_loss_ce": 0.0002775423345156014, "eval_screenspot_loss_iou": 0.6735026041666666, "eval_screenspot_loss_num": 0.07061513264973958, "eval_screenspot_loss_xval": 1.7000325520833333, "eval_screenspot_runtime": 35.522, "eval_screenspot_samples_per_second": 2.505, "eval_screenspot_steps_per_second": 0.084, "num_input_tokens_seen": 210030984, "step": 3750 }, { "epoch": 8.351893095768375, "eval_compot_CIoU": 0.3505849689245224, "eval_compot_GIoU": 0.3601333200931549, "eval_compot_IoU": 0.40839655697345734, "eval_compot_MAE_all": 0.017904515843838453, "eval_compot_MAE_h": 0.009080663323402405, "eval_compot_MAE_w": 0.022023603320121765, "eval_compot_MAE_x_boxes": 0.0294346297159791, "eval_compot_MAE_y_boxes": 0.0067735526245087385, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3908497095108032, "eval_compot_loss_ce": 0.00022490947594633326, "eval_compot_loss_iou": 0.63720703125, "eval_compot_loss_num": 0.01686859130859375, "eval_compot_loss_xval": 1.358642578125, "eval_compot_runtime": 19.7321, "eval_compot_samples_per_second": 2.534, "eval_compot_steps_per_second": 0.101, "num_input_tokens_seen": 210030984, "step": 3750 }, { "epoch": 8.351893095768375, "eval_custom_ui_val_CIoU": 0.470332317882114, "eval_custom_ui_val_GIoU": 0.48397915727562374, "eval_custom_ui_val_IoU": 0.530869291888343, "eval_custom_ui_val_MAE_all": 0.0304783143930965, "eval_custom_ui_val_MAE_h": 0.015579992827648917, "eval_custom_ui_val_MAE_w": 0.04120294677300586, "eval_custom_ui_val_MAE_x_boxes": 0.039027334190905094, "eval_custom_ui_val_MAE_y_boxes": 0.015175239571059743, "eval_custom_ui_val_inside_bbox": 0.738811731338501, "eval_custom_ui_val_loss": 1.183510184288025, "eval_custom_ui_val_loss_ce": 0.00025910464238323685, "eval_custom_ui_val_loss_iou": 0.5034722222222222, "eval_custom_ui_val_loss_num": 0.02816009521484375, "eval_custom_ui_val_loss_xval": 1.1477593315972223, "eval_custom_ui_val_runtime": 64.4781, "eval_custom_ui_val_samples_per_second": 4.11, "eval_custom_ui_val_steps_per_second": 0.14, "num_input_tokens_seen": 210030984, "step": 3750 }, { "epoch": 8.351893095768375, "loss": 0.9277232885360718, "loss_ce": 0.00023301383771467954, "loss_iou": 0.40234375, "loss_num": 0.024169921875, "loss_xval": 0.92578125, "num_input_tokens_seen": 210030984, "step": 3750 }, { "epoch": 8.35412026726058, "grad_norm": 20.638172149658203, "learning_rate": 1e-06, "loss": 0.6147, "num_input_tokens_seen": 210089604, "step": 3751 }, { "epoch": 8.35412026726058, "loss": 0.7052416205406189, "loss_ce": 0.00016353550017811358, "loss_iou": 0.291015625, "loss_num": 0.025146484375, "loss_xval": 0.703125, "num_input_tokens_seen": 210089604, "step": 3751 }, { "epoch": 8.356347438752785, "grad_norm": 17.781017303466797, "learning_rate": 1e-06, "loss": 0.5201, "num_input_tokens_seen": 210145624, "step": 3752 }, { "epoch": 8.356347438752785, "loss": 0.6434928178787231, "loss_ce": 0.00018225307576358318, "loss_iou": 0.265625, "loss_num": 0.02197265625, "loss_xval": 0.64453125, "num_input_tokens_seen": 210145624, "step": 3752 }, { "epoch": 8.35857461024499, "grad_norm": 88.13903045654297, "learning_rate": 1e-06, "loss": 0.6379, "num_input_tokens_seen": 210203036, "step": 3753 }, { "epoch": 8.35857461024499, "loss": 0.6185895204544067, "loss_ce": 0.00018133444245904684, "loss_iou": 0.28515625, "loss_num": 0.00958251953125, "loss_xval": 0.6171875, "num_input_tokens_seen": 210203036, "step": 3753 }, { "epoch": 8.360801781737194, "grad_norm": 15.603206634521484, "learning_rate": 1e-06, "loss": 0.738, "num_input_tokens_seen": 210259024, "step": 3754 }, { "epoch": 8.360801781737194, "loss": 0.7939836978912354, "loss_ce": 0.0006487197242677212, "loss_iou": 0.33984375, "loss_num": 0.02294921875, "loss_xval": 0.79296875, "num_input_tokens_seen": 210259024, "step": 3754 }, { "epoch": 8.3630289532294, "grad_norm": 21.302343368530273, "learning_rate": 1e-06, "loss": 0.7286, "num_input_tokens_seen": 210316388, "step": 3755 }, { "epoch": 8.3630289532294, "loss": 0.8514755964279175, "loss_ce": 0.00015728248399682343, "loss_iou": 0.34375, "loss_num": 0.03271484375, "loss_xval": 0.8515625, "num_input_tokens_seen": 210316388, "step": 3755 }, { "epoch": 8.365256124721604, "grad_norm": 15.152788162231445, "learning_rate": 1e-06, "loss": 0.5393, "num_input_tokens_seen": 210373268, "step": 3756 }, { "epoch": 8.365256124721604, "loss": 0.6328523755073547, "loss_ce": 0.00016193960618693382, "loss_iou": 0.275390625, "loss_num": 0.0166015625, "loss_xval": 0.6328125, "num_input_tokens_seen": 210373268, "step": 3756 }, { "epoch": 8.367483296213809, "grad_norm": 19.08024787902832, "learning_rate": 1e-06, "loss": 0.6706, "num_input_tokens_seen": 210426048, "step": 3757 }, { "epoch": 8.367483296213809, "loss": 0.7494222521781921, "loss_ce": 0.0001546498097013682, "loss_iou": 0.33984375, "loss_num": 0.013916015625, "loss_xval": 0.75, "num_input_tokens_seen": 210426048, "step": 3757 }, { "epoch": 8.369710467706014, "grad_norm": 19.812162399291992, "learning_rate": 1e-06, "loss": 0.6775, "num_input_tokens_seen": 210483032, "step": 3758 }, { "epoch": 8.369710467706014, "loss": 0.6561557650566101, "loss_ce": 0.0001498895580880344, "loss_iou": 0.287109375, "loss_num": 0.016357421875, "loss_xval": 0.65625, "num_input_tokens_seen": 210483032, "step": 3758 }, { "epoch": 8.371937639198219, "grad_norm": 22.67157554626465, "learning_rate": 1e-06, "loss": 0.7241, "num_input_tokens_seen": 210540188, "step": 3759 }, { "epoch": 8.371937639198219, "loss": 0.745628833770752, "loss_ce": 0.0002675401628948748, "loss_iou": 0.322265625, "loss_num": 0.0201416015625, "loss_xval": 0.74609375, "num_input_tokens_seen": 210540188, "step": 3759 }, { "epoch": 8.374164810690424, "grad_norm": 24.26649284362793, "learning_rate": 1e-06, "loss": 0.6399, "num_input_tokens_seen": 210597540, "step": 3760 }, { "epoch": 8.374164810690424, "loss": 0.5113849639892578, "loss_ce": 0.0001544796396046877, "loss_iou": 0.224609375, "loss_num": 0.0125732421875, "loss_xval": 0.51171875, "num_input_tokens_seen": 210597540, "step": 3760 }, { "epoch": 8.376391982182628, "grad_norm": 18.92394256591797, "learning_rate": 1e-06, "loss": 0.6188, "num_input_tokens_seen": 210651116, "step": 3761 }, { "epoch": 8.376391982182628, "loss": 0.6872795224189758, "loss_ce": 0.0002677679876796901, "loss_iou": 0.279296875, "loss_num": 0.025634765625, "loss_xval": 0.6875, "num_input_tokens_seen": 210651116, "step": 3761 }, { "epoch": 8.378619153674833, "grad_norm": 20.54817771911621, "learning_rate": 1e-06, "loss": 0.5593, "num_input_tokens_seen": 210705180, "step": 3762 }, { "epoch": 8.378619153674833, "loss": 0.4156179428100586, "loss_ce": 0.00018215348245576024, "loss_iou": 0.1796875, "loss_num": 0.01104736328125, "loss_xval": 0.416015625, "num_input_tokens_seen": 210705180, "step": 3762 }, { "epoch": 8.380846325167038, "grad_norm": 17.212587356567383, "learning_rate": 1e-06, "loss": 0.4299, "num_input_tokens_seen": 210763276, "step": 3763 }, { "epoch": 8.380846325167038, "loss": 0.373112291097641, "loss_ce": 0.00018747567082755268, "loss_iou": 0.171875, "loss_num": 0.0059814453125, "loss_xval": 0.373046875, "num_input_tokens_seen": 210763276, "step": 3763 }, { "epoch": 8.383073496659243, "grad_norm": 19.85719871520996, "learning_rate": 1e-06, "loss": 0.5694, "num_input_tokens_seen": 210818156, "step": 3764 }, { "epoch": 8.383073496659243, "loss": 0.5267565250396729, "loss_ce": 0.00014518463285639882, "loss_iou": 0.2392578125, "loss_num": 0.00946044921875, "loss_xval": 0.52734375, "num_input_tokens_seen": 210818156, "step": 3764 }, { "epoch": 8.385300668151448, "grad_norm": 22.43527603149414, "learning_rate": 1e-06, "loss": 0.7907, "num_input_tokens_seen": 210873660, "step": 3765 }, { "epoch": 8.385300668151448, "loss": 0.8294920921325684, "loss_ce": 0.0001463915396016091, "loss_iou": 0.330078125, "loss_num": 0.033935546875, "loss_xval": 0.828125, "num_input_tokens_seen": 210873660, "step": 3765 }, { "epoch": 8.387527839643653, "grad_norm": 15.67119312286377, "learning_rate": 1e-06, "loss": 0.4889, "num_input_tokens_seen": 210928384, "step": 3766 }, { "epoch": 8.387527839643653, "loss": 0.5981723070144653, "loss_ce": 0.0002108315529767424, "loss_iou": 0.251953125, "loss_num": 0.018798828125, "loss_xval": 0.59765625, "num_input_tokens_seen": 210928384, "step": 3766 }, { "epoch": 8.389755011135858, "grad_norm": 84.53565216064453, "learning_rate": 1e-06, "loss": 0.6099, "num_input_tokens_seen": 210983112, "step": 3767 }, { "epoch": 8.389755011135858, "loss": 0.5748885869979858, "loss_ce": 0.00018153036944568157, "loss_iou": 0.251953125, "loss_num": 0.01446533203125, "loss_xval": 0.57421875, "num_input_tokens_seen": 210983112, "step": 3767 }, { "epoch": 8.391982182628063, "grad_norm": 21.295007705688477, "learning_rate": 1e-06, "loss": 0.5424, "num_input_tokens_seen": 211039244, "step": 3768 }, { "epoch": 8.391982182628063, "loss": 0.5405082702636719, "loss_ce": 0.0002250939724035561, "loss_iou": 0.23828125, "loss_num": 0.01275634765625, "loss_xval": 0.5390625, "num_input_tokens_seen": 211039244, "step": 3768 }, { "epoch": 8.394209354120267, "grad_norm": 28.580411911010742, "learning_rate": 1e-06, "loss": 0.4886, "num_input_tokens_seen": 211093216, "step": 3769 }, { "epoch": 8.394209354120267, "loss": 0.4398452043533325, "loss_ce": 0.0001479371276218444, "loss_iou": 0.1953125, "loss_num": 0.009765625, "loss_xval": 0.439453125, "num_input_tokens_seen": 211093216, "step": 3769 }, { "epoch": 8.396436525612472, "grad_norm": 23.4449462890625, "learning_rate": 1e-06, "loss": 0.6098, "num_input_tokens_seen": 211145896, "step": 3770 }, { "epoch": 8.396436525612472, "loss": 0.4624381363391876, "loss_ce": 0.0002799497451633215, "loss_iou": 0.201171875, "loss_num": 0.0120849609375, "loss_xval": 0.462890625, "num_input_tokens_seen": 211145896, "step": 3770 }, { "epoch": 8.398663697104677, "grad_norm": 21.414167404174805, "learning_rate": 1e-06, "loss": 0.5727, "num_input_tokens_seen": 211200436, "step": 3771 }, { "epoch": 8.398663697104677, "loss": 0.6622140407562256, "loss_ce": 0.00022672140039503574, "loss_iou": 0.287109375, "loss_num": 0.017333984375, "loss_xval": 0.66015625, "num_input_tokens_seen": 211200436, "step": 3771 }, { "epoch": 8.400890868596882, "grad_norm": 24.094797134399414, "learning_rate": 1e-06, "loss": 0.4669, "num_input_tokens_seen": 211253492, "step": 3772 }, { "epoch": 8.400890868596882, "loss": 0.3054664731025696, "loss_ce": 0.00016864070494193584, "loss_iou": 0.1337890625, "loss_num": 0.0076904296875, "loss_xval": 0.3046875, "num_input_tokens_seen": 211253492, "step": 3772 }, { "epoch": 8.403118040089087, "grad_norm": 16.602676391601562, "learning_rate": 1e-06, "loss": 0.6977, "num_input_tokens_seen": 211309996, "step": 3773 }, { "epoch": 8.403118040089087, "loss": 0.8234143853187561, "loss_ce": 0.00017219953588210046, "loss_iou": 0.3046875, "loss_num": 0.042724609375, "loss_xval": 0.82421875, "num_input_tokens_seen": 211309996, "step": 3773 }, { "epoch": 8.405345211581292, "grad_norm": 15.6812744140625, "learning_rate": 1e-06, "loss": 0.5358, "num_input_tokens_seen": 211365484, "step": 3774 }, { "epoch": 8.405345211581292, "loss": 0.5414432287216187, "loss_ce": 0.0001529736036900431, "loss_iou": 0.2333984375, "loss_num": 0.01495361328125, "loss_xval": 0.54296875, "num_input_tokens_seen": 211365484, "step": 3774 }, { "epoch": 8.407572383073497, "grad_norm": 15.80637264251709, "learning_rate": 1e-06, "loss": 0.5815, "num_input_tokens_seen": 211421912, "step": 3775 }, { "epoch": 8.407572383073497, "loss": 0.4960188865661621, "loss_ce": 0.000169286533491686, "loss_iou": 0.212890625, "loss_num": 0.01409912109375, "loss_xval": 0.49609375, "num_input_tokens_seen": 211421912, "step": 3775 }, { "epoch": 8.409799554565701, "grad_norm": 22.33266830444336, "learning_rate": 1e-06, "loss": 0.7466, "num_input_tokens_seen": 211478792, "step": 3776 }, { "epoch": 8.409799554565701, "loss": 0.7512620091438293, "loss_ce": 0.00016335461987182498, "loss_iou": 0.302734375, "loss_num": 0.0286865234375, "loss_xval": 0.75, "num_input_tokens_seen": 211478792, "step": 3776 }, { "epoch": 8.412026726057906, "grad_norm": 18.270977020263672, "learning_rate": 1e-06, "loss": 0.6434, "num_input_tokens_seen": 211535448, "step": 3777 }, { "epoch": 8.412026726057906, "loss": 0.44083136320114136, "loss_ce": 0.00015753594925627112, "loss_iou": 0.1962890625, "loss_num": 0.00946044921875, "loss_xval": 0.44140625, "num_input_tokens_seen": 211535448, "step": 3777 }, { "epoch": 8.414253897550111, "grad_norm": 26.987186431884766, "learning_rate": 1e-06, "loss": 0.6894, "num_input_tokens_seen": 211590956, "step": 3778 }, { "epoch": 8.414253897550111, "loss": 0.9005333781242371, "loss_ce": 0.00014275358989834785, "loss_iou": 0.3671875, "loss_num": 0.032958984375, "loss_xval": 0.8984375, "num_input_tokens_seen": 211590956, "step": 3778 }, { "epoch": 8.416481069042316, "grad_norm": 36.464210510253906, "learning_rate": 1e-06, "loss": 0.5899, "num_input_tokens_seen": 211645452, "step": 3779 }, { "epoch": 8.416481069042316, "loss": 0.5824276804924011, "loss_ce": 0.00015230441931635141, "loss_iou": 0.251953125, "loss_num": 0.0157470703125, "loss_xval": 0.58203125, "num_input_tokens_seen": 211645452, "step": 3779 }, { "epoch": 8.41870824053452, "grad_norm": 18.14861297607422, "learning_rate": 1e-06, "loss": 0.6356, "num_input_tokens_seen": 211701036, "step": 3780 }, { "epoch": 8.41870824053452, "loss": 0.8778401613235474, "loss_ce": 0.0001545833656564355, "loss_iou": 0.353515625, "loss_num": 0.03369140625, "loss_xval": 0.87890625, "num_input_tokens_seen": 211701036, "step": 3780 }, { "epoch": 8.420935412026726, "grad_norm": 13.912912368774414, "learning_rate": 1e-06, "loss": 0.6765, "num_input_tokens_seen": 211757800, "step": 3781 }, { "epoch": 8.420935412026726, "loss": 0.7191988229751587, "loss_ce": 0.0002046736772172153, "loss_iou": 0.314453125, "loss_num": 0.0181884765625, "loss_xval": 0.71875, "num_input_tokens_seen": 211757800, "step": 3781 }, { "epoch": 8.42316258351893, "grad_norm": 28.029550552368164, "learning_rate": 1e-06, "loss": 0.5842, "num_input_tokens_seen": 211815608, "step": 3782 }, { "epoch": 8.42316258351893, "loss": 0.5234848260879517, "loss_ce": 0.00016937771579250693, "loss_iou": 0.220703125, "loss_num": 0.01611328125, "loss_xval": 0.5234375, "num_input_tokens_seen": 211815608, "step": 3782 }, { "epoch": 8.425389755011135, "grad_norm": 14.431218147277832, "learning_rate": 1e-06, "loss": 0.7867, "num_input_tokens_seen": 211872108, "step": 3783 }, { "epoch": 8.425389755011135, "loss": 0.6559756994247437, "loss_ce": 0.0002139731077477336, "loss_iou": 0.26171875, "loss_num": 0.0262451171875, "loss_xval": 0.65625, "num_input_tokens_seen": 211872108, "step": 3783 }, { "epoch": 8.42761692650334, "grad_norm": 35.63063430786133, "learning_rate": 1e-06, "loss": 0.7438, "num_input_tokens_seen": 211927844, "step": 3784 }, { "epoch": 8.42761692650334, "loss": 0.7180646657943726, "loss_ce": 0.0001691640354692936, "loss_iou": 0.330078125, "loss_num": 0.01165771484375, "loss_xval": 0.71875, "num_input_tokens_seen": 211927844, "step": 3784 }, { "epoch": 8.429844097995545, "grad_norm": 29.35708999633789, "learning_rate": 1e-06, "loss": 0.574, "num_input_tokens_seen": 211979040, "step": 3785 }, { "epoch": 8.429844097995545, "loss": 0.4887080192565918, "loss_ce": 0.0001826368534239009, "loss_iou": 0.2197265625, "loss_num": 0.00982666015625, "loss_xval": 0.48828125, "num_input_tokens_seen": 211979040, "step": 3785 }, { "epoch": 8.43207126948775, "grad_norm": 21.319759368896484, "learning_rate": 1e-06, "loss": 0.5513, "num_input_tokens_seen": 212035452, "step": 3786 }, { "epoch": 8.43207126948775, "loss": 0.363552451133728, "loss_ce": 0.00014914579514879733, "loss_iou": 0.15625, "loss_num": 0.0101318359375, "loss_xval": 0.36328125, "num_input_tokens_seen": 212035452, "step": 3786 }, { "epoch": 8.434298440979955, "grad_norm": 23.589679718017578, "learning_rate": 1e-06, "loss": 0.7261, "num_input_tokens_seen": 212093292, "step": 3787 }, { "epoch": 8.434298440979955, "loss": 0.6076536178588867, "loss_ce": 0.00023178444826044142, "loss_iou": 0.240234375, "loss_num": 0.025634765625, "loss_xval": 0.609375, "num_input_tokens_seen": 212093292, "step": 3787 }, { "epoch": 8.43652561247216, "grad_norm": 14.83303451538086, "learning_rate": 1e-06, "loss": 0.4061, "num_input_tokens_seen": 212150376, "step": 3788 }, { "epoch": 8.43652561247216, "loss": 0.35096991062164307, "loss_ce": 0.0001398474269080907, "loss_iou": 0.1474609375, "loss_num": 0.0111083984375, "loss_xval": 0.3515625, "num_input_tokens_seen": 212150376, "step": 3788 }, { "epoch": 8.438752783964365, "grad_norm": 20.81564712524414, "learning_rate": 1e-06, "loss": 0.6, "num_input_tokens_seen": 212202624, "step": 3789 }, { "epoch": 8.438752783964365, "loss": 0.5509481430053711, "loss_ce": 0.00016686462913639843, "loss_iou": 0.2451171875, "loss_num": 0.0120849609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 212202624, "step": 3789 }, { "epoch": 8.44097995545657, "grad_norm": 15.132460594177246, "learning_rate": 1e-06, "loss": 0.7095, "num_input_tokens_seen": 212258148, "step": 3790 }, { "epoch": 8.44097995545657, "loss": 0.7777798175811768, "loss_ce": 0.00019189229351468384, "loss_iou": 0.322265625, "loss_num": 0.0269775390625, "loss_xval": 0.77734375, "num_input_tokens_seen": 212258148, "step": 3790 }, { "epoch": 8.443207126948774, "grad_norm": 16.23349952697754, "learning_rate": 1e-06, "loss": 0.4949, "num_input_tokens_seen": 212315600, "step": 3791 }, { "epoch": 8.443207126948774, "loss": 0.6320212483406067, "loss_ce": 0.00018532070680521429, "loss_iou": 0.25390625, "loss_num": 0.02490234375, "loss_xval": 0.6328125, "num_input_tokens_seen": 212315600, "step": 3791 }, { "epoch": 8.44543429844098, "grad_norm": 29.736553192138672, "learning_rate": 1e-06, "loss": 0.6874, "num_input_tokens_seen": 212371436, "step": 3792 }, { "epoch": 8.44543429844098, "loss": 0.7760206460952759, "loss_ce": 0.00014173590170685202, "loss_iou": 0.34765625, "loss_num": 0.0164794921875, "loss_xval": 0.77734375, "num_input_tokens_seen": 212371436, "step": 3792 }, { "epoch": 8.447661469933184, "grad_norm": 17.218870162963867, "learning_rate": 1e-06, "loss": 0.5741, "num_input_tokens_seen": 212427368, "step": 3793 }, { "epoch": 8.447661469933184, "loss": 0.7538229823112488, "loss_ce": 0.00016089789278339595, "loss_iou": 0.30859375, "loss_num": 0.0272216796875, "loss_xval": 0.75390625, "num_input_tokens_seen": 212427368, "step": 3793 }, { "epoch": 8.449888641425389, "grad_norm": 17.727853775024414, "learning_rate": 1e-06, "loss": 0.4507, "num_input_tokens_seen": 212482836, "step": 3794 }, { "epoch": 8.449888641425389, "loss": 0.5649484395980835, "loss_ce": 0.0004953413736075163, "loss_iou": 0.2353515625, "loss_num": 0.018798828125, "loss_xval": 0.5625, "num_input_tokens_seen": 212482836, "step": 3794 }, { "epoch": 8.452115812917596, "grad_norm": 21.763526916503906, "learning_rate": 1e-06, "loss": 0.518, "num_input_tokens_seen": 212537884, "step": 3795 }, { "epoch": 8.452115812917596, "loss": 0.43735483288764954, "loss_ce": 0.00022105529205873609, "loss_iou": 0.189453125, "loss_num": 0.01190185546875, "loss_xval": 0.4375, "num_input_tokens_seen": 212537884, "step": 3795 }, { "epoch": 8.4543429844098, "grad_norm": 29.629959106445312, "learning_rate": 1e-06, "loss": 0.5047, "num_input_tokens_seen": 212592316, "step": 3796 }, { "epoch": 8.4543429844098, "loss": 0.4535437524318695, "loss_ce": 0.00017461413517594337, "loss_iou": 0.1845703125, "loss_num": 0.0167236328125, "loss_xval": 0.453125, "num_input_tokens_seen": 212592316, "step": 3796 }, { "epoch": 8.456570155902005, "grad_norm": 33.515464782714844, "learning_rate": 1e-06, "loss": 0.7209, "num_input_tokens_seen": 212649160, "step": 3797 }, { "epoch": 8.456570155902005, "loss": 0.8188025951385498, "loss_ce": 0.0001990534656215459, "loss_iou": 0.357421875, "loss_num": 0.02099609375, "loss_xval": 0.8203125, "num_input_tokens_seen": 212649160, "step": 3797 }, { "epoch": 8.45879732739421, "grad_norm": 25.849811553955078, "learning_rate": 1e-06, "loss": 0.6433, "num_input_tokens_seen": 212705208, "step": 3798 }, { "epoch": 8.45879732739421, "loss": 0.7545800805091858, "loss_ce": 0.0001855643349699676, "loss_iou": 0.34375, "loss_num": 0.01324462890625, "loss_xval": 0.75390625, "num_input_tokens_seen": 212705208, "step": 3798 }, { "epoch": 8.461024498886415, "grad_norm": 27.304214477539062, "learning_rate": 1e-06, "loss": 0.6453, "num_input_tokens_seen": 212760512, "step": 3799 }, { "epoch": 8.461024498886415, "loss": 0.5861043334007263, "loss_ce": 0.00016680179396644235, "loss_iou": 0.240234375, "loss_num": 0.02099609375, "loss_xval": 0.5859375, "num_input_tokens_seen": 212760512, "step": 3799 }, { "epoch": 8.46325167037862, "grad_norm": 23.8879451751709, "learning_rate": 1e-06, "loss": 0.6665, "num_input_tokens_seen": 212817608, "step": 3800 }, { "epoch": 8.46325167037862, "loss": 0.6634845733642578, "loss_ce": 0.00015447995974682271, "loss_iou": 0.310546875, "loss_num": 0.00811767578125, "loss_xval": 0.6640625, "num_input_tokens_seen": 212817608, "step": 3800 }, { "epoch": 8.465478841870825, "grad_norm": 42.28864288330078, "learning_rate": 1e-06, "loss": 0.7315, "num_input_tokens_seen": 212873236, "step": 3801 }, { "epoch": 8.465478841870825, "loss": 0.7518316507339478, "loss_ce": 0.0003667787532322109, "loss_iou": 0.3203125, "loss_num": 0.0223388671875, "loss_xval": 0.75, "num_input_tokens_seen": 212873236, "step": 3801 }, { "epoch": 8.46770601336303, "grad_norm": 17.901504516601562, "learning_rate": 1e-06, "loss": 0.6369, "num_input_tokens_seen": 212929028, "step": 3802 }, { "epoch": 8.46770601336303, "loss": 0.6398534178733826, "loss_ce": 0.000204975571250543, "loss_iou": 0.26953125, "loss_num": 0.020263671875, "loss_xval": 0.640625, "num_input_tokens_seen": 212929028, "step": 3802 }, { "epoch": 8.469933184855234, "grad_norm": 12.724729537963867, "learning_rate": 1e-06, "loss": 0.726, "num_input_tokens_seen": 212985068, "step": 3803 }, { "epoch": 8.469933184855234, "loss": 0.5726535320281982, "loss_ce": 0.00014379943604581058, "loss_iou": 0.23046875, "loss_num": 0.0224609375, "loss_xval": 0.57421875, "num_input_tokens_seen": 212985068, "step": 3803 }, { "epoch": 8.47216035634744, "grad_norm": 17.61178207397461, "learning_rate": 1e-06, "loss": 0.4873, "num_input_tokens_seen": 213042936, "step": 3804 }, { "epoch": 8.47216035634744, "loss": 0.37965720891952515, "loss_ce": 0.00014062756963539869, "loss_iou": 0.1708984375, "loss_num": 0.00750732421875, "loss_xval": 0.37890625, "num_input_tokens_seen": 213042936, "step": 3804 }, { "epoch": 8.474387527839644, "grad_norm": 17.672943115234375, "learning_rate": 1e-06, "loss": 0.5562, "num_input_tokens_seen": 213095496, "step": 3805 }, { "epoch": 8.474387527839644, "loss": 0.5798885822296143, "loss_ce": 0.00017669444787316024, "loss_iou": 0.251953125, "loss_num": 0.015380859375, "loss_xval": 0.578125, "num_input_tokens_seen": 213095496, "step": 3805 }, { "epoch": 8.476614699331849, "grad_norm": 15.858698844909668, "learning_rate": 1e-06, "loss": 0.5357, "num_input_tokens_seen": 213151276, "step": 3806 }, { "epoch": 8.476614699331849, "loss": 0.3039921820163727, "loss_ce": 0.00015917010023258626, "loss_iou": 0.126953125, "loss_num": 0.01007080078125, "loss_xval": 0.3046875, "num_input_tokens_seen": 213151276, "step": 3806 }, { "epoch": 8.478841870824054, "grad_norm": 19.456628799438477, "learning_rate": 1e-06, "loss": 0.5587, "num_input_tokens_seen": 213208404, "step": 3807 }, { "epoch": 8.478841870824054, "loss": 0.4867333769798279, "loss_ce": 0.0001611171173863113, "loss_iou": 0.208984375, "loss_num": 0.01385498046875, "loss_xval": 0.486328125, "num_input_tokens_seen": 213208404, "step": 3807 }, { "epoch": 8.481069042316259, "grad_norm": 16.330257415771484, "learning_rate": 1e-06, "loss": 0.592, "num_input_tokens_seen": 213266032, "step": 3808 }, { "epoch": 8.481069042316259, "loss": 0.8233369588851929, "loss_ce": 0.00015580856415908784, "loss_iou": 0.330078125, "loss_num": 0.033203125, "loss_xval": 0.82421875, "num_input_tokens_seen": 213266032, "step": 3808 }, { "epoch": 8.483296213808464, "grad_norm": 18.079784393310547, "learning_rate": 1e-06, "loss": 0.5542, "num_input_tokens_seen": 213321096, "step": 3809 }, { "epoch": 8.483296213808464, "loss": 0.5775792598724365, "loss_ce": 0.00018672105215955526, "loss_iou": 0.267578125, "loss_num": 0.00811767578125, "loss_xval": 0.578125, "num_input_tokens_seen": 213321096, "step": 3809 }, { "epoch": 8.485523385300668, "grad_norm": 14.717164039611816, "learning_rate": 1e-06, "loss": 0.5484, "num_input_tokens_seen": 213379256, "step": 3810 }, { "epoch": 8.485523385300668, "loss": 0.6559404134750366, "loss_ce": 0.00017873873002827168, "loss_iou": 0.2890625, "loss_num": 0.01531982421875, "loss_xval": 0.65625, "num_input_tokens_seen": 213379256, "step": 3810 }, { "epoch": 8.487750556792873, "grad_norm": 15.925959587097168, "learning_rate": 1e-06, "loss": 0.5738, "num_input_tokens_seen": 213436860, "step": 3811 }, { "epoch": 8.487750556792873, "loss": 0.6409112811088562, "loss_ce": 0.00016421903274022043, "loss_iou": 0.275390625, "loss_num": 0.01806640625, "loss_xval": 0.640625, "num_input_tokens_seen": 213436860, "step": 3811 }, { "epoch": 8.489977728285078, "grad_norm": 16.596834182739258, "learning_rate": 1e-06, "loss": 0.5186, "num_input_tokens_seen": 213494208, "step": 3812 }, { "epoch": 8.489977728285078, "loss": 0.4774528443813324, "loss_ce": 0.00015789938333909959, "loss_iou": 0.2109375, "loss_num": 0.0113525390625, "loss_xval": 0.4765625, "num_input_tokens_seen": 213494208, "step": 3812 }, { "epoch": 8.492204899777283, "grad_norm": 20.76615333557129, "learning_rate": 1e-06, "loss": 0.5608, "num_input_tokens_seen": 213550704, "step": 3813 }, { "epoch": 8.492204899777283, "loss": 0.6620367765426636, "loss_ce": 0.00017153860244434327, "loss_iou": 0.296875, "loss_num": 0.01361083984375, "loss_xval": 0.66015625, "num_input_tokens_seen": 213550704, "step": 3813 }, { "epoch": 8.494432071269488, "grad_norm": 35.46400451660156, "learning_rate": 1e-06, "loss": 0.4233, "num_input_tokens_seen": 213605856, "step": 3814 }, { "epoch": 8.494432071269488, "loss": 0.4266274869441986, "loss_ce": 0.00011381316289771348, "loss_iou": 0.1982421875, "loss_num": 0.00592041015625, "loss_xval": 0.42578125, "num_input_tokens_seen": 213605856, "step": 3814 }, { "epoch": 8.496659242761693, "grad_norm": 17.748003005981445, "learning_rate": 1e-06, "loss": 0.4427, "num_input_tokens_seen": 213661372, "step": 3815 }, { "epoch": 8.496659242761693, "loss": 0.5624146461486816, "loss_ce": 0.00015880668070167303, "loss_iou": 0.2216796875, "loss_num": 0.023681640625, "loss_xval": 0.5625, "num_input_tokens_seen": 213661372, "step": 3815 }, { "epoch": 8.498886414253898, "grad_norm": 17.674402236938477, "learning_rate": 1e-06, "loss": 0.5981, "num_input_tokens_seen": 213716564, "step": 3816 }, { "epoch": 8.498886414253898, "loss": 0.5442743897438049, "loss_ce": 0.00020702678011730313, "loss_iou": 0.2412109375, "loss_num": 0.0125732421875, "loss_xval": 0.54296875, "num_input_tokens_seen": 213716564, "step": 3816 }, { "epoch": 8.501113585746102, "grad_norm": 15.914905548095703, "learning_rate": 1e-06, "loss": 0.6234, "num_input_tokens_seen": 213775080, "step": 3817 }, { "epoch": 8.501113585746102, "loss": 0.6505662202835083, "loss_ce": 0.00017557844694238156, "loss_iou": 0.296875, "loss_num": 0.0111083984375, "loss_xval": 0.6484375, "num_input_tokens_seen": 213775080, "step": 3817 }, { "epoch": 8.503340757238307, "grad_norm": 13.035726547241211, "learning_rate": 1e-06, "loss": 0.4907, "num_input_tokens_seen": 213832808, "step": 3818 }, { "epoch": 8.503340757238307, "loss": 0.5575791001319885, "loss_ce": 0.00020606406906154007, "loss_iou": 0.2578125, "loss_num": 0.00848388671875, "loss_xval": 0.55859375, "num_input_tokens_seen": 213832808, "step": 3818 }, { "epoch": 8.505567928730512, "grad_norm": 21.998821258544922, "learning_rate": 1e-06, "loss": 0.6187, "num_input_tokens_seen": 213892096, "step": 3819 }, { "epoch": 8.505567928730512, "loss": 0.5462846755981445, "loss_ce": 0.0001420707703800872, "loss_iou": 0.2353515625, "loss_num": 0.01495361328125, "loss_xval": 0.546875, "num_input_tokens_seen": 213892096, "step": 3819 }, { "epoch": 8.507795100222717, "grad_norm": 38.21397399902344, "learning_rate": 1e-06, "loss": 0.6985, "num_input_tokens_seen": 213946800, "step": 3820 }, { "epoch": 8.507795100222717, "loss": 0.7257634401321411, "loss_ce": 0.00017748677055351436, "loss_iou": 0.326171875, "loss_num": 0.01470947265625, "loss_xval": 0.7265625, "num_input_tokens_seen": 213946800, "step": 3820 }, { "epoch": 8.510022271714922, "grad_norm": 24.83524513244629, "learning_rate": 1e-06, "loss": 0.5032, "num_input_tokens_seen": 214003200, "step": 3821 }, { "epoch": 8.510022271714922, "loss": 0.464314341545105, "loss_ce": 0.0004471480497159064, "loss_iou": 0.1953125, "loss_num": 0.014892578125, "loss_xval": 0.46484375, "num_input_tokens_seen": 214003200, "step": 3821 }, { "epoch": 8.512249443207127, "grad_norm": 18.419172286987305, "learning_rate": 1e-06, "loss": 0.6254, "num_input_tokens_seen": 214059832, "step": 3822 }, { "epoch": 8.512249443207127, "loss": 0.5757325887680054, "loss_ce": 0.00017112254863604903, "loss_iou": 0.255859375, "loss_num": 0.0125732421875, "loss_xval": 0.57421875, "num_input_tokens_seen": 214059832, "step": 3822 }, { "epoch": 8.514476614699332, "grad_norm": 23.88714599609375, "learning_rate": 1e-06, "loss": 0.7178, "num_input_tokens_seen": 214115268, "step": 3823 }, { "epoch": 8.514476614699332, "loss": 0.9213709235191345, "loss_ce": 0.00022837005963083357, "loss_iou": 0.365234375, "loss_num": 0.037841796875, "loss_xval": 0.921875, "num_input_tokens_seen": 214115268, "step": 3823 }, { "epoch": 8.516703786191536, "grad_norm": 16.07703399658203, "learning_rate": 1e-06, "loss": 0.774, "num_input_tokens_seen": 214173736, "step": 3824 }, { "epoch": 8.516703786191536, "loss": 0.5605735778808594, "loss_ce": 0.00014879286754876375, "loss_iou": 0.23828125, "loss_num": 0.0167236328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 214173736, "step": 3824 }, { "epoch": 8.518930957683741, "grad_norm": 17.80256462097168, "learning_rate": 1e-06, "loss": 0.5559, "num_input_tokens_seen": 214231872, "step": 3825 }, { "epoch": 8.518930957683741, "loss": 0.5602927207946777, "loss_ce": 0.00017310409748461097, "loss_iou": 0.251953125, "loss_num": 0.01141357421875, "loss_xval": 0.55859375, "num_input_tokens_seen": 214231872, "step": 3825 }, { "epoch": 8.521158129175946, "grad_norm": 15.230652809143066, "learning_rate": 1e-06, "loss": 0.5637, "num_input_tokens_seen": 214289616, "step": 3826 }, { "epoch": 8.521158129175946, "loss": 0.6788380146026611, "loss_ce": 0.0002491388004273176, "loss_iou": 0.28125, "loss_num": 0.0234375, "loss_xval": 0.6796875, "num_input_tokens_seen": 214289616, "step": 3826 }, { "epoch": 8.523385300668151, "grad_norm": 19.55664825439453, "learning_rate": 1e-06, "loss": 0.4918, "num_input_tokens_seen": 214347108, "step": 3827 }, { "epoch": 8.523385300668151, "loss": 0.4482664167881012, "loss_ce": 0.00014631151861976832, "loss_iou": 0.185546875, "loss_num": 0.01519775390625, "loss_xval": 0.447265625, "num_input_tokens_seen": 214347108, "step": 3827 }, { "epoch": 8.525612472160356, "grad_norm": 19.749784469604492, "learning_rate": 1e-06, "loss": 0.5827, "num_input_tokens_seen": 214399800, "step": 3828 }, { "epoch": 8.525612472160356, "loss": 0.7083353996276855, "loss_ce": 0.00020547436724882573, "loss_iou": 0.3125, "loss_num": 0.0166015625, "loss_xval": 0.70703125, "num_input_tokens_seen": 214399800, "step": 3828 }, { "epoch": 8.52783964365256, "grad_norm": 16.604900360107422, "learning_rate": 1e-06, "loss": 0.7405, "num_input_tokens_seen": 214454464, "step": 3829 }, { "epoch": 8.52783964365256, "loss": 0.6651185750961304, "loss_ce": 0.00020155691890977323, "loss_iou": 0.298828125, "loss_num": 0.01373291015625, "loss_xval": 0.6640625, "num_input_tokens_seen": 214454464, "step": 3829 }, { "epoch": 8.530066815144766, "grad_norm": 39.781227111816406, "learning_rate": 1e-06, "loss": 0.5991, "num_input_tokens_seen": 214511448, "step": 3830 }, { "epoch": 8.530066815144766, "loss": 0.589692234992981, "loss_ce": 0.0002146851911675185, "loss_iou": 0.24609375, "loss_num": 0.0191650390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 214511448, "step": 3830 }, { "epoch": 8.53229398663697, "grad_norm": 26.230976104736328, "learning_rate": 1e-06, "loss": 0.517, "num_input_tokens_seen": 214565856, "step": 3831 }, { "epoch": 8.53229398663697, "loss": 0.389570951461792, "loss_ce": 0.00016667514864820987, "loss_iou": 0.1767578125, "loss_num": 0.007354736328125, "loss_xval": 0.388671875, "num_input_tokens_seen": 214565856, "step": 3831 }, { "epoch": 8.534521158129175, "grad_norm": 10.636022567749023, "learning_rate": 1e-06, "loss": 0.4324, "num_input_tokens_seen": 214623292, "step": 3832 }, { "epoch": 8.534521158129175, "loss": 0.4710007309913635, "loss_ce": 0.00017552266945131123, "loss_iou": 0.2138671875, "loss_num": 0.00836181640625, "loss_xval": 0.470703125, "num_input_tokens_seen": 214623292, "step": 3832 }, { "epoch": 8.53674832962138, "grad_norm": 20.143753051757812, "learning_rate": 1e-06, "loss": 0.5645, "num_input_tokens_seen": 214679760, "step": 3833 }, { "epoch": 8.53674832962138, "loss": 0.5450610518455505, "loss_ce": 0.00013918953482061625, "loss_iou": 0.2294921875, "loss_num": 0.01708984375, "loss_xval": 0.546875, "num_input_tokens_seen": 214679760, "step": 3833 }, { "epoch": 8.538975501113585, "grad_norm": 29.548900604248047, "learning_rate": 1e-06, "loss": 0.6879, "num_input_tokens_seen": 214734852, "step": 3834 }, { "epoch": 8.538975501113585, "loss": 0.43624764680862427, "loss_ce": 0.00015146093210205436, "loss_iou": 0.193359375, "loss_num": 0.00994873046875, "loss_xval": 0.435546875, "num_input_tokens_seen": 214734852, "step": 3834 }, { "epoch": 8.54120267260579, "grad_norm": 21.732810974121094, "learning_rate": 1e-06, "loss": 0.639, "num_input_tokens_seen": 214787916, "step": 3835 }, { "epoch": 8.54120267260579, "loss": 0.7973939180374146, "loss_ce": 0.0001527119311504066, "loss_iou": 0.3515625, "loss_num": 0.01904296875, "loss_xval": 0.796875, "num_input_tokens_seen": 214787916, "step": 3835 }, { "epoch": 8.543429844097995, "grad_norm": 10.797616958618164, "learning_rate": 1e-06, "loss": 0.4759, "num_input_tokens_seen": 214842424, "step": 3836 }, { "epoch": 8.543429844097995, "loss": 0.5132704377174377, "loss_ce": 0.00020891126769129187, "loss_iou": 0.197265625, "loss_num": 0.0235595703125, "loss_xval": 0.51171875, "num_input_tokens_seen": 214842424, "step": 3836 }, { "epoch": 8.5456570155902, "grad_norm": 21.894956588745117, "learning_rate": 1e-06, "loss": 0.5809, "num_input_tokens_seen": 214897440, "step": 3837 }, { "epoch": 8.5456570155902, "loss": 0.5928073525428772, "loss_ce": 0.0001559797237860039, "loss_iou": 0.255859375, "loss_num": 0.0162353515625, "loss_xval": 0.59375, "num_input_tokens_seen": 214897440, "step": 3837 }, { "epoch": 8.547884187082406, "grad_norm": 16.91735076904297, "learning_rate": 1e-06, "loss": 0.6461, "num_input_tokens_seen": 214951216, "step": 3838 }, { "epoch": 8.547884187082406, "loss": 0.7435756325721741, "loss_ce": 0.00016743276501074433, "loss_iou": 0.328125, "loss_num": 0.0174560546875, "loss_xval": 0.7421875, "num_input_tokens_seen": 214951216, "step": 3838 }, { "epoch": 8.550111358574611, "grad_norm": 16.6099796295166, "learning_rate": 1e-06, "loss": 0.7447, "num_input_tokens_seen": 215004404, "step": 3839 }, { "epoch": 8.550111358574611, "loss": 0.4547840356826782, "loss_ce": 0.00019417837029322982, "loss_iou": 0.2041015625, "loss_num": 0.0093994140625, "loss_xval": 0.455078125, "num_input_tokens_seen": 215004404, "step": 3839 }, { "epoch": 8.552338530066816, "grad_norm": 18.755542755126953, "learning_rate": 1e-06, "loss": 0.7003, "num_input_tokens_seen": 215061452, "step": 3840 }, { "epoch": 8.552338530066816, "loss": 0.7906391620635986, "loss_ce": 0.00023385511303786188, "loss_iou": 0.34375, "loss_num": 0.02099609375, "loss_xval": 0.7890625, "num_input_tokens_seen": 215061452, "step": 3840 }, { "epoch": 8.55456570155902, "grad_norm": 15.474801063537598, "learning_rate": 1e-06, "loss": 0.5902, "num_input_tokens_seen": 215119176, "step": 3841 }, { "epoch": 8.55456570155902, "loss": 0.6494476199150085, "loss_ce": 0.00015564775094389915, "loss_iou": 0.271484375, "loss_num": 0.0211181640625, "loss_xval": 0.6484375, "num_input_tokens_seen": 215119176, "step": 3841 }, { "epoch": 8.556792873051226, "grad_norm": 22.1204891204834, "learning_rate": 1e-06, "loss": 0.5713, "num_input_tokens_seen": 215174944, "step": 3842 }, { "epoch": 8.556792873051226, "loss": 0.5475232601165771, "loss_ce": 0.00015993315901141614, "loss_iou": 0.2392578125, "loss_num": 0.01397705078125, "loss_xval": 0.546875, "num_input_tokens_seen": 215174944, "step": 3842 }, { "epoch": 8.55902004454343, "grad_norm": 13.736021995544434, "learning_rate": 1e-06, "loss": 0.5063, "num_input_tokens_seen": 215230864, "step": 3843 }, { "epoch": 8.55902004454343, "loss": 0.4863549768924713, "loss_ce": 0.00014891059254296124, "loss_iou": 0.2138671875, "loss_num": 0.01171875, "loss_xval": 0.486328125, "num_input_tokens_seen": 215230864, "step": 3843 }, { "epoch": 8.561247216035635, "grad_norm": 22.418909072875977, "learning_rate": 1e-06, "loss": 0.7348, "num_input_tokens_seen": 215289312, "step": 3844 }, { "epoch": 8.561247216035635, "loss": 0.7660500407218933, "loss_ce": 0.00018092009122483432, "loss_iou": 0.361328125, "loss_num": 0.00830078125, "loss_xval": 0.765625, "num_input_tokens_seen": 215289312, "step": 3844 }, { "epoch": 8.56347438752784, "grad_norm": 16.7957820892334, "learning_rate": 1e-06, "loss": 0.6157, "num_input_tokens_seen": 215346324, "step": 3845 }, { "epoch": 8.56347438752784, "loss": 0.4588056802749634, "loss_ce": 0.0001875289308372885, "loss_iou": 0.201171875, "loss_num": 0.01129150390625, "loss_xval": 0.458984375, "num_input_tokens_seen": 215346324, "step": 3845 }, { "epoch": 8.565701559020045, "grad_norm": 18.19455909729004, "learning_rate": 1e-06, "loss": 0.4958, "num_input_tokens_seen": 215402928, "step": 3846 }, { "epoch": 8.565701559020045, "loss": 0.4269152879714966, "loss_ce": 0.00015747133875265718, "loss_iou": 0.1884765625, "loss_num": 0.00994873046875, "loss_xval": 0.42578125, "num_input_tokens_seen": 215402928, "step": 3846 }, { "epoch": 8.56792873051225, "grad_norm": 21.939851760864258, "learning_rate": 1e-06, "loss": 0.649, "num_input_tokens_seen": 215458620, "step": 3847 }, { "epoch": 8.56792873051225, "loss": 0.5985796451568604, "loss_ce": 0.00019095309835392982, "loss_iou": 0.251953125, "loss_num": 0.018798828125, "loss_xval": 0.59765625, "num_input_tokens_seen": 215458620, "step": 3847 }, { "epoch": 8.570155902004455, "grad_norm": 17.403221130371094, "learning_rate": 1e-06, "loss": 0.5387, "num_input_tokens_seen": 215515404, "step": 3848 }, { "epoch": 8.570155902004455, "loss": 0.6373581886291504, "loss_ce": 0.0001511847076471895, "loss_iou": 0.283203125, "loss_num": 0.0146484375, "loss_xval": 0.63671875, "num_input_tokens_seen": 215515404, "step": 3848 }, { "epoch": 8.57238307349666, "grad_norm": 17.169862747192383, "learning_rate": 1e-06, "loss": 0.5251, "num_input_tokens_seen": 215571552, "step": 3849 }, { "epoch": 8.57238307349666, "loss": 0.4762542247772217, "loss_ce": 0.0001799672027118504, "loss_iou": 0.2119140625, "loss_num": 0.01043701171875, "loss_xval": 0.4765625, "num_input_tokens_seen": 215571552, "step": 3849 }, { "epoch": 8.574610244988865, "grad_norm": 15.021171569824219, "learning_rate": 1e-06, "loss": 0.737, "num_input_tokens_seen": 215627964, "step": 3850 }, { "epoch": 8.574610244988865, "loss": 0.8246517777442932, "loss_ce": 0.00018888208433054388, "loss_iou": 0.328125, "loss_num": 0.033203125, "loss_xval": 0.82421875, "num_input_tokens_seen": 215627964, "step": 3850 }, { "epoch": 8.57683741648107, "grad_norm": 25.07132339477539, "learning_rate": 1e-06, "loss": 0.6764, "num_input_tokens_seen": 215683576, "step": 3851 }, { "epoch": 8.57683741648107, "loss": 0.5619097352027893, "loss_ce": 0.00014213821850717068, "loss_iou": 0.25, "loss_num": 0.01202392578125, "loss_xval": 0.5625, "num_input_tokens_seen": 215683576, "step": 3851 }, { "epoch": 8.579064587973274, "grad_norm": 15.985095024108887, "learning_rate": 1e-06, "loss": 0.5649, "num_input_tokens_seen": 215740424, "step": 3852 }, { "epoch": 8.579064587973274, "loss": 0.5519498586654663, "loss_ce": 0.000802424328867346, "loss_iou": 0.2275390625, "loss_num": 0.0189208984375, "loss_xval": 0.55078125, "num_input_tokens_seen": 215740424, "step": 3852 }, { "epoch": 8.58129175946548, "grad_norm": 17.084421157836914, "learning_rate": 1e-06, "loss": 0.4632, "num_input_tokens_seen": 215797432, "step": 3853 }, { "epoch": 8.58129175946548, "loss": 0.45524704456329346, "loss_ce": 0.00016892084386199713, "loss_iou": 0.2060546875, "loss_num": 0.0087890625, "loss_xval": 0.455078125, "num_input_tokens_seen": 215797432, "step": 3853 }, { "epoch": 8.583518930957684, "grad_norm": 25.7215633392334, "learning_rate": 1e-06, "loss": 0.827, "num_input_tokens_seen": 215855860, "step": 3854 }, { "epoch": 8.583518930957684, "loss": 0.8243607878684998, "loss_ce": 0.0001420074113411829, "loss_iou": 0.322265625, "loss_num": 0.035888671875, "loss_xval": 0.82421875, "num_input_tokens_seen": 215855860, "step": 3854 }, { "epoch": 8.585746102449889, "grad_norm": 20.765113830566406, "learning_rate": 1e-06, "loss": 0.5694, "num_input_tokens_seen": 215913020, "step": 3855 }, { "epoch": 8.585746102449889, "loss": 0.4681831896305084, "loss_ce": 0.00016561683150939643, "loss_iou": 0.201171875, "loss_num": 0.01318359375, "loss_xval": 0.46875, "num_input_tokens_seen": 215913020, "step": 3855 }, { "epoch": 8.587973273942094, "grad_norm": 22.44451141357422, "learning_rate": 1e-06, "loss": 0.5467, "num_input_tokens_seen": 215968572, "step": 3856 }, { "epoch": 8.587973273942094, "loss": 0.5845038890838623, "loss_ce": 0.00015330014866776764, "loss_iou": 0.26171875, "loss_num": 0.01251220703125, "loss_xval": 0.5859375, "num_input_tokens_seen": 215968572, "step": 3856 }, { "epoch": 8.590200445434299, "grad_norm": 20.8133487701416, "learning_rate": 1e-06, "loss": 0.5643, "num_input_tokens_seen": 216022120, "step": 3857 }, { "epoch": 8.590200445434299, "loss": 0.5692482590675354, "loss_ce": 0.00015648285625502467, "loss_iou": 0.248046875, "loss_num": 0.0145263671875, "loss_xval": 0.5703125, "num_input_tokens_seen": 216022120, "step": 3857 }, { "epoch": 8.592427616926503, "grad_norm": 15.802310943603516, "learning_rate": 1e-06, "loss": 0.7726, "num_input_tokens_seen": 216077540, "step": 3858 }, { "epoch": 8.592427616926503, "loss": 0.732232391834259, "loss_ce": 0.00017669174121692777, "loss_iou": 0.30859375, "loss_num": 0.0224609375, "loss_xval": 0.73046875, "num_input_tokens_seen": 216077540, "step": 3858 }, { "epoch": 8.594654788418708, "grad_norm": 21.7590389251709, "learning_rate": 1e-06, "loss": 0.4892, "num_input_tokens_seen": 216132424, "step": 3859 }, { "epoch": 8.594654788418708, "loss": 0.3683167099952698, "loss_ce": 0.0001526352425571531, "loss_iou": 0.1318359375, "loss_num": 0.0206298828125, "loss_xval": 0.3671875, "num_input_tokens_seen": 216132424, "step": 3859 }, { "epoch": 8.596881959910913, "grad_norm": 29.03900718688965, "learning_rate": 1e-06, "loss": 0.7531, "num_input_tokens_seen": 216185120, "step": 3860 }, { "epoch": 8.596881959910913, "loss": 0.7123351097106934, "loss_ce": 0.0001769287046045065, "loss_iou": 0.3203125, "loss_num": 0.01458740234375, "loss_xval": 0.7109375, "num_input_tokens_seen": 216185120, "step": 3860 }, { "epoch": 8.599109131403118, "grad_norm": 14.911413192749023, "learning_rate": 1e-06, "loss": 0.4934, "num_input_tokens_seen": 216239524, "step": 3861 }, { "epoch": 8.599109131403118, "loss": 0.5576772689819336, "loss_ce": 0.00018213198927696794, "loss_iou": 0.25390625, "loss_num": 0.00982666015625, "loss_xval": 0.55859375, "num_input_tokens_seen": 216239524, "step": 3861 }, { "epoch": 8.601336302895323, "grad_norm": 23.764856338500977, "learning_rate": 1e-06, "loss": 0.535, "num_input_tokens_seen": 216295068, "step": 3862 }, { "epoch": 8.601336302895323, "loss": 0.6164814233779907, "loss_ce": 0.0001483721862314269, "loss_iou": 0.27734375, "loss_num": 0.01263427734375, "loss_xval": 0.6171875, "num_input_tokens_seen": 216295068, "step": 3862 }, { "epoch": 8.603563474387528, "grad_norm": 43.42782211303711, "learning_rate": 1e-06, "loss": 0.8414, "num_input_tokens_seen": 216349744, "step": 3863 }, { "epoch": 8.603563474387528, "loss": 0.4933130145072937, "loss_ce": 0.00014895344793330878, "loss_iou": 0.224609375, "loss_num": 0.00872802734375, "loss_xval": 0.4921875, "num_input_tokens_seen": 216349744, "step": 3863 }, { "epoch": 8.605790645879733, "grad_norm": 20.517236709594727, "learning_rate": 1e-06, "loss": 0.6078, "num_input_tokens_seen": 216407720, "step": 3864 }, { "epoch": 8.605790645879733, "loss": 0.5921906232833862, "loss_ce": 0.00014958585961721838, "loss_iou": 0.26171875, "loss_num": 0.01373291015625, "loss_xval": 0.59375, "num_input_tokens_seen": 216407720, "step": 3864 }, { "epoch": 8.608017817371937, "grad_norm": 11.897843360900879, "learning_rate": 1e-06, "loss": 0.3836, "num_input_tokens_seen": 216466340, "step": 3865 }, { "epoch": 8.608017817371937, "loss": 0.3420777916908264, "loss_ce": 0.00015885230095591396, "loss_iou": 0.1552734375, "loss_num": 0.006317138671875, "loss_xval": 0.341796875, "num_input_tokens_seen": 216466340, "step": 3865 }, { "epoch": 8.610244988864142, "grad_norm": 20.564950942993164, "learning_rate": 1e-06, "loss": 0.6276, "num_input_tokens_seen": 216521392, "step": 3866 }, { "epoch": 8.610244988864142, "loss": 0.4767248034477234, "loss_ce": 0.0001622969430172816, "loss_iou": 0.2080078125, "loss_num": 0.01226806640625, "loss_xval": 0.4765625, "num_input_tokens_seen": 216521392, "step": 3866 }, { "epoch": 8.612472160356347, "grad_norm": 22.76060676574707, "learning_rate": 1e-06, "loss": 0.9585, "num_input_tokens_seen": 216572244, "step": 3867 }, { "epoch": 8.612472160356347, "loss": 1.402675747871399, "loss_ce": 0.00033200866892002523, "loss_iou": 0.5546875, "loss_num": 0.05810546875, "loss_xval": 1.40625, "num_input_tokens_seen": 216572244, "step": 3867 }, { "epoch": 8.614699331848552, "grad_norm": 17.371736526489258, "learning_rate": 1e-06, "loss": 0.5416, "num_input_tokens_seen": 216630272, "step": 3868 }, { "epoch": 8.614699331848552, "loss": 0.49856650829315186, "loss_ce": 0.000153428060002625, "loss_iou": 0.2216796875, "loss_num": 0.01092529296875, "loss_xval": 0.498046875, "num_input_tokens_seen": 216630272, "step": 3868 }, { "epoch": 8.616926503340757, "grad_norm": 18.579317092895508, "learning_rate": 1e-06, "loss": 0.5887, "num_input_tokens_seen": 216686540, "step": 3869 }, { "epoch": 8.616926503340757, "loss": 0.5541001558303833, "loss_ce": 0.00014507281593978405, "loss_iou": 0.2138671875, "loss_num": 0.0252685546875, "loss_xval": 0.5546875, "num_input_tokens_seen": 216686540, "step": 3869 }, { "epoch": 8.619153674832962, "grad_norm": 17.833322525024414, "learning_rate": 1e-06, "loss": 0.5787, "num_input_tokens_seen": 216742544, "step": 3870 }, { "epoch": 8.619153674832962, "loss": 0.5232726335525513, "loss_ce": 0.00020131460041739047, "loss_iou": 0.23046875, "loss_num": 0.01263427734375, "loss_xval": 0.5234375, "num_input_tokens_seen": 216742544, "step": 3870 }, { "epoch": 8.621380846325167, "grad_norm": 16.937902450561523, "learning_rate": 1e-06, "loss": 0.6145, "num_input_tokens_seen": 216798684, "step": 3871 }, { "epoch": 8.621380846325167, "loss": 0.4764711260795593, "loss_ce": 0.000152770007844083, "loss_iou": 0.203125, "loss_num": 0.01397705078125, "loss_xval": 0.4765625, "num_input_tokens_seen": 216798684, "step": 3871 }, { "epoch": 8.623608017817372, "grad_norm": 15.312004089355469, "learning_rate": 1e-06, "loss": 0.5381, "num_input_tokens_seen": 216852448, "step": 3872 }, { "epoch": 8.623608017817372, "loss": 0.47357112169265747, "loss_ce": 0.00018245888350065798, "loss_iou": 0.2119140625, "loss_num": 0.00982666015625, "loss_xval": 0.47265625, "num_input_tokens_seen": 216852448, "step": 3872 }, { "epoch": 8.625835189309576, "grad_norm": 19.796167373657227, "learning_rate": 1e-06, "loss": 0.5406, "num_input_tokens_seen": 216906968, "step": 3873 }, { "epoch": 8.625835189309576, "loss": 0.42670130729675293, "loss_ce": 0.00018764834385365248, "loss_iou": 0.19921875, "loss_num": 0.0057373046875, "loss_xval": 0.42578125, "num_input_tokens_seen": 216906968, "step": 3873 }, { "epoch": 8.628062360801781, "grad_norm": 17.857717514038086, "learning_rate": 1e-06, "loss": 0.8762, "num_input_tokens_seen": 216959064, "step": 3874 }, { "epoch": 8.628062360801781, "loss": 0.8671058416366577, "loss_ce": 0.00016246383893303573, "loss_iou": 0.40234375, "loss_num": 0.01300048828125, "loss_xval": 0.8671875, "num_input_tokens_seen": 216959064, "step": 3874 }, { "epoch": 8.630289532293986, "grad_norm": 26.22123908996582, "learning_rate": 1e-06, "loss": 0.571, "num_input_tokens_seen": 217016020, "step": 3875 }, { "epoch": 8.630289532293986, "loss": 0.6762186288833618, "loss_ce": 0.0001932468730956316, "loss_iou": 0.310546875, "loss_num": 0.0108642578125, "loss_xval": 0.67578125, "num_input_tokens_seen": 217016020, "step": 3875 }, { "epoch": 8.632516703786191, "grad_norm": 16.814098358154297, "learning_rate": 1e-06, "loss": 0.5477, "num_input_tokens_seen": 217069532, "step": 3876 }, { "epoch": 8.632516703786191, "loss": 0.3917490839958191, "loss_ce": 0.00014753625146113336, "loss_iou": 0.1708984375, "loss_num": 0.00994873046875, "loss_xval": 0.390625, "num_input_tokens_seen": 217069532, "step": 3876 }, { "epoch": 8.634743875278396, "grad_norm": 14.337661743164062, "learning_rate": 1e-06, "loss": 0.4624, "num_input_tokens_seen": 217128076, "step": 3877 }, { "epoch": 8.634743875278396, "loss": 0.49962735176086426, "loss_ce": 0.00023766841331962496, "loss_iou": 0.2236328125, "loss_num": 0.0107421875, "loss_xval": 0.5, "num_input_tokens_seen": 217128076, "step": 3877 }, { "epoch": 8.6369710467706, "grad_norm": 19.950851440429688, "learning_rate": 1e-06, "loss": 0.4853, "num_input_tokens_seen": 217187092, "step": 3878 }, { "epoch": 8.6369710467706, "loss": 0.4780520796775818, "loss_ce": 0.0001620454277144745, "loss_iou": 0.197265625, "loss_num": 0.0167236328125, "loss_xval": 0.478515625, "num_input_tokens_seen": 217187092, "step": 3878 }, { "epoch": 8.639198218262806, "grad_norm": 16.780813217163086, "learning_rate": 1e-06, "loss": 0.6026, "num_input_tokens_seen": 217244100, "step": 3879 }, { "epoch": 8.639198218262806, "loss": 0.7703104019165039, "loss_ce": 0.00029087584698572755, "loss_iou": 0.314453125, "loss_num": 0.02783203125, "loss_xval": 0.76953125, "num_input_tokens_seen": 217244100, "step": 3879 }, { "epoch": 8.64142538975501, "grad_norm": 20.776565551757812, "learning_rate": 1e-06, "loss": 0.5547, "num_input_tokens_seen": 217300800, "step": 3880 }, { "epoch": 8.64142538975501, "loss": 0.5758423805236816, "loss_ce": 0.0001587883016327396, "loss_iou": 0.244140625, "loss_num": 0.017578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 217300800, "step": 3880 }, { "epoch": 8.643652561247215, "grad_norm": 16.412261962890625, "learning_rate": 1e-06, "loss": 0.5354, "num_input_tokens_seen": 217357076, "step": 3881 }, { "epoch": 8.643652561247215, "loss": 0.48521143198013306, "loss_ce": 0.00047024147352203727, "loss_iou": 0.2080078125, "loss_num": 0.013671875, "loss_xval": 0.484375, "num_input_tokens_seen": 217357076, "step": 3881 }, { "epoch": 8.64587973273942, "grad_norm": 40.7680778503418, "learning_rate": 1e-06, "loss": 0.7236, "num_input_tokens_seen": 217413124, "step": 3882 }, { "epoch": 8.64587973273942, "loss": 0.7792381644248962, "loss_ce": 0.00018543524492997676, "loss_iou": 0.341796875, "loss_num": 0.019287109375, "loss_xval": 0.77734375, "num_input_tokens_seen": 217413124, "step": 3882 }, { "epoch": 8.648106904231625, "grad_norm": 29.648727416992188, "learning_rate": 1e-06, "loss": 0.6271, "num_input_tokens_seen": 217468736, "step": 3883 }, { "epoch": 8.648106904231625, "loss": 0.6764873266220093, "loss_ce": 0.00027885870076715946, "loss_iou": 0.27734375, "loss_num": 0.024169921875, "loss_xval": 0.67578125, "num_input_tokens_seen": 217468736, "step": 3883 }, { "epoch": 8.65033407572383, "grad_norm": 16.55933380126953, "learning_rate": 1e-06, "loss": 0.6713, "num_input_tokens_seen": 217524312, "step": 3884 }, { "epoch": 8.65033407572383, "loss": 0.46768736839294434, "loss_ce": 0.00015806582814548165, "loss_iou": 0.201171875, "loss_num": 0.01300048828125, "loss_xval": 0.466796875, "num_input_tokens_seen": 217524312, "step": 3884 }, { "epoch": 8.652561247216035, "grad_norm": 24.027820587158203, "learning_rate": 1e-06, "loss": 0.6617, "num_input_tokens_seen": 217582960, "step": 3885 }, { "epoch": 8.652561247216035, "loss": 0.5760955810546875, "loss_ce": 0.00016780085570644587, "loss_iou": 0.251953125, "loss_num": 0.01446533203125, "loss_xval": 0.57421875, "num_input_tokens_seen": 217582960, "step": 3885 }, { "epoch": 8.654788418708241, "grad_norm": 12.898533821105957, "learning_rate": 1e-06, "loss": 0.4816, "num_input_tokens_seen": 217640700, "step": 3886 }, { "epoch": 8.654788418708241, "loss": 0.39870375394821167, "loss_ce": 0.00014417324564419687, "loss_iou": 0.169921875, "loss_num": 0.01171875, "loss_xval": 0.3984375, "num_input_tokens_seen": 217640700, "step": 3886 }, { "epoch": 8.657015590200446, "grad_norm": 24.518152236938477, "learning_rate": 1e-06, "loss": 0.7044, "num_input_tokens_seen": 217698148, "step": 3887 }, { "epoch": 8.657015590200446, "loss": 0.8448777198791504, "loss_ce": 0.0006394129595719278, "loss_iou": 0.357421875, "loss_num": 0.0257568359375, "loss_xval": 0.84375, "num_input_tokens_seen": 217698148, "step": 3887 }, { "epoch": 8.659242761692651, "grad_norm": 20.064247131347656, "learning_rate": 1e-06, "loss": 0.5771, "num_input_tokens_seen": 217755788, "step": 3888 }, { "epoch": 8.659242761692651, "loss": 0.602916955947876, "loss_ce": 0.00013376196147873998, "loss_iou": 0.26171875, "loss_num": 0.01611328125, "loss_xval": 0.6015625, "num_input_tokens_seen": 217755788, "step": 3888 }, { "epoch": 8.661469933184856, "grad_norm": 25.421236038208008, "learning_rate": 1e-06, "loss": 0.4772, "num_input_tokens_seen": 217813576, "step": 3889 }, { "epoch": 8.661469933184856, "loss": 0.5371390581130981, "loss_ce": 0.00015175854787230492, "loss_iou": 0.2470703125, "loss_num": 0.0086669921875, "loss_xval": 0.53515625, "num_input_tokens_seen": 217813576, "step": 3889 }, { "epoch": 8.66369710467706, "grad_norm": 23.681997299194336, "learning_rate": 1e-06, "loss": 0.6563, "num_input_tokens_seen": 217868180, "step": 3890 }, { "epoch": 8.66369710467706, "loss": 0.8451223373413086, "loss_ce": 0.00015169865218922496, "loss_iou": 0.36328125, "loss_num": 0.0238037109375, "loss_xval": 0.84375, "num_input_tokens_seen": 217868180, "step": 3890 }, { "epoch": 8.665924276169266, "grad_norm": 19.213150024414062, "learning_rate": 1e-06, "loss": 0.7518, "num_input_tokens_seen": 217926136, "step": 3891 }, { "epoch": 8.665924276169266, "loss": 0.5428782105445862, "loss_ce": 0.00015362550038844347, "loss_iou": 0.23046875, "loss_num": 0.0166015625, "loss_xval": 0.54296875, "num_input_tokens_seen": 217926136, "step": 3891 }, { "epoch": 8.66815144766147, "grad_norm": 22.411832809448242, "learning_rate": 1e-06, "loss": 0.6165, "num_input_tokens_seen": 217983252, "step": 3892 }, { "epoch": 8.66815144766147, "loss": 0.7387982606887817, "loss_ce": 0.0001508084242232144, "loss_iou": 0.3203125, "loss_num": 0.01953125, "loss_xval": 0.73828125, "num_input_tokens_seen": 217983252, "step": 3892 }, { "epoch": 8.670378619153675, "grad_norm": 22.501161575317383, "learning_rate": 1e-06, "loss": 0.5534, "num_input_tokens_seen": 218039972, "step": 3893 }, { "epoch": 8.670378619153675, "loss": 0.3819955587387085, "loss_ce": 0.0002206652716267854, "loss_iou": 0.1591796875, "loss_num": 0.01275634765625, "loss_xval": 0.380859375, "num_input_tokens_seen": 218039972, "step": 3893 }, { "epoch": 8.67260579064588, "grad_norm": 27.5329532623291, "learning_rate": 1e-06, "loss": 0.6429, "num_input_tokens_seen": 218097220, "step": 3894 }, { "epoch": 8.67260579064588, "loss": 0.3702659010887146, "loss_ce": 0.0001486857217969373, "loss_iou": 0.1640625, "loss_num": 0.00860595703125, "loss_xval": 0.37109375, "num_input_tokens_seen": 218097220, "step": 3894 }, { "epoch": 8.674832962138085, "grad_norm": 13.485652923583984, "learning_rate": 1e-06, "loss": 0.432, "num_input_tokens_seen": 218154216, "step": 3895 }, { "epoch": 8.674832962138085, "loss": 0.40601831674575806, "loss_ce": 0.00013454348663799465, "loss_iou": 0.1640625, "loss_num": 0.0157470703125, "loss_xval": 0.40625, "num_input_tokens_seen": 218154216, "step": 3895 }, { "epoch": 8.67706013363029, "grad_norm": 26.522533416748047, "learning_rate": 1e-06, "loss": 0.5957, "num_input_tokens_seen": 218206464, "step": 3896 }, { "epoch": 8.67706013363029, "loss": 0.751849889755249, "loss_ce": 0.0001408641110174358, "loss_iou": 0.32421875, "loss_num": 0.0206298828125, "loss_xval": 0.75, "num_input_tokens_seen": 218206464, "step": 3896 }, { "epoch": 8.679287305122495, "grad_norm": 24.25346565246582, "learning_rate": 1e-06, "loss": 0.8235, "num_input_tokens_seen": 218263664, "step": 3897 }, { "epoch": 8.679287305122495, "loss": 0.8781342506408691, "loss_ce": 0.0002045307046500966, "loss_iou": 0.333984375, "loss_num": 0.041748046875, "loss_xval": 0.87890625, "num_input_tokens_seen": 218263664, "step": 3897 }, { "epoch": 8.6815144766147, "grad_norm": 17.56336212158203, "learning_rate": 1e-06, "loss": 0.7218, "num_input_tokens_seen": 218319228, "step": 3898 }, { "epoch": 8.6815144766147, "loss": 0.8000770807266235, "loss_ce": 0.0001502884115325287, "loss_iou": 0.361328125, "loss_num": 0.01507568359375, "loss_xval": 0.80078125, "num_input_tokens_seen": 218319228, "step": 3898 }, { "epoch": 8.683741648106905, "grad_norm": 21.46102523803711, "learning_rate": 1e-06, "loss": 0.5721, "num_input_tokens_seen": 218374208, "step": 3899 }, { "epoch": 8.683741648106905, "loss": 0.6461607813835144, "loss_ce": 0.00016468125977553427, "loss_iou": 0.287109375, "loss_num": 0.0146484375, "loss_xval": 0.64453125, "num_input_tokens_seen": 218374208, "step": 3899 }, { "epoch": 8.68596881959911, "grad_norm": 15.952383995056152, "learning_rate": 1e-06, "loss": 0.6203, "num_input_tokens_seen": 218432844, "step": 3900 }, { "epoch": 8.68596881959911, "loss": 0.5890233516693115, "loss_ce": 0.0001561893877806142, "loss_iou": 0.265625, "loss_num": 0.01165771484375, "loss_xval": 0.58984375, "num_input_tokens_seen": 218432844, "step": 3900 }, { "epoch": 8.688195991091314, "grad_norm": 20.294042587280273, "learning_rate": 1e-06, "loss": 0.5964, "num_input_tokens_seen": 218489864, "step": 3901 }, { "epoch": 8.688195991091314, "loss": 0.6337062120437622, "loss_ce": 0.00040536391315981746, "loss_iou": 0.28125, "loss_num": 0.013916015625, "loss_xval": 0.6328125, "num_input_tokens_seen": 218489864, "step": 3901 }, { "epoch": 8.690423162583519, "grad_norm": 14.53677749633789, "learning_rate": 1e-06, "loss": 0.6919, "num_input_tokens_seen": 218545668, "step": 3902 }, { "epoch": 8.690423162583519, "loss": 0.4630255401134491, "loss_ce": 0.00013489379489328712, "loss_iou": 0.2080078125, "loss_num": 0.0093994140625, "loss_xval": 0.462890625, "num_input_tokens_seen": 218545668, "step": 3902 }, { "epoch": 8.692650334075724, "grad_norm": 21.7016544342041, "learning_rate": 1e-06, "loss": 0.614, "num_input_tokens_seen": 218602996, "step": 3903 }, { "epoch": 8.692650334075724, "loss": 0.573477566242218, "loss_ce": 0.000479543989058584, "loss_iou": 0.2392578125, "loss_num": 0.0189208984375, "loss_xval": 0.57421875, "num_input_tokens_seen": 218602996, "step": 3903 }, { "epoch": 8.694877505567929, "grad_norm": 17.698266983032227, "learning_rate": 1e-06, "loss": 0.4877, "num_input_tokens_seen": 218659176, "step": 3904 }, { "epoch": 8.694877505567929, "loss": 0.47621312737464905, "loss_ce": 0.00013888333342038095, "loss_iou": 0.203125, "loss_num": 0.0137939453125, "loss_xval": 0.4765625, "num_input_tokens_seen": 218659176, "step": 3904 }, { "epoch": 8.697104677060134, "grad_norm": 21.86427116394043, "learning_rate": 1e-06, "loss": 0.5896, "num_input_tokens_seen": 218713336, "step": 3905 }, { "epoch": 8.697104677060134, "loss": 0.5707519054412842, "loss_ce": 0.0001952478487510234, "loss_iou": 0.259765625, "loss_num": 0.01007080078125, "loss_xval": 0.5703125, "num_input_tokens_seen": 218713336, "step": 3905 }, { "epoch": 8.699331848552339, "grad_norm": 21.313528060913086, "learning_rate": 1e-06, "loss": 0.5622, "num_input_tokens_seen": 218771932, "step": 3906 }, { "epoch": 8.699331848552339, "loss": 0.5350602865219116, "loss_ce": 0.00014820430078543723, "loss_iou": 0.23828125, "loss_num": 0.01177978515625, "loss_xval": 0.53515625, "num_input_tokens_seen": 218771932, "step": 3906 }, { "epoch": 8.701559020044543, "grad_norm": 25.534914016723633, "learning_rate": 1e-06, "loss": 0.4969, "num_input_tokens_seen": 218829644, "step": 3907 }, { "epoch": 8.701559020044543, "loss": 0.4723511338233948, "loss_ce": 0.00018313938926439732, "loss_iou": 0.21875, "loss_num": 0.00689697265625, "loss_xval": 0.47265625, "num_input_tokens_seen": 218829644, "step": 3907 }, { "epoch": 8.703786191536748, "grad_norm": 31.614242553710938, "learning_rate": 1e-06, "loss": 0.7103, "num_input_tokens_seen": 218884672, "step": 3908 }, { "epoch": 8.703786191536748, "loss": 0.6449835300445557, "loss_ce": 0.0002081613929476589, "loss_iou": 0.27734375, "loss_num": 0.0177001953125, "loss_xval": 0.64453125, "num_input_tokens_seen": 218884672, "step": 3908 }, { "epoch": 8.706013363028953, "grad_norm": 15.506339073181152, "learning_rate": 1e-06, "loss": 0.5959, "num_input_tokens_seen": 218938064, "step": 3909 }, { "epoch": 8.706013363028953, "loss": 0.729173481464386, "loss_ce": 0.00016962323570623994, "loss_iou": 0.283203125, "loss_num": 0.031982421875, "loss_xval": 0.73046875, "num_input_tokens_seen": 218938064, "step": 3909 }, { "epoch": 8.708240534521158, "grad_norm": 23.147897720336914, "learning_rate": 1e-06, "loss": 0.4788, "num_input_tokens_seen": 218995120, "step": 3910 }, { "epoch": 8.708240534521158, "loss": 0.3938792645931244, "loss_ce": 0.0002025177382165566, "loss_iou": 0.1787109375, "loss_num": 0.007080078125, "loss_xval": 0.39453125, "num_input_tokens_seen": 218995120, "step": 3910 }, { "epoch": 8.710467706013363, "grad_norm": 23.020849227905273, "learning_rate": 1e-06, "loss": 0.6817, "num_input_tokens_seen": 219050528, "step": 3911 }, { "epoch": 8.710467706013363, "loss": 0.6399117708206177, "loss_ce": 0.00026331457775086164, "loss_iou": 0.255859375, "loss_num": 0.02587890625, "loss_xval": 0.640625, "num_input_tokens_seen": 219050528, "step": 3911 }, { "epoch": 8.712694877505568, "grad_norm": 16.904033660888672, "learning_rate": 1e-06, "loss": 0.5594, "num_input_tokens_seen": 219105692, "step": 3912 }, { "epoch": 8.712694877505568, "loss": 0.7058773636817932, "loss_ce": 0.00018892009393312037, "loss_iou": 0.3046875, "loss_num": 0.01904296875, "loss_xval": 0.70703125, "num_input_tokens_seen": 219105692, "step": 3912 }, { "epoch": 8.714922048997773, "grad_norm": 15.886432647705078, "learning_rate": 1e-06, "loss": 0.5245, "num_input_tokens_seen": 219160744, "step": 3913 }, { "epoch": 8.714922048997773, "loss": 0.6087524890899658, "loss_ce": 0.00023196196707431227, "loss_iou": 0.2578125, "loss_num": 0.01904296875, "loss_xval": 0.609375, "num_input_tokens_seen": 219160744, "step": 3913 }, { "epoch": 8.717149220489977, "grad_norm": 14.26981258392334, "learning_rate": 1e-06, "loss": 0.5525, "num_input_tokens_seen": 219217212, "step": 3914 }, { "epoch": 8.717149220489977, "loss": 0.7599831819534302, "loss_ce": 0.00021756268688477576, "loss_iou": 0.279296875, "loss_num": 0.040283203125, "loss_xval": 0.7578125, "num_input_tokens_seen": 219217212, "step": 3914 }, { "epoch": 8.719376391982182, "grad_norm": 15.31400203704834, "learning_rate": 1e-06, "loss": 0.6814, "num_input_tokens_seen": 219275172, "step": 3915 }, { "epoch": 8.719376391982182, "loss": 0.5874415040016174, "loss_ce": 0.00016121604130603373, "loss_iou": 0.236328125, "loss_num": 0.022705078125, "loss_xval": 0.5859375, "num_input_tokens_seen": 219275172, "step": 3915 }, { "epoch": 8.721603563474387, "grad_norm": 13.000571250915527, "learning_rate": 1e-06, "loss": 0.43, "num_input_tokens_seen": 219329864, "step": 3916 }, { "epoch": 8.721603563474387, "loss": 0.35176119208335876, "loss_ce": 0.00019871644326485693, "loss_iou": 0.158203125, "loss_num": 0.007232666015625, "loss_xval": 0.3515625, "num_input_tokens_seen": 219329864, "step": 3916 }, { "epoch": 8.723830734966592, "grad_norm": 24.805957794189453, "learning_rate": 1e-06, "loss": 0.6412, "num_input_tokens_seen": 219387476, "step": 3917 }, { "epoch": 8.723830734966592, "loss": 0.6872168183326721, "loss_ce": 0.00020506742293946445, "loss_iou": 0.26171875, "loss_num": 0.032958984375, "loss_xval": 0.6875, "num_input_tokens_seen": 219387476, "step": 3917 }, { "epoch": 8.726057906458797, "grad_norm": 17.83942413330078, "learning_rate": 1e-06, "loss": 0.6982, "num_input_tokens_seen": 219445780, "step": 3918 }, { "epoch": 8.726057906458797, "loss": 0.7072159051895142, "loss_ce": 0.00018466576875653118, "loss_iou": 0.29296875, "loss_num": 0.02392578125, "loss_xval": 0.70703125, "num_input_tokens_seen": 219445780, "step": 3918 }, { "epoch": 8.728285077951002, "grad_norm": 18.335899353027344, "learning_rate": 1e-06, "loss": 0.554, "num_input_tokens_seen": 219501992, "step": 3919 }, { "epoch": 8.728285077951002, "loss": 0.6365430355072021, "loss_ce": 0.0001904442033264786, "loss_iou": 0.279296875, "loss_num": 0.015380859375, "loss_xval": 0.63671875, "num_input_tokens_seen": 219501992, "step": 3919 }, { "epoch": 8.730512249443207, "grad_norm": 54.56409454345703, "learning_rate": 1e-06, "loss": 0.7071, "num_input_tokens_seen": 219557492, "step": 3920 }, { "epoch": 8.730512249443207, "loss": 0.7284532785415649, "loss_ce": 0.00018174288561567664, "loss_iou": 0.29296875, "loss_num": 0.0289306640625, "loss_xval": 0.7265625, "num_input_tokens_seen": 219557492, "step": 3920 }, { "epoch": 8.732739420935411, "grad_norm": 19.81359100341797, "learning_rate": 1e-06, "loss": 0.4295, "num_input_tokens_seen": 219612520, "step": 3921 }, { "epoch": 8.732739420935411, "loss": 0.35280388593673706, "loss_ce": 0.00014273943088483065, "loss_iou": 0.15234375, "loss_num": 0.009521484375, "loss_xval": 0.353515625, "num_input_tokens_seen": 219612520, "step": 3921 }, { "epoch": 8.734966592427616, "grad_norm": 20.431453704833984, "learning_rate": 1e-06, "loss": 0.6836, "num_input_tokens_seen": 219666936, "step": 3922 }, { "epoch": 8.734966592427616, "loss": 0.7043083906173706, "loss_ce": 0.00020681662135757506, "loss_iou": 0.29296875, "loss_num": 0.02392578125, "loss_xval": 0.703125, "num_input_tokens_seen": 219666936, "step": 3922 }, { "epoch": 8.737193763919821, "grad_norm": 74.53073120117188, "learning_rate": 1e-06, "loss": 0.6355, "num_input_tokens_seen": 219721856, "step": 3923 }, { "epoch": 8.737193763919821, "loss": 0.5503079891204834, "loss_ce": 0.00013712375948671252, "loss_iou": 0.2216796875, "loss_num": 0.0216064453125, "loss_xval": 0.55078125, "num_input_tokens_seen": 219721856, "step": 3923 }, { "epoch": 8.739420935412026, "grad_norm": 18.951021194458008, "learning_rate": 1e-06, "loss": 0.4582, "num_input_tokens_seen": 219777316, "step": 3924 }, { "epoch": 8.739420935412026, "loss": 0.4863456189632416, "loss_ce": 0.00026161997811868787, "loss_iou": 0.22265625, "loss_num": 0.00836181640625, "loss_xval": 0.486328125, "num_input_tokens_seen": 219777316, "step": 3924 }, { "epoch": 8.74164810690423, "grad_norm": 17.503719329833984, "learning_rate": 1e-06, "loss": 0.6413, "num_input_tokens_seen": 219834932, "step": 3925 }, { "epoch": 8.74164810690423, "loss": 0.6336848735809326, "loss_ce": 0.0001399153989041224, "loss_iou": 0.244140625, "loss_num": 0.0291748046875, "loss_xval": 0.6328125, "num_input_tokens_seen": 219834932, "step": 3925 }, { "epoch": 8.743875278396436, "grad_norm": 15.789238929748535, "learning_rate": 1e-06, "loss": 0.5391, "num_input_tokens_seen": 219891608, "step": 3926 }, { "epoch": 8.743875278396436, "loss": 0.559868574142456, "loss_ce": 0.00029824947705492377, "loss_iou": 0.236328125, "loss_num": 0.0172119140625, "loss_xval": 0.55859375, "num_input_tokens_seen": 219891608, "step": 3926 }, { "epoch": 8.74610244988864, "grad_norm": 22.618701934814453, "learning_rate": 1e-06, "loss": 0.6993, "num_input_tokens_seen": 219948044, "step": 3927 }, { "epoch": 8.74610244988864, "loss": 0.7226395606994629, "loss_ce": 0.00022747760522179306, "loss_iou": 0.3046875, "loss_num": 0.022216796875, "loss_xval": 0.72265625, "num_input_tokens_seen": 219948044, "step": 3927 }, { "epoch": 8.748329621380847, "grad_norm": 16.85779571533203, "learning_rate": 1e-06, "loss": 0.5143, "num_input_tokens_seen": 220002632, "step": 3928 }, { "epoch": 8.748329621380847, "loss": 0.6813238859176636, "loss_ce": 0.00017151121573988348, "loss_iou": 0.291015625, "loss_num": 0.0201416015625, "loss_xval": 0.6796875, "num_input_tokens_seen": 220002632, "step": 3928 }, { "epoch": 8.750556792873052, "grad_norm": 22.382169723510742, "learning_rate": 1e-06, "loss": 0.6557, "num_input_tokens_seen": 220058412, "step": 3929 }, { "epoch": 8.750556792873052, "loss": 0.7504002451896667, "loss_ce": 0.00015609902038704604, "loss_iou": 0.3203125, "loss_num": 0.02197265625, "loss_xval": 0.75, "num_input_tokens_seen": 220058412, "step": 3929 }, { "epoch": 8.752783964365257, "grad_norm": 15.235098838806152, "learning_rate": 1e-06, "loss": 0.6484, "num_input_tokens_seen": 220114060, "step": 3930 }, { "epoch": 8.752783964365257, "loss": 0.7623621225357056, "loss_ce": 0.00015506052295677364, "loss_iou": 0.337890625, "loss_num": 0.017333984375, "loss_xval": 0.76171875, "num_input_tokens_seen": 220114060, "step": 3930 }, { "epoch": 8.755011135857462, "grad_norm": 19.23377227783203, "learning_rate": 1e-06, "loss": 0.7264, "num_input_tokens_seen": 220169768, "step": 3931 }, { "epoch": 8.755011135857462, "loss": 0.7468116283416748, "loss_ce": 0.0007179292151704431, "loss_iou": 0.3203125, "loss_num": 0.0206298828125, "loss_xval": 0.74609375, "num_input_tokens_seen": 220169768, "step": 3931 }, { "epoch": 8.757238307349667, "grad_norm": 24.401901245117188, "learning_rate": 1e-06, "loss": 0.603, "num_input_tokens_seen": 220227336, "step": 3932 }, { "epoch": 8.757238307349667, "loss": 0.422521710395813, "loss_ce": 0.00015843103756196797, "loss_iou": 0.1865234375, "loss_num": 0.010009765625, "loss_xval": 0.421875, "num_input_tokens_seen": 220227336, "step": 3932 }, { "epoch": 8.759465478841872, "grad_norm": 24.561513900756836, "learning_rate": 1e-06, "loss": 0.5883, "num_input_tokens_seen": 220281280, "step": 3933 }, { "epoch": 8.759465478841872, "loss": 0.6444734930992126, "loss_ce": 0.00018640572670847178, "loss_iou": 0.291015625, "loss_num": 0.0126953125, "loss_xval": 0.64453125, "num_input_tokens_seen": 220281280, "step": 3933 }, { "epoch": 8.761692650334076, "grad_norm": 22.442155838012695, "learning_rate": 1e-06, "loss": 0.5317, "num_input_tokens_seen": 220337648, "step": 3934 }, { "epoch": 8.761692650334076, "loss": 0.662487268447876, "loss_ce": 0.00013376369315665215, "loss_iou": 0.287109375, "loss_num": 0.017333984375, "loss_xval": 0.6640625, "num_input_tokens_seen": 220337648, "step": 3934 }, { "epoch": 8.763919821826281, "grad_norm": 17.563997268676758, "learning_rate": 1e-06, "loss": 0.578, "num_input_tokens_seen": 220395500, "step": 3935 }, { "epoch": 8.763919821826281, "loss": 0.7258895635604858, "loss_ce": 0.0007919379277154803, "loss_iou": 0.30078125, "loss_num": 0.0244140625, "loss_xval": 0.7265625, "num_input_tokens_seen": 220395500, "step": 3935 }, { "epoch": 8.766146993318486, "grad_norm": 21.255687713623047, "learning_rate": 1e-06, "loss": 0.6404, "num_input_tokens_seen": 220450940, "step": 3936 }, { "epoch": 8.766146993318486, "loss": 0.6532711386680603, "loss_ce": 0.00019493838772177696, "loss_iou": 0.26171875, "loss_num": 0.0257568359375, "loss_xval": 0.65234375, "num_input_tokens_seen": 220450940, "step": 3936 }, { "epoch": 8.768374164810691, "grad_norm": 14.73167610168457, "learning_rate": 1e-06, "loss": 0.605, "num_input_tokens_seen": 220505768, "step": 3937 }, { "epoch": 8.768374164810691, "loss": 0.4992978870868683, "loss_ce": 0.00015238078776746988, "loss_iou": 0.2197265625, "loss_num": 0.01214599609375, "loss_xval": 0.5, "num_input_tokens_seen": 220505768, "step": 3937 }, { "epoch": 8.770601336302896, "grad_norm": 20.76053237915039, "learning_rate": 1e-06, "loss": 0.5365, "num_input_tokens_seen": 220562928, "step": 3938 }, { "epoch": 8.770601336302896, "loss": 0.5216740369796753, "loss_ce": 0.00018964534683618695, "loss_iou": 0.2373046875, "loss_num": 0.0093994140625, "loss_xval": 0.5234375, "num_input_tokens_seen": 220562928, "step": 3938 }, { "epoch": 8.7728285077951, "grad_norm": 16.428634643554688, "learning_rate": 1e-06, "loss": 0.6054, "num_input_tokens_seen": 220619840, "step": 3939 }, { "epoch": 8.7728285077951, "loss": 0.6325225830078125, "loss_ce": 0.00019834056729450822, "loss_iou": 0.2578125, "loss_num": 0.023681640625, "loss_xval": 0.6328125, "num_input_tokens_seen": 220619840, "step": 3939 }, { "epoch": 8.775055679287306, "grad_norm": 14.64647388458252, "learning_rate": 1e-06, "loss": 0.5719, "num_input_tokens_seen": 220674660, "step": 3940 }, { "epoch": 8.775055679287306, "loss": 0.6691886782646179, "loss_ce": 0.00024336397473234683, "loss_iou": 0.275390625, "loss_num": 0.023681640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 220674660, "step": 3940 }, { "epoch": 8.77728285077951, "grad_norm": 12.952407836914062, "learning_rate": 1e-06, "loss": 0.4744, "num_input_tokens_seen": 220733140, "step": 3941 }, { "epoch": 8.77728285077951, "loss": 0.6572137475013733, "loss_ce": 0.00023132127535063773, "loss_iou": 0.28125, "loss_num": 0.019287109375, "loss_xval": 0.65625, "num_input_tokens_seen": 220733140, "step": 3941 }, { "epoch": 8.779510022271715, "grad_norm": 18.830053329467773, "learning_rate": 1e-06, "loss": 0.6262, "num_input_tokens_seen": 220789288, "step": 3942 }, { "epoch": 8.779510022271715, "loss": 0.6996396780014038, "loss_ce": 0.0001767998473951593, "loss_iou": 0.279296875, "loss_num": 0.0283203125, "loss_xval": 0.69921875, "num_input_tokens_seen": 220789288, "step": 3942 }, { "epoch": 8.78173719376392, "grad_norm": 24.526212692260742, "learning_rate": 1e-06, "loss": 0.7996, "num_input_tokens_seen": 220844188, "step": 3943 }, { "epoch": 8.78173719376392, "loss": 0.6259015798568726, "loss_ce": 0.0001691749203018844, "loss_iou": 0.283203125, "loss_num": 0.01190185546875, "loss_xval": 0.625, "num_input_tokens_seen": 220844188, "step": 3943 }, { "epoch": 8.783964365256125, "grad_norm": 17.90644073486328, "learning_rate": 1e-06, "loss": 0.5722, "num_input_tokens_seen": 220901768, "step": 3944 }, { "epoch": 8.783964365256125, "loss": 0.5738731622695923, "loss_ce": 0.00014269730309024453, "loss_iou": 0.26171875, "loss_num": 0.0098876953125, "loss_xval": 0.57421875, "num_input_tokens_seen": 220901768, "step": 3944 }, { "epoch": 8.78619153674833, "grad_norm": 40.18233871459961, "learning_rate": 1e-06, "loss": 0.7066, "num_input_tokens_seen": 220957076, "step": 3945 }, { "epoch": 8.78619153674833, "loss": 0.8366864919662476, "loss_ce": 0.0001997254294110462, "loss_iou": 0.359375, "loss_num": 0.0233154296875, "loss_xval": 0.8359375, "num_input_tokens_seen": 220957076, "step": 3945 }, { "epoch": 8.788418708240535, "grad_norm": 35.03602600097656, "learning_rate": 1e-06, "loss": 0.683, "num_input_tokens_seen": 221011744, "step": 3946 }, { "epoch": 8.788418708240535, "loss": 0.5396614074707031, "loss_ce": 0.00023272512771654874, "loss_iou": 0.224609375, "loss_num": 0.0179443359375, "loss_xval": 0.5390625, "num_input_tokens_seen": 221011744, "step": 3946 }, { "epoch": 8.79064587973274, "grad_norm": 19.676462173461914, "learning_rate": 1e-06, "loss": 0.6669, "num_input_tokens_seen": 221068528, "step": 3947 }, { "epoch": 8.79064587973274, "loss": 0.7179461717605591, "loss_ce": 0.00017279275925830007, "loss_iou": 0.291015625, "loss_num": 0.02685546875, "loss_xval": 0.71875, "num_input_tokens_seen": 221068528, "step": 3947 }, { "epoch": 8.792873051224944, "grad_norm": 26.63570785522461, "learning_rate": 1e-06, "loss": 0.5588, "num_input_tokens_seen": 221125112, "step": 3948 }, { "epoch": 8.792873051224944, "loss": 0.6211436986923218, "loss_ce": 0.0005382976960390806, "loss_iou": 0.259765625, "loss_num": 0.0201416015625, "loss_xval": 0.62109375, "num_input_tokens_seen": 221125112, "step": 3948 }, { "epoch": 8.79510022271715, "grad_norm": 17.29355239868164, "learning_rate": 1e-06, "loss": 0.5911, "num_input_tokens_seen": 221181924, "step": 3949 }, { "epoch": 8.79510022271715, "loss": 0.6676521897315979, "loss_ce": 0.0001717579725664109, "loss_iou": 0.298828125, "loss_num": 0.01373291015625, "loss_xval": 0.66796875, "num_input_tokens_seen": 221181924, "step": 3949 }, { "epoch": 8.797327394209354, "grad_norm": 20.77406120300293, "learning_rate": 1e-06, "loss": 0.6492, "num_input_tokens_seen": 221239948, "step": 3950 }, { "epoch": 8.797327394209354, "loss": 0.7762324213981628, "loss_ce": 0.00035347635275684297, "loss_iou": 0.310546875, "loss_num": 0.0311279296875, "loss_xval": 0.77734375, "num_input_tokens_seen": 221239948, "step": 3950 }, { "epoch": 8.799554565701559, "grad_norm": 25.013608932495117, "learning_rate": 1e-06, "loss": 0.7604, "num_input_tokens_seen": 221296624, "step": 3951 }, { "epoch": 8.799554565701559, "loss": 0.6159054040908813, "loss_ce": 0.0001827623782446608, "loss_iou": 0.263671875, "loss_num": 0.017578125, "loss_xval": 0.6171875, "num_input_tokens_seen": 221296624, "step": 3951 }, { "epoch": 8.801781737193764, "grad_norm": 22.501018524169922, "learning_rate": 1e-06, "loss": 0.6662, "num_input_tokens_seen": 221352544, "step": 3952 }, { "epoch": 8.801781737193764, "loss": 0.8238818645477295, "loss_ce": 0.0001514081668574363, "loss_iou": 0.341796875, "loss_num": 0.0277099609375, "loss_xval": 0.82421875, "num_input_tokens_seen": 221352544, "step": 3952 }, { "epoch": 8.804008908685969, "grad_norm": 16.182146072387695, "learning_rate": 1e-06, "loss": 0.7461, "num_input_tokens_seen": 221409016, "step": 3953 }, { "epoch": 8.804008908685969, "loss": 1.03929603099823, "loss_ce": 0.00023350719129666686, "loss_iou": 0.41015625, "loss_num": 0.04345703125, "loss_xval": 1.0390625, "num_input_tokens_seen": 221409016, "step": 3953 }, { "epoch": 8.806236080178174, "grad_norm": 20.251689910888672, "learning_rate": 1e-06, "loss": 0.625, "num_input_tokens_seen": 221464600, "step": 3954 }, { "epoch": 8.806236080178174, "loss": 0.6593621969223022, "loss_ce": 0.00018251534493174404, "loss_iou": 0.296875, "loss_num": 0.01324462890625, "loss_xval": 0.66015625, "num_input_tokens_seen": 221464600, "step": 3954 }, { "epoch": 8.808463251670378, "grad_norm": 22.65058708190918, "learning_rate": 1e-06, "loss": 0.4196, "num_input_tokens_seen": 221519316, "step": 3955 }, { "epoch": 8.808463251670378, "loss": 0.5017726421356201, "loss_ce": 0.0001857746101450175, "loss_iou": 0.205078125, "loss_num": 0.01806640625, "loss_xval": 0.5, "num_input_tokens_seen": 221519316, "step": 3955 }, { "epoch": 8.810690423162583, "grad_norm": 16.896793365478516, "learning_rate": 1e-06, "loss": 0.6443, "num_input_tokens_seen": 221574936, "step": 3956 }, { "epoch": 8.810690423162583, "loss": 0.4596463441848755, "loss_ce": 0.00017369160195812583, "loss_iou": 0.2001953125, "loss_num": 0.0120849609375, "loss_xval": 0.458984375, "num_input_tokens_seen": 221574936, "step": 3956 }, { "epoch": 8.812917594654788, "grad_norm": 19.71438217163086, "learning_rate": 1e-06, "loss": 0.589, "num_input_tokens_seen": 221629572, "step": 3957 }, { "epoch": 8.812917594654788, "loss": 0.5652260780334473, "loss_ce": 0.00016256351955235004, "loss_iou": 0.2421875, "loss_num": 0.01611328125, "loss_xval": 0.56640625, "num_input_tokens_seen": 221629572, "step": 3957 }, { "epoch": 8.815144766146993, "grad_norm": 14.786736488342285, "learning_rate": 1e-06, "loss": 0.4319, "num_input_tokens_seen": 221687656, "step": 3958 }, { "epoch": 8.815144766146993, "loss": 0.45927944779396057, "loss_ce": 0.00017301308980677277, "loss_iou": 0.20703125, "loss_num": 0.00921630859375, "loss_xval": 0.458984375, "num_input_tokens_seen": 221687656, "step": 3958 }, { "epoch": 8.817371937639198, "grad_norm": 17.631956100463867, "learning_rate": 1e-06, "loss": 0.6813, "num_input_tokens_seen": 221745792, "step": 3959 }, { "epoch": 8.817371937639198, "loss": 0.5624639987945557, "loss_ce": 0.0002081410784740001, "loss_iou": 0.2431640625, "loss_num": 0.01544189453125, "loss_xval": 0.5625, "num_input_tokens_seen": 221745792, "step": 3959 }, { "epoch": 8.819599109131403, "grad_norm": 22.315784454345703, "learning_rate": 1e-06, "loss": 0.7343, "num_input_tokens_seen": 221800120, "step": 3960 }, { "epoch": 8.819599109131403, "loss": 0.8773585557937622, "loss_ce": 0.00016129494179040194, "loss_iou": 0.37890625, "loss_num": 0.0238037109375, "loss_xval": 0.87890625, "num_input_tokens_seen": 221800120, "step": 3960 }, { "epoch": 8.821826280623608, "grad_norm": 14.803606986999512, "learning_rate": 1e-06, "loss": 0.6183, "num_input_tokens_seen": 221856540, "step": 3961 }, { "epoch": 8.821826280623608, "loss": 0.5055595636367798, "loss_ce": 0.00018847928731702268, "loss_iou": 0.2236328125, "loss_num": 0.0115966796875, "loss_xval": 0.50390625, "num_input_tokens_seen": 221856540, "step": 3961 }, { "epoch": 8.824053452115812, "grad_norm": 22.032180786132812, "learning_rate": 1e-06, "loss": 0.5228, "num_input_tokens_seen": 221908880, "step": 3962 }, { "epoch": 8.824053452115812, "loss": 0.551054835319519, "loss_ce": 0.00027354987105354667, "loss_iou": 0.24609375, "loss_num": 0.0115966796875, "loss_xval": 0.55078125, "num_input_tokens_seen": 221908880, "step": 3962 }, { "epoch": 8.826280623608017, "grad_norm": 64.0085678100586, "learning_rate": 1e-06, "loss": 0.8304, "num_input_tokens_seen": 221964260, "step": 3963 }, { "epoch": 8.826280623608017, "loss": 0.7420934438705444, "loss_ce": 0.00015010150673333555, "loss_iou": 0.314453125, "loss_num": 0.0225830078125, "loss_xval": 0.7421875, "num_input_tokens_seen": 221964260, "step": 3963 }, { "epoch": 8.828507795100222, "grad_norm": 16.832103729248047, "learning_rate": 1e-06, "loss": 0.8408, "num_input_tokens_seen": 222022236, "step": 3964 }, { "epoch": 8.828507795100222, "loss": 1.2050738334655762, "loss_ce": 0.00023982246057130396, "loss_iou": 0.466796875, "loss_num": 0.053955078125, "loss_xval": 1.203125, "num_input_tokens_seen": 222022236, "step": 3964 }, { "epoch": 8.830734966592427, "grad_norm": 20.819835662841797, "learning_rate": 1e-06, "loss": 0.7145, "num_input_tokens_seen": 222077780, "step": 3965 }, { "epoch": 8.830734966592427, "loss": 0.9205887317657471, "loss_ce": 0.00017864895926322788, "loss_iou": 0.37109375, "loss_num": 0.035400390625, "loss_xval": 0.921875, "num_input_tokens_seen": 222077780, "step": 3965 }, { "epoch": 8.832962138084632, "grad_norm": 18.16783332824707, "learning_rate": 1e-06, "loss": 0.4841, "num_input_tokens_seen": 222136840, "step": 3966 }, { "epoch": 8.832962138084632, "loss": 0.5124315023422241, "loss_ce": 0.00019395150593481958, "loss_iou": 0.2080078125, "loss_num": 0.01953125, "loss_xval": 0.51171875, "num_input_tokens_seen": 222136840, "step": 3966 }, { "epoch": 8.835189309576837, "grad_norm": 25.93811798095703, "learning_rate": 1e-06, "loss": 0.4811, "num_input_tokens_seen": 222191848, "step": 3967 }, { "epoch": 8.835189309576837, "loss": 0.5905265212059021, "loss_ce": 0.00019449519459158182, "loss_iou": 0.248046875, "loss_num": 0.018798828125, "loss_xval": 0.58984375, "num_input_tokens_seen": 222191848, "step": 3967 }, { "epoch": 8.837416481069042, "grad_norm": 26.04351043701172, "learning_rate": 1e-06, "loss": 0.4815, "num_input_tokens_seen": 222246416, "step": 3968 }, { "epoch": 8.837416481069042, "loss": 0.5717356204986572, "loss_ce": 0.00020242987375240773, "loss_iou": 0.251953125, "loss_num": 0.013671875, "loss_xval": 0.5703125, "num_input_tokens_seen": 222246416, "step": 3968 }, { "epoch": 8.839643652561247, "grad_norm": 32.86327362060547, "learning_rate": 1e-06, "loss": 0.6211, "num_input_tokens_seen": 222300948, "step": 3969 }, { "epoch": 8.839643652561247, "loss": 0.7585898637771606, "loss_ce": 0.000167002814123407, "loss_iou": 0.3203125, "loss_num": 0.023681640625, "loss_xval": 0.7578125, "num_input_tokens_seen": 222300948, "step": 3969 }, { "epoch": 8.841870824053451, "grad_norm": 29.39252471923828, "learning_rate": 1e-06, "loss": 0.7207, "num_input_tokens_seen": 222356800, "step": 3970 }, { "epoch": 8.841870824053451, "loss": 0.7579694390296936, "loss_ce": 0.00015694380272179842, "loss_iou": 0.310546875, "loss_num": 0.0277099609375, "loss_xval": 0.7578125, "num_input_tokens_seen": 222356800, "step": 3970 }, { "epoch": 8.844097995545656, "grad_norm": 47.64936828613281, "learning_rate": 1e-06, "loss": 0.7561, "num_input_tokens_seen": 222414448, "step": 3971 }, { "epoch": 8.844097995545656, "loss": 0.6730362772941589, "loss_ce": 0.00018474232638254762, "loss_iou": 0.2890625, "loss_num": 0.01904296875, "loss_xval": 0.671875, "num_input_tokens_seen": 222414448, "step": 3971 }, { "epoch": 8.846325167037861, "grad_norm": 19.512876510620117, "learning_rate": 1e-06, "loss": 0.7139, "num_input_tokens_seen": 222468120, "step": 3972 }, { "epoch": 8.846325167037861, "loss": 0.6079658269882202, "loss_ce": 0.0001777580037014559, "loss_iou": 0.263671875, "loss_num": 0.015869140625, "loss_xval": 0.609375, "num_input_tokens_seen": 222468120, "step": 3972 }, { "epoch": 8.848552338530066, "grad_norm": 17.036836624145508, "learning_rate": 1e-06, "loss": 0.6141, "num_input_tokens_seen": 222523584, "step": 3973 }, { "epoch": 8.848552338530066, "loss": 0.5712729692459106, "loss_ce": 0.00016703552682884037, "loss_iou": 0.23828125, "loss_num": 0.0189208984375, "loss_xval": 0.5703125, "num_input_tokens_seen": 222523584, "step": 3973 }, { "epoch": 8.85077951002227, "grad_norm": 17.4189395904541, "learning_rate": 1e-06, "loss": 0.4694, "num_input_tokens_seen": 222581532, "step": 3974 }, { "epoch": 8.85077951002227, "loss": 0.42311620712280273, "loss_ce": 0.00014257154543884099, "loss_iou": 0.1904296875, "loss_num": 0.00860595703125, "loss_xval": 0.423828125, "num_input_tokens_seen": 222581532, "step": 3974 }, { "epoch": 8.853006681514476, "grad_norm": 40.079444885253906, "learning_rate": 1e-06, "loss": 0.5725, "num_input_tokens_seen": 222638036, "step": 3975 }, { "epoch": 8.853006681514476, "loss": 0.61489337682724, "loss_ce": 0.00014727030065841973, "loss_iou": 0.2734375, "loss_num": 0.0135498046875, "loss_xval": 0.61328125, "num_input_tokens_seen": 222638036, "step": 3975 }, { "epoch": 8.855233853006682, "grad_norm": 29.330018997192383, "learning_rate": 1e-06, "loss": 0.468, "num_input_tokens_seen": 222693340, "step": 3976 }, { "epoch": 8.855233853006682, "loss": 0.531175971031189, "loss_ce": 0.00017008304712362587, "loss_iou": 0.23828125, "loss_num": 0.01104736328125, "loss_xval": 0.53125, "num_input_tokens_seen": 222693340, "step": 3976 }, { "epoch": 8.857461024498887, "grad_norm": 11.950536727905273, "learning_rate": 1e-06, "loss": 0.4903, "num_input_tokens_seen": 222750828, "step": 3977 }, { "epoch": 8.857461024498887, "loss": 0.36172372102737427, "loss_ce": 0.00015144153439905494, "loss_iou": 0.1708984375, "loss_num": 0.00408935546875, "loss_xval": 0.361328125, "num_input_tokens_seen": 222750828, "step": 3977 }, { "epoch": 8.859688195991092, "grad_norm": 17.319873809814453, "learning_rate": 1e-06, "loss": 0.569, "num_input_tokens_seen": 222807080, "step": 3978 }, { "epoch": 8.859688195991092, "loss": 0.5596229434013367, "loss_ce": 0.00017472056788392365, "loss_iou": 0.212890625, "loss_num": 0.026611328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 222807080, "step": 3978 }, { "epoch": 8.861915367483297, "grad_norm": 25.0523624420166, "learning_rate": 1e-06, "loss": 0.6712, "num_input_tokens_seen": 222864000, "step": 3979 }, { "epoch": 8.861915367483297, "loss": 0.670589804649353, "loss_ce": 0.00017965443839784712, "loss_iou": 0.3046875, "loss_num": 0.01214599609375, "loss_xval": 0.671875, "num_input_tokens_seen": 222864000, "step": 3979 }, { "epoch": 8.864142538975502, "grad_norm": 18.790050506591797, "learning_rate": 1e-06, "loss": 0.4437, "num_input_tokens_seen": 222918468, "step": 3980 }, { "epoch": 8.864142538975502, "loss": 0.5265804529190063, "loss_ce": 0.00021330692106857896, "loss_iou": 0.2333984375, "loss_num": 0.01177978515625, "loss_xval": 0.52734375, "num_input_tokens_seen": 222918468, "step": 3980 }, { "epoch": 8.866369710467707, "grad_norm": 39.062259674072266, "learning_rate": 1e-06, "loss": 0.5382, "num_input_tokens_seen": 222971988, "step": 3981 }, { "epoch": 8.866369710467707, "loss": 0.4984826147556305, "loss_ce": 0.00019161765521857888, "loss_iou": 0.2177734375, "loss_num": 0.0126953125, "loss_xval": 0.498046875, "num_input_tokens_seen": 222971988, "step": 3981 }, { "epoch": 8.868596881959911, "grad_norm": 17.326452255249023, "learning_rate": 1e-06, "loss": 0.6576, "num_input_tokens_seen": 223027000, "step": 3982 }, { "epoch": 8.868596881959911, "loss": 0.6256543397903442, "loss_ce": 0.00041018627234734595, "loss_iou": 0.275390625, "loss_num": 0.01513671875, "loss_xval": 0.625, "num_input_tokens_seen": 223027000, "step": 3982 }, { "epoch": 8.870824053452116, "grad_norm": 16.484027862548828, "learning_rate": 1e-06, "loss": 0.7406, "num_input_tokens_seen": 223081684, "step": 3983 }, { "epoch": 8.870824053452116, "loss": 0.6387948989868164, "loss_ce": 0.0001230332418344915, "loss_iou": 0.275390625, "loss_num": 0.017578125, "loss_xval": 0.640625, "num_input_tokens_seen": 223081684, "step": 3983 }, { "epoch": 8.873051224944321, "grad_norm": 26.997020721435547, "learning_rate": 1e-06, "loss": 0.6737, "num_input_tokens_seen": 223135544, "step": 3984 }, { "epoch": 8.873051224944321, "loss": 0.745303213596344, "loss_ce": 0.00018600517068989575, "loss_iou": 0.310546875, "loss_num": 0.024658203125, "loss_xval": 0.74609375, "num_input_tokens_seen": 223135544, "step": 3984 }, { "epoch": 8.875278396436526, "grad_norm": 16.463109970092773, "learning_rate": 1e-06, "loss": 0.6391, "num_input_tokens_seen": 223192264, "step": 3985 }, { "epoch": 8.875278396436526, "loss": 0.6673752069473267, "loss_ce": 0.00038302806206047535, "loss_iou": 0.283203125, "loss_num": 0.0205078125, "loss_xval": 0.66796875, "num_input_tokens_seen": 223192264, "step": 3985 }, { "epoch": 8.877505567928731, "grad_norm": 26.982946395874023, "learning_rate": 1e-06, "loss": 0.5375, "num_input_tokens_seen": 223247844, "step": 3986 }, { "epoch": 8.877505567928731, "loss": 0.5868437886238098, "loss_ce": 0.00041803409112617373, "loss_iou": 0.255859375, "loss_num": 0.0147705078125, "loss_xval": 0.5859375, "num_input_tokens_seen": 223247844, "step": 3986 }, { "epoch": 8.879732739420936, "grad_norm": 17.86455726623535, "learning_rate": 1e-06, "loss": 0.5355, "num_input_tokens_seen": 223304620, "step": 3987 }, { "epoch": 8.879732739420936, "loss": 0.46156710386276245, "loss_ce": 0.0001413182180840522, "loss_iou": 0.203125, "loss_num": 0.01080322265625, "loss_xval": 0.4609375, "num_input_tokens_seen": 223304620, "step": 3987 }, { "epoch": 8.88195991091314, "grad_norm": 19.752405166625977, "learning_rate": 1e-06, "loss": 0.3951, "num_input_tokens_seen": 223361520, "step": 3988 }, { "epoch": 8.88195991091314, "loss": 0.3074072003364563, "loss_ce": 0.0001561975514050573, "loss_iou": 0.140625, "loss_num": 0.0052490234375, "loss_xval": 0.306640625, "num_input_tokens_seen": 223361520, "step": 3988 }, { "epoch": 8.884187082405345, "grad_norm": 12.55148983001709, "learning_rate": 1e-06, "loss": 0.7263, "num_input_tokens_seen": 223419580, "step": 3989 }, { "epoch": 8.884187082405345, "loss": 1.0404853820800781, "loss_ce": 0.00020214373944327235, "loss_iou": 0.3984375, "loss_num": 0.048583984375, "loss_xval": 1.0390625, "num_input_tokens_seen": 223419580, "step": 3989 }, { "epoch": 8.88641425389755, "grad_norm": 19.007347106933594, "learning_rate": 1e-06, "loss": 0.5547, "num_input_tokens_seen": 223476520, "step": 3990 }, { "epoch": 8.88641425389755, "loss": 0.45726656913757324, "loss_ce": 0.00017428494174964726, "loss_iou": 0.1953125, "loss_num": 0.01312255859375, "loss_xval": 0.45703125, "num_input_tokens_seen": 223476520, "step": 3990 }, { "epoch": 8.888641425389755, "grad_norm": 21.701292037963867, "learning_rate": 1e-06, "loss": 0.6151, "num_input_tokens_seen": 223532396, "step": 3991 }, { "epoch": 8.888641425389755, "loss": 0.6684781908988953, "loss_ce": 0.00014323292998597026, "loss_iou": 0.291015625, "loss_num": 0.0174560546875, "loss_xval": 0.66796875, "num_input_tokens_seen": 223532396, "step": 3991 }, { "epoch": 8.89086859688196, "grad_norm": 20.25054359436035, "learning_rate": 1e-06, "loss": 0.7418, "num_input_tokens_seen": 223588248, "step": 3992 }, { "epoch": 8.89086859688196, "loss": 0.6505246162414551, "loss_ce": 0.00013402706827037036, "loss_iou": 0.294921875, "loss_num": 0.011962890625, "loss_xval": 0.6484375, "num_input_tokens_seen": 223588248, "step": 3992 }, { "epoch": 8.893095768374165, "grad_norm": 39.547183990478516, "learning_rate": 1e-06, "loss": 0.6393, "num_input_tokens_seen": 223643372, "step": 3993 }, { "epoch": 8.893095768374165, "loss": 0.47359946370124817, "loss_ce": 0.0009432291844859719, "loss_iou": 0.2080078125, "loss_num": 0.0111083984375, "loss_xval": 0.47265625, "num_input_tokens_seen": 223643372, "step": 3993 }, { "epoch": 8.89532293986637, "grad_norm": 15.449313163757324, "learning_rate": 1e-06, "loss": 0.4557, "num_input_tokens_seen": 223701152, "step": 3994 }, { "epoch": 8.89532293986637, "loss": 0.4065554440021515, "loss_ce": 0.00018337275832891464, "loss_iou": 0.17578125, "loss_num": 0.01092529296875, "loss_xval": 0.40625, "num_input_tokens_seen": 223701152, "step": 3994 }, { "epoch": 8.897550111358575, "grad_norm": 20.84811782836914, "learning_rate": 1e-06, "loss": 0.5546, "num_input_tokens_seen": 223754800, "step": 3995 }, { "epoch": 8.897550111358575, "loss": 0.5366966724395752, "loss_ce": 0.00013655968359671533, "loss_iou": 0.2275390625, "loss_num": 0.0162353515625, "loss_xval": 0.53515625, "num_input_tokens_seen": 223754800, "step": 3995 }, { "epoch": 8.89977728285078, "grad_norm": 22.598325729370117, "learning_rate": 1e-06, "loss": 0.4089, "num_input_tokens_seen": 223811716, "step": 3996 }, { "epoch": 8.89977728285078, "loss": 0.5239717364311218, "loss_ce": 0.00016802808386273682, "loss_iou": 0.2158203125, "loss_num": 0.0185546875, "loss_xval": 0.5234375, "num_input_tokens_seen": 223811716, "step": 3996 }, { "epoch": 8.902004454342984, "grad_norm": 16.426435470581055, "learning_rate": 1e-06, "loss": 0.5465, "num_input_tokens_seen": 223867872, "step": 3997 }, { "epoch": 8.902004454342984, "loss": 0.5754693746566772, "loss_ce": 0.00015196282765828073, "loss_iou": 0.24609375, "loss_num": 0.0164794921875, "loss_xval": 0.57421875, "num_input_tokens_seen": 223867872, "step": 3997 }, { "epoch": 8.90423162583519, "grad_norm": 35.01568603515625, "learning_rate": 1e-06, "loss": 0.8874, "num_input_tokens_seen": 223922524, "step": 3998 }, { "epoch": 8.90423162583519, "loss": 0.7457464337348938, "loss_ce": 0.00014096120139583945, "loss_iou": 0.326171875, "loss_num": 0.0185546875, "loss_xval": 0.74609375, "num_input_tokens_seen": 223922524, "step": 3998 }, { "epoch": 8.906458797327394, "grad_norm": 52.03001403808594, "learning_rate": 1e-06, "loss": 0.7269, "num_input_tokens_seen": 223977692, "step": 3999 }, { "epoch": 8.906458797327394, "loss": 0.9718111753463745, "loss_ce": 0.00013149988080840558, "loss_iou": 0.392578125, "loss_num": 0.036865234375, "loss_xval": 0.97265625, "num_input_tokens_seen": 223977692, "step": 3999 }, { "epoch": 8.908685968819599, "grad_norm": 23.651729583740234, "learning_rate": 1e-06, "loss": 0.6773, "num_input_tokens_seen": 224031804, "step": 4000 }, { "epoch": 8.908685968819599, "eval_seeclick_web_CIoU": 0.584255576133728, "eval_seeclick_web_GIoU": 0.5831271409988403, "eval_seeclick_web_IoU": 0.6019724309444427, "eval_seeclick_web_MAE_all": 0.0160904498770833, "eval_seeclick_web_MAE_h": 0.00791524676606059, "eval_seeclick_web_MAE_w": 0.016444522887468338, "eval_seeclick_web_MAE_x_boxes": 0.009520682971924543, "eval_seeclick_web_MAE_y_boxes": 0.022019447991624475, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9091385006904602, "eval_seeclick_web_loss_ce": 0.00022534980962518603, "eval_seeclick_web_loss_iou": 0.41326904296875, "eval_seeclick_web_loss_num": 0.012880325317382812, "eval_seeclick_web_loss_xval": 0.8909912109375, "eval_seeclick_web_runtime": 23.4695, "eval_seeclick_web_samples_per_second": 2.13, "eval_seeclick_web_steps_per_second": 0.085, "num_input_tokens_seen": 224031804, "step": 4000 }, { "epoch": 8.908685968819599, "eval_icons_CIoU": 0.280209019780159, "eval_icons_GIoU": 0.3042156249284744, "eval_icons_IoU": 0.3636113852262497, "eval_icons_MAE_all": 0.06306547299027443, "eval_icons_MAE_h": 0.03832197841256857, "eval_icons_MAE_w": 0.06806911528110504, "eval_icons_MAE_x_boxes": 0.05768297426402569, "eval_icons_MAE_y_boxes": 0.03917842544615269, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.7296451330184937, "eval_icons_loss_ce": 0.00027250249695498496, "eval_icons_loss_iou": 0.6729736328125, "eval_icons_loss_num": 0.06075096130371094, "eval_icons_loss_xval": 1.648193359375, "eval_icons_runtime": 20.6404, "eval_icons_samples_per_second": 2.422, "eval_icons_steps_per_second": 0.097, "num_input_tokens_seen": 224031804, "step": 4000 }, { "epoch": 8.908685968819599, "eval_screenspot_CIoU": 0.344086229801178, "eval_screenspot_GIoU": 0.36166812976201373, "eval_screenspot_IoU": 0.42560062805811566, "eval_screenspot_MAE_all": 0.06235171233614286, "eval_screenspot_MAE_h": 0.038458424930771194, "eval_screenspot_MAE_w": 0.07053530837098758, "eval_screenspot_MAE_x_boxes": 0.07286067555348079, "eval_screenspot_MAE_y_boxes": 0.04639405757188797, "eval_screenspot_inside_bbox": 0.6862499912579855, "eval_screenspot_loss": 1.6449319124221802, "eval_screenspot_loss_ce": 0.0002812407910823822, "eval_screenspot_loss_iou": 0.6747233072916666, "eval_screenspot_loss_num": 0.07325236002604167, "eval_screenspot_loss_xval": 1.7154947916666667, "eval_screenspot_runtime": 37.0539, "eval_screenspot_samples_per_second": 2.402, "eval_screenspot_steps_per_second": 0.081, "num_input_tokens_seen": 224031804, "step": 4000 }, { "epoch": 8.908685968819599, "eval_compot_CIoU": 0.3489241451025009, "eval_compot_GIoU": 0.3593568354845047, "eval_compot_IoU": 0.4082530289888382, "eval_compot_MAE_all": 0.01762966625392437, "eval_compot_MAE_h": 0.009061001241207123, "eval_compot_MAE_w": 0.022062174044549465, "eval_compot_MAE_x_boxes": 0.028892694041132927, "eval_compot_MAE_y_boxes": 0.006829841528087854, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.407405138015747, "eval_compot_loss_ce": 0.00021665637177648023, "eval_compot_loss_iou": 0.6473388671875, "eval_compot_loss_num": 0.016569137573242188, "eval_compot_loss_xval": 1.377197265625, "eval_compot_runtime": 21.8205, "eval_compot_samples_per_second": 2.291, "eval_compot_steps_per_second": 0.092, "num_input_tokens_seen": 224031804, "step": 4000 }, { "epoch": 8.908685968819599, "eval_custom_ui_val_CIoU": 0.4698154992527432, "eval_custom_ui_val_GIoU": 0.483290907409456, "eval_custom_ui_val_IoU": 0.5295593506760068, "eval_custom_ui_val_MAE_all": 0.030179924021164577, "eval_custom_ui_val_MAE_h": 0.016545448245273695, "eval_custom_ui_val_MAE_w": 0.03918004294650422, "eval_custom_ui_val_MAE_x_boxes": 0.03695020234833161, "eval_custom_ui_val_MAE_y_boxes": 0.01497822223852078, "eval_custom_ui_val_inside_bbox": 0.7457561757829454, "eval_custom_ui_val_loss": 1.1855908632278442, "eval_custom_ui_val_loss_ce": 0.00024014294386789616, "eval_custom_ui_val_loss_iou": 0.5057237413194444, "eval_custom_ui_val_loss_num": 0.027684529622395832, "eval_custom_ui_val_loss_xval": 1.1498480902777777, "eval_custom_ui_val_runtime": 70.3473, "eval_custom_ui_val_samples_per_second": 3.767, "eval_custom_ui_val_steps_per_second": 0.128, "num_input_tokens_seen": 224031804, "step": 4000 }, { "epoch": 8.908685968819599, "loss": 0.9238047003746033, "loss_ce": 0.00022073142463341355, "loss_iou": 0.400390625, "loss_num": 0.0245361328125, "loss_xval": 0.921875, "num_input_tokens_seen": 224031804, "step": 4000 }, { "epoch": 8.910913140311804, "grad_norm": 17.125661849975586, "learning_rate": 1e-06, "loss": 0.6028, "num_input_tokens_seen": 224088664, "step": 4001 }, { "epoch": 8.910913140311804, "loss": 0.6688809394836426, "loss_ce": 0.00017975796072278172, "loss_iou": 0.279296875, "loss_num": 0.0224609375, "loss_xval": 0.66796875, "num_input_tokens_seen": 224088664, "step": 4001 }, { "epoch": 8.913140311804009, "grad_norm": 11.797565460205078, "learning_rate": 1e-06, "loss": 0.4654, "num_input_tokens_seen": 224146092, "step": 4002 }, { "epoch": 8.913140311804009, "loss": 0.43066659569740295, "loss_ce": 0.00012459905701689422, "loss_iou": 0.181640625, "loss_num": 0.013427734375, "loss_xval": 0.4296875, "num_input_tokens_seen": 224146092, "step": 4002 }, { "epoch": 8.915367483296214, "grad_norm": 18.511592864990234, "learning_rate": 1e-06, "loss": 0.4735, "num_input_tokens_seen": 224201576, "step": 4003 }, { "epoch": 8.915367483296214, "loss": 0.44720643758773804, "loss_ce": 0.00018495078256819397, "loss_iou": 0.1953125, "loss_num": 0.0113525390625, "loss_xval": 0.447265625, "num_input_tokens_seen": 224201576, "step": 4003 }, { "epoch": 8.917594654788418, "grad_norm": 19.493896484375, "learning_rate": 1e-06, "loss": 0.658, "num_input_tokens_seen": 224258016, "step": 4004 }, { "epoch": 8.917594654788418, "loss": 0.9191592931747437, "loss_ce": 0.00021400017431005836, "loss_iou": 0.388671875, "loss_num": 0.0281982421875, "loss_xval": 0.91796875, "num_input_tokens_seen": 224258016, "step": 4004 }, { "epoch": 8.919821826280623, "grad_norm": 19.006305694580078, "learning_rate": 1e-06, "loss": 0.6289, "num_input_tokens_seen": 224311936, "step": 4005 }, { "epoch": 8.919821826280623, "loss": 0.7552786469459534, "loss_ce": 0.0001517195487394929, "loss_iou": 0.322265625, "loss_num": 0.021728515625, "loss_xval": 0.75390625, "num_input_tokens_seen": 224311936, "step": 4005 }, { "epoch": 8.922048997772828, "grad_norm": 58.83499526977539, "learning_rate": 1e-06, "loss": 0.6666, "num_input_tokens_seen": 224368388, "step": 4006 }, { "epoch": 8.922048997772828, "loss": 0.746427595615387, "loss_ce": 0.00021177891176193953, "loss_iou": 0.33984375, "loss_num": 0.01336669921875, "loss_xval": 0.74609375, "num_input_tokens_seen": 224368388, "step": 4006 }, { "epoch": 8.924276169265033, "grad_norm": 17.04109001159668, "learning_rate": 1e-06, "loss": 0.6045, "num_input_tokens_seen": 224423224, "step": 4007 }, { "epoch": 8.924276169265033, "loss": 0.6434751749038696, "loss_ce": 0.00016464036889374256, "loss_iou": 0.298828125, "loss_num": 0.00958251953125, "loss_xval": 0.64453125, "num_input_tokens_seen": 224423224, "step": 4007 }, { "epoch": 8.926503340757238, "grad_norm": 16.078716278076172, "learning_rate": 1e-06, "loss": 0.4969, "num_input_tokens_seen": 224479088, "step": 4008 }, { "epoch": 8.926503340757238, "loss": 0.47645998001098633, "loss_ce": 0.00014161772560328245, "loss_iou": 0.2080078125, "loss_num": 0.011962890625, "loss_xval": 0.4765625, "num_input_tokens_seen": 224479088, "step": 4008 }, { "epoch": 8.928730512249443, "grad_norm": 18.10431671142578, "learning_rate": 1e-06, "loss": 0.5381, "num_input_tokens_seen": 224536168, "step": 4009 }, { "epoch": 8.928730512249443, "loss": 0.5730590224266052, "loss_ce": 0.00018303040997125208, "loss_iou": 0.220703125, "loss_num": 0.0264892578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 224536168, "step": 4009 }, { "epoch": 8.930957683741648, "grad_norm": 20.611072540283203, "learning_rate": 1e-06, "loss": 0.5397, "num_input_tokens_seen": 224594284, "step": 4010 }, { "epoch": 8.930957683741648, "loss": 0.5585442781448364, "loss_ce": 0.00019466917729005218, "loss_iou": 0.2578125, "loss_num": 0.0087890625, "loss_xval": 0.55859375, "num_input_tokens_seen": 224594284, "step": 4010 }, { "epoch": 8.933184855233852, "grad_norm": 20.54092788696289, "learning_rate": 1e-06, "loss": 0.5272, "num_input_tokens_seen": 224651224, "step": 4011 }, { "epoch": 8.933184855233852, "loss": 0.577430248260498, "loss_ce": 0.00015977630391716957, "loss_iou": 0.259765625, "loss_num": 0.01165771484375, "loss_xval": 0.578125, "num_input_tokens_seen": 224651224, "step": 4011 }, { "epoch": 8.935412026726057, "grad_norm": 15.826013565063477, "learning_rate": 1e-06, "loss": 0.6706, "num_input_tokens_seen": 224707328, "step": 4012 }, { "epoch": 8.935412026726057, "loss": 0.4253334701061249, "loss_ce": 0.0001625774457352236, "loss_iou": 0.193359375, "loss_num": 0.007568359375, "loss_xval": 0.42578125, "num_input_tokens_seen": 224707328, "step": 4012 }, { "epoch": 8.937639198218262, "grad_norm": 25.152671813964844, "learning_rate": 1e-06, "loss": 0.5631, "num_input_tokens_seen": 224766028, "step": 4013 }, { "epoch": 8.937639198218262, "loss": 0.7184352874755859, "loss_ce": 0.0001735725672915578, "loss_iou": 0.31640625, "loss_num": 0.01708984375, "loss_xval": 0.71875, "num_input_tokens_seen": 224766028, "step": 4013 }, { "epoch": 8.939866369710467, "grad_norm": 30.11623191833496, "learning_rate": 1e-06, "loss": 0.605, "num_input_tokens_seen": 224822620, "step": 4014 }, { "epoch": 8.939866369710467, "loss": 0.7723772525787354, "loss_ce": 0.00016042383504100144, "loss_iou": 0.326171875, "loss_num": 0.0244140625, "loss_xval": 0.7734375, "num_input_tokens_seen": 224822620, "step": 4014 }, { "epoch": 8.942093541202672, "grad_norm": 45.63913345336914, "learning_rate": 1e-06, "loss": 0.5652, "num_input_tokens_seen": 224878404, "step": 4015 }, { "epoch": 8.942093541202672, "loss": 0.6024213433265686, "loss_ce": 0.0001264153397642076, "loss_iou": 0.2314453125, "loss_num": 0.0281982421875, "loss_xval": 0.6015625, "num_input_tokens_seen": 224878404, "step": 4015 }, { "epoch": 8.944320712694877, "grad_norm": 17.508010864257812, "learning_rate": 1e-06, "loss": 0.4491, "num_input_tokens_seen": 224936428, "step": 4016 }, { "epoch": 8.944320712694877, "loss": 0.5184711217880249, "loss_ce": 0.0001606106961844489, "loss_iou": 0.220703125, "loss_num": 0.01531982421875, "loss_xval": 0.51953125, "num_input_tokens_seen": 224936428, "step": 4016 }, { "epoch": 8.946547884187082, "grad_norm": 15.416549682617188, "learning_rate": 1e-06, "loss": 0.6399, "num_input_tokens_seen": 224994188, "step": 4017 }, { "epoch": 8.946547884187082, "loss": 0.7011144161224365, "loss_ce": 0.00018664849631022662, "loss_iou": 0.287109375, "loss_num": 0.0252685546875, "loss_xval": 0.69921875, "num_input_tokens_seen": 224994188, "step": 4017 }, { "epoch": 8.948775055679288, "grad_norm": 21.26276969909668, "learning_rate": 1e-06, "loss": 0.4372, "num_input_tokens_seen": 225051360, "step": 4018 }, { "epoch": 8.948775055679288, "loss": 0.3914967179298401, "loss_ce": 0.00013928582484368235, "loss_iou": 0.171875, "loss_num": 0.00958251953125, "loss_xval": 0.390625, "num_input_tokens_seen": 225051360, "step": 4018 }, { "epoch": 8.951002227171493, "grad_norm": 13.906608581542969, "learning_rate": 1e-06, "loss": 0.4571, "num_input_tokens_seen": 225108952, "step": 4019 }, { "epoch": 8.951002227171493, "loss": 0.4027478098869324, "loss_ce": 0.00015992176486179233, "loss_iou": 0.1767578125, "loss_num": 0.00970458984375, "loss_xval": 0.40234375, "num_input_tokens_seen": 225108952, "step": 4019 }, { "epoch": 8.953229398663698, "grad_norm": 13.286585807800293, "learning_rate": 1e-06, "loss": 0.6289, "num_input_tokens_seen": 225165484, "step": 4020 }, { "epoch": 8.953229398663698, "loss": 0.37464502453804016, "loss_ce": 0.00013330676301848143, "loss_iou": 0.158203125, "loss_num": 0.011474609375, "loss_xval": 0.375, "num_input_tokens_seen": 225165484, "step": 4020 }, { "epoch": 8.955456570155903, "grad_norm": 27.786767959594727, "learning_rate": 1e-06, "loss": 0.843, "num_input_tokens_seen": 225219788, "step": 4021 }, { "epoch": 8.955456570155903, "loss": 0.6932044625282288, "loss_ce": 0.00015029762289486825, "loss_iou": 0.30078125, "loss_num": 0.0179443359375, "loss_xval": 0.69140625, "num_input_tokens_seen": 225219788, "step": 4021 }, { "epoch": 8.957683741648108, "grad_norm": 15.158284187316895, "learning_rate": 1e-06, "loss": 0.5056, "num_input_tokens_seen": 225277592, "step": 4022 }, { "epoch": 8.957683741648108, "loss": 0.5200361013412476, "loss_ce": 0.00013863734784536064, "loss_iou": 0.212890625, "loss_num": 0.018798828125, "loss_xval": 0.51953125, "num_input_tokens_seen": 225277592, "step": 4022 }, { "epoch": 8.959910913140313, "grad_norm": 45.792728424072266, "learning_rate": 1e-06, "loss": 0.4745, "num_input_tokens_seen": 225334828, "step": 4023 }, { "epoch": 8.959910913140313, "loss": 0.40143540501594543, "loss_ce": 0.0001902944641187787, "loss_iou": 0.1826171875, "loss_num": 0.00714111328125, "loss_xval": 0.400390625, "num_input_tokens_seen": 225334828, "step": 4023 }, { "epoch": 8.962138084632517, "grad_norm": 19.045509338378906, "learning_rate": 1e-06, "loss": 0.5239, "num_input_tokens_seen": 225389700, "step": 4024 }, { "epoch": 8.962138084632517, "loss": 0.4376333951950073, "loss_ce": 0.00013340538134798408, "loss_iou": 0.1962890625, "loss_num": 0.0091552734375, "loss_xval": 0.4375, "num_input_tokens_seen": 225389700, "step": 4024 }, { "epoch": 8.964365256124722, "grad_norm": 15.347426414489746, "learning_rate": 1e-06, "loss": 0.6214, "num_input_tokens_seen": 225447624, "step": 4025 }, { "epoch": 8.964365256124722, "loss": 0.5260591506958008, "loss_ce": 0.000180262781213969, "loss_iou": 0.2216796875, "loss_num": 0.016357421875, "loss_xval": 0.52734375, "num_input_tokens_seen": 225447624, "step": 4025 }, { "epoch": 8.966592427616927, "grad_norm": 14.499673843383789, "learning_rate": 1e-06, "loss": 0.5167, "num_input_tokens_seen": 225506444, "step": 4026 }, { "epoch": 8.966592427616927, "loss": 0.3981502652168274, "loss_ce": 0.00014002776879351586, "loss_iou": 0.171875, "loss_num": 0.010986328125, "loss_xval": 0.3984375, "num_input_tokens_seen": 225506444, "step": 4026 }, { "epoch": 8.968819599109132, "grad_norm": 19.23043441772461, "learning_rate": 1e-06, "loss": 0.5612, "num_input_tokens_seen": 225560052, "step": 4027 }, { "epoch": 8.968819599109132, "loss": 0.32940202951431274, "loss_ce": 0.00017840655345935374, "loss_iou": 0.140625, "loss_num": 0.00958251953125, "loss_xval": 0.330078125, "num_input_tokens_seen": 225560052, "step": 4027 }, { "epoch": 8.971046770601337, "grad_norm": 34.80836486816406, "learning_rate": 1e-06, "loss": 0.5679, "num_input_tokens_seen": 225618236, "step": 4028 }, { "epoch": 8.971046770601337, "loss": 0.6726021766662598, "loss_ce": 0.00023891603632364422, "loss_iou": 0.30078125, "loss_num": 0.0142822265625, "loss_xval": 0.671875, "num_input_tokens_seen": 225618236, "step": 4028 }, { "epoch": 8.973273942093542, "grad_norm": 19.638723373413086, "learning_rate": 1e-06, "loss": 0.5453, "num_input_tokens_seen": 225674116, "step": 4029 }, { "epoch": 8.973273942093542, "loss": 0.520553708076477, "loss_ce": 0.0012665874091908336, "loss_iou": 0.1845703125, "loss_num": 0.030029296875, "loss_xval": 0.51953125, "num_input_tokens_seen": 225674116, "step": 4029 }, { "epoch": 8.975501113585747, "grad_norm": 18.89423942565918, "learning_rate": 1e-06, "loss": 0.6397, "num_input_tokens_seen": 225730220, "step": 4030 }, { "epoch": 8.975501113585747, "loss": 0.5465627908706665, "loss_ce": 0.00017607388144824654, "loss_iou": 0.244140625, "loss_num": 0.01165771484375, "loss_xval": 0.546875, "num_input_tokens_seen": 225730220, "step": 4030 }, { "epoch": 8.977728285077951, "grad_norm": 13.650494575500488, "learning_rate": 1e-06, "loss": 0.5068, "num_input_tokens_seen": 225786488, "step": 4031 }, { "epoch": 8.977728285077951, "loss": 0.5636086463928223, "loss_ce": 0.00013209109602030367, "loss_iou": 0.2275390625, "loss_num": 0.021728515625, "loss_xval": 0.5625, "num_input_tokens_seen": 225786488, "step": 4031 }, { "epoch": 8.979955456570156, "grad_norm": 16.062467575073242, "learning_rate": 1e-06, "loss": 0.4167, "num_input_tokens_seen": 225841372, "step": 4032 }, { "epoch": 8.979955456570156, "loss": 0.29786229133605957, "loss_ce": 0.00013277304242365062, "loss_iou": 0.1201171875, "loss_num": 0.011474609375, "loss_xval": 0.296875, "num_input_tokens_seen": 225841372, "step": 4032 }, { "epoch": 8.982182628062361, "grad_norm": 17.630416870117188, "learning_rate": 1e-06, "loss": 0.5697, "num_input_tokens_seen": 225894908, "step": 4033 }, { "epoch": 8.982182628062361, "loss": 0.676677942276001, "loss_ce": 0.00016425049398094416, "loss_iou": 0.2734375, "loss_num": 0.0257568359375, "loss_xval": 0.67578125, "num_input_tokens_seen": 225894908, "step": 4033 }, { "epoch": 8.984409799554566, "grad_norm": 20.209087371826172, "learning_rate": 1e-06, "loss": 0.4613, "num_input_tokens_seen": 225948016, "step": 4034 }, { "epoch": 8.984409799554566, "loss": 0.6483420729637146, "loss_ce": 0.00014869487495161593, "loss_iou": 0.298828125, "loss_num": 0.00982666015625, "loss_xval": 0.6484375, "num_input_tokens_seen": 225948016, "step": 4034 }, { "epoch": 8.98663697104677, "grad_norm": 31.209794998168945, "learning_rate": 1e-06, "loss": 0.62, "num_input_tokens_seen": 226005860, "step": 4035 }, { "epoch": 8.98663697104677, "loss": 0.6758050918579102, "loss_ce": 0.0002679667086340487, "loss_iou": 0.287109375, "loss_num": 0.0205078125, "loss_xval": 0.67578125, "num_input_tokens_seen": 226005860, "step": 4035 }, { "epoch": 8.988864142538976, "grad_norm": 13.588601112365723, "learning_rate": 1e-06, "loss": 0.4508, "num_input_tokens_seen": 226061580, "step": 4036 }, { "epoch": 8.988864142538976, "loss": 0.2981112003326416, "loss_ce": 0.00013759495050180703, "loss_iou": 0.134765625, "loss_num": 0.00555419921875, "loss_xval": 0.298828125, "num_input_tokens_seen": 226061580, "step": 4036 }, { "epoch": 8.99109131403118, "grad_norm": 12.43724250793457, "learning_rate": 1e-06, "loss": 0.6268, "num_input_tokens_seen": 226121088, "step": 4037 }, { "epoch": 8.99109131403118, "loss": 0.7063800096511841, "loss_ce": 0.00020320256589911878, "loss_iou": 0.298828125, "loss_num": 0.02197265625, "loss_xval": 0.70703125, "num_input_tokens_seen": 226121088, "step": 4037 }, { "epoch": 8.993318485523385, "grad_norm": 14.459733963012695, "learning_rate": 1e-06, "loss": 0.7679, "num_input_tokens_seen": 226178000, "step": 4038 }, { "epoch": 8.993318485523385, "loss": 0.6036978363990784, "loss_ce": 0.00018220869242213666, "loss_iou": 0.2294921875, "loss_num": 0.029052734375, "loss_xval": 0.6015625, "num_input_tokens_seen": 226178000, "step": 4038 }, { "epoch": 8.99554565701559, "grad_norm": 20.82482147216797, "learning_rate": 1e-06, "loss": 0.5971, "num_input_tokens_seen": 226234736, "step": 4039 }, { "epoch": 8.99554565701559, "loss": 0.47330033779144287, "loss_ce": 0.00015583749336656183, "loss_iou": 0.2158203125, "loss_num": 0.00830078125, "loss_xval": 0.47265625, "num_input_tokens_seen": 226234736, "step": 4039 }, { "epoch": 8.997772828507795, "grad_norm": 18.3863468170166, "learning_rate": 1e-06, "loss": 0.5675, "num_input_tokens_seen": 226287252, "step": 4040 }, { "epoch": 8.997772828507795, "loss": 0.5164123177528381, "loss_ce": 0.00017695256974548101, "loss_iou": 0.2314453125, "loss_num": 0.01068115234375, "loss_xval": 0.515625, "num_input_tokens_seen": 226287252, "step": 4040 }, { "epoch": 9.0, "grad_norm": 18.648279190063477, "learning_rate": 1e-06, "loss": 0.5718, "num_input_tokens_seen": 226345288, "step": 4041 }, { "epoch": 9.0, "loss": 0.6733693480491638, "loss_ce": 0.00015158146561589092, "loss_iou": 0.283203125, "loss_num": 0.0211181640625, "loss_xval": 0.671875, "num_input_tokens_seen": 226345288, "step": 4041 }, { "epoch": 9.002227171492205, "grad_norm": 56.01642608642578, "learning_rate": 1e-06, "loss": 0.5703, "num_input_tokens_seen": 226400596, "step": 4042 }, { "epoch": 9.002227171492205, "loss": 0.6565755605697632, "loss_ce": 0.00020350792328827083, "loss_iou": 0.25, "loss_num": 0.03125, "loss_xval": 0.65625, "num_input_tokens_seen": 226400596, "step": 4042 }, { "epoch": 9.00445434298441, "grad_norm": 21.13946533203125, "learning_rate": 1e-06, "loss": 0.5926, "num_input_tokens_seen": 226455584, "step": 4043 }, { "epoch": 9.00445434298441, "loss": 0.5227538347244263, "loss_ce": 0.00017086087609641254, "loss_iou": 0.240234375, "loss_num": 0.00830078125, "loss_xval": 0.5234375, "num_input_tokens_seen": 226455584, "step": 4043 }, { "epoch": 9.006681514476615, "grad_norm": 23.61444091796875, "learning_rate": 1e-06, "loss": 0.5942, "num_input_tokens_seen": 226508220, "step": 4044 }, { "epoch": 9.006681514476615, "loss": 0.588532030582428, "loss_ce": 0.00015310911112464964, "loss_iou": 0.24609375, "loss_num": 0.0194091796875, "loss_xval": 0.58984375, "num_input_tokens_seen": 226508220, "step": 4044 }, { "epoch": 9.00890868596882, "grad_norm": 26.408849716186523, "learning_rate": 1e-06, "loss": 0.4008, "num_input_tokens_seen": 226563516, "step": 4045 }, { "epoch": 9.00890868596882, "loss": 0.4769752323627472, "loss_ce": 0.00016859016614034772, "loss_iou": 0.20703125, "loss_num": 0.01251220703125, "loss_xval": 0.4765625, "num_input_tokens_seen": 226563516, "step": 4045 }, { "epoch": 9.011135857461024, "grad_norm": 16.130887985229492, "learning_rate": 1e-06, "loss": 0.4924, "num_input_tokens_seen": 226619892, "step": 4046 }, { "epoch": 9.011135857461024, "loss": 0.3370331823825836, "loss_ce": 0.0001191033807117492, "loss_iou": 0.11083984375, "loss_num": 0.0230712890625, "loss_xval": 0.3359375, "num_input_tokens_seen": 226619892, "step": 4046 }, { "epoch": 9.01336302895323, "grad_norm": 17.81732749938965, "learning_rate": 1e-06, "loss": 0.6139, "num_input_tokens_seen": 226678680, "step": 4047 }, { "epoch": 9.01336302895323, "loss": 0.826948881149292, "loss_ce": 0.00028875278076156974, "loss_iou": 0.345703125, "loss_num": 0.0274658203125, "loss_xval": 0.828125, "num_input_tokens_seen": 226678680, "step": 4047 }, { "epoch": 9.015590200445434, "grad_norm": 19.63979721069336, "learning_rate": 1e-06, "loss": 0.7731, "num_input_tokens_seen": 226736108, "step": 4048 }, { "epoch": 9.015590200445434, "loss": 0.8490115404129028, "loss_ce": 0.00025667509180493653, "loss_iou": 0.37890625, "loss_num": 0.0179443359375, "loss_xval": 0.84765625, "num_input_tokens_seen": 226736108, "step": 4048 }, { "epoch": 9.017817371937639, "grad_norm": 23.76055335998535, "learning_rate": 1e-06, "loss": 0.5958, "num_input_tokens_seen": 226791264, "step": 4049 }, { "epoch": 9.017817371937639, "loss": 0.4640924334526062, "loss_ce": 0.00022527157852891833, "loss_iou": 0.201171875, "loss_num": 0.012451171875, "loss_xval": 0.46484375, "num_input_tokens_seen": 226791264, "step": 4049 }, { "epoch": 9.020044543429844, "grad_norm": 12.326661109924316, "learning_rate": 1e-06, "loss": 0.49, "num_input_tokens_seen": 226849248, "step": 4050 }, { "epoch": 9.020044543429844, "loss": 0.4276803731918335, "loss_ce": 0.00019013047858607024, "loss_iou": 0.1953125, "loss_num": 0.007476806640625, "loss_xval": 0.427734375, "num_input_tokens_seen": 226849248, "step": 4050 }, { "epoch": 9.022271714922049, "grad_norm": 25.97770118713379, "learning_rate": 1e-06, "loss": 0.704, "num_input_tokens_seen": 226903776, "step": 4051 }, { "epoch": 9.022271714922049, "loss": 0.8058446049690247, "loss_ce": 0.0001805661740945652, "loss_iou": 0.361328125, "loss_num": 0.016357421875, "loss_xval": 0.8046875, "num_input_tokens_seen": 226903776, "step": 4051 }, { "epoch": 9.024498886414253, "grad_norm": 23.6370792388916, "learning_rate": 1e-06, "loss": 0.4962, "num_input_tokens_seen": 226960756, "step": 4052 }, { "epoch": 9.024498886414253, "loss": 0.5954891443252563, "loss_ce": 0.00015223291120491922, "loss_iou": 0.2734375, "loss_num": 0.010009765625, "loss_xval": 0.59375, "num_input_tokens_seen": 226960756, "step": 4052 }, { "epoch": 9.026726057906458, "grad_norm": 22.727136611938477, "learning_rate": 1e-06, "loss": 0.792, "num_input_tokens_seen": 227015196, "step": 4053 }, { "epoch": 9.026726057906458, "loss": 0.6129428148269653, "loss_ce": 0.000394008937291801, "loss_iou": 0.267578125, "loss_num": 0.01519775390625, "loss_xval": 0.61328125, "num_input_tokens_seen": 227015196, "step": 4053 }, { "epoch": 9.028953229398663, "grad_norm": 21.844358444213867, "learning_rate": 1e-06, "loss": 0.6825, "num_input_tokens_seen": 227069252, "step": 4054 }, { "epoch": 9.028953229398663, "loss": 0.7161485552787781, "loss_ce": 0.00020616035908460617, "loss_iou": 0.3125, "loss_num": 0.0185546875, "loss_xval": 0.71484375, "num_input_tokens_seen": 227069252, "step": 4054 }, { "epoch": 9.031180400890868, "grad_norm": 12.784194946289062, "learning_rate": 1e-06, "loss": 0.5135, "num_input_tokens_seen": 227124728, "step": 4055 }, { "epoch": 9.031180400890868, "loss": 0.46670782566070557, "loss_ce": 0.00015509186778217554, "loss_iou": 0.21484375, "loss_num": 0.00750732421875, "loss_xval": 0.466796875, "num_input_tokens_seen": 227124728, "step": 4055 }, { "epoch": 9.033407572383073, "grad_norm": 48.46086120605469, "learning_rate": 1e-06, "loss": 0.7413, "num_input_tokens_seen": 227181172, "step": 4056 }, { "epoch": 9.033407572383073, "loss": 1.0057790279388428, "loss_ce": 0.00016376111307181418, "loss_iou": 0.40625, "loss_num": 0.0390625, "loss_xval": 1.0078125, "num_input_tokens_seen": 227181172, "step": 4056 }, { "epoch": 9.035634743875278, "grad_norm": 20.736644744873047, "learning_rate": 1e-06, "loss": 0.5321, "num_input_tokens_seen": 227237508, "step": 4057 }, { "epoch": 9.035634743875278, "loss": 0.46913978457450867, "loss_ce": 0.0007559881778433919, "loss_iou": 0.2099609375, "loss_num": 0.00982666015625, "loss_xval": 0.46875, "num_input_tokens_seen": 227237508, "step": 4057 }, { "epoch": 9.037861915367483, "grad_norm": 69.1377944946289, "learning_rate": 1e-06, "loss": 0.7385, "num_input_tokens_seen": 227289364, "step": 4058 }, { "epoch": 9.037861915367483, "loss": 0.597074568271637, "loss_ce": 0.00015077766147442162, "loss_iou": 0.26171875, "loss_num": 0.01483154296875, "loss_xval": 0.59765625, "num_input_tokens_seen": 227289364, "step": 4058 }, { "epoch": 9.040089086859687, "grad_norm": 15.685859680175781, "learning_rate": 1e-06, "loss": 0.5525, "num_input_tokens_seen": 227345812, "step": 4059 }, { "epoch": 9.040089086859687, "loss": 0.5543001890182495, "loss_ce": 0.00016202838742174208, "loss_iou": 0.2373046875, "loss_num": 0.0159912109375, "loss_xval": 0.5546875, "num_input_tokens_seen": 227345812, "step": 4059 }, { "epoch": 9.042316258351892, "grad_norm": 15.678596496582031, "learning_rate": 1e-06, "loss": 0.6039, "num_input_tokens_seen": 227402332, "step": 4060 }, { "epoch": 9.042316258351892, "loss": 0.6879202127456665, "loss_ce": 0.00017606788605917245, "loss_iou": 0.28125, "loss_num": 0.02490234375, "loss_xval": 0.6875, "num_input_tokens_seen": 227402332, "step": 4060 }, { "epoch": 9.044543429844097, "grad_norm": 24.40289306640625, "learning_rate": 1e-06, "loss": 0.6604, "num_input_tokens_seen": 227456080, "step": 4061 }, { "epoch": 9.044543429844097, "loss": 0.7870203256607056, "loss_ce": 0.00015513686230406165, "loss_iou": 0.3046875, "loss_num": 0.035400390625, "loss_xval": 0.78515625, "num_input_tokens_seen": 227456080, "step": 4061 }, { "epoch": 9.046770601336302, "grad_norm": 20.38033103942871, "learning_rate": 1e-06, "loss": 0.5911, "num_input_tokens_seen": 227512720, "step": 4062 }, { "epoch": 9.046770601336302, "loss": 0.541382908821106, "loss_ce": 0.0001231818023370579, "loss_iou": 0.25, "loss_num": 0.00848388671875, "loss_xval": 0.54296875, "num_input_tokens_seen": 227512720, "step": 4062 }, { "epoch": 9.048997772828507, "grad_norm": 16.504854202270508, "learning_rate": 1e-06, "loss": 0.5148, "num_input_tokens_seen": 227570688, "step": 4063 }, { "epoch": 9.048997772828507, "loss": 0.4388590455055237, "loss_ce": 0.00013835716526955366, "loss_iou": 0.1884765625, "loss_num": 0.01214599609375, "loss_xval": 0.439453125, "num_input_tokens_seen": 227570688, "step": 4063 }, { "epoch": 9.051224944320714, "grad_norm": 23.736780166625977, "learning_rate": 1e-06, "loss": 0.5917, "num_input_tokens_seen": 227628128, "step": 4064 }, { "epoch": 9.051224944320714, "loss": 0.7519105672836304, "loss_ce": 0.0002015778300119564, "loss_iou": 0.31640625, "loss_num": 0.02392578125, "loss_xval": 0.75, "num_input_tokens_seen": 227628128, "step": 4064 }, { "epoch": 9.053452115812918, "grad_norm": 17.67232894897461, "learning_rate": 1e-06, "loss": 0.6101, "num_input_tokens_seen": 227683884, "step": 4065 }, { "epoch": 9.053452115812918, "loss": 0.618810772895813, "loss_ce": 0.00015848156181164086, "loss_iou": 0.265625, "loss_num": 0.017578125, "loss_xval": 0.6171875, "num_input_tokens_seen": 227683884, "step": 4065 }, { "epoch": 9.055679287305123, "grad_norm": 19.514118194580078, "learning_rate": 1e-06, "loss": 0.8178, "num_input_tokens_seen": 227735504, "step": 4066 }, { "epoch": 9.055679287305123, "loss": 0.6703531742095947, "loss_ce": 0.00018715695478022099, "loss_iou": 0.287109375, "loss_num": 0.01904296875, "loss_xval": 0.671875, "num_input_tokens_seen": 227735504, "step": 4066 }, { "epoch": 9.057906458797328, "grad_norm": 18.996246337890625, "learning_rate": 1e-06, "loss": 0.4703, "num_input_tokens_seen": 227793336, "step": 4067 }, { "epoch": 9.057906458797328, "loss": 0.37396594882011414, "loss_ce": 0.00018664645904209465, "loss_iou": 0.1630859375, "loss_num": 0.0096435546875, "loss_xval": 0.373046875, "num_input_tokens_seen": 227793336, "step": 4067 }, { "epoch": 9.060133630289533, "grad_norm": 13.91976547241211, "learning_rate": 1e-06, "loss": 0.4175, "num_input_tokens_seen": 227851892, "step": 4068 }, { "epoch": 9.060133630289533, "loss": 0.5668018460273743, "loss_ce": 0.00015145693032536656, "loss_iou": 0.255859375, "loss_num": 0.0108642578125, "loss_xval": 0.56640625, "num_input_tokens_seen": 227851892, "step": 4068 }, { "epoch": 9.062360801781738, "grad_norm": 15.925460815429688, "learning_rate": 1e-06, "loss": 0.4987, "num_input_tokens_seen": 227904312, "step": 4069 }, { "epoch": 9.062360801781738, "loss": 0.5191997289657593, "loss_ce": 0.00015678332420066, "loss_iou": 0.2431640625, "loss_num": 0.00665283203125, "loss_xval": 0.51953125, "num_input_tokens_seen": 227904312, "step": 4069 }, { "epoch": 9.064587973273943, "grad_norm": 19.193389892578125, "learning_rate": 1e-06, "loss": 0.6064, "num_input_tokens_seen": 227959496, "step": 4070 }, { "epoch": 9.064587973273943, "loss": 0.5843898057937622, "loss_ce": 0.00016130355652421713, "loss_iou": 0.25, "loss_num": 0.0169677734375, "loss_xval": 0.5859375, "num_input_tokens_seen": 227959496, "step": 4070 }, { "epoch": 9.066815144766148, "grad_norm": 23.820255279541016, "learning_rate": 1e-06, "loss": 0.4375, "num_input_tokens_seen": 228018492, "step": 4071 }, { "epoch": 9.066815144766148, "loss": 0.4991843104362488, "loss_ce": 0.00016086628602351993, "loss_iou": 0.228515625, "loss_num": 0.00836181640625, "loss_xval": 0.5, "num_input_tokens_seen": 228018492, "step": 4071 }, { "epoch": 9.069042316258352, "grad_norm": 17.966339111328125, "learning_rate": 1e-06, "loss": 0.4902, "num_input_tokens_seen": 228072824, "step": 4072 }, { "epoch": 9.069042316258352, "loss": 0.33019495010375977, "loss_ce": 0.00020839170610997826, "loss_iou": 0.1337890625, "loss_num": 0.01251220703125, "loss_xval": 0.330078125, "num_input_tokens_seen": 228072824, "step": 4072 }, { "epoch": 9.071269487750557, "grad_norm": 19.144359588623047, "learning_rate": 1e-06, "loss": 0.5818, "num_input_tokens_seen": 228126552, "step": 4073 }, { "epoch": 9.071269487750557, "loss": 0.5044497847557068, "loss_ce": 0.00017729168757796288, "loss_iou": 0.20703125, "loss_num": 0.01806640625, "loss_xval": 0.50390625, "num_input_tokens_seen": 228126552, "step": 4073 }, { "epoch": 9.073496659242762, "grad_norm": 27.93861961364746, "learning_rate": 1e-06, "loss": 0.542, "num_input_tokens_seen": 228181208, "step": 4074 }, { "epoch": 9.073496659242762, "loss": 0.4734833836555481, "loss_ce": 0.00015576003352180123, "loss_iou": 0.220703125, "loss_num": 0.006256103515625, "loss_xval": 0.47265625, "num_input_tokens_seen": 228181208, "step": 4074 }, { "epoch": 9.075723830734967, "grad_norm": 16.074905395507812, "learning_rate": 1e-06, "loss": 0.5862, "num_input_tokens_seen": 228237728, "step": 4075 }, { "epoch": 9.075723830734967, "loss": 0.6505739092826843, "loss_ce": 0.00018325743440072984, "loss_iou": 0.27734375, "loss_num": 0.019287109375, "loss_xval": 0.6484375, "num_input_tokens_seen": 228237728, "step": 4075 }, { "epoch": 9.077951002227172, "grad_norm": 26.28703498840332, "learning_rate": 1e-06, "loss": 0.5817, "num_input_tokens_seen": 228294284, "step": 4076 }, { "epoch": 9.077951002227172, "loss": 0.46036121249198914, "loss_ce": 0.00015614047879353166, "loss_iou": 0.205078125, "loss_num": 0.010009765625, "loss_xval": 0.4609375, "num_input_tokens_seen": 228294284, "step": 4076 }, { "epoch": 9.080178173719377, "grad_norm": 19.399211883544922, "learning_rate": 1e-06, "loss": 0.5473, "num_input_tokens_seen": 228349092, "step": 4077 }, { "epoch": 9.080178173719377, "loss": 0.5990505218505859, "loss_ce": 0.00017354718875139952, "loss_iou": 0.251953125, "loss_num": 0.0189208984375, "loss_xval": 0.59765625, "num_input_tokens_seen": 228349092, "step": 4077 }, { "epoch": 9.082405345211582, "grad_norm": 25.406312942504883, "learning_rate": 1e-06, "loss": 0.5724, "num_input_tokens_seen": 228404620, "step": 4078 }, { "epoch": 9.082405345211582, "loss": 0.5832569003105164, "loss_ce": 0.00012698877253569663, "loss_iou": 0.251953125, "loss_num": 0.0157470703125, "loss_xval": 0.58203125, "num_input_tokens_seen": 228404620, "step": 4078 }, { "epoch": 9.084632516703786, "grad_norm": 21.80425262451172, "learning_rate": 1e-06, "loss": 0.4839, "num_input_tokens_seen": 228460136, "step": 4079 }, { "epoch": 9.084632516703786, "loss": 0.3352894186973572, "loss_ce": 0.00014540574920829386, "loss_iou": 0.1376953125, "loss_num": 0.011962890625, "loss_xval": 0.3359375, "num_input_tokens_seen": 228460136, "step": 4079 }, { "epoch": 9.086859688195991, "grad_norm": 20.531923294067383, "learning_rate": 1e-06, "loss": 0.491, "num_input_tokens_seen": 228517568, "step": 4080 }, { "epoch": 9.086859688195991, "loss": 0.6657657623291016, "loss_ce": 0.00014688099327031523, "loss_iou": 0.2734375, "loss_num": 0.0234375, "loss_xval": 0.6640625, "num_input_tokens_seen": 228517568, "step": 4080 }, { "epoch": 9.089086859688196, "grad_norm": 21.13003921508789, "learning_rate": 1e-06, "loss": 0.6183, "num_input_tokens_seen": 228571092, "step": 4081 }, { "epoch": 9.089086859688196, "loss": 0.7186765670776367, "loss_ce": 0.0001707267656456679, "loss_iou": 0.314453125, "loss_num": 0.0181884765625, "loss_xval": 0.71875, "num_input_tokens_seen": 228571092, "step": 4081 }, { "epoch": 9.091314031180401, "grad_norm": 17.36870002746582, "learning_rate": 1e-06, "loss": 0.7185, "num_input_tokens_seen": 228626888, "step": 4082 }, { "epoch": 9.091314031180401, "loss": 0.9348142147064209, "loss_ce": 0.000365984917152673, "loss_iou": 0.384765625, "loss_num": 0.032470703125, "loss_xval": 0.93359375, "num_input_tokens_seen": 228626888, "step": 4082 }, { "epoch": 9.093541202672606, "grad_norm": 16.720142364501953, "learning_rate": 1e-06, "loss": 0.6332, "num_input_tokens_seen": 228684328, "step": 4083 }, { "epoch": 9.093541202672606, "loss": 0.5341998934745789, "loss_ce": 0.0001422719651600346, "loss_iou": 0.2265625, "loss_num": 0.01611328125, "loss_xval": 0.53515625, "num_input_tokens_seen": 228684328, "step": 4083 }, { "epoch": 9.09576837416481, "grad_norm": 21.377214431762695, "learning_rate": 1e-06, "loss": 0.6319, "num_input_tokens_seen": 228741784, "step": 4084 }, { "epoch": 9.09576837416481, "loss": 0.5814862251281738, "loss_ce": 0.0011639189906418324, "loss_iou": 0.2431640625, "loss_num": 0.0189208984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 228741784, "step": 4084 }, { "epoch": 9.097995545657016, "grad_norm": 18.07195472717285, "learning_rate": 1e-06, "loss": 0.7059, "num_input_tokens_seen": 228797852, "step": 4085 }, { "epoch": 9.097995545657016, "loss": 0.8597797155380249, "loss_ce": 0.00016060096095316112, "loss_iou": 0.3828125, "loss_num": 0.0189208984375, "loss_xval": 0.859375, "num_input_tokens_seen": 228797852, "step": 4085 }, { "epoch": 9.10022271714922, "grad_norm": 15.08352279663086, "learning_rate": 1e-06, "loss": 0.5056, "num_input_tokens_seen": 228852356, "step": 4086 }, { "epoch": 9.10022271714922, "loss": 0.5499671697616577, "loss_ce": 0.00016245490405708551, "loss_iou": 0.25390625, "loss_num": 0.00830078125, "loss_xval": 0.55078125, "num_input_tokens_seen": 228852356, "step": 4086 }, { "epoch": 9.102449888641425, "grad_norm": 23.534421920776367, "learning_rate": 1e-06, "loss": 0.5026, "num_input_tokens_seen": 228904152, "step": 4087 }, { "epoch": 9.102449888641425, "loss": 0.4977412819862366, "loss_ce": 0.00018270721193403006, "loss_iou": 0.2080078125, "loss_num": 0.016357421875, "loss_xval": 0.498046875, "num_input_tokens_seen": 228904152, "step": 4087 }, { "epoch": 9.10467706013363, "grad_norm": 19.300045013427734, "learning_rate": 1e-06, "loss": 0.9714, "num_input_tokens_seen": 228957696, "step": 4088 }, { "epoch": 9.10467706013363, "loss": 0.8054395914077759, "loss_ce": 0.00014173405361361802, "loss_iou": 0.32421875, "loss_num": 0.03173828125, "loss_xval": 0.8046875, "num_input_tokens_seen": 228957696, "step": 4088 }, { "epoch": 9.106904231625835, "grad_norm": 20.3017578125, "learning_rate": 1e-06, "loss": 0.3923, "num_input_tokens_seen": 229012032, "step": 4089 }, { "epoch": 9.106904231625835, "loss": 0.40640291571617126, "loss_ce": 0.00015291740419343114, "loss_iou": 0.1865234375, "loss_num": 0.006866455078125, "loss_xval": 0.40625, "num_input_tokens_seen": 229012032, "step": 4089 }, { "epoch": 9.10913140311804, "grad_norm": 20.25347137451172, "learning_rate": 1e-06, "loss": 0.5511, "num_input_tokens_seen": 229067576, "step": 4090 }, { "epoch": 9.10913140311804, "loss": 0.5202056169509888, "loss_ce": 0.00012503171456046402, "loss_iou": 0.2177734375, "loss_num": 0.0169677734375, "loss_xval": 0.51953125, "num_input_tokens_seen": 229067576, "step": 4090 }, { "epoch": 9.111358574610245, "grad_norm": 19.562305450439453, "learning_rate": 1e-06, "loss": 0.6281, "num_input_tokens_seen": 229127092, "step": 4091 }, { "epoch": 9.111358574610245, "loss": 0.7418767213821411, "loss_ce": 0.00017745466902852058, "loss_iou": 0.298828125, "loss_num": 0.02880859375, "loss_xval": 0.7421875, "num_input_tokens_seen": 229127092, "step": 4091 }, { "epoch": 9.11358574610245, "grad_norm": 19.772968292236328, "learning_rate": 1e-06, "loss": 0.5598, "num_input_tokens_seen": 229181696, "step": 4092 }, { "epoch": 9.11358574610245, "loss": 0.6595751047134399, "loss_ce": 0.00015126551443245262, "loss_iou": 0.2890625, "loss_num": 0.0164794921875, "loss_xval": 0.66015625, "num_input_tokens_seen": 229181696, "step": 4092 }, { "epoch": 9.115812917594655, "grad_norm": 21.81011962890625, "learning_rate": 1e-06, "loss": 0.6088, "num_input_tokens_seen": 229235572, "step": 4093 }, { "epoch": 9.115812917594655, "loss": 0.6007593870162964, "loss_ce": 0.00017348321853205562, "loss_iou": 0.265625, "loss_num": 0.0135498046875, "loss_xval": 0.6015625, "num_input_tokens_seen": 229235572, "step": 4093 }, { "epoch": 9.11804008908686, "grad_norm": 19.07417106628418, "learning_rate": 1e-06, "loss": 0.4561, "num_input_tokens_seen": 229291708, "step": 4094 }, { "epoch": 9.11804008908686, "loss": 0.5785167217254639, "loss_ce": 0.00020860986842308193, "loss_iou": 0.236328125, "loss_num": 0.02099609375, "loss_xval": 0.578125, "num_input_tokens_seen": 229291708, "step": 4094 }, { "epoch": 9.120267260579064, "grad_norm": 27.573549270629883, "learning_rate": 1e-06, "loss": 0.6232, "num_input_tokens_seen": 229349472, "step": 4095 }, { "epoch": 9.120267260579064, "loss": 0.7934918403625488, "loss_ce": 0.0002789198188111186, "loss_iou": 0.322265625, "loss_num": 0.0296630859375, "loss_xval": 0.79296875, "num_input_tokens_seen": 229349472, "step": 4095 }, { "epoch": 9.122494432071269, "grad_norm": 13.100459098815918, "learning_rate": 1e-06, "loss": 0.7147, "num_input_tokens_seen": 229405976, "step": 4096 }, { "epoch": 9.122494432071269, "loss": 0.48514890670776367, "loss_ce": 0.00016353695536963642, "loss_iou": 0.22265625, "loss_num": 0.008056640625, "loss_xval": 0.484375, "num_input_tokens_seen": 229405976, "step": 4096 }, { "epoch": 9.124721603563474, "grad_norm": 25.375526428222656, "learning_rate": 1e-06, "loss": 0.6361, "num_input_tokens_seen": 229459716, "step": 4097 }, { "epoch": 9.124721603563474, "loss": 0.4884394705295563, "loss_ce": 0.00015823112335056067, "loss_iou": 0.216796875, "loss_num": 0.01080322265625, "loss_xval": 0.48828125, "num_input_tokens_seen": 229459716, "step": 4097 }, { "epoch": 9.126948775055679, "grad_norm": 18.099225997924805, "learning_rate": 1e-06, "loss": 0.6709, "num_input_tokens_seen": 229516212, "step": 4098 }, { "epoch": 9.126948775055679, "loss": 0.5384310483932495, "loss_ce": 0.0007113110623322427, "loss_iou": 0.2353515625, "loss_num": 0.01348876953125, "loss_xval": 0.5390625, "num_input_tokens_seen": 229516212, "step": 4098 }, { "epoch": 9.129175946547884, "grad_norm": 22.159957885742188, "learning_rate": 1e-06, "loss": 0.5206, "num_input_tokens_seen": 229570184, "step": 4099 }, { "epoch": 9.129175946547884, "loss": 0.638085126876831, "loss_ce": 0.00014572580403182656, "loss_iou": 0.26171875, "loss_num": 0.0234375, "loss_xval": 0.63671875, "num_input_tokens_seen": 229570184, "step": 4099 }, { "epoch": 9.131403118040089, "grad_norm": 19.935977935791016, "learning_rate": 1e-06, "loss": 0.5385, "num_input_tokens_seen": 229624652, "step": 4100 }, { "epoch": 9.131403118040089, "loss": 0.6349242925643921, "loss_ce": 0.00015866165631450713, "loss_iou": 0.28125, "loss_num": 0.01446533203125, "loss_xval": 0.6328125, "num_input_tokens_seen": 229624652, "step": 4100 }, { "epoch": 9.133630289532293, "grad_norm": 17.789283752441406, "learning_rate": 1e-06, "loss": 0.5755, "num_input_tokens_seen": 229681300, "step": 4101 }, { "epoch": 9.133630289532293, "loss": 0.5260428190231323, "loss_ce": 0.00016388525546062738, "loss_iou": 0.232421875, "loss_num": 0.01239013671875, "loss_xval": 0.52734375, "num_input_tokens_seen": 229681300, "step": 4101 }, { "epoch": 9.135857461024498, "grad_norm": 29.85516357421875, "learning_rate": 1e-06, "loss": 0.5757, "num_input_tokens_seen": 229735816, "step": 4102 }, { "epoch": 9.135857461024498, "loss": 0.6520152688026428, "loss_ce": 0.00015980206080712378, "loss_iou": 0.28125, "loss_num": 0.0179443359375, "loss_xval": 0.65234375, "num_input_tokens_seen": 229735816, "step": 4102 }, { "epoch": 9.138084632516703, "grad_norm": 17.78467559814453, "learning_rate": 1e-06, "loss": 0.4811, "num_input_tokens_seen": 229789440, "step": 4103 }, { "epoch": 9.138084632516703, "loss": 0.5187010169029236, "loss_ce": 0.00014634460967499763, "loss_iou": 0.2041015625, "loss_num": 0.02197265625, "loss_xval": 0.51953125, "num_input_tokens_seen": 229789440, "step": 4103 }, { "epoch": 9.140311804008908, "grad_norm": 13.813704490661621, "learning_rate": 1e-06, "loss": 0.5981, "num_input_tokens_seen": 229845256, "step": 4104 }, { "epoch": 9.140311804008908, "loss": 0.6361100673675537, "loss_ce": 0.000245812872890383, "loss_iou": 0.25390625, "loss_num": 0.0252685546875, "loss_xval": 0.63671875, "num_input_tokens_seen": 229845256, "step": 4104 }, { "epoch": 9.142538975501113, "grad_norm": 17.91861343383789, "learning_rate": 1e-06, "loss": 0.7475, "num_input_tokens_seen": 229902832, "step": 4105 }, { "epoch": 9.142538975501113, "loss": 0.8098193407058716, "loss_ce": 0.0002490263432264328, "loss_iou": 0.369140625, "loss_num": 0.0142822265625, "loss_xval": 0.80859375, "num_input_tokens_seen": 229902832, "step": 4105 }, { "epoch": 9.144766146993318, "grad_norm": 34.62513732910156, "learning_rate": 1e-06, "loss": 0.5171, "num_input_tokens_seen": 229959432, "step": 4106 }, { "epoch": 9.144766146993318, "loss": 0.6218581199645996, "loss_ce": 0.00015404779696837068, "loss_iou": 0.265625, "loss_num": 0.017822265625, "loss_xval": 0.62109375, "num_input_tokens_seen": 229959432, "step": 4106 }, { "epoch": 9.146993318485523, "grad_norm": 16.90599822998047, "learning_rate": 1e-06, "loss": 0.6897, "num_input_tokens_seen": 230015140, "step": 4107 }, { "epoch": 9.146993318485523, "loss": 0.9310408234596252, "loss_ce": 0.00013258628314360976, "loss_iou": 0.400390625, "loss_num": 0.0262451171875, "loss_xval": 0.9296875, "num_input_tokens_seen": 230015140, "step": 4107 }, { "epoch": 9.14922048997773, "grad_norm": 18.637083053588867, "learning_rate": 1e-06, "loss": 0.5436, "num_input_tokens_seen": 230074668, "step": 4108 }, { "epoch": 9.14922048997773, "loss": 0.5741239786148071, "loss_ce": 0.0001493622548878193, "loss_iou": 0.26171875, "loss_num": 0.0101318359375, "loss_xval": 0.57421875, "num_input_tokens_seen": 230074668, "step": 4108 }, { "epoch": 9.151447661469934, "grad_norm": 26.293113708496094, "learning_rate": 1e-06, "loss": 0.5738, "num_input_tokens_seen": 230131008, "step": 4109 }, { "epoch": 9.151447661469934, "loss": 0.5387140512466431, "loss_ce": 0.00013981794472783804, "loss_iou": 0.220703125, "loss_num": 0.0196533203125, "loss_xval": 0.5390625, "num_input_tokens_seen": 230131008, "step": 4109 }, { "epoch": 9.153674832962139, "grad_norm": 99.2131576538086, "learning_rate": 1e-06, "loss": 0.6848, "num_input_tokens_seen": 230186684, "step": 4110 }, { "epoch": 9.153674832962139, "loss": 0.8059197068214417, "loss_ce": 0.00025563541566953063, "loss_iou": 0.35546875, "loss_num": 0.0191650390625, "loss_xval": 0.8046875, "num_input_tokens_seen": 230186684, "step": 4110 }, { "epoch": 9.155902004454344, "grad_norm": 11.691954612731934, "learning_rate": 1e-06, "loss": 0.5424, "num_input_tokens_seen": 230244428, "step": 4111 }, { "epoch": 9.155902004454344, "loss": 0.5812520980834961, "loss_ce": 0.00019743737357202917, "loss_iou": 0.22265625, "loss_num": 0.0269775390625, "loss_xval": 0.58203125, "num_input_tokens_seen": 230244428, "step": 4111 }, { "epoch": 9.158129175946549, "grad_norm": 23.4112606048584, "learning_rate": 1e-06, "loss": 0.5201, "num_input_tokens_seen": 230298452, "step": 4112 }, { "epoch": 9.158129175946549, "loss": 0.6187844276428223, "loss_ce": 0.00013208799646236002, "loss_iou": 0.2490234375, "loss_num": 0.024169921875, "loss_xval": 0.6171875, "num_input_tokens_seen": 230298452, "step": 4112 }, { "epoch": 9.160356347438753, "grad_norm": 24.178136825561523, "learning_rate": 1e-06, "loss": 0.4805, "num_input_tokens_seen": 230352684, "step": 4113 }, { "epoch": 9.160356347438753, "loss": 0.5137137174606323, "loss_ce": 0.00016394033445976675, "loss_iou": 0.2451171875, "loss_num": 0.0045166015625, "loss_xval": 0.51171875, "num_input_tokens_seen": 230352684, "step": 4113 }, { "epoch": 9.162583518930958, "grad_norm": 17.317472457885742, "learning_rate": 1e-06, "loss": 0.5563, "num_input_tokens_seen": 230408152, "step": 4114 }, { "epoch": 9.162583518930958, "loss": 0.6397294998168945, "loss_ce": 0.0001421074557583779, "loss_iou": 0.26953125, "loss_num": 0.0198974609375, "loss_xval": 0.640625, "num_input_tokens_seen": 230408152, "step": 4114 }, { "epoch": 9.164810690423163, "grad_norm": 18.378246307373047, "learning_rate": 1e-06, "loss": 0.57, "num_input_tokens_seen": 230464552, "step": 4115 }, { "epoch": 9.164810690423163, "loss": 0.5939757823944092, "loss_ce": 0.00016474167932756245, "loss_iou": 0.212890625, "loss_num": 0.03369140625, "loss_xval": 0.59375, "num_input_tokens_seen": 230464552, "step": 4115 }, { "epoch": 9.167037861915368, "grad_norm": 13.769119262695312, "learning_rate": 1e-06, "loss": 0.6041, "num_input_tokens_seen": 230520760, "step": 4116 }, { "epoch": 9.167037861915368, "loss": 0.7275907397270203, "loss_ce": 0.0001737515558488667, "loss_iou": 0.306640625, "loss_num": 0.0224609375, "loss_xval": 0.7265625, "num_input_tokens_seen": 230520760, "step": 4116 }, { "epoch": 9.169265033407573, "grad_norm": 16.299108505249023, "learning_rate": 1e-06, "loss": 0.6447, "num_input_tokens_seen": 230577540, "step": 4117 }, { "epoch": 9.169265033407573, "loss": 0.7198707461357117, "loss_ce": 0.0002662655897438526, "loss_iou": 0.29296875, "loss_num": 0.0269775390625, "loss_xval": 0.71875, "num_input_tokens_seen": 230577540, "step": 4117 }, { "epoch": 9.171492204899778, "grad_norm": 20.457033157348633, "learning_rate": 1e-06, "loss": 0.7404, "num_input_tokens_seen": 230632328, "step": 4118 }, { "epoch": 9.171492204899778, "loss": 0.7303974032402039, "loss_ce": 0.0001728244824334979, "loss_iou": 0.3125, "loss_num": 0.021240234375, "loss_xval": 0.73046875, "num_input_tokens_seen": 230632328, "step": 4118 }, { "epoch": 9.173719376391983, "grad_norm": 17.638404846191406, "learning_rate": 1e-06, "loss": 0.5783, "num_input_tokens_seen": 230687228, "step": 4119 }, { "epoch": 9.173719376391983, "loss": 0.46454915404319763, "loss_ce": 0.00019369515939615667, "loss_iou": 0.177734375, "loss_num": 0.021728515625, "loss_xval": 0.46484375, "num_input_tokens_seen": 230687228, "step": 4119 }, { "epoch": 9.175946547884188, "grad_norm": 18.509496688842773, "learning_rate": 1e-06, "loss": 0.6066, "num_input_tokens_seen": 230743560, "step": 4120 }, { "epoch": 9.175946547884188, "loss": 0.7997314929962158, "loss_ce": 0.00017094583017751575, "loss_iou": 0.33984375, "loss_num": 0.0234375, "loss_xval": 0.80078125, "num_input_tokens_seen": 230743560, "step": 4120 }, { "epoch": 9.178173719376392, "grad_norm": 16.593032836914062, "learning_rate": 1e-06, "loss": 0.6728, "num_input_tokens_seen": 230800968, "step": 4121 }, { "epoch": 9.178173719376392, "loss": 0.949892520904541, "loss_ce": 0.00018545052444096655, "loss_iou": 0.37890625, "loss_num": 0.0380859375, "loss_xval": 0.94921875, "num_input_tokens_seen": 230800968, "step": 4121 }, { "epoch": 9.180400890868597, "grad_norm": 18.3552303314209, "learning_rate": 1e-06, "loss": 0.6935, "num_input_tokens_seen": 230857948, "step": 4122 }, { "epoch": 9.180400890868597, "loss": 0.7443253993988037, "loss_ce": 0.00018475553952157497, "loss_iou": 0.3125, "loss_num": 0.02392578125, "loss_xval": 0.7421875, "num_input_tokens_seen": 230857948, "step": 4122 }, { "epoch": 9.182628062360802, "grad_norm": 17.54131317138672, "learning_rate": 1e-06, "loss": 0.4531, "num_input_tokens_seen": 230912532, "step": 4123 }, { "epoch": 9.182628062360802, "loss": 0.43069642782211304, "loss_ce": 0.0001544352126074955, "loss_iou": 0.1796875, "loss_num": 0.01409912109375, "loss_xval": 0.4296875, "num_input_tokens_seen": 230912532, "step": 4123 }, { "epoch": 9.184855233853007, "grad_norm": 15.92164421081543, "learning_rate": 1e-06, "loss": 0.416, "num_input_tokens_seen": 230967364, "step": 4124 }, { "epoch": 9.184855233853007, "loss": 0.37866830825805664, "loss_ce": 0.00012826549937017262, "loss_iou": 0.158203125, "loss_num": 0.0123291015625, "loss_xval": 0.37890625, "num_input_tokens_seen": 230967364, "step": 4124 }, { "epoch": 9.187082405345212, "grad_norm": 14.523744583129883, "learning_rate": 1e-06, "loss": 0.5998, "num_input_tokens_seen": 231023780, "step": 4125 }, { "epoch": 9.187082405345212, "loss": 0.4754905700683594, "loss_ce": 0.00014873658074066043, "loss_iou": 0.2177734375, "loss_num": 0.00799560546875, "loss_xval": 0.474609375, "num_input_tokens_seen": 231023780, "step": 4125 }, { "epoch": 9.189309576837417, "grad_norm": 41.82172393798828, "learning_rate": 1e-06, "loss": 0.6878, "num_input_tokens_seen": 231081652, "step": 4126 }, { "epoch": 9.189309576837417, "loss": 0.8278828859329224, "loss_ce": 0.0002461716067045927, "loss_iou": 0.33984375, "loss_num": 0.029296875, "loss_xval": 0.828125, "num_input_tokens_seen": 231081652, "step": 4126 }, { "epoch": 9.191536748329622, "grad_norm": 13.996521949768066, "learning_rate": 1e-06, "loss": 0.627, "num_input_tokens_seen": 231137804, "step": 4127 }, { "epoch": 9.191536748329622, "loss": 0.6197777390480042, "loss_ce": 0.0001488196139689535, "loss_iou": 0.2470703125, "loss_num": 0.0250244140625, "loss_xval": 0.62109375, "num_input_tokens_seen": 231137804, "step": 4127 }, { "epoch": 9.193763919821826, "grad_norm": 23.2529239654541, "learning_rate": 1e-06, "loss": 0.5141, "num_input_tokens_seen": 231192268, "step": 4128 }, { "epoch": 9.193763919821826, "loss": 0.5340644717216492, "loss_ce": 0.00015944175538606942, "loss_iou": 0.236328125, "loss_num": 0.0120849609375, "loss_xval": 0.53515625, "num_input_tokens_seen": 231192268, "step": 4128 }, { "epoch": 9.195991091314031, "grad_norm": 16.674602508544922, "learning_rate": 1e-06, "loss": 0.5281, "num_input_tokens_seen": 231246748, "step": 4129 }, { "epoch": 9.195991091314031, "loss": 0.5064660310745239, "loss_ce": 0.00011834965698653832, "loss_iou": 0.2197265625, "loss_num": 0.0133056640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 231246748, "step": 4129 }, { "epoch": 9.198218262806236, "grad_norm": 18.663047790527344, "learning_rate": 1e-06, "loss": 0.6424, "num_input_tokens_seen": 231302740, "step": 4130 }, { "epoch": 9.198218262806236, "loss": 0.5936636924743652, "loss_ce": 0.0001578406518092379, "loss_iou": 0.2392578125, "loss_num": 0.02294921875, "loss_xval": 0.59375, "num_input_tokens_seen": 231302740, "step": 4130 }, { "epoch": 9.200445434298441, "grad_norm": 39.11851119995117, "learning_rate": 1e-06, "loss": 0.4673, "num_input_tokens_seen": 231356712, "step": 4131 }, { "epoch": 9.200445434298441, "loss": 0.3150908052921295, "loss_ce": 0.0001494048337917775, "loss_iou": 0.126953125, "loss_num": 0.0123291015625, "loss_xval": 0.314453125, "num_input_tokens_seen": 231356712, "step": 4131 }, { "epoch": 9.202672605790646, "grad_norm": 16.948453903198242, "learning_rate": 1e-06, "loss": 0.537, "num_input_tokens_seen": 231411692, "step": 4132 }, { "epoch": 9.202672605790646, "loss": 0.3508445620536804, "loss_ce": 0.00013653644418809563, "loss_iou": 0.1572265625, "loss_num": 0.00726318359375, "loss_xval": 0.3515625, "num_input_tokens_seen": 231411692, "step": 4132 }, { "epoch": 9.20489977728285, "grad_norm": 18.197696685791016, "learning_rate": 1e-06, "loss": 0.5971, "num_input_tokens_seen": 231468140, "step": 4133 }, { "epoch": 9.20489977728285, "loss": 0.47916704416275024, "loss_ce": 0.0001631466147955507, "loss_iou": 0.203125, "loss_num": 0.01470947265625, "loss_xval": 0.478515625, "num_input_tokens_seen": 231468140, "step": 4133 }, { "epoch": 9.207126948775056, "grad_norm": 18.140928268432617, "learning_rate": 1e-06, "loss": 0.4389, "num_input_tokens_seen": 231524076, "step": 4134 }, { "epoch": 9.207126948775056, "loss": 0.484072208404541, "loss_ce": 0.0001854776928666979, "loss_iou": 0.2021484375, "loss_num": 0.0159912109375, "loss_xval": 0.484375, "num_input_tokens_seen": 231524076, "step": 4134 }, { "epoch": 9.20935412026726, "grad_norm": 31.31038475036621, "learning_rate": 1e-06, "loss": 0.5013, "num_input_tokens_seen": 231580384, "step": 4135 }, { "epoch": 9.20935412026726, "loss": 0.5428678393363953, "loss_ce": 0.00014321647176984698, "loss_iou": 0.23046875, "loss_num": 0.016357421875, "loss_xval": 0.54296875, "num_input_tokens_seen": 231580384, "step": 4135 }, { "epoch": 9.211581291759465, "grad_norm": 18.321279525756836, "learning_rate": 1e-06, "loss": 0.482, "num_input_tokens_seen": 231635920, "step": 4136 }, { "epoch": 9.211581291759465, "loss": 0.5265001058578491, "loss_ce": 0.0001329151273239404, "loss_iou": 0.2314453125, "loss_num": 0.01263427734375, "loss_xval": 0.52734375, "num_input_tokens_seen": 231635920, "step": 4136 }, { "epoch": 9.21380846325167, "grad_norm": 15.607020378112793, "learning_rate": 1e-06, "loss": 0.7133, "num_input_tokens_seen": 231692860, "step": 4137 }, { "epoch": 9.21380846325167, "loss": 0.6896048784255981, "loss_ce": 0.00015171918494161218, "loss_iou": 0.298828125, "loss_num": 0.0181884765625, "loss_xval": 0.6875, "num_input_tokens_seen": 231692860, "step": 4137 }, { "epoch": 9.216035634743875, "grad_norm": 20.895606994628906, "learning_rate": 1e-06, "loss": 0.5896, "num_input_tokens_seen": 231752072, "step": 4138 }, { "epoch": 9.216035634743875, "loss": 0.5861691236495972, "loss_ce": 0.00017053935152944177, "loss_iou": 0.2431640625, "loss_num": 0.0198974609375, "loss_xval": 0.5859375, "num_input_tokens_seen": 231752072, "step": 4138 }, { "epoch": 9.21826280623608, "grad_norm": 21.63951873779297, "learning_rate": 1e-06, "loss": 0.5425, "num_input_tokens_seen": 231806848, "step": 4139 }, { "epoch": 9.21826280623608, "loss": 0.6595204472541809, "loss_ce": 0.0004628373426385224, "loss_iou": 0.296875, "loss_num": 0.0133056640625, "loss_xval": 0.66015625, "num_input_tokens_seen": 231806848, "step": 4139 }, { "epoch": 9.220489977728285, "grad_norm": 17.077104568481445, "learning_rate": 1e-06, "loss": 0.6683, "num_input_tokens_seen": 231862472, "step": 4140 }, { "epoch": 9.220489977728285, "loss": 0.7157517075538635, "loss_ce": 0.00017556847888045013, "loss_iou": 0.30078125, "loss_num": 0.0228271484375, "loss_xval": 0.71484375, "num_input_tokens_seen": 231862472, "step": 4140 }, { "epoch": 9.22271714922049, "grad_norm": 15.992926597595215, "learning_rate": 1e-06, "loss": 0.5696, "num_input_tokens_seen": 231916984, "step": 4141 }, { "epoch": 9.22271714922049, "loss": 0.48255985975265503, "loss_ce": 0.00013797251449432224, "loss_iou": 0.2138671875, "loss_num": 0.01080322265625, "loss_xval": 0.482421875, "num_input_tokens_seen": 231916984, "step": 4141 }, { "epoch": 9.224944320712694, "grad_norm": 17.786970138549805, "learning_rate": 1e-06, "loss": 0.6369, "num_input_tokens_seen": 231971832, "step": 4142 }, { "epoch": 9.224944320712694, "loss": 0.581957221031189, "loss_ce": 0.00017014719196595252, "loss_iou": 0.267578125, "loss_num": 0.009521484375, "loss_xval": 0.58203125, "num_input_tokens_seen": 231971832, "step": 4142 }, { "epoch": 9.2271714922049, "grad_norm": 17.536426544189453, "learning_rate": 1e-06, "loss": 0.7411, "num_input_tokens_seen": 232029912, "step": 4143 }, { "epoch": 9.2271714922049, "loss": 0.6547312140464783, "loss_ce": 0.00019021191110368818, "loss_iou": 0.28515625, "loss_num": 0.01708984375, "loss_xval": 0.65625, "num_input_tokens_seen": 232029912, "step": 4143 }, { "epoch": 9.229398663697104, "grad_norm": 17.012691497802734, "learning_rate": 1e-06, "loss": 0.5203, "num_input_tokens_seen": 232089236, "step": 4144 }, { "epoch": 9.229398663697104, "loss": 0.5632002353668213, "loss_ce": 0.00021197632304392755, "loss_iou": 0.2451171875, "loss_num": 0.01446533203125, "loss_xval": 0.5625, "num_input_tokens_seen": 232089236, "step": 4144 }, { "epoch": 9.231625835189309, "grad_norm": 17.848848342895508, "learning_rate": 1e-06, "loss": 0.4004, "num_input_tokens_seen": 232144528, "step": 4145 }, { "epoch": 9.231625835189309, "loss": 0.3932184875011444, "loss_ce": 0.0002741398348007351, "loss_iou": 0.169921875, "loss_num": 0.0106201171875, "loss_xval": 0.392578125, "num_input_tokens_seen": 232144528, "step": 4145 }, { "epoch": 9.233853006681514, "grad_norm": 14.590682983398438, "learning_rate": 1e-06, "loss": 0.476, "num_input_tokens_seen": 232202272, "step": 4146 }, { "epoch": 9.233853006681514, "loss": 0.40485870838165283, "loss_ce": 0.00013458858302328736, "loss_iou": 0.1728515625, "loss_num": 0.0118408203125, "loss_xval": 0.404296875, "num_input_tokens_seen": 232202272, "step": 4146 }, { "epoch": 9.236080178173719, "grad_norm": 22.08498764038086, "learning_rate": 1e-06, "loss": 0.5566, "num_input_tokens_seen": 232257404, "step": 4147 }, { "epoch": 9.236080178173719, "loss": 0.687615156173706, "loss_ce": 0.0002372571761952713, "loss_iou": 0.27734375, "loss_num": 0.0263671875, "loss_xval": 0.6875, "num_input_tokens_seen": 232257404, "step": 4147 }, { "epoch": 9.238307349665924, "grad_norm": 33.60602569580078, "learning_rate": 1e-06, "loss": 0.5149, "num_input_tokens_seen": 232313104, "step": 4148 }, { "epoch": 9.238307349665924, "loss": 0.46257176995277405, "loss_ce": 0.00016943998343776911, "loss_iou": 0.208984375, "loss_num": 0.0087890625, "loss_xval": 0.462890625, "num_input_tokens_seen": 232313104, "step": 4148 }, { "epoch": 9.240534521158128, "grad_norm": 20.825382232666016, "learning_rate": 1e-06, "loss": 0.6308, "num_input_tokens_seen": 232368704, "step": 4149 }, { "epoch": 9.240534521158128, "loss": 0.7248323559761047, "loss_ce": 0.00022297201212495565, "loss_iou": 0.291015625, "loss_num": 0.0283203125, "loss_xval": 0.7265625, "num_input_tokens_seen": 232368704, "step": 4149 }, { "epoch": 9.242761692650333, "grad_norm": 16.274980545043945, "learning_rate": 1e-06, "loss": 0.611, "num_input_tokens_seen": 232423564, "step": 4150 }, { "epoch": 9.242761692650333, "loss": 0.8920093178749084, "loss_ce": 0.0001636209199205041, "loss_iou": 0.337890625, "loss_num": 0.04296875, "loss_xval": 0.890625, "num_input_tokens_seen": 232423564, "step": 4150 }, { "epoch": 9.244988864142538, "grad_norm": 17.573843002319336, "learning_rate": 1e-06, "loss": 0.4551, "num_input_tokens_seen": 232480304, "step": 4151 }, { "epoch": 9.244988864142538, "loss": 0.4830518662929535, "loss_ce": 0.0001416985469404608, "loss_iou": 0.2060546875, "loss_num": 0.01409912109375, "loss_xval": 0.482421875, "num_input_tokens_seen": 232480304, "step": 4151 }, { "epoch": 9.247216035634743, "grad_norm": 20.984481811523438, "learning_rate": 1e-06, "loss": 0.5364, "num_input_tokens_seen": 232536816, "step": 4152 }, { "epoch": 9.247216035634743, "loss": 0.46505963802337646, "loss_ce": 0.00015485798940062523, "loss_iou": 0.1982421875, "loss_num": 0.01373291015625, "loss_xval": 0.46484375, "num_input_tokens_seen": 232536816, "step": 4152 }, { "epoch": 9.249443207126948, "grad_norm": 16.171337127685547, "learning_rate": 1e-06, "loss": 0.4752, "num_input_tokens_seen": 232592992, "step": 4153 }, { "epoch": 9.249443207126948, "loss": 0.5165247917175293, "loss_ce": 0.0004115123301744461, "loss_iou": 0.220703125, "loss_num": 0.01519775390625, "loss_xval": 0.515625, "num_input_tokens_seen": 232592992, "step": 4153 }, { "epoch": 9.251670378619155, "grad_norm": 21.20688247680664, "learning_rate": 1e-06, "loss": 0.5654, "num_input_tokens_seen": 232649224, "step": 4154 }, { "epoch": 9.251670378619155, "loss": 0.4512087404727936, "loss_ce": 0.00015892238297965378, "loss_iou": 0.1865234375, "loss_num": 0.01544189453125, "loss_xval": 0.451171875, "num_input_tokens_seen": 232649224, "step": 4154 }, { "epoch": 9.25389755011136, "grad_norm": 12.871101379394531, "learning_rate": 1e-06, "loss": 0.4954, "num_input_tokens_seen": 232706068, "step": 4155 }, { "epoch": 9.25389755011136, "loss": 0.5504859685897827, "loss_ce": 0.00019304068700876087, "loss_iou": 0.248046875, "loss_num": 0.010986328125, "loss_xval": 0.55078125, "num_input_tokens_seen": 232706068, "step": 4155 }, { "epoch": 9.256124721603564, "grad_norm": 21.039005279541016, "learning_rate": 1e-06, "loss": 0.6367, "num_input_tokens_seen": 232759908, "step": 4156 }, { "epoch": 9.256124721603564, "loss": 0.6510411500930786, "loss_ce": 0.00016224203864112496, "loss_iou": 0.2890625, "loss_num": 0.014404296875, "loss_xval": 0.65234375, "num_input_tokens_seen": 232759908, "step": 4156 }, { "epoch": 9.25835189309577, "grad_norm": 17.5001163482666, "learning_rate": 1e-06, "loss": 0.5683, "num_input_tokens_seen": 232815104, "step": 4157 }, { "epoch": 9.25835189309577, "loss": 0.57974773645401, "loss_ce": 0.00015791512851137668, "loss_iou": 0.23046875, "loss_num": 0.0238037109375, "loss_xval": 0.578125, "num_input_tokens_seen": 232815104, "step": 4157 }, { "epoch": 9.260579064587974, "grad_norm": 13.760968208312988, "learning_rate": 1e-06, "loss": 0.49, "num_input_tokens_seen": 232870704, "step": 4158 }, { "epoch": 9.260579064587974, "loss": 0.553850531578064, "loss_ce": 0.00013953927555121481, "loss_iou": 0.2392578125, "loss_num": 0.01495361328125, "loss_xval": 0.5546875, "num_input_tokens_seen": 232870704, "step": 4158 }, { "epoch": 9.262806236080179, "grad_norm": 20.684478759765625, "learning_rate": 1e-06, "loss": 0.578, "num_input_tokens_seen": 232925348, "step": 4159 }, { "epoch": 9.262806236080179, "loss": 0.4780513644218445, "loss_ce": 0.000146102363942191, "loss_iou": 0.212890625, "loss_num": 0.010498046875, "loss_xval": 0.478515625, "num_input_tokens_seen": 232925348, "step": 4159 }, { "epoch": 9.265033407572384, "grad_norm": 13.17013931274414, "learning_rate": 1e-06, "loss": 0.6326, "num_input_tokens_seen": 232982220, "step": 4160 }, { "epoch": 9.265033407572384, "loss": 0.7166314125061035, "loss_ce": 0.00020072060578968376, "loss_iou": 0.302734375, "loss_num": 0.0224609375, "loss_xval": 0.71484375, "num_input_tokens_seen": 232982220, "step": 4160 }, { "epoch": 9.267260579064589, "grad_norm": 21.762144088745117, "learning_rate": 1e-06, "loss": 0.6647, "num_input_tokens_seen": 233039268, "step": 4161 }, { "epoch": 9.267260579064589, "loss": 0.6170930862426758, "loss_ce": 0.00014968152390792966, "loss_iou": 0.2451171875, "loss_num": 0.025390625, "loss_xval": 0.6171875, "num_input_tokens_seen": 233039268, "step": 4161 }, { "epoch": 9.269487750556793, "grad_norm": 19.671171188354492, "learning_rate": 1e-06, "loss": 0.4354, "num_input_tokens_seen": 233095824, "step": 4162 }, { "epoch": 9.269487750556793, "loss": 0.41189324855804443, "loss_ce": 0.00015009564231149852, "loss_iou": 0.1845703125, "loss_num": 0.0087890625, "loss_xval": 0.412109375, "num_input_tokens_seen": 233095824, "step": 4162 }, { "epoch": 9.271714922048998, "grad_norm": 19.537986755371094, "learning_rate": 1e-06, "loss": 0.5711, "num_input_tokens_seen": 233152836, "step": 4163 }, { "epoch": 9.271714922048998, "loss": 0.654711902141571, "loss_ce": 0.00017089179891627282, "loss_iou": 0.298828125, "loss_num": 0.01141357421875, "loss_xval": 0.65625, "num_input_tokens_seen": 233152836, "step": 4163 }, { "epoch": 9.273942093541203, "grad_norm": 15.77131462097168, "learning_rate": 1e-06, "loss": 0.566, "num_input_tokens_seen": 233207496, "step": 4164 }, { "epoch": 9.273942093541203, "loss": 0.5520115494728088, "loss_ce": 0.00019268158939667046, "loss_iou": 0.2353515625, "loss_num": 0.01611328125, "loss_xval": 0.55078125, "num_input_tokens_seen": 233207496, "step": 4164 }, { "epoch": 9.276169265033408, "grad_norm": 13.012890815734863, "learning_rate": 1e-06, "loss": 0.5237, "num_input_tokens_seen": 233264596, "step": 4165 }, { "epoch": 9.276169265033408, "loss": 0.46829238533973694, "loss_ce": 0.00015275325858965516, "loss_iou": 0.1875, "loss_num": 0.018798828125, "loss_xval": 0.46875, "num_input_tokens_seen": 233264596, "step": 4165 }, { "epoch": 9.278396436525613, "grad_norm": 24.705101013183594, "learning_rate": 1e-06, "loss": 0.6364, "num_input_tokens_seen": 233321472, "step": 4166 }, { "epoch": 9.278396436525613, "loss": 0.5150579214096069, "loss_ce": 0.00016535192844457924, "loss_iou": 0.228515625, "loss_num": 0.0115966796875, "loss_xval": 0.515625, "num_input_tokens_seen": 233321472, "step": 4166 }, { "epoch": 9.280623608017818, "grad_norm": 15.74944019317627, "learning_rate": 1e-06, "loss": 0.5495, "num_input_tokens_seen": 233377064, "step": 4167 }, { "epoch": 9.280623608017818, "loss": 0.5138007402420044, "loss_ce": 0.00022045343939680606, "loss_iou": 0.23046875, "loss_num": 0.01055908203125, "loss_xval": 0.51171875, "num_input_tokens_seen": 233377064, "step": 4167 }, { "epoch": 9.282850779510023, "grad_norm": 25.632081985473633, "learning_rate": 1e-06, "loss": 0.5602, "num_input_tokens_seen": 233432460, "step": 4168 }, { "epoch": 9.282850779510023, "loss": 0.6908320784568787, "loss_ce": 0.0001582444820087403, "loss_iou": 0.26953125, "loss_num": 0.0299072265625, "loss_xval": 0.69140625, "num_input_tokens_seen": 233432460, "step": 4168 }, { "epoch": 9.285077951002227, "grad_norm": 21.129497528076172, "learning_rate": 1e-06, "loss": 0.6149, "num_input_tokens_seen": 233489932, "step": 4169 }, { "epoch": 9.285077951002227, "loss": 0.8304713368415833, "loss_ce": 0.00014905552961863577, "loss_iou": 0.337890625, "loss_num": 0.0306396484375, "loss_xval": 0.83203125, "num_input_tokens_seen": 233489932, "step": 4169 }, { "epoch": 9.287305122494432, "grad_norm": 18.864761352539062, "learning_rate": 1e-06, "loss": 0.6289, "num_input_tokens_seen": 233546048, "step": 4170 }, { "epoch": 9.287305122494432, "loss": 0.4682886600494385, "loss_ce": 0.00014900718815624714, "loss_iou": 0.2109375, "loss_num": 0.00946044921875, "loss_xval": 0.46875, "num_input_tokens_seen": 233546048, "step": 4170 }, { "epoch": 9.289532293986637, "grad_norm": 20.65398597717285, "learning_rate": 1e-06, "loss": 0.5547, "num_input_tokens_seen": 233601528, "step": 4171 }, { "epoch": 9.289532293986637, "loss": 0.6244362592697144, "loss_ce": 0.00016863204655237496, "loss_iou": 0.2734375, "loss_num": 0.01556396484375, "loss_xval": 0.625, "num_input_tokens_seen": 233601528, "step": 4171 }, { "epoch": 9.291759465478842, "grad_norm": 23.496076583862305, "learning_rate": 1e-06, "loss": 0.5856, "num_input_tokens_seen": 233657160, "step": 4172 }, { "epoch": 9.291759465478842, "loss": 0.5482483506202698, "loss_ce": 0.00015264737885445356, "loss_iou": 0.1982421875, "loss_num": 0.0303955078125, "loss_xval": 0.546875, "num_input_tokens_seen": 233657160, "step": 4172 }, { "epoch": 9.293986636971047, "grad_norm": 19.034543991088867, "learning_rate": 1e-06, "loss": 0.7883, "num_input_tokens_seen": 233713276, "step": 4173 }, { "epoch": 9.293986636971047, "loss": 0.7952208518981934, "loss_ce": 0.00017690191452857107, "loss_iou": 0.3203125, "loss_num": 0.03076171875, "loss_xval": 0.796875, "num_input_tokens_seen": 233713276, "step": 4173 }, { "epoch": 9.296213808463252, "grad_norm": 17.01585578918457, "learning_rate": 1e-06, "loss": 0.3602, "num_input_tokens_seen": 233769732, "step": 4174 }, { "epoch": 9.296213808463252, "loss": 0.4717066287994385, "loss_ce": 0.0001490233844378963, "loss_iou": 0.20703125, "loss_num": 0.01141357421875, "loss_xval": 0.470703125, "num_input_tokens_seen": 233769732, "step": 4174 }, { "epoch": 9.298440979955457, "grad_norm": 18.97732925415039, "learning_rate": 1e-06, "loss": 0.4866, "num_input_tokens_seen": 233827660, "step": 4175 }, { "epoch": 9.298440979955457, "loss": 0.5509434938430786, "loss_ce": 0.0001622582640266046, "loss_iou": 0.25, "loss_num": 0.0098876953125, "loss_xval": 0.55078125, "num_input_tokens_seen": 233827660, "step": 4175 }, { "epoch": 9.300668151447661, "grad_norm": 23.303890228271484, "learning_rate": 1e-06, "loss": 0.67, "num_input_tokens_seen": 233884728, "step": 4176 }, { "epoch": 9.300668151447661, "loss": 0.754581093788147, "loss_ce": 0.00018660849309526384, "loss_iou": 0.298828125, "loss_num": 0.031494140625, "loss_xval": 0.75390625, "num_input_tokens_seen": 233884728, "step": 4176 }, { "epoch": 9.302895322939866, "grad_norm": 15.087565422058105, "learning_rate": 1e-06, "loss": 0.5322, "num_input_tokens_seen": 233942264, "step": 4177 }, { "epoch": 9.302895322939866, "loss": 0.692180871963501, "loss_ce": 0.00016421612235717475, "loss_iou": 0.296875, "loss_num": 0.019775390625, "loss_xval": 0.69140625, "num_input_tokens_seen": 233942264, "step": 4177 }, { "epoch": 9.305122494432071, "grad_norm": 21.872156143188477, "learning_rate": 1e-06, "loss": 0.4854, "num_input_tokens_seen": 233999788, "step": 4178 }, { "epoch": 9.305122494432071, "loss": 0.37598371505737305, "loss_ce": 0.00012923183385282755, "loss_iou": 0.166015625, "loss_num": 0.0089111328125, "loss_xval": 0.375, "num_input_tokens_seen": 233999788, "step": 4178 }, { "epoch": 9.307349665924276, "grad_norm": 15.20783519744873, "learning_rate": 1e-06, "loss": 0.7171, "num_input_tokens_seen": 234058404, "step": 4179 }, { "epoch": 9.307349665924276, "loss": 0.7457845211029053, "loss_ce": 0.00017908678273670375, "loss_iou": 0.3046875, "loss_num": 0.0274658203125, "loss_xval": 0.74609375, "num_input_tokens_seen": 234058404, "step": 4179 }, { "epoch": 9.309576837416481, "grad_norm": 18.751060485839844, "learning_rate": 1e-06, "loss": 0.5907, "num_input_tokens_seen": 234115236, "step": 4180 }, { "epoch": 9.309576837416481, "loss": 0.5246922969818115, "loss_ce": 0.00015614343283232301, "loss_iou": 0.2294921875, "loss_num": 0.013427734375, "loss_xval": 0.5234375, "num_input_tokens_seen": 234115236, "step": 4180 }, { "epoch": 9.311804008908686, "grad_norm": 21.91156005859375, "learning_rate": 1e-06, "loss": 0.4344, "num_input_tokens_seen": 234173444, "step": 4181 }, { "epoch": 9.311804008908686, "loss": 0.5160158276557922, "loss_ce": 0.0001467098481953144, "loss_iou": 0.244140625, "loss_num": 0.005767822265625, "loss_xval": 0.515625, "num_input_tokens_seen": 234173444, "step": 4181 }, { "epoch": 9.31403118040089, "grad_norm": 31.379457473754883, "learning_rate": 1e-06, "loss": 0.8267, "num_input_tokens_seen": 234228884, "step": 4182 }, { "epoch": 9.31403118040089, "loss": 0.8775807619094849, "loss_ce": 0.00013937248149886727, "loss_iou": 0.369140625, "loss_num": 0.027587890625, "loss_xval": 0.87890625, "num_input_tokens_seen": 234228884, "step": 4182 }, { "epoch": 9.316258351893095, "grad_norm": 18.9929256439209, "learning_rate": 1e-06, "loss": 0.5908, "num_input_tokens_seen": 234286752, "step": 4183 }, { "epoch": 9.316258351893095, "loss": 0.8116600513458252, "loss_ce": 0.000136615228257142, "loss_iou": 0.337890625, "loss_num": 0.02685546875, "loss_xval": 0.8125, "num_input_tokens_seen": 234286752, "step": 4183 }, { "epoch": 9.3184855233853, "grad_norm": 16.527360916137695, "learning_rate": 1e-06, "loss": 0.6648, "num_input_tokens_seen": 234342332, "step": 4184 }, { "epoch": 9.3184855233853, "loss": 0.7774823904037476, "loss_ce": 0.00013861866318620741, "loss_iou": 0.34765625, "loss_num": 0.0167236328125, "loss_xval": 0.77734375, "num_input_tokens_seen": 234342332, "step": 4184 }, { "epoch": 9.320712694877505, "grad_norm": 16.93600845336914, "learning_rate": 1e-06, "loss": 0.5782, "num_input_tokens_seen": 234398820, "step": 4185 }, { "epoch": 9.320712694877505, "loss": 0.7552863359451294, "loss_ce": 0.0001593705965206027, "loss_iou": 0.322265625, "loss_num": 0.022216796875, "loss_xval": 0.75390625, "num_input_tokens_seen": 234398820, "step": 4185 }, { "epoch": 9.32293986636971, "grad_norm": 22.869224548339844, "learning_rate": 1e-06, "loss": 0.7147, "num_input_tokens_seen": 234455760, "step": 4186 }, { "epoch": 9.32293986636971, "loss": 0.5804110169410706, "loss_ce": 0.00024132244288921356, "loss_iou": 0.24609375, "loss_num": 0.0177001953125, "loss_xval": 0.58203125, "num_input_tokens_seen": 234455760, "step": 4186 }, { "epoch": 9.325167037861915, "grad_norm": 15.121347427368164, "learning_rate": 1e-06, "loss": 0.4896, "num_input_tokens_seen": 234510508, "step": 4187 }, { "epoch": 9.325167037861915, "loss": 0.3788001537322998, "loss_ce": 0.00013804002082906663, "loss_iou": 0.166015625, "loss_num": 0.0093994140625, "loss_xval": 0.37890625, "num_input_tokens_seen": 234510508, "step": 4187 }, { "epoch": 9.32739420935412, "grad_norm": 18.335205078125, "learning_rate": 1e-06, "loss": 0.7204, "num_input_tokens_seen": 234567480, "step": 4188 }, { "epoch": 9.32739420935412, "loss": 0.6255242824554443, "loss_ce": 0.0001580739044584334, "loss_iou": 0.251953125, "loss_num": 0.024658203125, "loss_xval": 0.625, "num_input_tokens_seen": 234567480, "step": 4188 }, { "epoch": 9.329621380846325, "grad_norm": 12.408183097839355, "learning_rate": 1e-06, "loss": 0.58, "num_input_tokens_seen": 234625836, "step": 4189 }, { "epoch": 9.329621380846325, "loss": 0.36714544892311096, "loss_ce": 0.0002021001128014177, "loss_iou": 0.146484375, "loss_num": 0.0147705078125, "loss_xval": 0.3671875, "num_input_tokens_seen": 234625836, "step": 4189 }, { "epoch": 9.33184855233853, "grad_norm": 20.51274299621582, "learning_rate": 1e-06, "loss": 0.5372, "num_input_tokens_seen": 234683824, "step": 4190 }, { "epoch": 9.33184855233853, "loss": 0.4362949728965759, "loss_ce": 0.00013776315608993173, "loss_iou": 0.1865234375, "loss_num": 0.01263427734375, "loss_xval": 0.435546875, "num_input_tokens_seen": 234683824, "step": 4190 }, { "epoch": 9.334075723830734, "grad_norm": 67.99667358398438, "learning_rate": 1e-06, "loss": 0.5763, "num_input_tokens_seen": 234739016, "step": 4191 }, { "epoch": 9.334075723830734, "loss": 0.4585108757019043, "loss_ce": 0.00013684081204701215, "loss_iou": 0.1923828125, "loss_num": 0.0146484375, "loss_xval": 0.458984375, "num_input_tokens_seen": 234739016, "step": 4191 }, { "epoch": 9.33630289532294, "grad_norm": 17.7122859954834, "learning_rate": 1e-06, "loss": 0.6762, "num_input_tokens_seen": 234793360, "step": 4192 }, { "epoch": 9.33630289532294, "loss": 0.6960656642913818, "loss_ce": 0.00014278030721470714, "loss_iou": 0.32421875, "loss_num": 0.0093994140625, "loss_xval": 0.6953125, "num_input_tokens_seen": 234793360, "step": 4192 }, { "epoch": 9.338530066815144, "grad_norm": 14.239766120910645, "learning_rate": 1e-06, "loss": 0.4748, "num_input_tokens_seen": 234849088, "step": 4193 }, { "epoch": 9.338530066815144, "loss": 0.40485137701034546, "loss_ce": 0.0001883181685116142, "loss_iou": 0.18359375, "loss_num": 0.007598876953125, "loss_xval": 0.404296875, "num_input_tokens_seen": 234849088, "step": 4193 }, { "epoch": 9.340757238307349, "grad_norm": 22.02215576171875, "learning_rate": 1e-06, "loss": 0.4715, "num_input_tokens_seen": 234905732, "step": 4194 }, { "epoch": 9.340757238307349, "loss": 0.4357007145881653, "loss_ce": 0.0001538339420221746, "loss_iou": 0.193359375, "loss_num": 0.00982666015625, "loss_xval": 0.435546875, "num_input_tokens_seen": 234905732, "step": 4194 }, { "epoch": 9.342984409799554, "grad_norm": 24.65700340270996, "learning_rate": 1e-06, "loss": 0.6571, "num_input_tokens_seen": 234959588, "step": 4195 }, { "epoch": 9.342984409799554, "loss": 0.6442734003067017, "loss_ce": 0.00023038909421302378, "loss_iou": 0.2353515625, "loss_num": 0.03466796875, "loss_xval": 0.64453125, "num_input_tokens_seen": 234959588, "step": 4195 }, { "epoch": 9.345211581291759, "grad_norm": 18.189048767089844, "learning_rate": 1e-06, "loss": 0.5611, "num_input_tokens_seen": 235018980, "step": 4196 }, { "epoch": 9.345211581291759, "loss": 0.6549949645996094, "loss_ce": 0.00020979381224606186, "loss_iou": 0.29296875, "loss_num": 0.0140380859375, "loss_xval": 0.65625, "num_input_tokens_seen": 235018980, "step": 4196 }, { "epoch": 9.347438752783964, "grad_norm": 43.657958984375, "learning_rate": 1e-06, "loss": 0.7156, "num_input_tokens_seen": 235076428, "step": 4197 }, { "epoch": 9.347438752783964, "loss": 0.6480019092559814, "loss_ce": 0.0002968419576063752, "loss_iou": 0.28125, "loss_num": 0.0172119140625, "loss_xval": 0.6484375, "num_input_tokens_seen": 235076428, "step": 4197 }, { "epoch": 9.34966592427617, "grad_norm": 18.815326690673828, "learning_rate": 1e-06, "loss": 0.526, "num_input_tokens_seen": 235134848, "step": 4198 }, { "epoch": 9.34966592427617, "loss": 0.5375351905822754, "loss_ce": 0.00018168592941947281, "loss_iou": 0.2373046875, "loss_num": 0.01263427734375, "loss_xval": 0.5390625, "num_input_tokens_seen": 235134848, "step": 4198 }, { "epoch": 9.351893095768375, "grad_norm": 16.838205337524414, "learning_rate": 1e-06, "loss": 0.6095, "num_input_tokens_seen": 235189616, "step": 4199 }, { "epoch": 9.351893095768375, "loss": 0.5999011993408203, "loss_ce": 0.00016978610074147582, "loss_iou": 0.26171875, "loss_num": 0.0152587890625, "loss_xval": 0.6015625, "num_input_tokens_seen": 235189616, "step": 4199 }, { "epoch": 9.35412026726058, "grad_norm": 22.742488861083984, "learning_rate": 1e-06, "loss": 0.6383, "num_input_tokens_seen": 235243244, "step": 4200 }, { "epoch": 9.35412026726058, "loss": 0.49684494733810425, "loss_ce": 0.00014081134577281773, "loss_iou": 0.205078125, "loss_num": 0.0174560546875, "loss_xval": 0.49609375, "num_input_tokens_seen": 235243244, "step": 4200 }, { "epoch": 9.356347438752785, "grad_norm": 18.40520668029785, "learning_rate": 1e-06, "loss": 0.6173, "num_input_tokens_seen": 235300852, "step": 4201 }, { "epoch": 9.356347438752785, "loss": 0.7159720659255981, "loss_ce": 0.00015178298053797334, "loss_iou": 0.291015625, "loss_num": 0.02685546875, "loss_xval": 0.71484375, "num_input_tokens_seen": 235300852, "step": 4201 }, { "epoch": 9.35857461024499, "grad_norm": 18.26913833618164, "learning_rate": 1e-06, "loss": 0.6494, "num_input_tokens_seen": 235357404, "step": 4202 }, { "epoch": 9.35857461024499, "loss": 0.7315636873245239, "loss_ce": 0.00024043236044235528, "loss_iou": 0.328125, "loss_num": 0.0145263671875, "loss_xval": 0.73046875, "num_input_tokens_seen": 235357404, "step": 4202 }, { "epoch": 9.360801781737194, "grad_norm": 20.889400482177734, "learning_rate": 1e-06, "loss": 0.6694, "num_input_tokens_seen": 235412144, "step": 4203 }, { "epoch": 9.360801781737194, "loss": 0.6847151517868042, "loss_ce": 0.00014484771236311644, "loss_iou": 0.2890625, "loss_num": 0.0208740234375, "loss_xval": 0.68359375, "num_input_tokens_seen": 235412144, "step": 4203 }, { "epoch": 9.3630289532294, "grad_norm": 14.52232551574707, "learning_rate": 1e-06, "loss": 0.522, "num_input_tokens_seen": 235469972, "step": 4204 }, { "epoch": 9.3630289532294, "loss": 0.5159176588058472, "loss_ce": 0.00017058770754374564, "loss_iou": 0.2197265625, "loss_num": 0.01531982421875, "loss_xval": 0.515625, "num_input_tokens_seen": 235469972, "step": 4204 }, { "epoch": 9.365256124721604, "grad_norm": 18.92323875427246, "learning_rate": 1e-06, "loss": 0.5651, "num_input_tokens_seen": 235526988, "step": 4205 }, { "epoch": 9.365256124721604, "loss": 0.7476789951324463, "loss_ce": 0.00036454031942412257, "loss_iou": 0.330078125, "loss_num": 0.0172119140625, "loss_xval": 0.74609375, "num_input_tokens_seen": 235526988, "step": 4205 }, { "epoch": 9.367483296213809, "grad_norm": 25.92852210998535, "learning_rate": 1e-06, "loss": 0.4669, "num_input_tokens_seen": 235584824, "step": 4206 }, { "epoch": 9.367483296213809, "loss": 0.36491858959198, "loss_ce": 0.00017249592929147184, "loss_iou": 0.166015625, "loss_num": 0.006622314453125, "loss_xval": 0.365234375, "num_input_tokens_seen": 235584824, "step": 4206 }, { "epoch": 9.369710467706014, "grad_norm": 23.176660537719727, "learning_rate": 1e-06, "loss": 0.5446, "num_input_tokens_seen": 235642972, "step": 4207 }, { "epoch": 9.369710467706014, "loss": 0.425027072429657, "loss_ce": 0.00022239354439079762, "loss_iou": 0.171875, "loss_num": 0.0159912109375, "loss_xval": 0.42578125, "num_input_tokens_seen": 235642972, "step": 4207 }, { "epoch": 9.371937639198219, "grad_norm": 47.62214660644531, "learning_rate": 1e-06, "loss": 0.7125, "num_input_tokens_seen": 235698332, "step": 4208 }, { "epoch": 9.371937639198219, "loss": 0.8402742743492126, "loss_ce": 0.00018637791799847037, "loss_iou": 0.33203125, "loss_num": 0.035400390625, "loss_xval": 0.83984375, "num_input_tokens_seen": 235698332, "step": 4208 }, { "epoch": 9.374164810690424, "grad_norm": 15.558445930480957, "learning_rate": 1e-06, "loss": 0.4657, "num_input_tokens_seen": 235754988, "step": 4209 }, { "epoch": 9.374164810690424, "loss": 0.354988694190979, "loss_ce": 0.000130318061565049, "loss_iou": 0.1494140625, "loss_num": 0.0111083984375, "loss_xval": 0.35546875, "num_input_tokens_seen": 235754988, "step": 4209 }, { "epoch": 9.376391982182628, "grad_norm": 21.12288475036621, "learning_rate": 1e-06, "loss": 0.6511, "num_input_tokens_seen": 235812532, "step": 4210 }, { "epoch": 9.376391982182628, "loss": 0.7051099538803101, "loss_ce": 0.0001538842625450343, "loss_iou": 0.302734375, "loss_num": 0.01953125, "loss_xval": 0.703125, "num_input_tokens_seen": 235812532, "step": 4210 }, { "epoch": 9.378619153674833, "grad_norm": 16.031126022338867, "learning_rate": 1e-06, "loss": 0.4852, "num_input_tokens_seen": 235867920, "step": 4211 }, { "epoch": 9.378619153674833, "loss": 0.5213667750358582, "loss_ce": 0.0004927542759105563, "loss_iou": 0.2333984375, "loss_num": 0.0107421875, "loss_xval": 0.51953125, "num_input_tokens_seen": 235867920, "step": 4211 }, { "epoch": 9.380846325167038, "grad_norm": 16.82216453552246, "learning_rate": 1e-06, "loss": 0.468, "num_input_tokens_seen": 235923424, "step": 4212 }, { "epoch": 9.380846325167038, "loss": 0.5014048218727112, "loss_ce": 0.00018410818302072585, "loss_iou": 0.2109375, "loss_num": 0.015869140625, "loss_xval": 0.5, "num_input_tokens_seen": 235923424, "step": 4212 }, { "epoch": 9.383073496659243, "grad_norm": 15.369388580322266, "learning_rate": 1e-06, "loss": 0.4188, "num_input_tokens_seen": 235977656, "step": 4213 }, { "epoch": 9.383073496659243, "loss": 0.466952919960022, "loss_ce": 0.0001560571399750188, "loss_iou": 0.1943359375, "loss_num": 0.015625, "loss_xval": 0.466796875, "num_input_tokens_seen": 235977656, "step": 4213 }, { "epoch": 9.385300668151448, "grad_norm": 12.31820011138916, "learning_rate": 1e-06, "loss": 0.6936, "num_input_tokens_seen": 236034136, "step": 4214 }, { "epoch": 9.385300668151448, "loss": 0.6039036512374878, "loss_ce": 0.00014389277203008533, "loss_iou": 0.265625, "loss_num": 0.01409912109375, "loss_xval": 0.60546875, "num_input_tokens_seen": 236034136, "step": 4214 }, { "epoch": 9.387527839643653, "grad_norm": 20.034059524536133, "learning_rate": 1e-06, "loss": 0.5376, "num_input_tokens_seen": 236092456, "step": 4215 }, { "epoch": 9.387527839643653, "loss": 0.5938976407051086, "loss_ce": 0.0001476521574659273, "loss_iou": 0.267578125, "loss_num": 0.01141357421875, "loss_xval": 0.59375, "num_input_tokens_seen": 236092456, "step": 4215 }, { "epoch": 9.389755011135858, "grad_norm": 46.42024612426758, "learning_rate": 1e-06, "loss": 0.5741, "num_input_tokens_seen": 236147020, "step": 4216 }, { "epoch": 9.389755011135858, "loss": 0.5294545292854309, "loss_ce": 0.00015764265845064074, "loss_iou": 0.244140625, "loss_num": 0.0079345703125, "loss_xval": 0.53125, "num_input_tokens_seen": 236147020, "step": 4216 }, { "epoch": 9.391982182628063, "grad_norm": 26.644548416137695, "learning_rate": 1e-06, "loss": 0.6237, "num_input_tokens_seen": 236200772, "step": 4217 }, { "epoch": 9.391982182628063, "loss": 0.7529926300048828, "loss_ce": 0.00018498601275496185, "loss_iou": 0.33984375, "loss_num": 0.01483154296875, "loss_xval": 0.75390625, "num_input_tokens_seen": 236200772, "step": 4217 }, { "epoch": 9.394209354120267, "grad_norm": 18.652217864990234, "learning_rate": 1e-06, "loss": 0.5676, "num_input_tokens_seen": 236255604, "step": 4218 }, { "epoch": 9.394209354120267, "loss": 0.6091314554214478, "loss_ce": 0.00024474196834489703, "loss_iou": 0.28125, "loss_num": 0.0091552734375, "loss_xval": 0.609375, "num_input_tokens_seen": 236255604, "step": 4218 }, { "epoch": 9.396436525612472, "grad_norm": 14.043095588684082, "learning_rate": 1e-06, "loss": 0.7096, "num_input_tokens_seen": 236310220, "step": 4219 }, { "epoch": 9.396436525612472, "loss": 0.8942954540252686, "loss_ce": 0.0002524922601878643, "loss_iou": 0.390625, "loss_num": 0.022705078125, "loss_xval": 0.89453125, "num_input_tokens_seen": 236310220, "step": 4219 }, { "epoch": 9.398663697104677, "grad_norm": 17.981460571289062, "learning_rate": 1e-06, "loss": 0.5667, "num_input_tokens_seen": 236364604, "step": 4220 }, { "epoch": 9.398663697104677, "loss": 0.4448683559894562, "loss_ce": 0.0001662159920670092, "loss_iou": 0.2001953125, "loss_num": 0.00872802734375, "loss_xval": 0.4453125, "num_input_tokens_seen": 236364604, "step": 4220 }, { "epoch": 9.400890868596882, "grad_norm": 14.154129028320312, "learning_rate": 1e-06, "loss": 0.6947, "num_input_tokens_seen": 236421912, "step": 4221 }, { "epoch": 9.400890868596882, "loss": 0.7411311268806458, "loss_ce": 0.00016432552365586162, "loss_iou": 0.34765625, "loss_num": 0.00933837890625, "loss_xval": 0.7421875, "num_input_tokens_seen": 236421912, "step": 4221 }, { "epoch": 9.403118040089087, "grad_norm": 34.250003814697266, "learning_rate": 1e-06, "loss": 0.5965, "num_input_tokens_seen": 236478284, "step": 4222 }, { "epoch": 9.403118040089087, "loss": 0.5931603312492371, "loss_ce": 0.00014277252194005996, "loss_iou": 0.25, "loss_num": 0.0185546875, "loss_xval": 0.59375, "num_input_tokens_seen": 236478284, "step": 4222 }, { "epoch": 9.405345211581292, "grad_norm": 21.780054092407227, "learning_rate": 1e-06, "loss": 0.5467, "num_input_tokens_seen": 236534108, "step": 4223 }, { "epoch": 9.405345211581292, "loss": 0.5391287803649902, "loss_ce": 0.0001272944500669837, "loss_iou": 0.2158203125, "loss_num": 0.0213623046875, "loss_xval": 0.5390625, "num_input_tokens_seen": 236534108, "step": 4223 }, { "epoch": 9.407572383073497, "grad_norm": 22.980321884155273, "learning_rate": 1e-06, "loss": 0.7141, "num_input_tokens_seen": 236590772, "step": 4224 }, { "epoch": 9.407572383073497, "loss": 0.6423543691635132, "loss_ce": 0.00014242672477848828, "loss_iou": 0.27734375, "loss_num": 0.01708984375, "loss_xval": 0.640625, "num_input_tokens_seen": 236590772, "step": 4224 }, { "epoch": 9.409799554565701, "grad_norm": 21.113697052001953, "learning_rate": 1e-06, "loss": 0.4206, "num_input_tokens_seen": 236648468, "step": 4225 }, { "epoch": 9.409799554565701, "loss": 0.31469690799713135, "loss_ce": 0.0001217244571307674, "loss_iou": 0.12060546875, "loss_num": 0.0146484375, "loss_xval": 0.314453125, "num_input_tokens_seen": 236648468, "step": 4225 }, { "epoch": 9.412026726057906, "grad_norm": 9.035064697265625, "learning_rate": 1e-06, "loss": 0.3957, "num_input_tokens_seen": 236707008, "step": 4226 }, { "epoch": 9.412026726057906, "loss": 0.30737414956092834, "loss_ce": 0.00012316979700699449, "loss_iou": 0.125, "loss_num": 0.011474609375, "loss_xval": 0.306640625, "num_input_tokens_seen": 236707008, "step": 4226 }, { "epoch": 9.414253897550111, "grad_norm": 16.591636657714844, "learning_rate": 1e-06, "loss": 0.6342, "num_input_tokens_seen": 236761664, "step": 4227 }, { "epoch": 9.414253897550111, "loss": 0.7024285793304443, "loss_ce": 0.00015806331066414714, "loss_iou": 0.29296875, "loss_num": 0.0234375, "loss_xval": 0.703125, "num_input_tokens_seen": 236761664, "step": 4227 }, { "epoch": 9.416481069042316, "grad_norm": 21.84915542602539, "learning_rate": 1e-06, "loss": 0.6148, "num_input_tokens_seen": 236819440, "step": 4228 }, { "epoch": 9.416481069042316, "loss": 0.5935507416725159, "loss_ce": 0.0002889999595936388, "loss_iou": 0.2412109375, "loss_num": 0.02197265625, "loss_xval": 0.59375, "num_input_tokens_seen": 236819440, "step": 4228 }, { "epoch": 9.41870824053452, "grad_norm": 17.725332260131836, "learning_rate": 1e-06, "loss": 0.5218, "num_input_tokens_seen": 236876512, "step": 4229 }, { "epoch": 9.41870824053452, "loss": 0.546193540096283, "loss_ce": 0.00017301872139796615, "loss_iou": 0.24609375, "loss_num": 0.0106201171875, "loss_xval": 0.546875, "num_input_tokens_seen": 236876512, "step": 4229 }, { "epoch": 9.420935412026726, "grad_norm": 14.556755065917969, "learning_rate": 1e-06, "loss": 0.4876, "num_input_tokens_seen": 236933876, "step": 4230 }, { "epoch": 9.420935412026726, "loss": 0.5147203207015991, "loss_ce": 0.0008042675326578319, "loss_iou": 0.220703125, "loss_num": 0.0147705078125, "loss_xval": 0.515625, "num_input_tokens_seen": 236933876, "step": 4230 }, { "epoch": 9.42316258351893, "grad_norm": 30.021310806274414, "learning_rate": 1e-06, "loss": 0.5368, "num_input_tokens_seen": 236989724, "step": 4231 }, { "epoch": 9.42316258351893, "loss": 0.6030337810516357, "loss_ce": 0.000128521875012666, "loss_iou": 0.26171875, "loss_num": 0.01544189453125, "loss_xval": 0.6015625, "num_input_tokens_seen": 236989724, "step": 4231 }, { "epoch": 9.425389755011135, "grad_norm": 17.476322174072266, "learning_rate": 1e-06, "loss": 0.6046, "num_input_tokens_seen": 237044832, "step": 4232 }, { "epoch": 9.425389755011135, "loss": 0.6286413669586182, "loss_ce": 0.00022339157294481993, "loss_iou": 0.291015625, "loss_num": 0.0089111328125, "loss_xval": 0.62890625, "num_input_tokens_seen": 237044832, "step": 4232 }, { "epoch": 9.42761692650334, "grad_norm": 18.216716766357422, "learning_rate": 1e-06, "loss": 0.6325, "num_input_tokens_seen": 237102544, "step": 4233 }, { "epoch": 9.42761692650334, "loss": 0.7180312871932983, "loss_ce": 0.0002578936982899904, "loss_iou": 0.29296875, "loss_num": 0.026611328125, "loss_xval": 0.71875, "num_input_tokens_seen": 237102544, "step": 4233 }, { "epoch": 9.429844097995545, "grad_norm": 20.78913688659668, "learning_rate": 1e-06, "loss": 0.5074, "num_input_tokens_seen": 237158060, "step": 4234 }, { "epoch": 9.429844097995545, "loss": 0.5021035671234131, "loss_ce": 0.00015046056068968028, "loss_iou": 0.2275390625, "loss_num": 0.00933837890625, "loss_xval": 0.5, "num_input_tokens_seen": 237158060, "step": 4234 }, { "epoch": 9.43207126948775, "grad_norm": 138.009033203125, "learning_rate": 1e-06, "loss": 0.5155, "num_input_tokens_seen": 237213952, "step": 4235 }, { "epoch": 9.43207126948775, "loss": 0.5691501498222351, "loss_ce": 0.0005466500297188759, "loss_iou": 0.240234375, "loss_num": 0.017578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 237213952, "step": 4235 }, { "epoch": 9.434298440979955, "grad_norm": 18.69287109375, "learning_rate": 1e-06, "loss": 0.5015, "num_input_tokens_seen": 237269936, "step": 4236 }, { "epoch": 9.434298440979955, "loss": 0.6309968829154968, "loss_ce": 0.00013751011283602566, "loss_iou": 0.251953125, "loss_num": 0.0255126953125, "loss_xval": 0.6328125, "num_input_tokens_seen": 237269936, "step": 4236 }, { "epoch": 9.43652561247216, "grad_norm": 11.285958290100098, "learning_rate": 1e-06, "loss": 0.5439, "num_input_tokens_seen": 237325496, "step": 4237 }, { "epoch": 9.43652561247216, "loss": 0.6378533840179443, "loss_ce": 0.0001580730895511806, "loss_iou": 0.27734375, "loss_num": 0.01708984375, "loss_xval": 0.63671875, "num_input_tokens_seen": 237325496, "step": 4237 }, { "epoch": 9.438752783964365, "grad_norm": 24.240333557128906, "learning_rate": 1e-06, "loss": 0.5876, "num_input_tokens_seen": 237383104, "step": 4238 }, { "epoch": 9.438752783964365, "loss": 0.6686908006668091, "loss_ce": 0.0002032603952102363, "loss_iou": 0.2734375, "loss_num": 0.02392578125, "loss_xval": 0.66796875, "num_input_tokens_seen": 237383104, "step": 4238 }, { "epoch": 9.44097995545657, "grad_norm": 16.92664337158203, "learning_rate": 1e-06, "loss": 0.4389, "num_input_tokens_seen": 237434992, "step": 4239 }, { "epoch": 9.44097995545657, "loss": 0.38501814007759094, "loss_ce": 0.00013044432853348553, "loss_iou": 0.1611328125, "loss_num": 0.012451171875, "loss_xval": 0.384765625, "num_input_tokens_seen": 237434992, "step": 4239 }, { "epoch": 9.443207126948774, "grad_norm": 20.529521942138672, "learning_rate": 1e-06, "loss": 0.6429, "num_input_tokens_seen": 237489668, "step": 4240 }, { "epoch": 9.443207126948774, "loss": 0.703424870967865, "loss_ce": 0.00017782000941224396, "loss_iou": 0.283203125, "loss_num": 0.0274658203125, "loss_xval": 0.703125, "num_input_tokens_seen": 237489668, "step": 4240 }, { "epoch": 9.44543429844098, "grad_norm": 18.514591217041016, "learning_rate": 1e-06, "loss": 0.5228, "num_input_tokens_seen": 237545776, "step": 4241 }, { "epoch": 9.44543429844098, "loss": 0.508231520652771, "loss_ce": 0.00041901745134964585, "loss_iou": 0.236328125, "loss_num": 0.007171630859375, "loss_xval": 0.5078125, "num_input_tokens_seen": 237545776, "step": 4241 }, { "epoch": 9.447661469933184, "grad_norm": 22.630306243896484, "learning_rate": 1e-06, "loss": 0.5125, "num_input_tokens_seen": 237603000, "step": 4242 }, { "epoch": 9.447661469933184, "loss": 0.5746460556983948, "loss_ce": 0.00018314782937522978, "loss_iou": 0.22265625, "loss_num": 0.02587890625, "loss_xval": 0.57421875, "num_input_tokens_seen": 237603000, "step": 4242 }, { "epoch": 9.449888641425389, "grad_norm": 21.890134811401367, "learning_rate": 1e-06, "loss": 0.5389, "num_input_tokens_seen": 237658508, "step": 4243 }, { "epoch": 9.449888641425389, "loss": 0.5602293610572815, "loss_ce": 0.00017077414668165147, "loss_iou": 0.248046875, "loss_num": 0.01263427734375, "loss_xval": 0.55859375, "num_input_tokens_seen": 237658508, "step": 4243 }, { "epoch": 9.452115812917596, "grad_norm": 17.784313201904297, "learning_rate": 1e-06, "loss": 0.5817, "num_input_tokens_seen": 237714064, "step": 4244 }, { "epoch": 9.452115812917596, "loss": 0.5421576499938965, "loss_ce": 0.0001654581428738311, "loss_iou": 0.2470703125, "loss_num": 0.00933837890625, "loss_xval": 0.54296875, "num_input_tokens_seen": 237714064, "step": 4244 }, { "epoch": 9.4543429844098, "grad_norm": 17.17974853515625, "learning_rate": 1e-06, "loss": 0.4908, "num_input_tokens_seen": 237768912, "step": 4245 }, { "epoch": 9.4543429844098, "loss": 0.34260523319244385, "loss_ce": 0.00013697342365048826, "loss_iou": 0.13671875, "loss_num": 0.01361083984375, "loss_xval": 0.341796875, "num_input_tokens_seen": 237768912, "step": 4245 }, { "epoch": 9.456570155902005, "grad_norm": 22.440263748168945, "learning_rate": 1e-06, "loss": 0.8688, "num_input_tokens_seen": 237822376, "step": 4246 }, { "epoch": 9.456570155902005, "loss": 0.7955905795097351, "loss_ce": 0.00018044964235741645, "loss_iou": 0.322265625, "loss_num": 0.030029296875, "loss_xval": 0.796875, "num_input_tokens_seen": 237822376, "step": 4246 }, { "epoch": 9.45879732739421, "grad_norm": 20.348644256591797, "learning_rate": 1e-06, "loss": 0.5448, "num_input_tokens_seen": 237879852, "step": 4247 }, { "epoch": 9.45879732739421, "loss": 0.7884389758110046, "loss_ce": 0.00023097131634131074, "loss_iou": 0.314453125, "loss_num": 0.031982421875, "loss_xval": 0.7890625, "num_input_tokens_seen": 237879852, "step": 4247 }, { "epoch": 9.461024498886415, "grad_norm": 98.13805389404297, "learning_rate": 1e-06, "loss": 0.7861, "num_input_tokens_seen": 237933556, "step": 4248 }, { "epoch": 9.461024498886415, "loss": 1.0614817142486572, "loss_ce": 0.0002023179258685559, "loss_iou": 0.453125, "loss_num": 0.03076171875, "loss_xval": 1.0625, "num_input_tokens_seen": 237933556, "step": 4248 }, { "epoch": 9.46325167037862, "grad_norm": 18.363706588745117, "learning_rate": 1e-06, "loss": 0.5792, "num_input_tokens_seen": 237990416, "step": 4249 }, { "epoch": 9.46325167037862, "loss": 0.6430014371871948, "loss_ce": 0.00017919728998094797, "loss_iou": 0.2734375, "loss_num": 0.01904296875, "loss_xval": 0.64453125, "num_input_tokens_seen": 237990416, "step": 4249 }, { "epoch": 9.465478841870825, "grad_norm": 22.31871223449707, "learning_rate": 1e-06, "loss": 0.5357, "num_input_tokens_seen": 238048204, "step": 4250 }, { "epoch": 9.465478841870825, "eval_seeclick_web_CIoU": 0.5783384740352631, "eval_seeclick_web_GIoU": 0.5768938064575195, "eval_seeclick_web_IoU": 0.596380203962326, "eval_seeclick_web_MAE_all": 0.016195162199437618, "eval_seeclick_web_MAE_h": 0.007964757736772299, "eval_seeclick_web_MAE_w": 0.01662321202456951, "eval_seeclick_web_MAE_x_boxes": 0.00983009533956647, "eval_seeclick_web_MAE_y_boxes": 0.021932302275672555, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9355359077453613, "eval_seeclick_web_loss_ce": 0.00021685881074517965, "eval_seeclick_web_loss_iou": 0.424072265625, "eval_seeclick_web_loss_num": 0.013062477111816406, "eval_seeclick_web_loss_xval": 0.9130859375, "eval_seeclick_web_runtime": 23.3851, "eval_seeclick_web_samples_per_second": 2.138, "eval_seeclick_web_steps_per_second": 0.086, "num_input_tokens_seen": 238048204, "step": 4250 }, { "epoch": 9.465478841870825, "eval_icons_CIoU": 0.2803712487220764, "eval_icons_GIoU": 0.30777665972709656, "eval_icons_IoU": 0.3619535565376282, "eval_icons_MAE_all": 0.06462299637496471, "eval_icons_MAE_h": 0.039737068116664886, "eval_icons_MAE_w": 0.06902601942420006, "eval_icons_MAE_x_boxes": 0.05829739384353161, "eval_icons_MAE_y_boxes": 0.039782424457371235, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.738389253616333, "eval_icons_loss_ce": 0.00025170089793391526, "eval_icons_loss_iou": 0.6759033203125, "eval_icons_loss_num": 0.061977386474609375, "eval_icons_loss_xval": 1.659912109375, "eval_icons_runtime": 22.6548, "eval_icons_samples_per_second": 2.207, "eval_icons_steps_per_second": 0.088, "num_input_tokens_seen": 238048204, "step": 4250 }, { "epoch": 9.465478841870825, "eval_screenspot_CIoU": 0.34290045499801636, "eval_screenspot_GIoU": 0.36323591073354083, "eval_screenspot_IoU": 0.4251118103663127, "eval_screenspot_MAE_all": 0.06305227304498355, "eval_screenspot_MAE_h": 0.03816718918581804, "eval_screenspot_MAE_w": 0.07184332360823949, "eval_screenspot_MAE_x_boxes": 0.07595415661732356, "eval_screenspot_MAE_y_boxes": 0.04537342426677545, "eval_screenspot_inside_bbox": 0.6862499912579855, "eval_screenspot_loss": 1.6450954675674438, "eval_screenspot_loss_ce": 0.0002674317511264235, "eval_screenspot_loss_iou": 0.6758626302083334, "eval_screenspot_loss_num": 0.07347997029622395, "eval_screenspot_loss_xval": 1.7189127604166667, "eval_screenspot_runtime": 39.9574, "eval_screenspot_samples_per_second": 2.227, "eval_screenspot_steps_per_second": 0.075, "num_input_tokens_seen": 238048204, "step": 4250 }, { "epoch": 9.465478841870825, "eval_compot_CIoU": 0.3499128520488739, "eval_compot_GIoU": 0.3589301258325577, "eval_compot_IoU": 0.40847519040107727, "eval_compot_MAE_all": 0.018001767806708813, "eval_compot_MAE_h": 0.008998606353998184, "eval_compot_MAE_w": 0.021249551326036453, "eval_compot_MAE_x_boxes": 0.029959955252707005, "eval_compot_MAE_y_boxes": 0.007053635781630874, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3970842361450195, "eval_compot_loss_ce": 0.00020831655274378136, "eval_compot_loss_iou": 0.6414794921875, "eval_compot_loss_num": 0.016920089721679688, "eval_compot_loss_xval": 1.3681640625, "eval_compot_runtime": 21.8924, "eval_compot_samples_per_second": 2.284, "eval_compot_steps_per_second": 0.091, "num_input_tokens_seen": 238048204, "step": 4250 }, { "epoch": 9.465478841870825, "eval_custom_ui_val_CIoU": 0.4645145701037513, "eval_custom_ui_val_GIoU": 0.4788343757390976, "eval_custom_ui_val_IoU": 0.5250709156195322, "eval_custom_ui_val_MAE_all": 0.03104415287574132, "eval_custom_ui_val_MAE_h": 0.01666957584934102, "eval_custom_ui_val_MAE_w": 0.039596209612985454, "eval_custom_ui_val_MAE_x_boxes": 0.0380424867487616, "eval_custom_ui_val_MAE_y_boxes": 0.015655622765835788, "eval_custom_ui_val_inside_bbox": 0.7353395091162788, "eval_custom_ui_val_loss": 1.2056481838226318, "eval_custom_ui_val_loss_ce": 0.00023572022231140485, "eval_custom_ui_val_loss_iou": 0.5117323133680556, "eval_custom_ui_val_loss_num": 0.028479894002278645, "eval_custom_ui_val_loss_xval": 1.1659071180555556, "eval_custom_ui_val_runtime": 64.1276, "eval_custom_ui_val_samples_per_second": 4.132, "eval_custom_ui_val_steps_per_second": 0.14, "num_input_tokens_seen": 238048204, "step": 4250 }, { "epoch": 9.465478841870825, "loss": 0.9052478671073914, "loss_ce": 0.00021855966770090163, "loss_iou": 0.390625, "loss_num": 0.0250244140625, "loss_xval": 0.90625, "num_input_tokens_seen": 238048204, "step": 4250 }, { "epoch": 9.46770601336303, "grad_norm": 13.239431381225586, "learning_rate": 1e-06, "loss": 0.5228, "num_input_tokens_seen": 238102764, "step": 4251 }, { "epoch": 9.46770601336303, "loss": 0.4459550380706787, "loss_ce": 0.00015426415484398603, "loss_iou": 0.1982421875, "loss_num": 0.00994873046875, "loss_xval": 0.4453125, "num_input_tokens_seen": 238102764, "step": 4251 }, { "epoch": 9.469933184855234, "grad_norm": 16.412900924682617, "learning_rate": 1e-06, "loss": 0.5837, "num_input_tokens_seen": 238160024, "step": 4252 }, { "epoch": 9.469933184855234, "loss": 0.6651994585990906, "loss_ce": 0.00016038438479881734, "loss_iou": 0.265625, "loss_num": 0.0269775390625, "loss_xval": 0.6640625, "num_input_tokens_seen": 238160024, "step": 4252 }, { "epoch": 9.47216035634744, "grad_norm": 16.5249080657959, "learning_rate": 1e-06, "loss": 0.535, "num_input_tokens_seen": 238218512, "step": 4253 }, { "epoch": 9.47216035634744, "loss": 0.5461084246635437, "loss_ce": 0.00020996385137550533, "loss_iou": 0.248046875, "loss_num": 0.01007080078125, "loss_xval": 0.546875, "num_input_tokens_seen": 238218512, "step": 4253 }, { "epoch": 9.474387527839644, "grad_norm": 17.52018165588379, "learning_rate": 1e-06, "loss": 0.6211, "num_input_tokens_seen": 238277132, "step": 4254 }, { "epoch": 9.474387527839644, "loss": 0.5265886783599854, "loss_ce": 0.00022152790916152298, "loss_iou": 0.2216796875, "loss_num": 0.0166015625, "loss_xval": 0.52734375, "num_input_tokens_seen": 238277132, "step": 4254 }, { "epoch": 9.476614699331849, "grad_norm": 18.231037139892578, "learning_rate": 1e-06, "loss": 0.5999, "num_input_tokens_seen": 238331752, "step": 4255 }, { "epoch": 9.476614699331849, "loss": 0.7489705085754395, "loss_ce": 0.00019125922699458897, "loss_iou": 0.318359375, "loss_num": 0.022216796875, "loss_xval": 0.75, "num_input_tokens_seen": 238331752, "step": 4255 }, { "epoch": 9.478841870824054, "grad_norm": 27.039873123168945, "learning_rate": 1e-06, "loss": 0.6843, "num_input_tokens_seen": 238387640, "step": 4256 }, { "epoch": 9.478841870824054, "loss": 0.7631433010101318, "loss_ce": 0.00020384913659654558, "loss_iou": 0.3359375, "loss_num": 0.018310546875, "loss_xval": 0.76171875, "num_input_tokens_seen": 238387640, "step": 4256 }, { "epoch": 9.481069042316259, "grad_norm": 33.0074577331543, "learning_rate": 1e-06, "loss": 0.6398, "num_input_tokens_seen": 238440696, "step": 4257 }, { "epoch": 9.481069042316259, "loss": 0.8715030550956726, "loss_ce": 0.0001651808270253241, "loss_iou": 0.3828125, "loss_num": 0.0213623046875, "loss_xval": 0.87109375, "num_input_tokens_seen": 238440696, "step": 4257 }, { "epoch": 9.483296213808464, "grad_norm": 27.93197250366211, "learning_rate": 1e-06, "loss": 0.8218, "num_input_tokens_seen": 238494316, "step": 4258 }, { "epoch": 9.483296213808464, "loss": 0.602424681186676, "loss_ce": 0.0001297968119615689, "loss_iou": 0.255859375, "loss_num": 0.0184326171875, "loss_xval": 0.6015625, "num_input_tokens_seen": 238494316, "step": 4258 }, { "epoch": 9.485523385300668, "grad_norm": 23.008649826049805, "learning_rate": 1e-06, "loss": 0.7639, "num_input_tokens_seen": 238550660, "step": 4259 }, { "epoch": 9.485523385300668, "loss": 0.7036758065223694, "loss_ce": 0.0001846282830229029, "loss_iou": 0.296875, "loss_num": 0.0223388671875, "loss_xval": 0.703125, "num_input_tokens_seen": 238550660, "step": 4259 }, { "epoch": 9.487750556792873, "grad_norm": 18.077539443969727, "learning_rate": 1e-06, "loss": 0.5111, "num_input_tokens_seen": 238606896, "step": 4260 }, { "epoch": 9.487750556792873, "loss": 0.44276681542396545, "loss_ce": 0.00013988392311148345, "loss_iou": 0.1943359375, "loss_num": 0.010498046875, "loss_xval": 0.443359375, "num_input_tokens_seen": 238606896, "step": 4260 }, { "epoch": 9.489977728285078, "grad_norm": 12.999273300170898, "learning_rate": 1e-06, "loss": 0.6034, "num_input_tokens_seen": 238662636, "step": 4261 }, { "epoch": 9.489977728285078, "loss": 0.6766662001609802, "loss_ce": 0.00015251210425049067, "loss_iou": 0.283203125, "loss_num": 0.0216064453125, "loss_xval": 0.67578125, "num_input_tokens_seen": 238662636, "step": 4261 }, { "epoch": 9.492204899777283, "grad_norm": 22.135456085205078, "learning_rate": 1e-06, "loss": 0.5358, "num_input_tokens_seen": 238717560, "step": 4262 }, { "epoch": 9.492204899777283, "loss": 0.38586312532424927, "loss_ce": 0.000181969502591528, "loss_iou": 0.173828125, "loss_num": 0.00762939453125, "loss_xval": 0.384765625, "num_input_tokens_seen": 238717560, "step": 4262 }, { "epoch": 9.494432071269488, "grad_norm": 50.78253173828125, "learning_rate": 1e-06, "loss": 0.6531, "num_input_tokens_seen": 238775784, "step": 4263 }, { "epoch": 9.494432071269488, "loss": 0.7694522142410278, "loss_ce": 0.00016509005217812955, "loss_iou": 0.32421875, "loss_num": 0.0240478515625, "loss_xval": 0.76953125, "num_input_tokens_seen": 238775784, "step": 4263 }, { "epoch": 9.496659242761693, "grad_norm": 19.99468421936035, "learning_rate": 1e-06, "loss": 0.4683, "num_input_tokens_seen": 238834380, "step": 4264 }, { "epoch": 9.496659242761693, "loss": 0.5259066820144653, "loss_ce": 0.00014983075379859656, "loss_iou": 0.2197265625, "loss_num": 0.0169677734375, "loss_xval": 0.52734375, "num_input_tokens_seen": 238834380, "step": 4264 }, { "epoch": 9.498886414253898, "grad_norm": 20.870853424072266, "learning_rate": 1e-06, "loss": 0.7663, "num_input_tokens_seen": 238892804, "step": 4265 }, { "epoch": 9.498886414253898, "loss": 0.5954189300537109, "loss_ce": 0.00014309046673588455, "loss_iou": 0.2421875, "loss_num": 0.02197265625, "loss_xval": 0.59375, "num_input_tokens_seen": 238892804, "step": 4265 }, { "epoch": 9.501113585746102, "grad_norm": 16.311065673828125, "learning_rate": 1e-06, "loss": 0.5614, "num_input_tokens_seen": 238949684, "step": 4266 }, { "epoch": 9.501113585746102, "loss": 0.5311484336853027, "loss_ce": 0.00014256531721912324, "loss_iou": 0.2412109375, "loss_num": 0.010009765625, "loss_xval": 0.53125, "num_input_tokens_seen": 238949684, "step": 4266 }, { "epoch": 9.503340757238307, "grad_norm": 234.5572509765625, "learning_rate": 1e-06, "loss": 0.6156, "num_input_tokens_seen": 239004348, "step": 4267 }, { "epoch": 9.503340757238307, "loss": 0.7360097169876099, "loss_ce": 0.00041402934584766626, "loss_iou": 0.306640625, "loss_num": 0.0247802734375, "loss_xval": 0.734375, "num_input_tokens_seen": 239004348, "step": 4267 }, { "epoch": 9.505567928730512, "grad_norm": 22.837961196899414, "learning_rate": 1e-06, "loss": 0.5485, "num_input_tokens_seen": 239061392, "step": 4268 }, { "epoch": 9.505567928730512, "loss": 0.5970693826675415, "loss_ce": 0.0001455858291592449, "loss_iou": 0.2470703125, "loss_num": 0.0206298828125, "loss_xval": 0.59765625, "num_input_tokens_seen": 239061392, "step": 4268 }, { "epoch": 9.507795100222717, "grad_norm": 24.823139190673828, "learning_rate": 1e-06, "loss": 0.6112, "num_input_tokens_seen": 239115904, "step": 4269 }, { "epoch": 9.507795100222717, "loss": 0.7151393890380859, "loss_ce": 0.00017360490164719522, "loss_iou": 0.265625, "loss_num": 0.037109375, "loss_xval": 0.71484375, "num_input_tokens_seen": 239115904, "step": 4269 }, { "epoch": 9.510022271714922, "grad_norm": 31.860271453857422, "learning_rate": 1e-06, "loss": 0.5909, "num_input_tokens_seen": 239170464, "step": 4270 }, { "epoch": 9.510022271714922, "loss": 0.7071930170059204, "loss_ce": 0.00025335949612781405, "loss_iou": 0.3046875, "loss_num": 0.019287109375, "loss_xval": 0.70703125, "num_input_tokens_seen": 239170464, "step": 4270 }, { "epoch": 9.512249443207127, "grad_norm": 32.744850158691406, "learning_rate": 1e-06, "loss": 0.4666, "num_input_tokens_seen": 239227924, "step": 4271 }, { "epoch": 9.512249443207127, "loss": 0.31606927514076233, "loss_ce": 0.00027338817017152905, "loss_iou": 0.1435546875, "loss_num": 0.00567626953125, "loss_xval": 0.31640625, "num_input_tokens_seen": 239227924, "step": 4271 }, { "epoch": 9.514476614699332, "grad_norm": 24.312049865722656, "learning_rate": 1e-06, "loss": 0.6541, "num_input_tokens_seen": 239285676, "step": 4272 }, { "epoch": 9.514476614699332, "loss": 0.7336921691894531, "loss_ce": 0.0002937186509370804, "loss_iou": 0.291015625, "loss_num": 0.030517578125, "loss_xval": 0.734375, "num_input_tokens_seen": 239285676, "step": 4272 }, { "epoch": 9.516703786191536, "grad_norm": 16.174928665161133, "learning_rate": 1e-06, "loss": 0.6278, "num_input_tokens_seen": 239340228, "step": 4273 }, { "epoch": 9.516703786191536, "loss": 0.6358986496925354, "loss_ce": 0.0001564474805491045, "loss_iou": 0.263671875, "loss_num": 0.0216064453125, "loss_xval": 0.63671875, "num_input_tokens_seen": 239340228, "step": 4273 }, { "epoch": 9.518930957683741, "grad_norm": 46.78654861450195, "learning_rate": 1e-06, "loss": 0.5724, "num_input_tokens_seen": 239396412, "step": 4274 }, { "epoch": 9.518930957683741, "loss": 0.6387228965759277, "loss_ce": 0.00029518798692151904, "loss_iou": 0.26953125, "loss_num": 0.0198974609375, "loss_xval": 0.63671875, "num_input_tokens_seen": 239396412, "step": 4274 }, { "epoch": 9.521158129175946, "grad_norm": 14.414083480834961, "learning_rate": 1e-06, "loss": 0.374, "num_input_tokens_seen": 239450920, "step": 4275 }, { "epoch": 9.521158129175946, "loss": 0.27442389726638794, "loss_ce": 0.00013192339974921197, "loss_iou": 0.1044921875, "loss_num": 0.01318359375, "loss_xval": 0.2734375, "num_input_tokens_seen": 239450920, "step": 4275 }, { "epoch": 9.523385300668151, "grad_norm": 25.984390258789062, "learning_rate": 1e-06, "loss": 0.584, "num_input_tokens_seen": 239506768, "step": 4276 }, { "epoch": 9.523385300668151, "loss": 0.7418030500411987, "loss_ce": 0.0003479471488390118, "loss_iou": 0.314453125, "loss_num": 0.0220947265625, "loss_xval": 0.7421875, "num_input_tokens_seen": 239506768, "step": 4276 }, { "epoch": 9.525612472160356, "grad_norm": 25.130739212036133, "learning_rate": 1e-06, "loss": 0.5816, "num_input_tokens_seen": 239559000, "step": 4277 }, { "epoch": 9.525612472160356, "loss": 0.566669225692749, "loss_ce": 0.00014092770288698375, "loss_iou": 0.2333984375, "loss_num": 0.0201416015625, "loss_xval": 0.56640625, "num_input_tokens_seen": 239559000, "step": 4277 }, { "epoch": 9.52783964365256, "grad_norm": 18.2275447845459, "learning_rate": 1e-06, "loss": 0.4939, "num_input_tokens_seen": 239616448, "step": 4278 }, { "epoch": 9.52783964365256, "loss": 0.45471665263175964, "loss_ce": 0.0001268136693397537, "loss_iou": 0.2109375, "loss_num": 0.006591796875, "loss_xval": 0.455078125, "num_input_tokens_seen": 239616448, "step": 4278 }, { "epoch": 9.530066815144766, "grad_norm": 17.732545852661133, "learning_rate": 1e-06, "loss": 0.389, "num_input_tokens_seen": 239674876, "step": 4279 }, { "epoch": 9.530066815144766, "loss": 0.4159805476665497, "loss_ce": 0.00014803148224018514, "loss_iou": 0.17578125, "loss_num": 0.012939453125, "loss_xval": 0.416015625, "num_input_tokens_seen": 239674876, "step": 4279 }, { "epoch": 9.53229398663697, "grad_norm": 39.37736511230469, "learning_rate": 1e-06, "loss": 0.5871, "num_input_tokens_seen": 239732948, "step": 4280 }, { "epoch": 9.53229398663697, "loss": 0.49721166491508484, "loss_ce": 0.0001413416030118242, "loss_iou": 0.224609375, "loss_num": 0.00958251953125, "loss_xval": 0.49609375, "num_input_tokens_seen": 239732948, "step": 4280 }, { "epoch": 9.534521158129175, "grad_norm": 32.132293701171875, "learning_rate": 1e-06, "loss": 0.4756, "num_input_tokens_seen": 239789608, "step": 4281 }, { "epoch": 9.534521158129175, "loss": 0.44500744342803955, "loss_ce": 0.00018322419782634825, "loss_iou": 0.1904296875, "loss_num": 0.01263427734375, "loss_xval": 0.4453125, "num_input_tokens_seen": 239789608, "step": 4281 }, { "epoch": 9.53674832962138, "grad_norm": 14.220974922180176, "learning_rate": 1e-06, "loss": 0.4334, "num_input_tokens_seen": 239844376, "step": 4282 }, { "epoch": 9.53674832962138, "loss": 0.40624189376831055, "loss_ce": 0.00023602474539075047, "loss_iou": 0.1484375, "loss_num": 0.02197265625, "loss_xval": 0.40625, "num_input_tokens_seen": 239844376, "step": 4282 }, { "epoch": 9.538975501113585, "grad_norm": 23.4123592376709, "learning_rate": 1e-06, "loss": 0.5361, "num_input_tokens_seen": 239896980, "step": 4283 }, { "epoch": 9.538975501113585, "loss": 0.43205326795578003, "loss_ce": 0.00016851615509949625, "loss_iou": 0.1884765625, "loss_num": 0.01104736328125, "loss_xval": 0.431640625, "num_input_tokens_seen": 239896980, "step": 4283 }, { "epoch": 9.54120267260579, "grad_norm": 18.76468276977539, "learning_rate": 1e-06, "loss": 0.6264, "num_input_tokens_seen": 239953088, "step": 4284 }, { "epoch": 9.54120267260579, "loss": 0.6263715624809265, "loss_ce": 0.00015087236533872783, "loss_iou": 0.287109375, "loss_num": 0.01068115234375, "loss_xval": 0.625, "num_input_tokens_seen": 239953088, "step": 4284 }, { "epoch": 9.543429844097995, "grad_norm": 15.258724212646484, "learning_rate": 1e-06, "loss": 0.5531, "num_input_tokens_seen": 240011052, "step": 4285 }, { "epoch": 9.543429844097995, "loss": 0.5455443859100342, "loss_ce": 0.00013422755000647157, "loss_iou": 0.19921875, "loss_num": 0.029541015625, "loss_xval": 0.546875, "num_input_tokens_seen": 240011052, "step": 4285 }, { "epoch": 9.5456570155902, "grad_norm": 20.07108497619629, "learning_rate": 1e-06, "loss": 0.6354, "num_input_tokens_seen": 240061388, "step": 4286 }, { "epoch": 9.5456570155902, "loss": 0.670364260673523, "loss_ce": 0.00019821789464913309, "loss_iou": 0.287109375, "loss_num": 0.0194091796875, "loss_xval": 0.671875, "num_input_tokens_seen": 240061388, "step": 4286 }, { "epoch": 9.547884187082406, "grad_norm": 16.95429229736328, "learning_rate": 1e-06, "loss": 0.5388, "num_input_tokens_seen": 240120732, "step": 4287 }, { "epoch": 9.547884187082406, "loss": 0.6176313161849976, "loss_ce": 0.00013861627667210996, "loss_iou": 0.27734375, "loss_num": 0.012939453125, "loss_xval": 0.6171875, "num_input_tokens_seen": 240120732, "step": 4287 }, { "epoch": 9.550111358574611, "grad_norm": 30.3793888092041, "learning_rate": 1e-06, "loss": 0.7166, "num_input_tokens_seen": 240176776, "step": 4288 }, { "epoch": 9.550111358574611, "loss": 0.8734985589981079, "loss_ce": 0.000207598292035982, "loss_iou": 0.359375, "loss_num": 0.0308837890625, "loss_xval": 0.875, "num_input_tokens_seen": 240176776, "step": 4288 }, { "epoch": 9.552338530066816, "grad_norm": 23.587675094604492, "learning_rate": 1e-06, "loss": 0.6852, "num_input_tokens_seen": 240232852, "step": 4289 }, { "epoch": 9.552338530066816, "loss": 0.5840327739715576, "loss_ce": 0.00017050979658961296, "loss_iou": 0.263671875, "loss_num": 0.011474609375, "loss_xval": 0.58203125, "num_input_tokens_seen": 240232852, "step": 4289 }, { "epoch": 9.55456570155902, "grad_norm": 33.9605598449707, "learning_rate": 1e-06, "loss": 0.5779, "num_input_tokens_seen": 240288524, "step": 4290 }, { "epoch": 9.55456570155902, "loss": 0.6915979385375977, "loss_ce": 0.00019171604071743786, "loss_iou": 0.310546875, "loss_num": 0.01434326171875, "loss_xval": 0.69140625, "num_input_tokens_seen": 240288524, "step": 4290 }, { "epoch": 9.556792873051226, "grad_norm": 13.900967597961426, "learning_rate": 1e-06, "loss": 0.4135, "num_input_tokens_seen": 240344628, "step": 4291 }, { "epoch": 9.556792873051226, "loss": 0.5288236737251282, "loss_ce": 0.00013714321539737284, "loss_iou": 0.2255859375, "loss_num": 0.01544189453125, "loss_xval": 0.52734375, "num_input_tokens_seen": 240344628, "step": 4291 }, { "epoch": 9.55902004454343, "grad_norm": 40.7745246887207, "learning_rate": 1e-06, "loss": 0.5172, "num_input_tokens_seen": 240399768, "step": 4292 }, { "epoch": 9.55902004454343, "loss": 0.6885933876037598, "loss_ce": 0.0001778678852133453, "loss_iou": 0.30078125, "loss_num": 0.017333984375, "loss_xval": 0.6875, "num_input_tokens_seen": 240399768, "step": 4292 }, { "epoch": 9.561247216035635, "grad_norm": 24.22138786315918, "learning_rate": 1e-06, "loss": 0.6703, "num_input_tokens_seen": 240456252, "step": 4293 }, { "epoch": 9.561247216035635, "loss": 0.6952459812164307, "loss_ce": 0.00023865490220487118, "loss_iou": 0.265625, "loss_num": 0.03271484375, "loss_xval": 0.6953125, "num_input_tokens_seen": 240456252, "step": 4293 }, { "epoch": 9.56347438752784, "grad_norm": 19.527559280395508, "learning_rate": 1e-06, "loss": 0.6914, "num_input_tokens_seen": 240512636, "step": 4294 }, { "epoch": 9.56347438752784, "loss": 0.63869708776474, "loss_ce": 0.0001473345619160682, "loss_iou": 0.2451171875, "loss_num": 0.0301513671875, "loss_xval": 0.63671875, "num_input_tokens_seen": 240512636, "step": 4294 }, { "epoch": 9.565701559020045, "grad_norm": 17.53272819519043, "learning_rate": 1e-06, "loss": 0.6336, "num_input_tokens_seen": 240571148, "step": 4295 }, { "epoch": 9.565701559020045, "loss": 0.7293701767921448, "loss_ce": 0.00024417019449174404, "loss_iou": 0.310546875, "loss_num": 0.0218505859375, "loss_xval": 0.73046875, "num_input_tokens_seen": 240571148, "step": 4295 }, { "epoch": 9.56792873051225, "grad_norm": 20.05321502685547, "learning_rate": 1e-06, "loss": 0.6674, "num_input_tokens_seen": 240628292, "step": 4296 }, { "epoch": 9.56792873051225, "loss": 0.6266350746154785, "loss_ce": 0.0001702026347629726, "loss_iou": 0.28125, "loss_num": 0.0128173828125, "loss_xval": 0.625, "num_input_tokens_seen": 240628292, "step": 4296 }, { "epoch": 9.570155902004455, "grad_norm": 15.255680084228516, "learning_rate": 1e-06, "loss": 0.4882, "num_input_tokens_seen": 240686052, "step": 4297 }, { "epoch": 9.570155902004455, "loss": 0.5545688271522522, "loss_ce": 0.0001254748785868287, "loss_iou": 0.2265625, "loss_num": 0.020263671875, "loss_xval": 0.5546875, "num_input_tokens_seen": 240686052, "step": 4297 }, { "epoch": 9.57238307349666, "grad_norm": 18.931354522705078, "learning_rate": 1e-06, "loss": 0.5507, "num_input_tokens_seen": 240744448, "step": 4298 }, { "epoch": 9.57238307349666, "loss": 0.571701169013977, "loss_ce": 0.00016800707089714706, "loss_iou": 0.25390625, "loss_num": 0.0130615234375, "loss_xval": 0.5703125, "num_input_tokens_seen": 240744448, "step": 4298 }, { "epoch": 9.574610244988865, "grad_norm": 38.29421615600586, "learning_rate": 1e-06, "loss": 0.5789, "num_input_tokens_seen": 240800872, "step": 4299 }, { "epoch": 9.574610244988865, "loss": 0.6190475821495056, "loss_ce": 0.0001510926231276244, "loss_iou": 0.27734375, "loss_num": 0.01275634765625, "loss_xval": 0.6171875, "num_input_tokens_seen": 240800872, "step": 4299 }, { "epoch": 9.57683741648107, "grad_norm": 15.936655044555664, "learning_rate": 1e-06, "loss": 0.655, "num_input_tokens_seen": 240855308, "step": 4300 }, { "epoch": 9.57683741648107, "loss": 0.7067993879318237, "loss_ce": 0.00013432535342872143, "loss_iou": 0.29296875, "loss_num": 0.024658203125, "loss_xval": 0.70703125, "num_input_tokens_seen": 240855308, "step": 4300 }, { "epoch": 9.579064587973274, "grad_norm": 14.852972030639648, "learning_rate": 1e-06, "loss": 0.5593, "num_input_tokens_seen": 240910724, "step": 4301 }, { "epoch": 9.579064587973274, "loss": 0.6589754819869995, "loss_ce": 0.00016200373647734523, "loss_iou": 0.27734375, "loss_num": 0.0211181640625, "loss_xval": 0.66015625, "num_input_tokens_seen": 240910724, "step": 4301 }, { "epoch": 9.58129175946548, "grad_norm": 18.125749588012695, "learning_rate": 1e-06, "loss": 0.5183, "num_input_tokens_seen": 240967040, "step": 4302 }, { "epoch": 9.58129175946548, "loss": 0.404930055141449, "loss_ce": 0.00014489417662844062, "loss_iou": 0.166015625, "loss_num": 0.0142822265625, "loss_xval": 0.404296875, "num_input_tokens_seen": 240967040, "step": 4302 }, { "epoch": 9.583518930957684, "grad_norm": 16.510257720947266, "learning_rate": 1e-06, "loss": 0.6927, "num_input_tokens_seen": 241021748, "step": 4303 }, { "epoch": 9.583518930957684, "loss": 0.7239894866943359, "loss_ce": 0.0027980851009488106, "loss_iou": 0.3046875, "loss_num": 0.0220947265625, "loss_xval": 0.72265625, "num_input_tokens_seen": 241021748, "step": 4303 }, { "epoch": 9.585746102449889, "grad_norm": 17.652618408203125, "learning_rate": 1e-06, "loss": 0.5067, "num_input_tokens_seen": 241078380, "step": 4304 }, { "epoch": 9.585746102449889, "loss": 0.45593586564064026, "loss_ce": 0.00012532059918157756, "loss_iou": 0.18359375, "loss_num": 0.017578125, "loss_xval": 0.455078125, "num_input_tokens_seen": 241078380, "step": 4304 }, { "epoch": 9.587973273942094, "grad_norm": 21.635093688964844, "learning_rate": 1e-06, "loss": 0.6214, "num_input_tokens_seen": 241131336, "step": 4305 }, { "epoch": 9.587973273942094, "loss": 0.6010667085647583, "loss_ce": 0.00023660104488953948, "loss_iou": 0.263671875, "loss_num": 0.01507568359375, "loss_xval": 0.6015625, "num_input_tokens_seen": 241131336, "step": 4305 }, { "epoch": 9.590200445434299, "grad_norm": 21.13984489440918, "learning_rate": 1e-06, "loss": 0.5725, "num_input_tokens_seen": 241188112, "step": 4306 }, { "epoch": 9.590200445434299, "loss": 0.526428759098053, "loss_ce": 0.0003057234571315348, "loss_iou": 0.208984375, "loss_num": 0.0216064453125, "loss_xval": 0.52734375, "num_input_tokens_seen": 241188112, "step": 4306 }, { "epoch": 9.592427616926503, "grad_norm": 16.988536834716797, "learning_rate": 1e-06, "loss": 0.614, "num_input_tokens_seen": 241248100, "step": 4307 }, { "epoch": 9.592427616926503, "loss": 0.6281224489212036, "loss_ce": 0.00019275453814771026, "loss_iou": 0.2734375, "loss_num": 0.0162353515625, "loss_xval": 0.62890625, "num_input_tokens_seen": 241248100, "step": 4307 }, { "epoch": 9.594654788418708, "grad_norm": 21.287412643432617, "learning_rate": 1e-06, "loss": 0.7941, "num_input_tokens_seen": 241303028, "step": 4308 }, { "epoch": 9.594654788418708, "loss": 1.0175116062164307, "loss_ce": 0.00017758479225449264, "loss_iou": 0.416015625, "loss_num": 0.036865234375, "loss_xval": 1.015625, "num_input_tokens_seen": 241303028, "step": 4308 }, { "epoch": 9.596881959910913, "grad_norm": 17.50885009765625, "learning_rate": 1e-06, "loss": 0.5592, "num_input_tokens_seen": 241359508, "step": 4309 }, { "epoch": 9.596881959910913, "loss": 0.6614413261413574, "loss_ce": 0.00018642100621946156, "loss_iou": 0.283203125, "loss_num": 0.0194091796875, "loss_xval": 0.66015625, "num_input_tokens_seen": 241359508, "step": 4309 }, { "epoch": 9.599109131403118, "grad_norm": 23.472171783447266, "learning_rate": 1e-06, "loss": 0.7445, "num_input_tokens_seen": 241417160, "step": 4310 }, { "epoch": 9.599109131403118, "loss": 0.797520101070404, "loss_ce": 0.00015685184916947037, "loss_iou": 0.3359375, "loss_num": 0.0247802734375, "loss_xval": 0.796875, "num_input_tokens_seen": 241417160, "step": 4310 }, { "epoch": 9.601336302895323, "grad_norm": 29.62855339050293, "learning_rate": 1e-06, "loss": 0.7193, "num_input_tokens_seen": 241473892, "step": 4311 }, { "epoch": 9.601336302895323, "loss": 0.7771183848381042, "loss_ce": 0.00014080063556320965, "loss_iou": 0.33203125, "loss_num": 0.0230712890625, "loss_xval": 0.77734375, "num_input_tokens_seen": 241473892, "step": 4311 }, { "epoch": 9.603563474387528, "grad_norm": 19.706748962402344, "learning_rate": 1e-06, "loss": 0.6076, "num_input_tokens_seen": 241532188, "step": 4312 }, { "epoch": 9.603563474387528, "loss": 0.7907249927520752, "loss_ce": 0.00031976206810213625, "loss_iou": 0.333984375, "loss_num": 0.0250244140625, "loss_xval": 0.7890625, "num_input_tokens_seen": 241532188, "step": 4312 }, { "epoch": 9.605790645879733, "grad_norm": 19.344236373901367, "learning_rate": 1e-06, "loss": 0.565, "num_input_tokens_seen": 241587928, "step": 4313 }, { "epoch": 9.605790645879733, "loss": 0.6804521083831787, "loss_ce": 0.0001542992249596864, "loss_iou": 0.30078125, "loss_num": 0.01556396484375, "loss_xval": 0.6796875, "num_input_tokens_seen": 241587928, "step": 4313 }, { "epoch": 9.608017817371937, "grad_norm": 15.50113582611084, "learning_rate": 1e-06, "loss": 0.399, "num_input_tokens_seen": 241640704, "step": 4314 }, { "epoch": 9.608017817371937, "loss": 0.44986775517463684, "loss_ce": 0.00013020676851738244, "loss_iou": 0.2041015625, "loss_num": 0.00836181640625, "loss_xval": 0.44921875, "num_input_tokens_seen": 241640704, "step": 4314 }, { "epoch": 9.610244988864142, "grad_norm": 36.39356994628906, "learning_rate": 1e-06, "loss": 0.559, "num_input_tokens_seen": 241693484, "step": 4315 }, { "epoch": 9.610244988864142, "loss": 0.7055351138114929, "loss_ce": 0.000456988753285259, "loss_iou": 0.28125, "loss_num": 0.028076171875, "loss_xval": 0.703125, "num_input_tokens_seen": 241693484, "step": 4315 }, { "epoch": 9.612472160356347, "grad_norm": 16.385406494140625, "learning_rate": 1e-06, "loss": 0.6239, "num_input_tokens_seen": 241747860, "step": 4316 }, { "epoch": 9.612472160356347, "loss": 0.7735809087753296, "loss_ce": 0.0005096091772429645, "loss_iou": 0.345703125, "loss_num": 0.0166015625, "loss_xval": 0.7734375, "num_input_tokens_seen": 241747860, "step": 4316 }, { "epoch": 9.614699331848552, "grad_norm": 18.57633399963379, "learning_rate": 1e-06, "loss": 0.6124, "num_input_tokens_seen": 241802996, "step": 4317 }, { "epoch": 9.614699331848552, "loss": 0.6089174747467041, "loss_ce": 0.0001528042630525306, "loss_iou": 0.2578125, "loss_num": 0.0189208984375, "loss_xval": 0.609375, "num_input_tokens_seen": 241802996, "step": 4317 }, { "epoch": 9.616926503340757, "grad_norm": 17.626914978027344, "learning_rate": 1e-06, "loss": 0.4423, "num_input_tokens_seen": 241857684, "step": 4318 }, { "epoch": 9.616926503340757, "loss": 0.41496047377586365, "loss_ce": 0.0001655758242122829, "loss_iou": 0.1796875, "loss_num": 0.01092529296875, "loss_xval": 0.4140625, "num_input_tokens_seen": 241857684, "step": 4318 }, { "epoch": 9.619153674832962, "grad_norm": 23.88527488708496, "learning_rate": 1e-06, "loss": 0.6528, "num_input_tokens_seen": 241911896, "step": 4319 }, { "epoch": 9.619153674832962, "loss": 0.8219267129898071, "loss_ce": 0.00014938012463971972, "loss_iou": 0.34765625, "loss_num": 0.0250244140625, "loss_xval": 0.8203125, "num_input_tokens_seen": 241911896, "step": 4319 }, { "epoch": 9.621380846325167, "grad_norm": 14.68041706085205, "learning_rate": 1e-06, "loss": 0.3875, "num_input_tokens_seen": 241969944, "step": 4320 }, { "epoch": 9.621380846325167, "loss": 0.4286039471626282, "loss_ce": 0.00013716876856051385, "loss_iou": 0.1865234375, "loss_num": 0.01116943359375, "loss_xval": 0.427734375, "num_input_tokens_seen": 241969944, "step": 4320 }, { "epoch": 9.623608017817372, "grad_norm": 24.28359603881836, "learning_rate": 1e-06, "loss": 0.6069, "num_input_tokens_seen": 242029492, "step": 4321 }, { "epoch": 9.623608017817372, "loss": 0.6774266958236694, "loss_ce": 0.00018064204778056592, "loss_iou": 0.318359375, "loss_num": 0.00811767578125, "loss_xval": 0.67578125, "num_input_tokens_seen": 242029492, "step": 4321 }, { "epoch": 9.625835189309576, "grad_norm": 22.979413986206055, "learning_rate": 1e-06, "loss": 0.6953, "num_input_tokens_seen": 242084892, "step": 4322 }, { "epoch": 9.625835189309576, "loss": 0.7274185419082642, "loss_ce": 0.0001846232480602339, "loss_iou": 0.31640625, "loss_num": 0.0189208984375, "loss_xval": 0.7265625, "num_input_tokens_seen": 242084892, "step": 4322 }, { "epoch": 9.628062360801781, "grad_norm": 17.841794967651367, "learning_rate": 1e-06, "loss": 0.468, "num_input_tokens_seen": 242139172, "step": 4323 }, { "epoch": 9.628062360801781, "loss": 0.5101624131202698, "loss_ce": 0.00015266051923390478, "loss_iou": 0.2314453125, "loss_num": 0.00958251953125, "loss_xval": 0.51171875, "num_input_tokens_seen": 242139172, "step": 4323 }, { "epoch": 9.630289532293986, "grad_norm": 20.742664337158203, "learning_rate": 1e-06, "loss": 0.517, "num_input_tokens_seen": 242196196, "step": 4324 }, { "epoch": 9.630289532293986, "loss": 0.5766589045524597, "loss_ce": 0.0008532421197742224, "loss_iou": 0.2373046875, "loss_num": 0.020263671875, "loss_xval": 0.57421875, "num_input_tokens_seen": 242196196, "step": 4324 }, { "epoch": 9.632516703786191, "grad_norm": 60.28476333618164, "learning_rate": 1e-06, "loss": 0.6853, "num_input_tokens_seen": 242251812, "step": 4325 }, { "epoch": 9.632516703786191, "loss": 0.5879150629043579, "loss_ce": 0.00014649088552687317, "loss_iou": 0.2470703125, "loss_num": 0.0185546875, "loss_xval": 0.5859375, "num_input_tokens_seen": 242251812, "step": 4325 }, { "epoch": 9.634743875278396, "grad_norm": 66.29544067382812, "learning_rate": 1e-06, "loss": 0.6181, "num_input_tokens_seen": 242308236, "step": 4326 }, { "epoch": 9.634743875278396, "loss": 0.5924375057220459, "loss_ce": 0.00015232324949465692, "loss_iou": 0.232421875, "loss_num": 0.0255126953125, "loss_xval": 0.59375, "num_input_tokens_seen": 242308236, "step": 4326 }, { "epoch": 9.6369710467706, "grad_norm": 14.150559425354004, "learning_rate": 1e-06, "loss": 0.4069, "num_input_tokens_seen": 242364656, "step": 4327 }, { "epoch": 9.6369710467706, "loss": 0.4071110486984253, "loss_ce": 0.00012862987932749093, "loss_iou": 0.1826171875, "loss_num": 0.00848388671875, "loss_xval": 0.40625, "num_input_tokens_seen": 242364656, "step": 4327 }, { "epoch": 9.639198218262806, "grad_norm": 21.00891876220703, "learning_rate": 1e-06, "loss": 0.6717, "num_input_tokens_seen": 242422764, "step": 4328 }, { "epoch": 9.639198218262806, "loss": 0.8691731691360474, "loss_ce": 0.0029012321028858423, "loss_iou": 0.345703125, "loss_num": 0.03466796875, "loss_xval": 0.8671875, "num_input_tokens_seen": 242422764, "step": 4328 }, { "epoch": 9.64142538975501, "grad_norm": 21.65594482421875, "learning_rate": 1e-06, "loss": 0.5172, "num_input_tokens_seen": 242480756, "step": 4329 }, { "epoch": 9.64142538975501, "loss": 0.5980610251426697, "loss_ce": 0.00016064970986917615, "loss_iou": 0.2451171875, "loss_num": 0.0218505859375, "loss_xval": 0.59765625, "num_input_tokens_seen": 242480756, "step": 4329 }, { "epoch": 9.643652561247215, "grad_norm": 19.103317260742188, "learning_rate": 1e-06, "loss": 0.7951, "num_input_tokens_seen": 242536328, "step": 4330 }, { "epoch": 9.643652561247215, "loss": 0.6043500900268555, "loss_ce": 0.00022415511193685234, "loss_iou": 0.25, "loss_num": 0.0203857421875, "loss_xval": 0.60546875, "num_input_tokens_seen": 242536328, "step": 4330 }, { "epoch": 9.64587973273942, "grad_norm": 18.600261688232422, "learning_rate": 1e-06, "loss": 0.6719, "num_input_tokens_seen": 242594076, "step": 4331 }, { "epoch": 9.64587973273942, "loss": 0.7887901663780212, "loss_ce": 0.00021596970327664167, "loss_iou": 0.3203125, "loss_num": 0.0299072265625, "loss_xval": 0.7890625, "num_input_tokens_seen": 242594076, "step": 4331 }, { "epoch": 9.648106904231625, "grad_norm": 23.29888343811035, "learning_rate": 1e-06, "loss": 0.5155, "num_input_tokens_seen": 242647136, "step": 4332 }, { "epoch": 9.648106904231625, "loss": 0.44800740480422974, "loss_ce": 0.00013143845717422664, "loss_iou": 0.185546875, "loss_num": 0.015380859375, "loss_xval": 0.447265625, "num_input_tokens_seen": 242647136, "step": 4332 }, { "epoch": 9.65033407572383, "grad_norm": 24.5333194732666, "learning_rate": 1e-06, "loss": 0.5333, "num_input_tokens_seen": 242700824, "step": 4333 }, { "epoch": 9.65033407572383, "loss": 0.5556710958480835, "loss_ce": 0.00012911748490296304, "loss_iou": 0.25, "loss_num": 0.0108642578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 242700824, "step": 4333 }, { "epoch": 9.652561247216035, "grad_norm": 17.157747268676758, "learning_rate": 1e-06, "loss": 0.7577, "num_input_tokens_seen": 242759000, "step": 4334 }, { "epoch": 9.652561247216035, "loss": 0.7171846628189087, "loss_ce": 0.00014364052913151681, "loss_iou": 0.31640625, "loss_num": 0.0172119140625, "loss_xval": 0.71875, "num_input_tokens_seen": 242759000, "step": 4334 }, { "epoch": 9.654788418708241, "grad_norm": 26.958030700683594, "learning_rate": 1e-06, "loss": 0.5635, "num_input_tokens_seen": 242814612, "step": 4335 }, { "epoch": 9.654788418708241, "loss": 0.517261266708374, "loss_ce": 0.00017140991985797882, "loss_iou": 0.234375, "loss_num": 0.0096435546875, "loss_xval": 0.515625, "num_input_tokens_seen": 242814612, "step": 4335 }, { "epoch": 9.657015590200446, "grad_norm": 13.9208345413208, "learning_rate": 1e-06, "loss": 0.5748, "num_input_tokens_seen": 242871532, "step": 4336 }, { "epoch": 9.657015590200446, "loss": 0.7538334131240845, "loss_ce": 0.00017131002095993608, "loss_iou": 0.283203125, "loss_num": 0.037109375, "loss_xval": 0.75390625, "num_input_tokens_seen": 242871532, "step": 4336 }, { "epoch": 9.659242761692651, "grad_norm": 19.94957733154297, "learning_rate": 1e-06, "loss": 0.5622, "num_input_tokens_seen": 242927132, "step": 4337 }, { "epoch": 9.659242761692651, "loss": 0.6652075052261353, "loss_ce": 0.00016842935292515904, "loss_iou": 0.2890625, "loss_num": 0.0169677734375, "loss_xval": 0.6640625, "num_input_tokens_seen": 242927132, "step": 4337 }, { "epoch": 9.661469933184856, "grad_norm": 18.527286529541016, "learning_rate": 1e-06, "loss": 0.7037, "num_input_tokens_seen": 242980644, "step": 4338 }, { "epoch": 9.661469933184856, "loss": 0.798882007598877, "loss_ce": 0.00017601504805497825, "loss_iou": 0.341796875, "loss_num": 0.0234375, "loss_xval": 0.796875, "num_input_tokens_seen": 242980644, "step": 4338 }, { "epoch": 9.66369710467706, "grad_norm": 24.58388328552246, "learning_rate": 1e-06, "loss": 0.623, "num_input_tokens_seen": 243034252, "step": 4339 }, { "epoch": 9.66369710467706, "loss": 0.56207674741745, "loss_ce": 0.0003091880353167653, "loss_iou": 0.2578125, "loss_num": 0.00958251953125, "loss_xval": 0.5625, "num_input_tokens_seen": 243034252, "step": 4339 }, { "epoch": 9.665924276169266, "grad_norm": 22.670303344726562, "learning_rate": 1e-06, "loss": 0.6743, "num_input_tokens_seen": 243091100, "step": 4340 }, { "epoch": 9.665924276169266, "loss": 0.5509505867958069, "loss_ce": 0.00016935347230173647, "loss_iou": 0.251953125, "loss_num": 0.0093994140625, "loss_xval": 0.55078125, "num_input_tokens_seen": 243091100, "step": 4340 }, { "epoch": 9.66815144766147, "grad_norm": 24.056676864624023, "learning_rate": 1e-06, "loss": 0.7283, "num_input_tokens_seen": 243144624, "step": 4341 }, { "epoch": 9.66815144766147, "loss": 0.876803994178772, "loss_ce": 0.0002171122032450512, "loss_iou": 0.375, "loss_num": 0.0255126953125, "loss_xval": 0.875, "num_input_tokens_seen": 243144624, "step": 4341 }, { "epoch": 9.670378619153675, "grad_norm": 24.195653915405273, "learning_rate": 1e-06, "loss": 0.5525, "num_input_tokens_seen": 243202736, "step": 4342 }, { "epoch": 9.670378619153675, "loss": 0.5127952098846436, "loss_ce": 0.00022197004000190645, "loss_iou": 0.2158203125, "loss_num": 0.0162353515625, "loss_xval": 0.51171875, "num_input_tokens_seen": 243202736, "step": 4342 }, { "epoch": 9.67260579064588, "grad_norm": 20.252214431762695, "learning_rate": 1e-06, "loss": 0.5979, "num_input_tokens_seen": 243260404, "step": 4343 }, { "epoch": 9.67260579064588, "loss": 0.569305956363678, "loss_ce": 0.00021413374633993953, "loss_iou": 0.2421875, "loss_num": 0.01708984375, "loss_xval": 0.5703125, "num_input_tokens_seen": 243260404, "step": 4343 }, { "epoch": 9.674832962138085, "grad_norm": 22.576656341552734, "learning_rate": 1e-06, "loss": 0.6199, "num_input_tokens_seen": 243315828, "step": 4344 }, { "epoch": 9.674832962138085, "loss": 0.7008746266365051, "loss_ce": 0.00019105095998384058, "loss_iou": 0.302734375, "loss_num": 0.0194091796875, "loss_xval": 0.69921875, "num_input_tokens_seen": 243315828, "step": 4344 }, { "epoch": 9.67706013363029, "grad_norm": 23.109046936035156, "learning_rate": 1e-06, "loss": 0.6222, "num_input_tokens_seen": 243371304, "step": 4345 }, { "epoch": 9.67706013363029, "loss": 0.6012382507324219, "loss_ce": 0.0001640522968955338, "loss_iou": 0.265625, "loss_num": 0.01409912109375, "loss_xval": 0.6015625, "num_input_tokens_seen": 243371304, "step": 4345 }, { "epoch": 9.679287305122495, "grad_norm": 17.9921875, "learning_rate": 1e-06, "loss": 0.6403, "num_input_tokens_seen": 243421812, "step": 4346 }, { "epoch": 9.679287305122495, "loss": 0.7237908840179443, "loss_ce": 0.00015808289754204452, "loss_iou": 0.30859375, "loss_num": 0.0216064453125, "loss_xval": 0.72265625, "num_input_tokens_seen": 243421812, "step": 4346 }, { "epoch": 9.6815144766147, "grad_norm": 22.943727493286133, "learning_rate": 1e-06, "loss": 0.5186, "num_input_tokens_seen": 243479024, "step": 4347 }, { "epoch": 9.6815144766147, "loss": 0.4597442150115967, "loss_ce": 0.0014922881964594126, "loss_iou": 0.2021484375, "loss_num": 0.0107421875, "loss_xval": 0.458984375, "num_input_tokens_seen": 243479024, "step": 4347 }, { "epoch": 9.683741648106905, "grad_norm": 24.374197006225586, "learning_rate": 1e-06, "loss": 0.6149, "num_input_tokens_seen": 243535892, "step": 4348 }, { "epoch": 9.683741648106905, "loss": 0.7361659407615662, "loss_ce": 0.00020402228983584791, "loss_iou": 0.3125, "loss_num": 0.0223388671875, "loss_xval": 0.734375, "num_input_tokens_seen": 243535892, "step": 4348 }, { "epoch": 9.68596881959911, "grad_norm": 22.339231491088867, "learning_rate": 1e-06, "loss": 0.735, "num_input_tokens_seen": 243591976, "step": 4349 }, { "epoch": 9.68596881959911, "loss": 0.8295117020606995, "loss_ce": 0.00016598097863607109, "loss_iou": 0.373046875, "loss_num": 0.016845703125, "loss_xval": 0.828125, "num_input_tokens_seen": 243591976, "step": 4349 }, { "epoch": 9.688195991091314, "grad_norm": 14.869282722473145, "learning_rate": 1e-06, "loss": 0.4473, "num_input_tokens_seen": 243648860, "step": 4350 }, { "epoch": 9.688195991091314, "loss": 0.5270088911056519, "loss_ce": 0.00015341459948103875, "loss_iou": 0.240234375, "loss_num": 0.00921630859375, "loss_xval": 0.52734375, "num_input_tokens_seen": 243648860, "step": 4350 }, { "epoch": 9.690423162583519, "grad_norm": 18.157405853271484, "learning_rate": 1e-06, "loss": 0.6345, "num_input_tokens_seen": 243705736, "step": 4351 }, { "epoch": 9.690423162583519, "loss": 0.6656748056411743, "loss_ce": 0.0001474730233894661, "loss_iou": 0.27734375, "loss_num": 0.0220947265625, "loss_xval": 0.6640625, "num_input_tokens_seen": 243705736, "step": 4351 }, { "epoch": 9.692650334075724, "grad_norm": 20.70793342590332, "learning_rate": 1e-06, "loss": 0.6055, "num_input_tokens_seen": 243763076, "step": 4352 }, { "epoch": 9.692650334075724, "loss": 0.6417899131774902, "loss_ce": 0.0001883807562990114, "loss_iou": 0.28125, "loss_num": 0.015625, "loss_xval": 0.640625, "num_input_tokens_seen": 243763076, "step": 4352 }, { "epoch": 9.694877505567929, "grad_norm": 16.69988441467285, "learning_rate": 1e-06, "loss": 0.4398, "num_input_tokens_seen": 243819884, "step": 4353 }, { "epoch": 9.694877505567929, "loss": 0.4072994291782379, "loss_ce": 0.00019493894069455564, "loss_iou": 0.1796875, "loss_num": 0.009521484375, "loss_xval": 0.40625, "num_input_tokens_seen": 243819884, "step": 4353 }, { "epoch": 9.697104677060134, "grad_norm": 18.394502639770508, "learning_rate": 1e-06, "loss": 0.709, "num_input_tokens_seen": 243874696, "step": 4354 }, { "epoch": 9.697104677060134, "loss": 0.7145628333091736, "loss_ce": 0.00020736586884595454, "loss_iou": 0.3125, "loss_num": 0.017822265625, "loss_xval": 0.71484375, "num_input_tokens_seen": 243874696, "step": 4354 }, { "epoch": 9.699331848552339, "grad_norm": 25.20227813720703, "learning_rate": 1e-06, "loss": 0.6424, "num_input_tokens_seen": 243930484, "step": 4355 }, { "epoch": 9.699331848552339, "loss": 0.6150010228157043, "loss_ce": 0.00013285188470035791, "loss_iou": 0.267578125, "loss_num": 0.0162353515625, "loss_xval": 0.61328125, "num_input_tokens_seen": 243930484, "step": 4355 }, { "epoch": 9.701559020044543, "grad_norm": 21.711627960205078, "learning_rate": 1e-06, "loss": 0.6181, "num_input_tokens_seen": 243988028, "step": 4356 }, { "epoch": 9.701559020044543, "loss": 0.7733531594276428, "loss_ce": 0.00015979193267412484, "loss_iou": 0.3359375, "loss_num": 0.0206298828125, "loss_xval": 0.7734375, "num_input_tokens_seen": 243988028, "step": 4356 }, { "epoch": 9.703786191536748, "grad_norm": 23.96913719177246, "learning_rate": 1e-06, "loss": 0.5364, "num_input_tokens_seen": 244042796, "step": 4357 }, { "epoch": 9.703786191536748, "loss": 0.43648862838745117, "loss_ce": 0.00020933088671881706, "loss_iou": 0.1904296875, "loss_num": 0.01123046875, "loss_xval": 0.435546875, "num_input_tokens_seen": 244042796, "step": 4357 }, { "epoch": 9.706013363028953, "grad_norm": 16.607881546020508, "learning_rate": 1e-06, "loss": 0.7013, "num_input_tokens_seen": 244100220, "step": 4358 }, { "epoch": 9.706013363028953, "loss": 0.651787281036377, "loss_ce": 0.00017597780970390886, "loss_iou": 0.302734375, "loss_num": 0.00897216796875, "loss_xval": 0.65234375, "num_input_tokens_seen": 244100220, "step": 4358 }, { "epoch": 9.708240534521158, "grad_norm": 17.134170532226562, "learning_rate": 1e-06, "loss": 0.596, "num_input_tokens_seen": 244158116, "step": 4359 }, { "epoch": 9.708240534521158, "loss": 0.6266348958015442, "loss_ce": 0.0001700021093711257, "loss_iou": 0.271484375, "loss_num": 0.0167236328125, "loss_xval": 0.625, "num_input_tokens_seen": 244158116, "step": 4359 }, { "epoch": 9.710467706013363, "grad_norm": 18.850582122802734, "learning_rate": 1e-06, "loss": 0.4285, "num_input_tokens_seen": 244214688, "step": 4360 }, { "epoch": 9.710467706013363, "loss": 0.4430268406867981, "loss_ce": 0.0001557298528496176, "loss_iou": 0.1943359375, "loss_num": 0.0106201171875, "loss_xval": 0.443359375, "num_input_tokens_seen": 244214688, "step": 4360 }, { "epoch": 9.712694877505568, "grad_norm": 15.182866096496582, "learning_rate": 1e-06, "loss": 0.461, "num_input_tokens_seen": 244272276, "step": 4361 }, { "epoch": 9.712694877505568, "loss": 0.4096102714538574, "loss_ce": 0.00018646058742888272, "loss_iou": 0.1826171875, "loss_num": 0.0087890625, "loss_xval": 0.41015625, "num_input_tokens_seen": 244272276, "step": 4361 }, { "epoch": 9.714922048997773, "grad_norm": 21.032060623168945, "learning_rate": 1e-06, "loss": 0.5245, "num_input_tokens_seen": 244326084, "step": 4362 }, { "epoch": 9.714922048997773, "loss": 0.5369995832443237, "loss_ce": 0.00013436358131002635, "loss_iou": 0.2353515625, "loss_num": 0.01318359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 244326084, "step": 4362 }, { "epoch": 9.717149220489977, "grad_norm": 16.875015258789062, "learning_rate": 1e-06, "loss": 0.7523, "num_input_tokens_seen": 244383708, "step": 4363 }, { "epoch": 9.717149220489977, "loss": 0.8245912790298462, "loss_ce": 0.00025050187832675874, "loss_iou": 0.3203125, "loss_num": 0.036865234375, "loss_xval": 0.82421875, "num_input_tokens_seen": 244383708, "step": 4363 }, { "epoch": 9.719376391982182, "grad_norm": 15.091886520385742, "learning_rate": 1e-06, "loss": 0.4595, "num_input_tokens_seen": 244436700, "step": 4364 }, { "epoch": 9.719376391982182, "loss": 0.2986956834793091, "loss_ce": 0.00053892977302894, "loss_iou": 0.11376953125, "loss_num": 0.01416015625, "loss_xval": 0.298828125, "num_input_tokens_seen": 244436700, "step": 4364 }, { "epoch": 9.721603563474387, "grad_norm": 36.89336013793945, "learning_rate": 1e-06, "loss": 0.6042, "num_input_tokens_seen": 244492464, "step": 4365 }, { "epoch": 9.721603563474387, "loss": 0.7046360373497009, "loss_ce": 0.0010227648308500648, "loss_iou": 0.30078125, "loss_num": 0.0203857421875, "loss_xval": 0.703125, "num_input_tokens_seen": 244492464, "step": 4365 }, { "epoch": 9.723830734966592, "grad_norm": 21.095430374145508, "learning_rate": 1e-06, "loss": 0.7696, "num_input_tokens_seen": 244548864, "step": 4366 }, { "epoch": 9.723830734966592, "loss": 0.7162793874740601, "loss_ce": 0.0007032152498140931, "loss_iou": 0.318359375, "loss_num": 0.0152587890625, "loss_xval": 0.71484375, "num_input_tokens_seen": 244548864, "step": 4366 }, { "epoch": 9.726057906458797, "grad_norm": 17.61524772644043, "learning_rate": 1e-06, "loss": 0.4847, "num_input_tokens_seen": 244602692, "step": 4367 }, { "epoch": 9.726057906458797, "loss": 0.505713939666748, "loss_ce": 0.00022077930043451488, "loss_iou": 0.23046875, "loss_num": 0.00885009765625, "loss_xval": 0.50390625, "num_input_tokens_seen": 244602692, "step": 4367 }, { "epoch": 9.728285077951002, "grad_norm": 23.061426162719727, "learning_rate": 1e-06, "loss": 0.519, "num_input_tokens_seen": 244657716, "step": 4368 }, { "epoch": 9.728285077951002, "loss": 0.48245200514793396, "loss_ce": 0.00015220226487144828, "loss_iou": 0.208984375, "loss_num": 0.0128173828125, "loss_xval": 0.482421875, "num_input_tokens_seen": 244657716, "step": 4368 }, { "epoch": 9.730512249443207, "grad_norm": 19.16279411315918, "learning_rate": 1e-06, "loss": 0.5937, "num_input_tokens_seen": 244711224, "step": 4369 }, { "epoch": 9.730512249443207, "loss": 0.7714163661003113, "loss_ce": 0.0001761687162797898, "loss_iou": 0.333984375, "loss_num": 0.0203857421875, "loss_xval": 0.76953125, "num_input_tokens_seen": 244711224, "step": 4369 }, { "epoch": 9.732739420935411, "grad_norm": 17.79773712158203, "learning_rate": 1e-06, "loss": 0.5996, "num_input_tokens_seen": 244765972, "step": 4370 }, { "epoch": 9.732739420935411, "loss": 0.5670474171638489, "loss_ce": 0.00015290510782506317, "loss_iou": 0.244140625, "loss_num": 0.0157470703125, "loss_xval": 0.56640625, "num_input_tokens_seen": 244765972, "step": 4370 }, { "epoch": 9.734966592427616, "grad_norm": 16.366491317749023, "learning_rate": 1e-06, "loss": 0.6912, "num_input_tokens_seen": 244822468, "step": 4371 }, { "epoch": 9.734966592427616, "loss": 0.6842033863067627, "loss_ce": 0.0001213748037116602, "loss_iou": 0.3046875, "loss_num": 0.01470947265625, "loss_xval": 0.68359375, "num_input_tokens_seen": 244822468, "step": 4371 }, { "epoch": 9.737193763919821, "grad_norm": 21.454408645629883, "learning_rate": 1e-06, "loss": 0.4793, "num_input_tokens_seen": 244880364, "step": 4372 }, { "epoch": 9.737193763919821, "loss": 0.38819241523742676, "loss_ce": 0.00013089305139146745, "loss_iou": 0.177734375, "loss_num": 0.006561279296875, "loss_xval": 0.388671875, "num_input_tokens_seen": 244880364, "step": 4372 }, { "epoch": 9.739420935412026, "grad_norm": 52.516292572021484, "learning_rate": 1e-06, "loss": 0.4779, "num_input_tokens_seen": 244935344, "step": 4373 }, { "epoch": 9.739420935412026, "loss": 0.4206976294517517, "loss_ce": 0.0007757844286970794, "loss_iou": 0.162109375, "loss_num": 0.0194091796875, "loss_xval": 0.419921875, "num_input_tokens_seen": 244935344, "step": 4373 }, { "epoch": 9.74164810690423, "grad_norm": 12.701075553894043, "learning_rate": 1e-06, "loss": 0.5218, "num_input_tokens_seen": 244991972, "step": 4374 }, { "epoch": 9.74164810690423, "loss": 0.381612628698349, "loss_ce": 0.00014290204853750765, "loss_iou": 0.17578125, "loss_num": 0.006072998046875, "loss_xval": 0.380859375, "num_input_tokens_seen": 244991972, "step": 4374 }, { "epoch": 9.743875278396436, "grad_norm": 20.51826286315918, "learning_rate": 1e-06, "loss": 0.5475, "num_input_tokens_seen": 245048956, "step": 4375 }, { "epoch": 9.743875278396436, "loss": 0.5812922120094299, "loss_ce": 0.00029854726744815707, "loss_iou": 0.248046875, "loss_num": 0.01708984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 245048956, "step": 4375 }, { "epoch": 9.74610244988864, "grad_norm": 19.916961669921875, "learning_rate": 1e-06, "loss": 0.5741, "num_input_tokens_seen": 245104028, "step": 4376 }, { "epoch": 9.74610244988864, "loss": 0.4663507342338562, "loss_ce": 0.0007135132909752429, "loss_iou": 0.1845703125, "loss_num": 0.01904296875, "loss_xval": 0.46484375, "num_input_tokens_seen": 245104028, "step": 4376 }, { "epoch": 9.748329621380847, "grad_norm": 18.798843383789062, "learning_rate": 1e-06, "loss": 0.7112, "num_input_tokens_seen": 245161592, "step": 4377 }, { "epoch": 9.748329621380847, "loss": 0.5074524283409119, "loss_ce": 0.00012819524272345006, "loss_iou": 0.2255859375, "loss_num": 0.01129150390625, "loss_xval": 0.5078125, "num_input_tokens_seen": 245161592, "step": 4377 }, { "epoch": 9.750556792873052, "grad_norm": 14.465723991394043, "learning_rate": 1e-06, "loss": 0.4476, "num_input_tokens_seen": 245218120, "step": 4378 }, { "epoch": 9.750556792873052, "loss": 0.5026047825813293, "loss_ce": 0.00016336789121851325, "loss_iou": 0.19921875, "loss_num": 0.020751953125, "loss_xval": 0.50390625, "num_input_tokens_seen": 245218120, "step": 4378 }, { "epoch": 9.752783964365257, "grad_norm": 148.81324768066406, "learning_rate": 1e-06, "loss": 0.4971, "num_input_tokens_seen": 245276528, "step": 4379 }, { "epoch": 9.752783964365257, "loss": 0.5717120170593262, "loss_ce": 0.00017881978419609368, "loss_iou": 0.255859375, "loss_num": 0.01214599609375, "loss_xval": 0.5703125, "num_input_tokens_seen": 245276528, "step": 4379 }, { "epoch": 9.755011135857462, "grad_norm": 22.322999954223633, "learning_rate": 1e-06, "loss": 0.8127, "num_input_tokens_seen": 245332196, "step": 4380 }, { "epoch": 9.755011135857462, "loss": 0.8225026726722717, "loss_ce": 0.0002370491565670818, "loss_iou": 0.359375, "loss_num": 0.0211181640625, "loss_xval": 0.8203125, "num_input_tokens_seen": 245332196, "step": 4380 }, { "epoch": 9.757238307349667, "grad_norm": 15.783463478088379, "learning_rate": 1e-06, "loss": 0.5557, "num_input_tokens_seen": 245386960, "step": 4381 }, { "epoch": 9.757238307349667, "loss": 0.670534610748291, "loss_ce": 0.0001244655722985044, "loss_iou": 0.2890625, "loss_num": 0.018798828125, "loss_xval": 0.671875, "num_input_tokens_seen": 245386960, "step": 4381 }, { "epoch": 9.759465478841872, "grad_norm": 16.41698455810547, "learning_rate": 1e-06, "loss": 0.3972, "num_input_tokens_seen": 245439152, "step": 4382 }, { "epoch": 9.759465478841872, "loss": 0.38251131772994995, "loss_ce": 0.00012604420771822333, "loss_iou": 0.166015625, "loss_num": 0.0098876953125, "loss_xval": 0.3828125, "num_input_tokens_seen": 245439152, "step": 4382 }, { "epoch": 9.761692650334076, "grad_norm": 21.44931983947754, "learning_rate": 1e-06, "loss": 0.6739, "num_input_tokens_seen": 245498164, "step": 4383 }, { "epoch": 9.761692650334076, "loss": 0.76970374584198, "loss_ce": 0.0001725103793432936, "loss_iou": 0.3203125, "loss_num": 0.0255126953125, "loss_xval": 0.76953125, "num_input_tokens_seen": 245498164, "step": 4383 }, { "epoch": 9.763919821826281, "grad_norm": 19.587390899658203, "learning_rate": 1e-06, "loss": 0.4272, "num_input_tokens_seen": 245552244, "step": 4384 }, { "epoch": 9.763919821826281, "loss": 0.4954010248184204, "loss_ce": 0.0001617551752133295, "loss_iou": 0.21484375, "loss_num": 0.0130615234375, "loss_xval": 0.49609375, "num_input_tokens_seen": 245552244, "step": 4384 }, { "epoch": 9.766146993318486, "grad_norm": 16.423864364624023, "learning_rate": 1e-06, "loss": 0.5937, "num_input_tokens_seen": 245607924, "step": 4385 }, { "epoch": 9.766146993318486, "loss": 0.659552276134491, "loss_ce": 0.0001284128229599446, "loss_iou": 0.2734375, "loss_num": 0.0224609375, "loss_xval": 0.66015625, "num_input_tokens_seen": 245607924, "step": 4385 }, { "epoch": 9.768374164810691, "grad_norm": 17.927560806274414, "learning_rate": 1e-06, "loss": 0.5699, "num_input_tokens_seen": 245662504, "step": 4386 }, { "epoch": 9.768374164810691, "loss": 0.5620256662368774, "loss_ce": 0.00013603617844637483, "loss_iou": 0.2314453125, "loss_num": 0.01953125, "loss_xval": 0.5625, "num_input_tokens_seen": 245662504, "step": 4386 }, { "epoch": 9.770601336302896, "grad_norm": 21.027870178222656, "learning_rate": 1e-06, "loss": 0.6231, "num_input_tokens_seen": 245719168, "step": 4387 }, { "epoch": 9.770601336302896, "loss": 0.7491781711578369, "loss_ce": 0.00015475715918000787, "loss_iou": 0.310546875, "loss_num": 0.025634765625, "loss_xval": 0.75, "num_input_tokens_seen": 245719168, "step": 4387 }, { "epoch": 9.7728285077951, "grad_norm": 209.35565185546875, "learning_rate": 1e-06, "loss": 0.7288, "num_input_tokens_seen": 245773520, "step": 4388 }, { "epoch": 9.7728285077951, "loss": 0.900350034236908, "loss_ce": 0.00020351973944343626, "loss_iou": 0.34375, "loss_num": 0.042236328125, "loss_xval": 0.8984375, "num_input_tokens_seen": 245773520, "step": 4388 }, { "epoch": 9.775055679287306, "grad_norm": 15.992069244384766, "learning_rate": 1e-06, "loss": 0.6249, "num_input_tokens_seen": 245829068, "step": 4389 }, { "epoch": 9.775055679287306, "loss": 0.4481305480003357, "loss_ce": 0.0002545609895605594, "loss_iou": 0.1943359375, "loss_num": 0.01177978515625, "loss_xval": 0.447265625, "num_input_tokens_seen": 245829068, "step": 4389 }, { "epoch": 9.77728285077951, "grad_norm": 65.46475219726562, "learning_rate": 1e-06, "loss": 0.5634, "num_input_tokens_seen": 245885128, "step": 4390 }, { "epoch": 9.77728285077951, "loss": 0.5616711378097534, "loss_ce": 0.00014769005065318197, "loss_iou": 0.2421875, "loss_num": 0.01519775390625, "loss_xval": 0.5625, "num_input_tokens_seen": 245885128, "step": 4390 }, { "epoch": 9.779510022271715, "grad_norm": 23.950044631958008, "learning_rate": 1e-06, "loss": 0.5287, "num_input_tokens_seen": 245939168, "step": 4391 }, { "epoch": 9.779510022271715, "loss": 0.6016286611557007, "loss_ce": 0.0003102673217654228, "loss_iou": 0.2470703125, "loss_num": 0.021484375, "loss_xval": 0.6015625, "num_input_tokens_seen": 245939168, "step": 4391 }, { "epoch": 9.78173719376392, "grad_norm": 13.293421745300293, "learning_rate": 1e-06, "loss": 0.4483, "num_input_tokens_seen": 245996116, "step": 4392 }, { "epoch": 9.78173719376392, "loss": 0.439850389957428, "loss_ce": 0.00015313076437450945, "loss_iou": 0.1806640625, "loss_num": 0.015869140625, "loss_xval": 0.439453125, "num_input_tokens_seen": 245996116, "step": 4392 }, { "epoch": 9.783964365256125, "grad_norm": 27.452177047729492, "learning_rate": 1e-06, "loss": 0.5466, "num_input_tokens_seen": 246051320, "step": 4393 }, { "epoch": 9.783964365256125, "loss": 0.5230746269226074, "loss_ce": 0.00012541524483822286, "loss_iou": 0.2373046875, "loss_num": 0.00958251953125, "loss_xval": 0.5234375, "num_input_tokens_seen": 246051320, "step": 4393 }, { "epoch": 9.78619153674833, "grad_norm": 18.695438385009766, "learning_rate": 1e-06, "loss": 0.6525, "num_input_tokens_seen": 246107564, "step": 4394 }, { "epoch": 9.78619153674833, "loss": 0.6207006573677063, "loss_ce": 0.0002172729728044942, "loss_iou": 0.263671875, "loss_num": 0.0186767578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 246107564, "step": 4394 }, { "epoch": 9.788418708240535, "grad_norm": 16.555150985717773, "learning_rate": 1e-06, "loss": 0.6315, "num_input_tokens_seen": 246166380, "step": 4395 }, { "epoch": 9.788418708240535, "loss": 0.6279211044311523, "loss_ce": 0.00017448890139348805, "loss_iou": 0.2421875, "loss_num": 0.02880859375, "loss_xval": 0.62890625, "num_input_tokens_seen": 246166380, "step": 4395 }, { "epoch": 9.79064587973274, "grad_norm": 13.74927043914795, "learning_rate": 1e-06, "loss": 0.7028, "num_input_tokens_seen": 246222136, "step": 4396 }, { "epoch": 9.79064587973274, "loss": 0.8214117288589478, "loss_ce": 0.00036682139034383, "loss_iou": 0.3359375, "loss_num": 0.0301513671875, "loss_xval": 0.8203125, "num_input_tokens_seen": 246222136, "step": 4396 }, { "epoch": 9.792873051224944, "grad_norm": 17.71005630493164, "learning_rate": 1e-06, "loss": 0.5901, "num_input_tokens_seen": 246278428, "step": 4397 }, { "epoch": 9.792873051224944, "loss": 0.41447368264198303, "loss_ce": 0.00016704711015336215, "loss_iou": 0.1884765625, "loss_num": 0.007537841796875, "loss_xval": 0.4140625, "num_input_tokens_seen": 246278428, "step": 4397 }, { "epoch": 9.79510022271715, "grad_norm": 13.60590934753418, "learning_rate": 1e-06, "loss": 0.5328, "num_input_tokens_seen": 246335200, "step": 4398 }, { "epoch": 9.79510022271715, "loss": 0.4858427345752716, "loss_ce": 0.00012495677219703794, "loss_iou": 0.212890625, "loss_num": 0.0118408203125, "loss_xval": 0.486328125, "num_input_tokens_seen": 246335200, "step": 4398 }, { "epoch": 9.797327394209354, "grad_norm": 12.569108009338379, "learning_rate": 1e-06, "loss": 0.7097, "num_input_tokens_seen": 246391172, "step": 4399 }, { "epoch": 9.797327394209354, "loss": 0.8952298164367676, "loss_ce": 0.00021028138871770352, "loss_iou": 0.337890625, "loss_num": 0.043701171875, "loss_xval": 0.89453125, "num_input_tokens_seen": 246391172, "step": 4399 }, { "epoch": 9.799554565701559, "grad_norm": 16.045135498046875, "learning_rate": 1e-06, "loss": 0.5855, "num_input_tokens_seen": 246448824, "step": 4400 }, { "epoch": 9.799554565701559, "loss": 0.7127890586853027, "loss_ce": 0.00014254784036893398, "loss_iou": 0.306640625, "loss_num": 0.0201416015625, "loss_xval": 0.7109375, "num_input_tokens_seen": 246448824, "step": 4400 }, { "epoch": 9.801781737193764, "grad_norm": 18.482440948486328, "learning_rate": 1e-06, "loss": 0.5164, "num_input_tokens_seen": 246505864, "step": 4401 }, { "epoch": 9.801781737193764, "loss": 0.5470033884048462, "loss_ce": 0.00012841983698308468, "loss_iou": 0.2392578125, "loss_num": 0.01385498046875, "loss_xval": 0.546875, "num_input_tokens_seen": 246505864, "step": 4401 }, { "epoch": 9.804008908685969, "grad_norm": 17.286788940429688, "learning_rate": 1e-06, "loss": 0.6259, "num_input_tokens_seen": 246562828, "step": 4402 }, { "epoch": 9.804008908685969, "loss": 0.5885273218154907, "loss_ce": 0.00014838032075203955, "loss_iou": 0.2578125, "loss_num": 0.0146484375, "loss_xval": 0.58984375, "num_input_tokens_seen": 246562828, "step": 4402 }, { "epoch": 9.806236080178174, "grad_norm": 14.533248901367188, "learning_rate": 1e-06, "loss": 0.4501, "num_input_tokens_seen": 246619276, "step": 4403 }, { "epoch": 9.806236080178174, "loss": 0.4673030972480774, "loss_ce": 0.00014000001829117537, "loss_iou": 0.2080078125, "loss_num": 0.01025390625, "loss_xval": 0.466796875, "num_input_tokens_seen": 246619276, "step": 4403 }, { "epoch": 9.808463251670378, "grad_norm": 16.228418350219727, "learning_rate": 1e-06, "loss": 0.4324, "num_input_tokens_seen": 246678448, "step": 4404 }, { "epoch": 9.808463251670378, "loss": 0.47206440567970276, "loss_ce": 0.0001405745279043913, "loss_iou": 0.2109375, "loss_num": 0.0098876953125, "loss_xval": 0.47265625, "num_input_tokens_seen": 246678448, "step": 4404 }, { "epoch": 9.810690423162583, "grad_norm": 18.946840286254883, "learning_rate": 1e-06, "loss": 0.5187, "num_input_tokens_seen": 246732924, "step": 4405 }, { "epoch": 9.810690423162583, "loss": 0.5357993841171265, "loss_ce": 0.00015488323697354645, "loss_iou": 0.2294921875, "loss_num": 0.015380859375, "loss_xval": 0.53515625, "num_input_tokens_seen": 246732924, "step": 4405 }, { "epoch": 9.812917594654788, "grad_norm": 21.7833194732666, "learning_rate": 1e-06, "loss": 0.6752, "num_input_tokens_seen": 246790636, "step": 4406 }, { "epoch": 9.812917594654788, "loss": 0.5842469334602356, "loss_ce": 0.00014047868899069726, "loss_iou": 0.251953125, "loss_num": 0.01556396484375, "loss_xval": 0.5859375, "num_input_tokens_seen": 246790636, "step": 4406 }, { "epoch": 9.815144766146993, "grad_norm": 18.096515655517578, "learning_rate": 1e-06, "loss": 0.5097, "num_input_tokens_seen": 246847708, "step": 4407 }, { "epoch": 9.815144766146993, "loss": 0.634331226348877, "loss_ce": 0.0002980299177579582, "loss_iou": 0.263671875, "loss_num": 0.021484375, "loss_xval": 0.6328125, "num_input_tokens_seen": 246847708, "step": 4407 }, { "epoch": 9.817371937639198, "grad_norm": 21.39216423034668, "learning_rate": 1e-06, "loss": 0.6078, "num_input_tokens_seen": 246902376, "step": 4408 }, { "epoch": 9.817371937639198, "loss": 0.6875587701797485, "loss_ce": 0.00018085587362293154, "loss_iou": 0.283203125, "loss_num": 0.0244140625, "loss_xval": 0.6875, "num_input_tokens_seen": 246902376, "step": 4408 }, { "epoch": 9.819599109131403, "grad_norm": 29.385353088378906, "learning_rate": 1e-06, "loss": 0.6988, "num_input_tokens_seen": 246956228, "step": 4409 }, { "epoch": 9.819599109131403, "loss": 0.6361332535743713, "loss_ce": 0.00014688669762108475, "loss_iou": 0.28515625, "loss_num": 0.01318359375, "loss_xval": 0.63671875, "num_input_tokens_seen": 246956228, "step": 4409 }, { "epoch": 9.821826280623608, "grad_norm": 23.78947639465332, "learning_rate": 1e-06, "loss": 0.5749, "num_input_tokens_seen": 247012124, "step": 4410 }, { "epoch": 9.821826280623608, "loss": 0.4960480332374573, "loss_ce": 0.0001984003756660968, "loss_iou": 0.2197265625, "loss_num": 0.01129150390625, "loss_xval": 0.49609375, "num_input_tokens_seen": 247012124, "step": 4410 }, { "epoch": 9.824053452115812, "grad_norm": 24.32109260559082, "learning_rate": 1e-06, "loss": 0.6939, "num_input_tokens_seen": 247070408, "step": 4411 }, { "epoch": 9.824053452115812, "loss": 1.043853521347046, "loss_ce": 0.00015239304048009217, "loss_iou": 0.439453125, "loss_num": 0.032958984375, "loss_xval": 1.046875, "num_input_tokens_seen": 247070408, "step": 4411 }, { "epoch": 9.826280623608017, "grad_norm": 13.603236198425293, "learning_rate": 1e-06, "loss": 0.7308, "num_input_tokens_seen": 247130296, "step": 4412 }, { "epoch": 9.826280623608017, "loss": 0.8429387807846069, "loss_ce": 0.00016536489420104772, "loss_iou": 0.369140625, "loss_num": 0.0206298828125, "loss_xval": 0.84375, "num_input_tokens_seen": 247130296, "step": 4412 }, { "epoch": 9.828507795100222, "grad_norm": 23.041610717773438, "learning_rate": 1e-06, "loss": 0.4969, "num_input_tokens_seen": 247186440, "step": 4413 }, { "epoch": 9.828507795100222, "loss": 0.5199660062789917, "loss_ce": 0.0001905907120089978, "loss_iou": 0.2236328125, "loss_num": 0.0145263671875, "loss_xval": 0.51953125, "num_input_tokens_seen": 247186440, "step": 4413 }, { "epoch": 9.830734966592427, "grad_norm": 18.076107025146484, "learning_rate": 1e-06, "loss": 0.4157, "num_input_tokens_seen": 247239284, "step": 4414 }, { "epoch": 9.830734966592427, "loss": 0.3731992244720459, "loss_ce": 0.0001523523242212832, "loss_iou": 0.173828125, "loss_num": 0.005157470703125, "loss_xval": 0.373046875, "num_input_tokens_seen": 247239284, "step": 4414 }, { "epoch": 9.832962138084632, "grad_norm": 13.739655494689941, "learning_rate": 1e-06, "loss": 0.4094, "num_input_tokens_seen": 247295360, "step": 4415 }, { "epoch": 9.832962138084632, "loss": 0.3968695104122162, "loss_ce": 0.00014100654516369104, "loss_iou": 0.1796875, "loss_num": 0.007293701171875, "loss_xval": 0.396484375, "num_input_tokens_seen": 247295360, "step": 4415 }, { "epoch": 9.835189309576837, "grad_norm": 22.17353057861328, "learning_rate": 1e-06, "loss": 0.4095, "num_input_tokens_seen": 247353432, "step": 4416 }, { "epoch": 9.835189309576837, "loss": 0.38090550899505615, "loss_ce": 0.00016820061136968434, "loss_iou": 0.1748046875, "loss_num": 0.00616455078125, "loss_xval": 0.380859375, "num_input_tokens_seen": 247353432, "step": 4416 }, { "epoch": 9.837416481069042, "grad_norm": 28.909465789794922, "learning_rate": 1e-06, "loss": 0.5909, "num_input_tokens_seen": 247412760, "step": 4417 }, { "epoch": 9.837416481069042, "loss": 0.5692502856254578, "loss_ce": 0.00015849701594561338, "loss_iou": 0.251953125, "loss_num": 0.01300048828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 247412760, "step": 4417 }, { "epoch": 9.839643652561247, "grad_norm": 18.436254501342773, "learning_rate": 1e-06, "loss": 0.7414, "num_input_tokens_seen": 247469036, "step": 4418 }, { "epoch": 9.839643652561247, "loss": 0.946731686592102, "loss_ce": 0.00019852191326208413, "loss_iou": 0.38671875, "loss_num": 0.0341796875, "loss_xval": 0.9453125, "num_input_tokens_seen": 247469036, "step": 4418 }, { "epoch": 9.841870824053451, "grad_norm": 17.95979881286621, "learning_rate": 1e-06, "loss": 0.5232, "num_input_tokens_seen": 247525680, "step": 4419 }, { "epoch": 9.841870824053451, "loss": 0.5357891917228699, "loss_ce": 0.00014465830463450402, "loss_iou": 0.228515625, "loss_num": 0.015869140625, "loss_xval": 0.53515625, "num_input_tokens_seen": 247525680, "step": 4419 }, { "epoch": 9.844097995545656, "grad_norm": 29.744184494018555, "learning_rate": 1e-06, "loss": 0.5656, "num_input_tokens_seen": 247580136, "step": 4420 }, { "epoch": 9.844097995545656, "loss": 0.6758188605308533, "loss_ce": 0.00015968517982400954, "loss_iou": 0.291015625, "loss_num": 0.0184326171875, "loss_xval": 0.67578125, "num_input_tokens_seen": 247580136, "step": 4420 }, { "epoch": 9.846325167037861, "grad_norm": 13.804282188415527, "learning_rate": 1e-06, "loss": 0.4793, "num_input_tokens_seen": 247634072, "step": 4421 }, { "epoch": 9.846325167037861, "loss": 0.34506338834762573, "loss_ce": 0.00033683725632727146, "loss_iou": 0.1484375, "loss_num": 0.0093994140625, "loss_xval": 0.34375, "num_input_tokens_seen": 247634072, "step": 4421 }, { "epoch": 9.848552338530066, "grad_norm": 53.691707611083984, "learning_rate": 1e-06, "loss": 0.5384, "num_input_tokens_seen": 247688788, "step": 4422 }, { "epoch": 9.848552338530066, "loss": 0.5472594499588013, "loss_ce": 0.0001402852067258209, "loss_iou": 0.24609375, "loss_num": 0.01116943359375, "loss_xval": 0.546875, "num_input_tokens_seen": 247688788, "step": 4422 }, { "epoch": 9.85077951002227, "grad_norm": 37.15926742553711, "learning_rate": 1e-06, "loss": 0.6395, "num_input_tokens_seen": 247746856, "step": 4423 }, { "epoch": 9.85077951002227, "loss": 0.6605561375617981, "loss_ce": 0.00015572294068988413, "loss_iou": 0.287109375, "loss_num": 0.016845703125, "loss_xval": 0.66015625, "num_input_tokens_seen": 247746856, "step": 4423 }, { "epoch": 9.853006681514476, "grad_norm": 16.28897476196289, "learning_rate": 1e-06, "loss": 0.7326, "num_input_tokens_seen": 247802588, "step": 4424 }, { "epoch": 9.853006681514476, "loss": 0.605131208896637, "loss_ce": 0.00015073509712237865, "loss_iou": 0.26953125, "loss_num": 0.012939453125, "loss_xval": 0.60546875, "num_input_tokens_seen": 247802588, "step": 4424 }, { "epoch": 9.855233853006682, "grad_norm": 24.916080474853516, "learning_rate": 1e-06, "loss": 0.5261, "num_input_tokens_seen": 247858736, "step": 4425 }, { "epoch": 9.855233853006682, "loss": 0.5880270004272461, "loss_ce": 0.0006245865952223539, "loss_iou": 0.267578125, "loss_num": 0.010498046875, "loss_xval": 0.5859375, "num_input_tokens_seen": 247858736, "step": 4425 }, { "epoch": 9.857461024498887, "grad_norm": 13.907861709594727, "learning_rate": 1e-06, "loss": 0.4909, "num_input_tokens_seen": 247916764, "step": 4426 }, { "epoch": 9.857461024498887, "loss": 0.5015993118286133, "loss_ce": 0.00013441478949971497, "loss_iou": 0.21875, "loss_num": 0.012939453125, "loss_xval": 0.5, "num_input_tokens_seen": 247916764, "step": 4426 }, { "epoch": 9.859688195991092, "grad_norm": 31.806222915649414, "learning_rate": 1e-06, "loss": 0.5157, "num_input_tokens_seen": 247971644, "step": 4427 }, { "epoch": 9.859688195991092, "loss": 0.5441058278083801, "loss_ce": 0.00016053869330789894, "loss_iou": 0.244140625, "loss_num": 0.01123046875, "loss_xval": 0.54296875, "num_input_tokens_seen": 247971644, "step": 4427 }, { "epoch": 9.861915367483297, "grad_norm": 13.702202796936035, "learning_rate": 1e-06, "loss": 0.4027, "num_input_tokens_seen": 248027780, "step": 4428 }, { "epoch": 9.861915367483297, "loss": 0.3753761351108551, "loss_ce": 0.00013199455861467868, "loss_iou": 0.1640625, "loss_num": 0.00946044921875, "loss_xval": 0.375, "num_input_tokens_seen": 248027780, "step": 4428 }, { "epoch": 9.864142538975502, "grad_norm": 22.021825790405273, "learning_rate": 1e-06, "loss": 0.369, "num_input_tokens_seen": 248085456, "step": 4429 }, { "epoch": 9.864142538975502, "loss": 0.34767791628837585, "loss_ce": 0.00014372303849086165, "loss_iou": 0.1572265625, "loss_num": 0.0064697265625, "loss_xval": 0.34765625, "num_input_tokens_seen": 248085456, "step": 4429 }, { "epoch": 9.866369710467707, "grad_norm": 15.745802879333496, "learning_rate": 1e-06, "loss": 0.4453, "num_input_tokens_seen": 248141132, "step": 4430 }, { "epoch": 9.866369710467707, "loss": 0.3866683840751648, "loss_ce": 0.0001937640190590173, "loss_iou": 0.15625, "loss_num": 0.01470947265625, "loss_xval": 0.38671875, "num_input_tokens_seen": 248141132, "step": 4430 }, { "epoch": 9.868596881959911, "grad_norm": 17.44371795654297, "learning_rate": 1e-06, "loss": 0.6266, "num_input_tokens_seen": 248199536, "step": 4431 }, { "epoch": 9.868596881959911, "loss": 0.4967302978038788, "loss_ce": 0.0001482525112805888, "loss_iou": 0.224609375, "loss_num": 0.0096435546875, "loss_xval": 0.49609375, "num_input_tokens_seen": 248199536, "step": 4431 }, { "epoch": 9.870824053452116, "grad_norm": 20.9477481842041, "learning_rate": 1e-06, "loss": 0.5961, "num_input_tokens_seen": 248254300, "step": 4432 }, { "epoch": 9.870824053452116, "loss": 0.5253418684005737, "loss_ce": 0.00019535439787432551, "loss_iou": 0.23828125, "loss_num": 0.009521484375, "loss_xval": 0.5234375, "num_input_tokens_seen": 248254300, "step": 4432 }, { "epoch": 9.873051224944321, "grad_norm": 32.99359893798828, "learning_rate": 1e-06, "loss": 0.6711, "num_input_tokens_seen": 248311172, "step": 4433 }, { "epoch": 9.873051224944321, "loss": 0.6815510988235474, "loss_ce": 0.00015456396795343608, "loss_iou": 0.294921875, "loss_num": 0.0186767578125, "loss_xval": 0.6796875, "num_input_tokens_seen": 248311172, "step": 4433 }, { "epoch": 9.875278396436526, "grad_norm": 31.431236267089844, "learning_rate": 1e-06, "loss": 0.4702, "num_input_tokens_seen": 248366780, "step": 4434 }, { "epoch": 9.875278396436526, "loss": 0.4480833113193512, "loss_ce": 0.0004514938627835363, "loss_iou": 0.19921875, "loss_num": 0.00982666015625, "loss_xval": 0.447265625, "num_input_tokens_seen": 248366780, "step": 4434 }, { "epoch": 9.877505567928731, "grad_norm": 14.6542329788208, "learning_rate": 1e-06, "loss": 0.6543, "num_input_tokens_seen": 248422596, "step": 4435 }, { "epoch": 9.877505567928731, "loss": 0.7738858461380005, "loss_ce": 0.00020423725072760135, "loss_iou": 0.3046875, "loss_num": 0.032958984375, "loss_xval": 0.7734375, "num_input_tokens_seen": 248422596, "step": 4435 }, { "epoch": 9.879732739420936, "grad_norm": 21.185422897338867, "learning_rate": 1e-06, "loss": 0.7841, "num_input_tokens_seen": 248477628, "step": 4436 }, { "epoch": 9.879732739420936, "loss": 0.7956980466842651, "loss_ce": 0.00016583640535827726, "loss_iou": 0.330078125, "loss_num": 0.026611328125, "loss_xval": 0.796875, "num_input_tokens_seen": 248477628, "step": 4436 }, { "epoch": 9.88195991091314, "grad_norm": 22.979459762573242, "learning_rate": 1e-06, "loss": 0.746, "num_input_tokens_seen": 248531252, "step": 4437 }, { "epoch": 9.88195991091314, "loss": 1.0274933576583862, "loss_ce": 0.00014961442502681166, "loss_iou": 0.4375, "loss_num": 0.030517578125, "loss_xval": 1.03125, "num_input_tokens_seen": 248531252, "step": 4437 }, { "epoch": 9.884187082405345, "grad_norm": 13.26347541809082, "learning_rate": 1e-06, "loss": 0.789, "num_input_tokens_seen": 248587712, "step": 4438 }, { "epoch": 9.884187082405345, "loss": 0.8224426507949829, "loss_ce": 0.0001770372618921101, "loss_iou": 0.34765625, "loss_num": 0.025390625, "loss_xval": 0.8203125, "num_input_tokens_seen": 248587712, "step": 4438 }, { "epoch": 9.88641425389755, "grad_norm": 19.138172149658203, "learning_rate": 1e-06, "loss": 0.6224, "num_input_tokens_seen": 248644168, "step": 4439 }, { "epoch": 9.88641425389755, "loss": 0.6742221117019653, "loss_ce": 0.00014987046597525477, "loss_iou": 0.2734375, "loss_num": 0.025146484375, "loss_xval": 0.67578125, "num_input_tokens_seen": 248644168, "step": 4439 }, { "epoch": 9.888641425389755, "grad_norm": 14.5908784866333, "learning_rate": 1e-06, "loss": 0.448, "num_input_tokens_seen": 248700288, "step": 4440 }, { "epoch": 9.888641425389755, "loss": 0.5669944286346436, "loss_ce": 0.00022195381461642683, "loss_iou": 0.2333984375, "loss_num": 0.020263671875, "loss_xval": 0.56640625, "num_input_tokens_seen": 248700288, "step": 4440 }, { "epoch": 9.89086859688196, "grad_norm": 15.528139114379883, "learning_rate": 1e-06, "loss": 0.5742, "num_input_tokens_seen": 248756128, "step": 4441 }, { "epoch": 9.89086859688196, "loss": 0.6929147243499756, "loss_ce": 0.00016564424731768668, "loss_iou": 0.283203125, "loss_num": 0.0255126953125, "loss_xval": 0.69140625, "num_input_tokens_seen": 248756128, "step": 4441 }, { "epoch": 9.893095768374165, "grad_norm": 25.997331619262695, "learning_rate": 1e-06, "loss": 0.4781, "num_input_tokens_seen": 248812652, "step": 4442 }, { "epoch": 9.893095768374165, "loss": 0.4952731132507324, "loss_ce": 0.0001559244265081361, "loss_iou": 0.23046875, "loss_num": 0.0069580078125, "loss_xval": 0.49609375, "num_input_tokens_seen": 248812652, "step": 4442 }, { "epoch": 9.89532293986637, "grad_norm": 17.757108688354492, "learning_rate": 1e-06, "loss": 0.457, "num_input_tokens_seen": 248870328, "step": 4443 }, { "epoch": 9.89532293986637, "loss": 0.5199582576751709, "loss_ce": 0.0003048997314181179, "loss_iou": 0.2294921875, "loss_num": 0.01220703125, "loss_xval": 0.51953125, "num_input_tokens_seen": 248870328, "step": 4443 }, { "epoch": 9.897550111358575, "grad_norm": 33.18953323364258, "learning_rate": 1e-06, "loss": 0.4442, "num_input_tokens_seen": 248927148, "step": 4444 }, { "epoch": 9.897550111358575, "loss": 0.4624471068382263, "loss_ce": 0.00016684165166225284, "loss_iou": 0.2060546875, "loss_num": 0.01007080078125, "loss_xval": 0.462890625, "num_input_tokens_seen": 248927148, "step": 4444 }, { "epoch": 9.89977728285078, "grad_norm": 16.396703720092773, "learning_rate": 1e-06, "loss": 0.695, "num_input_tokens_seen": 248982876, "step": 4445 }, { "epoch": 9.89977728285078, "loss": 0.759984016418457, "loss_ce": 0.0002184216573368758, "loss_iou": 0.32421875, "loss_num": 0.022705078125, "loss_xval": 0.7578125, "num_input_tokens_seen": 248982876, "step": 4445 }, { "epoch": 9.902004454342984, "grad_norm": 355.2792053222656, "learning_rate": 1e-06, "loss": 0.6844, "num_input_tokens_seen": 249036336, "step": 4446 }, { "epoch": 9.902004454342984, "loss": 0.8203428983688354, "loss_ce": 0.0007628132589161396, "loss_iou": 0.373046875, "loss_num": 0.01483154296875, "loss_xval": 0.8203125, "num_input_tokens_seen": 249036336, "step": 4446 }, { "epoch": 9.90423162583519, "grad_norm": 20.815412521362305, "learning_rate": 1e-06, "loss": 0.493, "num_input_tokens_seen": 249090512, "step": 4447 }, { "epoch": 9.90423162583519, "loss": 0.3990846276283264, "loss_ce": 0.00015884230379015207, "loss_iou": 0.173828125, "loss_num": 0.0103759765625, "loss_xval": 0.3984375, "num_input_tokens_seen": 249090512, "step": 4447 }, { "epoch": 9.906458797327394, "grad_norm": 22.289730072021484, "learning_rate": 1e-06, "loss": 0.5334, "num_input_tokens_seen": 249146280, "step": 4448 }, { "epoch": 9.906458797327394, "loss": 0.46716466546058655, "loss_ce": 0.00012365993461571634, "loss_iou": 0.205078125, "loss_num": 0.01153564453125, "loss_xval": 0.466796875, "num_input_tokens_seen": 249146280, "step": 4448 }, { "epoch": 9.908685968819599, "grad_norm": 18.984031677246094, "learning_rate": 1e-06, "loss": 0.482, "num_input_tokens_seen": 249199220, "step": 4449 }, { "epoch": 9.908685968819599, "loss": 0.3772040605545044, "loss_ce": 0.00012887499178759754, "loss_iou": 0.1669921875, "loss_num": 0.00848388671875, "loss_xval": 0.376953125, "num_input_tokens_seen": 249199220, "step": 4449 }, { "epoch": 9.910913140311804, "grad_norm": 22.167558670043945, "learning_rate": 1e-06, "loss": 0.3869, "num_input_tokens_seen": 249255288, "step": 4450 }, { "epoch": 9.910913140311804, "loss": 0.42713233828544617, "loss_ce": 0.0001304029137827456, "loss_iou": 0.1865234375, "loss_num": 0.01068115234375, "loss_xval": 0.427734375, "num_input_tokens_seen": 249255288, "step": 4450 }, { "epoch": 9.913140311804009, "grad_norm": 16.50093650817871, "learning_rate": 1e-06, "loss": 0.4574, "num_input_tokens_seen": 249309848, "step": 4451 }, { "epoch": 9.913140311804009, "loss": 0.3489049971103668, "loss_ce": 0.0001501307706348598, "loss_iou": 0.1484375, "loss_num": 0.0101318359375, "loss_xval": 0.349609375, "num_input_tokens_seen": 249309848, "step": 4451 }, { "epoch": 9.915367483296214, "grad_norm": 26.222118377685547, "learning_rate": 1e-06, "loss": 0.6307, "num_input_tokens_seen": 249365492, "step": 4452 }, { "epoch": 9.915367483296214, "loss": 0.5098949670791626, "loss_ce": 0.00012939177395310253, "loss_iou": 0.2177734375, "loss_num": 0.01513671875, "loss_xval": 0.5078125, "num_input_tokens_seen": 249365492, "step": 4452 }, { "epoch": 9.917594654788418, "grad_norm": 26.15787124633789, "learning_rate": 1e-06, "loss": 0.4824, "num_input_tokens_seen": 249420888, "step": 4453 }, { "epoch": 9.917594654788418, "loss": 0.34827083349227905, "loss_ce": 0.0001263211015611887, "loss_iou": 0.1533203125, "loss_num": 0.008056640625, "loss_xval": 0.34765625, "num_input_tokens_seen": 249420888, "step": 4453 }, { "epoch": 9.919821826280623, "grad_norm": 21.763919830322266, "learning_rate": 1e-06, "loss": 0.5638, "num_input_tokens_seen": 249474144, "step": 4454 }, { "epoch": 9.919821826280623, "loss": 0.48050248622894287, "loss_ce": 0.00015578939928673208, "loss_iou": 0.2177734375, "loss_num": 0.0089111328125, "loss_xval": 0.48046875, "num_input_tokens_seen": 249474144, "step": 4454 }, { "epoch": 9.922048997772828, "grad_norm": 22.63077163696289, "learning_rate": 1e-06, "loss": 0.5656, "num_input_tokens_seen": 249532508, "step": 4455 }, { "epoch": 9.922048997772828, "loss": 0.4286133050918579, "loss_ce": 0.00014652467507403344, "loss_iou": 0.1943359375, "loss_num": 0.008056640625, "loss_xval": 0.427734375, "num_input_tokens_seen": 249532508, "step": 4455 }, { "epoch": 9.924276169265033, "grad_norm": 19.111759185791016, "learning_rate": 1e-06, "loss": 0.4905, "num_input_tokens_seen": 249588256, "step": 4456 }, { "epoch": 9.924276169265033, "loss": 0.6744104623794556, "loss_ce": 0.0005213359836488962, "loss_iou": 0.279296875, "loss_num": 0.022705078125, "loss_xval": 0.67578125, "num_input_tokens_seen": 249588256, "step": 4456 }, { "epoch": 9.926503340757238, "grad_norm": 14.64742660522461, "learning_rate": 1e-06, "loss": 0.5547, "num_input_tokens_seen": 249645916, "step": 4457 }, { "epoch": 9.926503340757238, "loss": 0.6124250888824463, "loss_ce": 0.00036456523230299354, "loss_iou": 0.2578125, "loss_num": 0.018798828125, "loss_xval": 0.61328125, "num_input_tokens_seen": 249645916, "step": 4457 }, { "epoch": 9.928730512249443, "grad_norm": 30.41196060180664, "learning_rate": 1e-06, "loss": 0.6046, "num_input_tokens_seen": 249699704, "step": 4458 }, { "epoch": 9.928730512249443, "loss": 0.43876737356185913, "loss_ce": 0.00016875306027941406, "loss_iou": 0.2041015625, "loss_num": 0.0059814453125, "loss_xval": 0.439453125, "num_input_tokens_seen": 249699704, "step": 4458 }, { "epoch": 9.930957683741648, "grad_norm": 19.998111724853516, "learning_rate": 1e-06, "loss": 0.5264, "num_input_tokens_seen": 249754572, "step": 4459 }, { "epoch": 9.930957683741648, "loss": 0.41116297245025635, "loss_ce": 0.0001522265374660492, "loss_iou": 0.1806640625, "loss_num": 0.01007080078125, "loss_xval": 0.41015625, "num_input_tokens_seen": 249754572, "step": 4459 }, { "epoch": 9.933184855233852, "grad_norm": 33.828853607177734, "learning_rate": 1e-06, "loss": 0.7319, "num_input_tokens_seen": 249809712, "step": 4460 }, { "epoch": 9.933184855233852, "loss": 0.640634298324585, "loss_ce": 0.0002534303639549762, "loss_iou": 0.279296875, "loss_num": 0.01611328125, "loss_xval": 0.640625, "num_input_tokens_seen": 249809712, "step": 4460 }, { "epoch": 9.935412026726057, "grad_norm": 540.546875, "learning_rate": 1e-06, "loss": 0.5882, "num_input_tokens_seen": 249864220, "step": 4461 }, { "epoch": 9.935412026726057, "loss": 0.4697587788105011, "loss_ce": 0.00012375140795484185, "loss_iou": 0.19921875, "loss_num": 0.01422119140625, "loss_xval": 0.46875, "num_input_tokens_seen": 249864220, "step": 4461 }, { "epoch": 9.937639198218262, "grad_norm": 36.97883987426758, "learning_rate": 1e-06, "loss": 0.6369, "num_input_tokens_seen": 249917428, "step": 4462 }, { "epoch": 9.937639198218262, "loss": 0.5367482900619507, "loss_ce": 0.00012720399536192417, "loss_iou": 0.2216796875, "loss_num": 0.018798828125, "loss_xval": 0.53515625, "num_input_tokens_seen": 249917428, "step": 4462 }, { "epoch": 9.939866369710467, "grad_norm": 16.344486236572266, "learning_rate": 1e-06, "loss": 0.5017, "num_input_tokens_seen": 249975260, "step": 4463 }, { "epoch": 9.939866369710467, "loss": 0.4674408435821533, "loss_ce": 0.0001557128271088004, "loss_iou": 0.193359375, "loss_num": 0.0162353515625, "loss_xval": 0.466796875, "num_input_tokens_seen": 249975260, "step": 4463 }, { "epoch": 9.942093541202672, "grad_norm": 20.06435203552246, "learning_rate": 1e-06, "loss": 0.4822, "num_input_tokens_seen": 250028748, "step": 4464 }, { "epoch": 9.942093541202672, "loss": 0.562027096748352, "loss_ce": 0.00013746539480052888, "loss_iou": 0.2421875, "loss_num": 0.0152587890625, "loss_xval": 0.5625, "num_input_tokens_seen": 250028748, "step": 4464 }, { "epoch": 9.944320712694877, "grad_norm": 21.986183166503906, "learning_rate": 1e-06, "loss": 0.6695, "num_input_tokens_seen": 250086348, "step": 4465 }, { "epoch": 9.944320712694877, "loss": 0.6947346925735474, "loss_ce": 0.00015459363930858672, "loss_iou": 0.28515625, "loss_num": 0.0245361328125, "loss_xval": 0.6953125, "num_input_tokens_seen": 250086348, "step": 4465 }, { "epoch": 9.946547884187082, "grad_norm": 22.66680335998535, "learning_rate": 1e-06, "loss": 0.5404, "num_input_tokens_seen": 250140012, "step": 4466 }, { "epoch": 9.946547884187082, "loss": 0.5031921863555908, "loss_ce": 0.00014042215480003506, "loss_iou": 0.22265625, "loss_num": 0.01165771484375, "loss_xval": 0.50390625, "num_input_tokens_seen": 250140012, "step": 4466 }, { "epoch": 9.948775055679288, "grad_norm": 20.20932388305664, "learning_rate": 1e-06, "loss": 0.5721, "num_input_tokens_seen": 250197260, "step": 4467 }, { "epoch": 9.948775055679288, "loss": 0.5846377611160278, "loss_ce": 0.0001650652993703261, "loss_iou": 0.25, "loss_num": 0.016845703125, "loss_xval": 0.5859375, "num_input_tokens_seen": 250197260, "step": 4467 }, { "epoch": 9.951002227171493, "grad_norm": 13.735246658325195, "learning_rate": 1e-06, "loss": 0.5371, "num_input_tokens_seen": 250255264, "step": 4468 }, { "epoch": 9.951002227171493, "loss": 0.36416295170783997, "loss_ce": 0.0001492602750658989, "loss_iou": 0.16015625, "loss_num": 0.00872802734375, "loss_xval": 0.36328125, "num_input_tokens_seen": 250255264, "step": 4468 }, { "epoch": 9.953229398663698, "grad_norm": 31.190519332885742, "learning_rate": 1e-06, "loss": 0.6512, "num_input_tokens_seen": 250309688, "step": 4469 }, { "epoch": 9.953229398663698, "loss": 0.8156118392944336, "loss_ce": 0.00018213686416856945, "loss_iou": 0.35546875, "loss_num": 0.0208740234375, "loss_xval": 0.81640625, "num_input_tokens_seen": 250309688, "step": 4469 }, { "epoch": 9.955456570155903, "grad_norm": 20.936838150024414, "learning_rate": 1e-06, "loss": 0.6122, "num_input_tokens_seen": 250364732, "step": 4470 }, { "epoch": 9.955456570155903, "loss": 0.6811445951461792, "loss_ce": 0.0004805437056347728, "loss_iou": 0.28515625, "loss_num": 0.0220947265625, "loss_xval": 0.6796875, "num_input_tokens_seen": 250364732, "step": 4470 }, { "epoch": 9.957683741648108, "grad_norm": 20.30494499206543, "learning_rate": 1e-06, "loss": 0.5026, "num_input_tokens_seen": 250421248, "step": 4471 }, { "epoch": 9.957683741648108, "loss": 0.4135947823524475, "loss_ce": 0.00014261619071476161, "loss_iou": 0.1865234375, "loss_num": 0.00787353515625, "loss_xval": 0.4140625, "num_input_tokens_seen": 250421248, "step": 4471 }, { "epoch": 9.959910913140313, "grad_norm": 22.17571449279785, "learning_rate": 1e-06, "loss": 0.5305, "num_input_tokens_seen": 250476212, "step": 4472 }, { "epoch": 9.959910913140313, "loss": 0.5278682112693787, "loss_ce": 0.0001582720287842676, "loss_iou": 0.2275390625, "loss_num": 0.01446533203125, "loss_xval": 0.52734375, "num_input_tokens_seen": 250476212, "step": 4472 }, { "epoch": 9.962138084632517, "grad_norm": 21.48920249938965, "learning_rate": 1e-06, "loss": 0.4718, "num_input_tokens_seen": 250534156, "step": 4473 }, { "epoch": 9.962138084632517, "loss": 0.5655474662780762, "loss_ce": 0.0002398924989392981, "loss_iou": 0.265625, "loss_num": 0.00677490234375, "loss_xval": 0.56640625, "num_input_tokens_seen": 250534156, "step": 4473 }, { "epoch": 9.964365256124722, "grad_norm": 21.838937759399414, "learning_rate": 1e-06, "loss": 0.3754, "num_input_tokens_seen": 250590784, "step": 4474 }, { "epoch": 9.964365256124722, "loss": 0.44572579860687256, "loss_ce": 0.00016913507715798914, "loss_iou": 0.1875, "loss_num": 0.0142822265625, "loss_xval": 0.4453125, "num_input_tokens_seen": 250590784, "step": 4474 }, { "epoch": 9.966592427616927, "grad_norm": 15.84533977508545, "learning_rate": 1e-06, "loss": 0.5513, "num_input_tokens_seen": 250649576, "step": 4475 }, { "epoch": 9.966592427616927, "loss": 0.4788917899131775, "loss_ce": 0.0001320300216320902, "loss_iou": 0.203125, "loss_num": 0.0147705078125, "loss_xval": 0.478515625, "num_input_tokens_seen": 250649576, "step": 4475 }, { "epoch": 9.968819599109132, "grad_norm": 15.656432151794434, "learning_rate": 1e-06, "loss": 0.4874, "num_input_tokens_seen": 250705124, "step": 4476 }, { "epoch": 9.968819599109132, "loss": 0.49739590287208557, "loss_ce": 0.00014248676598072052, "loss_iou": 0.1904296875, "loss_num": 0.023193359375, "loss_xval": 0.498046875, "num_input_tokens_seen": 250705124, "step": 4476 }, { "epoch": 9.971046770601337, "grad_norm": 26.737167358398438, "learning_rate": 1e-06, "loss": 0.6391, "num_input_tokens_seen": 250761396, "step": 4477 }, { "epoch": 9.971046770601337, "loss": 0.6163727045059204, "loss_ce": 0.00016177864745259285, "loss_iou": 0.26171875, "loss_num": 0.0186767578125, "loss_xval": 0.6171875, "num_input_tokens_seen": 250761396, "step": 4477 }, { "epoch": 9.973273942093542, "grad_norm": 22.787172317504883, "learning_rate": 1e-06, "loss": 0.6781, "num_input_tokens_seen": 250815480, "step": 4478 }, { "epoch": 9.973273942093542, "loss": 0.8530862331390381, "loss_ce": 0.00018101731257047504, "loss_iou": 0.349609375, "loss_num": 0.0306396484375, "loss_xval": 0.8515625, "num_input_tokens_seen": 250815480, "step": 4478 }, { "epoch": 9.975501113585747, "grad_norm": 15.702953338623047, "learning_rate": 1e-06, "loss": 0.4059, "num_input_tokens_seen": 250872060, "step": 4479 }, { "epoch": 9.975501113585747, "loss": 0.3851672112941742, "loss_ce": 0.0001574572379468009, "loss_iou": 0.1640625, "loss_num": 0.01141357421875, "loss_xval": 0.384765625, "num_input_tokens_seen": 250872060, "step": 4479 }, { "epoch": 9.977728285077951, "grad_norm": 25.042709350585938, "learning_rate": 1e-06, "loss": 0.5131, "num_input_tokens_seen": 250929860, "step": 4480 }, { "epoch": 9.977728285077951, "loss": 0.5049949884414673, "loss_ce": 0.00011219277803320438, "loss_iou": 0.2275390625, "loss_num": 0.01019287109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 250929860, "step": 4480 }, { "epoch": 9.979955456570156, "grad_norm": 15.756171226501465, "learning_rate": 1e-06, "loss": 0.5156, "num_input_tokens_seen": 250984516, "step": 4481 }, { "epoch": 9.979955456570156, "loss": 0.5473989248275757, "loss_ce": 0.00015769051969982684, "loss_iou": 0.251953125, "loss_num": 0.0084228515625, "loss_xval": 0.546875, "num_input_tokens_seen": 250984516, "step": 4481 }, { "epoch": 9.982182628062361, "grad_norm": 16.539581298828125, "learning_rate": 1e-06, "loss": 0.6737, "num_input_tokens_seen": 251041288, "step": 4482 }, { "epoch": 9.982182628062361, "loss": 0.6556758880615234, "loss_ce": 0.00015832131612114608, "loss_iou": 0.291015625, "loss_num": 0.01422119140625, "loss_xval": 0.65625, "num_input_tokens_seen": 251041288, "step": 4482 }, { "epoch": 9.984409799554566, "grad_norm": 19.558635711669922, "learning_rate": 1e-06, "loss": 0.5597, "num_input_tokens_seen": 251098176, "step": 4483 }, { "epoch": 9.984409799554566, "loss": 0.43492138385772705, "loss_ce": 0.00022901118791196495, "loss_iou": 0.197265625, "loss_num": 0.0079345703125, "loss_xval": 0.435546875, "num_input_tokens_seen": 251098176, "step": 4483 }, { "epoch": 9.98663697104677, "grad_norm": 14.561528205871582, "learning_rate": 1e-06, "loss": 0.5418, "num_input_tokens_seen": 251154472, "step": 4484 }, { "epoch": 9.98663697104677, "loss": 0.41586917638778687, "loss_ce": 0.0001434582518413663, "loss_iou": 0.181640625, "loss_num": 0.010498046875, "loss_xval": 0.416015625, "num_input_tokens_seen": 251154472, "step": 4484 }, { "epoch": 9.988864142538976, "grad_norm": 17.365453720092773, "learning_rate": 1e-06, "loss": 0.5246, "num_input_tokens_seen": 251211700, "step": 4485 }, { "epoch": 9.988864142538976, "loss": 0.41728872060775757, "loss_ce": 0.0001744481414789334, "loss_iou": 0.1884765625, "loss_num": 0.00811767578125, "loss_xval": 0.41796875, "num_input_tokens_seen": 251211700, "step": 4485 }, { "epoch": 9.99109131403118, "grad_norm": 21.09174346923828, "learning_rate": 1e-06, "loss": 0.586, "num_input_tokens_seen": 251266340, "step": 4486 }, { "epoch": 9.99109131403118, "loss": 0.6495558619499207, "loss_ce": 0.000141800323035568, "loss_iou": 0.27734375, "loss_num": 0.01904296875, "loss_xval": 0.6484375, "num_input_tokens_seen": 251266340, "step": 4486 }, { "epoch": 9.993318485523385, "grad_norm": 13.637633323669434, "learning_rate": 1e-06, "loss": 0.5915, "num_input_tokens_seen": 251323956, "step": 4487 }, { "epoch": 9.993318485523385, "loss": 0.6502304077148438, "loss_ce": 0.00020597720867954195, "loss_iou": 0.255859375, "loss_num": 0.0274658203125, "loss_xval": 0.6484375, "num_input_tokens_seen": 251323956, "step": 4487 }, { "epoch": 9.99554565701559, "grad_norm": 21.853805541992188, "learning_rate": 1e-06, "loss": 0.3252, "num_input_tokens_seen": 251383004, "step": 4488 }, { "epoch": 9.99554565701559, "loss": 0.4286557734012604, "loss_ce": 0.0001889891573227942, "loss_iou": 0.17578125, "loss_num": 0.0155029296875, "loss_xval": 0.427734375, "num_input_tokens_seen": 251383004, "step": 4488 }, { "epoch": 9.997772828507795, "grad_norm": 27.582048416137695, "learning_rate": 1e-06, "loss": 0.6094, "num_input_tokens_seen": 251442116, "step": 4489 }, { "epoch": 9.997772828507795, "loss": 0.6864469051361084, "loss_ce": 0.00016763756866566837, "loss_iou": 0.30078125, "loss_num": 0.016845703125, "loss_xval": 0.6875, "num_input_tokens_seen": 251442116, "step": 4489 }, { "epoch": 10.0, "grad_norm": 16.904457092285156, "learning_rate": 1e-06, "loss": 0.6229, "num_input_tokens_seen": 251498292, "step": 4490 }, { "epoch": 10.0, "loss": 0.6710529923439026, "loss_ce": 0.00015453548985533416, "loss_iou": 0.27734375, "loss_num": 0.0235595703125, "loss_xval": 0.671875, "num_input_tokens_seen": 251498292, "step": 4490 }, { "epoch": 10.002227171492205, "grad_norm": 24.753952026367188, "learning_rate": 1e-06, "loss": 0.5628, "num_input_tokens_seen": 251553736, "step": 4491 }, { "epoch": 10.002227171492205, "loss": 0.44396665692329407, "loss_ce": 0.00011899826495209709, "loss_iou": 0.1953125, "loss_num": 0.01055908203125, "loss_xval": 0.443359375, "num_input_tokens_seen": 251553736, "step": 4491 }, { "epoch": 10.00445434298441, "grad_norm": 23.54865074157715, "learning_rate": 1e-06, "loss": 0.6842, "num_input_tokens_seen": 251607144, "step": 4492 }, { "epoch": 10.00445434298441, "loss": 0.7414926290512085, "loss_ce": 0.00015957036521285772, "loss_iou": 0.298828125, "loss_num": 0.0284423828125, "loss_xval": 0.7421875, "num_input_tokens_seen": 251607144, "step": 4492 }, { "epoch": 10.006681514476615, "grad_norm": 17.976106643676758, "learning_rate": 1e-06, "loss": 0.5584, "num_input_tokens_seen": 251663048, "step": 4493 }, { "epoch": 10.006681514476615, "loss": 0.5196723937988281, "loss_ce": 0.0001411354896845296, "loss_iou": 0.201171875, "loss_num": 0.0234375, "loss_xval": 0.51953125, "num_input_tokens_seen": 251663048, "step": 4493 }, { "epoch": 10.00890868596882, "grad_norm": 14.608015060424805, "learning_rate": 1e-06, "loss": 0.4636, "num_input_tokens_seen": 251719488, "step": 4494 }, { "epoch": 10.00890868596882, "loss": 0.5168766975402832, "loss_ce": 0.00015306880231946707, "loss_iou": 0.2314453125, "loss_num": 0.01080322265625, "loss_xval": 0.515625, "num_input_tokens_seen": 251719488, "step": 4494 }, { "epoch": 10.011135857461024, "grad_norm": 18.117263793945312, "learning_rate": 1e-06, "loss": 0.6167, "num_input_tokens_seen": 251777652, "step": 4495 }, { "epoch": 10.011135857461024, "loss": 0.7012206315994263, "loss_ce": 0.0002929244365077466, "loss_iou": 0.283203125, "loss_num": 0.02685546875, "loss_xval": 0.69921875, "num_input_tokens_seen": 251777652, "step": 4495 }, { "epoch": 10.01336302895323, "grad_norm": 23.114547729492188, "learning_rate": 1e-06, "loss": 0.6466, "num_input_tokens_seen": 251834100, "step": 4496 }, { "epoch": 10.01336302895323, "loss": 0.4545919597148895, "loss_ce": 0.00012418595724739134, "loss_iou": 0.19921875, "loss_num": 0.010986328125, "loss_xval": 0.455078125, "num_input_tokens_seen": 251834100, "step": 4496 }, { "epoch": 10.015590200445434, "grad_norm": 35.5590705871582, "learning_rate": 1e-06, "loss": 0.5079, "num_input_tokens_seen": 251889480, "step": 4497 }, { "epoch": 10.015590200445434, "loss": 0.5074775218963623, "loss_ce": 0.00015331841132137924, "loss_iou": 0.220703125, "loss_num": 0.0133056640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 251889480, "step": 4497 }, { "epoch": 10.017817371937639, "grad_norm": 35.230525970458984, "learning_rate": 1e-06, "loss": 0.4227, "num_input_tokens_seen": 251944744, "step": 4498 }, { "epoch": 10.017817371937639, "loss": 0.35149550437927246, "loss_ce": 0.00011612092202994972, "loss_iou": 0.1533203125, "loss_num": 0.0089111328125, "loss_xval": 0.3515625, "num_input_tokens_seen": 251944744, "step": 4498 }, { "epoch": 10.020044543429844, "grad_norm": 17.35042953491211, "learning_rate": 1e-06, "loss": 0.5238, "num_input_tokens_seen": 252001072, "step": 4499 }, { "epoch": 10.020044543429844, "loss": 0.5627619624137878, "loss_ce": 0.0002619452425278723, "loss_iou": 0.228515625, "loss_num": 0.02099609375, "loss_xval": 0.5625, "num_input_tokens_seen": 252001072, "step": 4499 }, { "epoch": 10.022271714922049, "grad_norm": 18.346031188964844, "learning_rate": 1e-06, "loss": 0.6645, "num_input_tokens_seen": 252054980, "step": 4500 }, { "epoch": 10.022271714922049, "eval_seeclick_web_CIoU": 0.5782820582389832, "eval_seeclick_web_GIoU": 0.5778241455554962, "eval_seeclick_web_IoU": 0.5961449146270752, "eval_seeclick_web_MAE_all": 0.016307780984789133, "eval_seeclick_web_MAE_h": 0.008026089053601027, "eval_seeclick_web_MAE_w": 0.01621978636831045, "eval_seeclick_web_MAE_x_boxes": 0.009718116372823715, "eval_seeclick_web_MAE_y_boxes": 0.022209799382835627, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9367778301239014, "eval_seeclick_web_loss_ce": 0.00020874126494163647, "eval_seeclick_web_loss_iou": 0.4254150390625, "eval_seeclick_web_loss_num": 0.013081550598144531, "eval_seeclick_web_loss_xval": 0.916259765625, "eval_seeclick_web_runtime": 18.3321, "eval_seeclick_web_samples_per_second": 2.727, "eval_seeclick_web_steps_per_second": 0.109, "num_input_tokens_seen": 252054980, "step": 4500 }, { "epoch": 10.022271714922049, "eval_icons_CIoU": 0.2818611115217209, "eval_icons_GIoU": 0.3074444532394409, "eval_icons_IoU": 0.3626803755760193, "eval_icons_MAE_all": 0.06376760825514793, "eval_icons_MAE_h": 0.038395908661186695, "eval_icons_MAE_w": 0.06800028681755066, "eval_icons_MAE_x_boxes": 0.059712398797273636, "eval_icons_MAE_y_boxes": 0.03969671577215195, "eval_icons_inside_bbox": 0.6336805522441864, "eval_icons_loss": 1.7186964750289917, "eval_icons_loss_ce": 0.00024977065913844854, "eval_icons_loss_iou": 0.6683349609375, "eval_icons_loss_num": 0.06123924255371094, "eval_icons_loss_xval": 1.6416015625, "eval_icons_runtime": 19.3941, "eval_icons_samples_per_second": 2.578, "eval_icons_steps_per_second": 0.103, "num_input_tokens_seen": 252054980, "step": 4500 }, { "epoch": 10.022271714922049, "eval_screenspot_CIoU": 0.34726441899935406, "eval_screenspot_GIoU": 0.36475805441538495, "eval_screenspot_IoU": 0.43036482731501263, "eval_screenspot_MAE_all": 0.06156049047907194, "eval_screenspot_MAE_h": 0.0375117938965559, "eval_screenspot_MAE_w": 0.07428983474771182, "eval_screenspot_MAE_x_boxes": 0.07907873081664245, "eval_screenspot_MAE_y_boxes": 0.04031235041717688, "eval_screenspot_inside_bbox": 0.6862499912579855, "eval_screenspot_loss": 1.6380661725997925, "eval_screenspot_loss_ce": 0.00026154937222599983, "eval_screenspot_loss_iou": 0.6749674479166666, "eval_screenspot_loss_num": 0.0705553690592448, "eval_screenspot_loss_xval": 1.7029622395833333, "eval_screenspot_runtime": 30.7953, "eval_screenspot_samples_per_second": 2.89, "eval_screenspot_steps_per_second": 0.097, "num_input_tokens_seen": 252054980, "step": 4500 }, { "epoch": 10.022271714922049, "eval_compot_CIoU": 0.35239382088184357, "eval_compot_GIoU": 0.36740949749946594, "eval_compot_IoU": 0.4099326878786087, "eval_compot_MAE_all": 0.01800244627520442, "eval_compot_MAE_h": 0.008867041673511267, "eval_compot_MAE_w": 0.02112545073032379, "eval_compot_MAE_x_boxes": 0.030241395346820354, "eval_compot_MAE_y_boxes": 0.0067769435700029135, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3512978553771973, "eval_compot_loss_ce": 0.0002027387818088755, "eval_compot_loss_iou": 0.6209716796875, "eval_compot_loss_num": 0.016811370849609375, "eval_compot_loss_xval": 1.325439453125, "eval_compot_runtime": 19.3355, "eval_compot_samples_per_second": 2.586, "eval_compot_steps_per_second": 0.103, "num_input_tokens_seen": 252054980, "step": 4500 }, { "epoch": 10.022271714922049, "eval_custom_ui_val_CIoU": 0.46736228962739307, "eval_custom_ui_val_GIoU": 0.48078184492058224, "eval_custom_ui_val_IoU": 0.5274362398518456, "eval_custom_ui_val_MAE_all": 0.03146956084916989, "eval_custom_ui_val_MAE_h": 0.016498709821866617, "eval_custom_ui_val_MAE_w": 0.0397563229004542, "eval_custom_ui_val_MAE_x_boxes": 0.039604716209901705, "eval_custom_ui_val_MAE_y_boxes": 0.015526494192373421, "eval_custom_ui_val_inside_bbox": 0.7527006202273898, "eval_custom_ui_val_loss": 1.2082252502441406, "eval_custom_ui_val_loss_ce": 0.00023262486178686636, "eval_custom_ui_val_loss_iou": 0.5123562282986112, "eval_custom_ui_val_loss_num": 0.02882491217719184, "eval_custom_ui_val_loss_xval": 1.1688096788194444, "eval_custom_ui_val_runtime": 55.4656, "eval_custom_ui_val_samples_per_second": 4.778, "eval_custom_ui_val_steps_per_second": 0.162, "num_input_tokens_seen": 252054980, "step": 4500 }, { "epoch": 10.022271714922049, "loss": 0.912562906742096, "loss_ce": 0.00020945594587828964, "loss_iou": 0.392578125, "loss_num": 0.0250244140625, "loss_xval": 0.9140625, "num_input_tokens_seen": 252054980, "step": 4500 }, { "epoch": 10.024498886414253, "grad_norm": 17.76951789855957, "learning_rate": 1e-06, "loss": 0.4816, "num_input_tokens_seen": 252110792, "step": 4501 }, { "epoch": 10.024498886414253, "loss": 0.40937352180480957, "loss_ce": 0.00019383057951927185, "loss_iou": 0.1826171875, "loss_num": 0.0087890625, "loss_xval": 0.41015625, "num_input_tokens_seen": 252110792, "step": 4501 }, { "epoch": 10.026726057906458, "grad_norm": 13.45481014251709, "learning_rate": 1e-06, "loss": 0.6639, "num_input_tokens_seen": 252168752, "step": 4502 }, { "epoch": 10.026726057906458, "loss": 0.7877389788627625, "loss_ce": 0.0001412917481502518, "loss_iou": 0.328125, "loss_num": 0.0264892578125, "loss_xval": 0.7890625, "num_input_tokens_seen": 252168752, "step": 4502 }, { "epoch": 10.028953229398663, "grad_norm": 14.474963188171387, "learning_rate": 1e-06, "loss": 0.5939, "num_input_tokens_seen": 252226176, "step": 4503 }, { "epoch": 10.028953229398663, "loss": 0.7263405323028564, "loss_ce": 0.00014424577238969505, "loss_iou": 0.279296875, "loss_num": 0.03369140625, "loss_xval": 0.7265625, "num_input_tokens_seen": 252226176, "step": 4503 }, { "epoch": 10.031180400890868, "grad_norm": 20.8179931640625, "learning_rate": 1e-06, "loss": 0.6298, "num_input_tokens_seen": 252285124, "step": 4504 }, { "epoch": 10.031180400890868, "loss": 0.5824363231658936, "loss_ce": 0.00016091388533823192, "loss_iou": 0.259765625, "loss_num": 0.01287841796875, "loss_xval": 0.58203125, "num_input_tokens_seen": 252285124, "step": 4504 }, { "epoch": 10.033407572383073, "grad_norm": 25.60984230041504, "learning_rate": 1e-06, "loss": 0.594, "num_input_tokens_seen": 252339596, "step": 4505 }, { "epoch": 10.033407572383073, "loss": 0.47377800941467285, "loss_ce": 0.00014518320676870644, "loss_iou": 0.203125, "loss_num": 0.01336669921875, "loss_xval": 0.47265625, "num_input_tokens_seen": 252339596, "step": 4505 }, { "epoch": 10.035634743875278, "grad_norm": 16.030750274658203, "learning_rate": 1e-06, "loss": 0.6331, "num_input_tokens_seen": 252395008, "step": 4506 }, { "epoch": 10.035634743875278, "loss": 0.6874623894691467, "loss_ce": 0.0001455222663935274, "loss_iou": 0.28515625, "loss_num": 0.0230712890625, "loss_xval": 0.6875, "num_input_tokens_seen": 252395008, "step": 4506 }, { "epoch": 10.037861915367483, "grad_norm": 24.892925262451172, "learning_rate": 1e-06, "loss": 0.6838, "num_input_tokens_seen": 252449524, "step": 4507 }, { "epoch": 10.037861915367483, "loss": 0.7407252788543701, "loss_ce": 0.00012467037595342845, "loss_iou": 0.33203125, "loss_num": 0.01507568359375, "loss_xval": 0.7421875, "num_input_tokens_seen": 252449524, "step": 4507 }, { "epoch": 10.040089086859687, "grad_norm": 20.39448356628418, "learning_rate": 1e-06, "loss": 0.4444, "num_input_tokens_seen": 252507044, "step": 4508 }, { "epoch": 10.040089086859687, "loss": 0.5464025139808655, "loss_ce": 0.0002599266008473933, "loss_iou": 0.2373046875, "loss_num": 0.01416015625, "loss_xval": 0.546875, "num_input_tokens_seen": 252507044, "step": 4508 }, { "epoch": 10.042316258351892, "grad_norm": 28.83409881591797, "learning_rate": 1e-06, "loss": 0.7085, "num_input_tokens_seen": 252561052, "step": 4509 }, { "epoch": 10.042316258351892, "loss": 0.6915697455406189, "loss_ce": 0.00016349003999494016, "loss_iou": 0.30859375, "loss_num": 0.0147705078125, "loss_xval": 0.69140625, "num_input_tokens_seen": 252561052, "step": 4509 }, { "epoch": 10.044543429844097, "grad_norm": 22.83576011657715, "learning_rate": 1e-06, "loss": 0.4975, "num_input_tokens_seen": 252613648, "step": 4510 }, { "epoch": 10.044543429844097, "loss": 0.3887307047843933, "loss_ce": 0.0005776156904175878, "loss_iou": 0.162109375, "loss_num": 0.012939453125, "loss_xval": 0.388671875, "num_input_tokens_seen": 252613648, "step": 4510 }, { "epoch": 10.046770601336302, "grad_norm": 20.531705856323242, "learning_rate": 1e-06, "loss": 0.6052, "num_input_tokens_seen": 252670492, "step": 4511 }, { "epoch": 10.046770601336302, "loss": 0.5842575430870056, "loss_ce": 0.00015114534471649677, "loss_iou": 0.25390625, "loss_num": 0.01507568359375, "loss_xval": 0.5859375, "num_input_tokens_seen": 252670492, "step": 4511 }, { "epoch": 10.048997772828507, "grad_norm": 19.739933013916016, "learning_rate": 1e-06, "loss": 0.9433, "num_input_tokens_seen": 252725212, "step": 4512 }, { "epoch": 10.048997772828507, "loss": 1.3185453414916992, "loss_ce": 0.0001860101765487343, "loss_iou": 0.53515625, "loss_num": 0.049072265625, "loss_xval": 1.3203125, "num_input_tokens_seen": 252725212, "step": 4512 }, { "epoch": 10.051224944320714, "grad_norm": 68.91185760498047, "learning_rate": 1e-06, "loss": 0.478, "num_input_tokens_seen": 252781356, "step": 4513 }, { "epoch": 10.051224944320714, "loss": 0.46619629859924316, "loss_ce": 0.00013182274415157735, "loss_iou": 0.1982421875, "loss_num": 0.01397705078125, "loss_xval": 0.466796875, "num_input_tokens_seen": 252781356, "step": 4513 }, { "epoch": 10.053452115812918, "grad_norm": 47.988834381103516, "learning_rate": 1e-06, "loss": 0.7151, "num_input_tokens_seen": 252835956, "step": 4514 }, { "epoch": 10.053452115812918, "loss": 0.7893850803375244, "loss_ce": 0.00032261922024190426, "loss_iou": 0.3359375, "loss_num": 0.023681640625, "loss_xval": 0.7890625, "num_input_tokens_seen": 252835956, "step": 4514 }, { "epoch": 10.055679287305123, "grad_norm": 13.075557708740234, "learning_rate": 1e-06, "loss": 0.5498, "num_input_tokens_seen": 252893772, "step": 4515 }, { "epoch": 10.055679287305123, "loss": 0.49821239709854126, "loss_ce": 0.00016552505258005112, "loss_iou": 0.2109375, "loss_num": 0.01507568359375, "loss_xval": 0.498046875, "num_input_tokens_seen": 252893772, "step": 4515 }, { "epoch": 10.057906458797328, "grad_norm": 31.43246078491211, "learning_rate": 1e-06, "loss": 0.5931, "num_input_tokens_seen": 252947940, "step": 4516 }, { "epoch": 10.057906458797328, "loss": 0.6125627756118774, "loss_ce": 0.0002580622094683349, "loss_iou": 0.279296875, "loss_num": 0.0106201171875, "loss_xval": 0.61328125, "num_input_tokens_seen": 252947940, "step": 4516 }, { "epoch": 10.060133630289533, "grad_norm": 27.630748748779297, "learning_rate": 1e-06, "loss": 0.6048, "num_input_tokens_seen": 253005340, "step": 4517 }, { "epoch": 10.060133630289533, "loss": 0.699859619140625, "loss_ce": 0.00015256297774612904, "loss_iou": 0.296875, "loss_num": 0.021484375, "loss_xval": 0.69921875, "num_input_tokens_seen": 253005340, "step": 4517 }, { "epoch": 10.062360801781738, "grad_norm": 23.77977180480957, "learning_rate": 1e-06, "loss": 0.5228, "num_input_tokens_seen": 253061712, "step": 4518 }, { "epoch": 10.062360801781738, "loss": 0.5870422124862671, "loss_ce": 0.000128203013446182, "loss_iou": 0.2451171875, "loss_num": 0.01904296875, "loss_xval": 0.5859375, "num_input_tokens_seen": 253061712, "step": 4518 }, { "epoch": 10.064587973273943, "grad_norm": 20.586177825927734, "learning_rate": 1e-06, "loss": 0.581, "num_input_tokens_seen": 253119552, "step": 4519 }, { "epoch": 10.064587973273943, "loss": 0.5409017205238342, "loss_ce": 0.000130253320094198, "loss_iou": 0.25390625, "loss_num": 0.00677490234375, "loss_xval": 0.5390625, "num_input_tokens_seen": 253119552, "step": 4519 }, { "epoch": 10.066815144766148, "grad_norm": 19.27606964111328, "learning_rate": 1e-06, "loss": 0.5846, "num_input_tokens_seen": 253173196, "step": 4520 }, { "epoch": 10.066815144766148, "loss": 0.5604423880577087, "loss_ce": 0.00013963712262921035, "loss_iou": 0.24609375, "loss_num": 0.01361083984375, "loss_xval": 0.55859375, "num_input_tokens_seen": 253173196, "step": 4520 }, { "epoch": 10.069042316258352, "grad_norm": 16.18434715270996, "learning_rate": 1e-06, "loss": 0.4942, "num_input_tokens_seen": 253230384, "step": 4521 }, { "epoch": 10.069042316258352, "loss": 0.5661949515342712, "loss_ce": 0.00015493093815166503, "loss_iou": 0.2421875, "loss_num": 0.0164794921875, "loss_xval": 0.56640625, "num_input_tokens_seen": 253230384, "step": 4521 }, { "epoch": 10.071269487750557, "grad_norm": 22.953781127929688, "learning_rate": 1e-06, "loss": 0.5444, "num_input_tokens_seen": 253286812, "step": 4522 }, { "epoch": 10.071269487750557, "loss": 0.5951232314109802, "loss_ce": 0.00015253589663188905, "loss_iou": 0.26953125, "loss_num": 0.011474609375, "loss_xval": 0.59375, "num_input_tokens_seen": 253286812, "step": 4522 }, { "epoch": 10.073496659242762, "grad_norm": 18.672510147094727, "learning_rate": 1e-06, "loss": 0.5558, "num_input_tokens_seen": 253343172, "step": 4523 }, { "epoch": 10.073496659242762, "loss": 0.5056633949279785, "loss_ce": 0.00017027268768288195, "loss_iou": 0.2177734375, "loss_num": 0.01409912109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 253343172, "step": 4523 }, { "epoch": 10.075723830734967, "grad_norm": 17.472490310668945, "learning_rate": 1e-06, "loss": 0.6312, "num_input_tokens_seen": 253397752, "step": 4524 }, { "epoch": 10.075723830734967, "loss": 0.6256510019302368, "loss_ce": 0.0001627619785722345, "loss_iou": 0.287109375, "loss_num": 0.010009765625, "loss_xval": 0.625, "num_input_tokens_seen": 253397752, "step": 4524 }, { "epoch": 10.077951002227172, "grad_norm": 13.800442695617676, "learning_rate": 1e-06, "loss": 0.4006, "num_input_tokens_seen": 253454584, "step": 4525 }, { "epoch": 10.077951002227172, "loss": 0.4295790493488312, "loss_ce": 0.00013570842565968633, "loss_iou": 0.1826171875, "loss_num": 0.0130615234375, "loss_xval": 0.4296875, "num_input_tokens_seen": 253454584, "step": 4525 }, { "epoch": 10.080178173719377, "grad_norm": 83.25904846191406, "learning_rate": 1e-06, "loss": 0.5269, "num_input_tokens_seen": 253512564, "step": 4526 }, { "epoch": 10.080178173719377, "loss": 0.45021358132362366, "loss_ce": 0.00014032571925781667, "loss_iou": 0.2060546875, "loss_num": 0.007476806640625, "loss_xval": 0.44921875, "num_input_tokens_seen": 253512564, "step": 4526 }, { "epoch": 10.082405345211582, "grad_norm": 32.7124137878418, "learning_rate": 1e-06, "loss": 0.6008, "num_input_tokens_seen": 253569208, "step": 4527 }, { "epoch": 10.082405345211582, "loss": 0.5963279008865356, "loss_ce": 0.00013654123176820576, "loss_iou": 0.267578125, "loss_num": 0.01220703125, "loss_xval": 0.59765625, "num_input_tokens_seen": 253569208, "step": 4527 }, { "epoch": 10.084632516703786, "grad_norm": 18.75583839416504, "learning_rate": 1e-06, "loss": 0.436, "num_input_tokens_seen": 253625684, "step": 4528 }, { "epoch": 10.084632516703786, "loss": 0.42995163798332214, "loss_ce": 0.00014207106141839176, "loss_iou": 0.1767578125, "loss_num": 0.01519775390625, "loss_xval": 0.4296875, "num_input_tokens_seen": 253625684, "step": 4528 }, { "epoch": 10.086859688195991, "grad_norm": 55.7838020324707, "learning_rate": 1e-06, "loss": 0.7112, "num_input_tokens_seen": 253679644, "step": 4529 }, { "epoch": 10.086859688195991, "loss": 0.73432457447052, "loss_ce": 0.00019371899543330073, "loss_iou": 0.326171875, "loss_num": 0.0164794921875, "loss_xval": 0.734375, "num_input_tokens_seen": 253679644, "step": 4529 }, { "epoch": 10.089086859688196, "grad_norm": 15.54879379272461, "learning_rate": 1e-06, "loss": 0.5008, "num_input_tokens_seen": 253738164, "step": 4530 }, { "epoch": 10.089086859688196, "loss": 0.4535183012485504, "loss_ce": 0.00014916164218448102, "loss_iou": 0.1875, "loss_num": 0.015625, "loss_xval": 0.453125, "num_input_tokens_seen": 253738164, "step": 4530 }, { "epoch": 10.091314031180401, "grad_norm": 26.567584991455078, "learning_rate": 1e-06, "loss": 0.6987, "num_input_tokens_seen": 253793512, "step": 4531 }, { "epoch": 10.091314031180401, "loss": 0.8244662880897522, "loss_ce": 0.0002475357032380998, "loss_iou": 0.34375, "loss_num": 0.0274658203125, "loss_xval": 0.82421875, "num_input_tokens_seen": 253793512, "step": 4531 }, { "epoch": 10.093541202672606, "grad_norm": 17.407928466796875, "learning_rate": 1e-06, "loss": 0.6066, "num_input_tokens_seen": 253851088, "step": 4532 }, { "epoch": 10.093541202672606, "loss": 0.7939009666442871, "loss_ce": 0.0009321961551904678, "loss_iou": 0.32421875, "loss_num": 0.02880859375, "loss_xval": 0.79296875, "num_input_tokens_seen": 253851088, "step": 4532 }, { "epoch": 10.09576837416481, "grad_norm": 15.048906326293945, "learning_rate": 1e-06, "loss": 0.4981, "num_input_tokens_seen": 253906892, "step": 4533 }, { "epoch": 10.09576837416481, "loss": 0.5735203623771667, "loss_ce": 0.00015607741079293191, "loss_iou": 0.232421875, "loss_num": 0.0216064453125, "loss_xval": 0.57421875, "num_input_tokens_seen": 253906892, "step": 4533 }, { "epoch": 10.097995545657016, "grad_norm": 19.791410446166992, "learning_rate": 1e-06, "loss": 0.5475, "num_input_tokens_seen": 253964416, "step": 4534 }, { "epoch": 10.097995545657016, "loss": 0.5835007429122925, "loss_ce": 0.00014201825251802802, "loss_iou": 0.2490234375, "loss_num": 0.01708984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 253964416, "step": 4534 }, { "epoch": 10.10022271714922, "grad_norm": 16.358074188232422, "learning_rate": 1e-06, "loss": 0.3828, "num_input_tokens_seen": 254021404, "step": 4535 }, { "epoch": 10.10022271714922, "loss": 0.48548585176467896, "loss_ce": 0.00013428418606054038, "loss_iou": 0.2099609375, "loss_num": 0.01300048828125, "loss_xval": 0.484375, "num_input_tokens_seen": 254021404, "step": 4535 }, { "epoch": 10.102449888641425, "grad_norm": 20.186296463012695, "learning_rate": 1e-06, "loss": 0.7477, "num_input_tokens_seen": 254076892, "step": 4536 }, { "epoch": 10.102449888641425, "loss": 0.8367500305175781, "loss_ce": 0.0001716263359412551, "loss_iou": 0.328125, "loss_num": 0.035888671875, "loss_xval": 0.8359375, "num_input_tokens_seen": 254076892, "step": 4536 }, { "epoch": 10.10467706013363, "grad_norm": 23.128829956054688, "learning_rate": 1e-06, "loss": 0.6005, "num_input_tokens_seen": 254132820, "step": 4537 }, { "epoch": 10.10467706013363, "loss": 0.7570396661758423, "loss_ce": 0.00020372896688058972, "loss_iou": 0.345703125, "loss_num": 0.01300048828125, "loss_xval": 0.7578125, "num_input_tokens_seen": 254132820, "step": 4537 }, { "epoch": 10.106904231625835, "grad_norm": 19.934675216674805, "learning_rate": 1e-06, "loss": 0.5398, "num_input_tokens_seen": 254190556, "step": 4538 }, { "epoch": 10.106904231625835, "loss": 0.46338510513305664, "loss_ce": 0.0004944695974700153, "loss_iou": 0.2080078125, "loss_num": 0.009521484375, "loss_xval": 0.462890625, "num_input_tokens_seen": 254190556, "step": 4538 }, { "epoch": 10.10913140311804, "grad_norm": 24.60442352294922, "learning_rate": 1e-06, "loss": 0.6456, "num_input_tokens_seen": 254248000, "step": 4539 }, { "epoch": 10.10913140311804, "loss": 0.7887189984321594, "loss_ce": 0.0001448030088795349, "loss_iou": 0.310546875, "loss_num": 0.033447265625, "loss_xval": 0.7890625, "num_input_tokens_seen": 254248000, "step": 4539 }, { "epoch": 10.111358574610245, "grad_norm": 16.316864013671875, "learning_rate": 1e-06, "loss": 0.5443, "num_input_tokens_seen": 254306248, "step": 4540 }, { "epoch": 10.111358574610245, "loss": 0.3741779625415802, "loss_ce": 0.00015453985542990267, "loss_iou": 0.15625, "loss_num": 0.01220703125, "loss_xval": 0.375, "num_input_tokens_seen": 254306248, "step": 4540 }, { "epoch": 10.11358574610245, "grad_norm": 19.49997901916504, "learning_rate": 1e-06, "loss": 0.6895, "num_input_tokens_seen": 254359852, "step": 4541 }, { "epoch": 10.11358574610245, "loss": 0.6783727407455444, "loss_ce": 0.00015005419845692813, "loss_iou": 0.27734375, "loss_num": 0.0250244140625, "loss_xval": 0.6796875, "num_input_tokens_seen": 254359852, "step": 4541 }, { "epoch": 10.115812917594655, "grad_norm": 16.376052856445312, "learning_rate": 1e-06, "loss": 0.4816, "num_input_tokens_seen": 254414044, "step": 4542 }, { "epoch": 10.115812917594655, "loss": 0.526771605014801, "loss_ce": 0.00016028305981308222, "loss_iou": 0.2294921875, "loss_num": 0.01385498046875, "loss_xval": 0.52734375, "num_input_tokens_seen": 254414044, "step": 4542 }, { "epoch": 10.11804008908686, "grad_norm": 20.01656723022461, "learning_rate": 1e-06, "loss": 0.5995, "num_input_tokens_seen": 254470696, "step": 4543 }, { "epoch": 10.11804008908686, "loss": 0.538500964641571, "loss_ce": 0.000170878556673415, "loss_iou": 0.2109375, "loss_num": 0.02294921875, "loss_xval": 0.5390625, "num_input_tokens_seen": 254470696, "step": 4543 }, { "epoch": 10.120267260579064, "grad_norm": 15.220352172851562, "learning_rate": 1e-06, "loss": 0.5189, "num_input_tokens_seen": 254526240, "step": 4544 }, { "epoch": 10.120267260579064, "loss": 0.5009964108467102, "loss_ce": 0.0002639779122546315, "loss_iou": 0.2236328125, "loss_num": 0.0106201171875, "loss_xval": 0.5, "num_input_tokens_seen": 254526240, "step": 4544 }, { "epoch": 10.122494432071269, "grad_norm": 15.65402889251709, "learning_rate": 1e-06, "loss": 0.5134, "num_input_tokens_seen": 254582940, "step": 4545 }, { "epoch": 10.122494432071269, "loss": 0.52699214220047, "loss_ce": 0.0001366714423056692, "loss_iou": 0.232421875, "loss_num": 0.012451171875, "loss_xval": 0.52734375, "num_input_tokens_seen": 254582940, "step": 4545 }, { "epoch": 10.124721603563474, "grad_norm": 21.156890869140625, "learning_rate": 1e-06, "loss": 0.5, "num_input_tokens_seen": 254638184, "step": 4546 }, { "epoch": 10.124721603563474, "loss": 0.5189378261566162, "loss_ce": 0.0001390243851346895, "loss_iou": 0.236328125, "loss_num": 0.009033203125, "loss_xval": 0.51953125, "num_input_tokens_seen": 254638184, "step": 4546 }, { "epoch": 10.126948775055679, "grad_norm": 20.83241844177246, "learning_rate": 1e-06, "loss": 0.4975, "num_input_tokens_seen": 254692884, "step": 4547 }, { "epoch": 10.126948775055679, "loss": 0.4995710849761963, "loss_ce": 0.00018145760986953974, "loss_iou": 0.21484375, "loss_num": 0.0137939453125, "loss_xval": 0.5, "num_input_tokens_seen": 254692884, "step": 4547 }, { "epoch": 10.129175946547884, "grad_norm": 146.1394805908203, "learning_rate": 1e-06, "loss": 0.5299, "num_input_tokens_seen": 254749104, "step": 4548 }, { "epoch": 10.129175946547884, "loss": 0.6289869546890259, "loss_ce": 0.001545518171042204, "loss_iou": 0.240234375, "loss_num": 0.0294189453125, "loss_xval": 0.62890625, "num_input_tokens_seen": 254749104, "step": 4548 }, { "epoch": 10.131403118040089, "grad_norm": 21.525724411010742, "learning_rate": 1e-06, "loss": 0.4352, "num_input_tokens_seen": 254806856, "step": 4549 }, { "epoch": 10.131403118040089, "loss": 0.44396907091140747, "loss_ce": 0.00012142823834437877, "loss_iou": 0.203125, "loss_num": 0.00738525390625, "loss_xval": 0.443359375, "num_input_tokens_seen": 254806856, "step": 4549 }, { "epoch": 10.133630289532293, "grad_norm": 15.656088829040527, "learning_rate": 1e-06, "loss": 0.486, "num_input_tokens_seen": 254862556, "step": 4550 }, { "epoch": 10.133630289532293, "loss": 0.3813803195953369, "loss_ce": 0.0001547573774587363, "loss_iou": 0.162109375, "loss_num": 0.011474609375, "loss_xval": 0.380859375, "num_input_tokens_seen": 254862556, "step": 4550 }, { "epoch": 10.135857461024498, "grad_norm": 18.48883628845215, "learning_rate": 1e-06, "loss": 0.4152, "num_input_tokens_seen": 254921356, "step": 4551 }, { "epoch": 10.135857461024498, "loss": 0.4063987135887146, "loss_ce": 0.00027079382562078536, "loss_iou": 0.1796875, "loss_num": 0.00958251953125, "loss_xval": 0.40625, "num_input_tokens_seen": 254921356, "step": 4551 }, { "epoch": 10.138084632516703, "grad_norm": 14.69189739227295, "learning_rate": 1e-06, "loss": 0.576, "num_input_tokens_seen": 254979752, "step": 4552 }, { "epoch": 10.138084632516703, "loss": 0.520897388458252, "loss_ce": 0.00014549962361343205, "loss_iou": 0.2216796875, "loss_num": 0.0152587890625, "loss_xval": 0.51953125, "num_input_tokens_seen": 254979752, "step": 4552 }, { "epoch": 10.140311804008908, "grad_norm": 16.03564453125, "learning_rate": 1e-06, "loss": 0.4583, "num_input_tokens_seen": 255037852, "step": 4553 }, { "epoch": 10.140311804008908, "loss": 0.500576376914978, "loss_ce": 0.0005153242964297533, "loss_iou": 0.2275390625, "loss_num": 0.0087890625, "loss_xval": 0.5, "num_input_tokens_seen": 255037852, "step": 4553 }, { "epoch": 10.142538975501113, "grad_norm": 14.397992134094238, "learning_rate": 1e-06, "loss": 0.5558, "num_input_tokens_seen": 255095480, "step": 4554 }, { "epoch": 10.142538975501113, "loss": 0.4591131806373596, "loss_ce": 0.0001288054045289755, "loss_iou": 0.2080078125, "loss_num": 0.0086669921875, "loss_xval": 0.458984375, "num_input_tokens_seen": 255095480, "step": 4554 }, { "epoch": 10.144766146993318, "grad_norm": 16.567859649658203, "learning_rate": 1e-06, "loss": 0.3819, "num_input_tokens_seen": 255154316, "step": 4555 }, { "epoch": 10.144766146993318, "loss": 0.3125069737434387, "loss_ce": 0.0001290409272769466, "loss_iou": 0.115234375, "loss_num": 0.0164794921875, "loss_xval": 0.3125, "num_input_tokens_seen": 255154316, "step": 4555 }, { "epoch": 10.146993318485523, "grad_norm": 13.150337219238281, "learning_rate": 1e-06, "loss": 0.4094, "num_input_tokens_seen": 255211692, "step": 4556 }, { "epoch": 10.146993318485523, "loss": 0.3635147213935852, "loss_ce": 0.00011139338312204927, "loss_iou": 0.15625, "loss_num": 0.0101318359375, "loss_xval": 0.36328125, "num_input_tokens_seen": 255211692, "step": 4556 }, { "epoch": 10.14922048997773, "grad_norm": 18.678987503051758, "learning_rate": 1e-06, "loss": 0.2899, "num_input_tokens_seen": 255265416, "step": 4557 }, { "epoch": 10.14922048997773, "loss": 0.3159905672073364, "loss_ce": 0.0001946770935319364, "loss_iou": 0.130859375, "loss_num": 0.01080322265625, "loss_xval": 0.31640625, "num_input_tokens_seen": 255265416, "step": 4557 }, { "epoch": 10.151447661469934, "grad_norm": 15.587052345275879, "learning_rate": 1e-06, "loss": 0.5265, "num_input_tokens_seen": 255319560, "step": 4558 }, { "epoch": 10.151447661469934, "loss": 0.3966302275657654, "loss_ce": 0.0001458691549487412, "loss_iou": 0.177734375, "loss_num": 0.00830078125, "loss_xval": 0.396484375, "num_input_tokens_seen": 255319560, "step": 4558 }, { "epoch": 10.153674832962139, "grad_norm": 19.554283142089844, "learning_rate": 1e-06, "loss": 0.5922, "num_input_tokens_seen": 255372816, "step": 4559 }, { "epoch": 10.153674832962139, "loss": 0.5624641180038452, "loss_ce": 0.0002082242863252759, "loss_iou": 0.2158203125, "loss_num": 0.0260009765625, "loss_xval": 0.5625, "num_input_tokens_seen": 255372816, "step": 4559 }, { "epoch": 10.155902004454344, "grad_norm": 27.854368209838867, "learning_rate": 1e-06, "loss": 0.5583, "num_input_tokens_seen": 255426296, "step": 4560 }, { "epoch": 10.155902004454344, "loss": 0.5693508386611938, "loss_ce": 0.00013697383110411465, "loss_iou": 0.2373046875, "loss_num": 0.018798828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 255426296, "step": 4560 }, { "epoch": 10.158129175946549, "grad_norm": 22.34395408630371, "learning_rate": 1e-06, "loss": 0.4309, "num_input_tokens_seen": 255479824, "step": 4561 }, { "epoch": 10.158129175946549, "loss": 0.3726035952568054, "loss_ce": 0.0002891222247853875, "loss_iou": 0.1494140625, "loss_num": 0.0147705078125, "loss_xval": 0.373046875, "num_input_tokens_seen": 255479824, "step": 4561 }, { "epoch": 10.160356347438753, "grad_norm": 16.66666603088379, "learning_rate": 1e-06, "loss": 0.4812, "num_input_tokens_seen": 255533776, "step": 4562 }, { "epoch": 10.160356347438753, "loss": 0.46586695313453674, "loss_ce": 0.0002907839370891452, "loss_iou": 0.1748046875, "loss_num": 0.023193359375, "loss_xval": 0.46484375, "num_input_tokens_seen": 255533776, "step": 4562 }, { "epoch": 10.162583518930958, "grad_norm": 15.96545696258545, "learning_rate": 1e-06, "loss": 0.633, "num_input_tokens_seen": 255592708, "step": 4563 }, { "epoch": 10.162583518930958, "loss": 0.7283331751823425, "loss_ce": 0.00018375377112533897, "loss_iou": 0.3203125, "loss_num": 0.017578125, "loss_xval": 0.7265625, "num_input_tokens_seen": 255592708, "step": 4563 }, { "epoch": 10.164810690423163, "grad_norm": 17.769378662109375, "learning_rate": 1e-06, "loss": 0.5003, "num_input_tokens_seen": 255651044, "step": 4564 }, { "epoch": 10.164810690423163, "loss": 0.47229546308517456, "loss_ce": 0.00012747224536724389, "loss_iou": 0.1962890625, "loss_num": 0.0159912109375, "loss_xval": 0.47265625, "num_input_tokens_seen": 255651044, "step": 4564 }, { "epoch": 10.167037861915368, "grad_norm": 37.10343933105469, "learning_rate": 1e-06, "loss": 0.5678, "num_input_tokens_seen": 255706820, "step": 4565 }, { "epoch": 10.167037861915368, "loss": 0.5193136930465698, "loss_ce": 0.00027068209601566195, "loss_iou": 0.2255859375, "loss_num": 0.01373291015625, "loss_xval": 0.51953125, "num_input_tokens_seen": 255706820, "step": 4565 }, { "epoch": 10.169265033407573, "grad_norm": 16.142242431640625, "learning_rate": 1e-06, "loss": 0.5588, "num_input_tokens_seen": 255764732, "step": 4566 }, { "epoch": 10.169265033407573, "loss": 0.6505287885665894, "loss_ce": 0.00013815713464282453, "loss_iou": 0.29296875, "loss_num": 0.0133056640625, "loss_xval": 0.6484375, "num_input_tokens_seen": 255764732, "step": 4566 }, { "epoch": 10.171492204899778, "grad_norm": 26.361278533935547, "learning_rate": 1e-06, "loss": 0.5016, "num_input_tokens_seen": 255821704, "step": 4567 }, { "epoch": 10.171492204899778, "loss": 0.5541030168533325, "loss_ce": 0.0001479470229241997, "loss_iou": 0.240234375, "loss_num": 0.0150146484375, "loss_xval": 0.5546875, "num_input_tokens_seen": 255821704, "step": 4567 }, { "epoch": 10.173719376391983, "grad_norm": 14.757318496704102, "learning_rate": 1e-06, "loss": 0.5656, "num_input_tokens_seen": 255880672, "step": 4568 }, { "epoch": 10.173719376391983, "loss": 0.6377292275428772, "loss_ce": 0.00015599204925820231, "loss_iou": 0.263671875, "loss_num": 0.0223388671875, "loss_xval": 0.63671875, "num_input_tokens_seen": 255880672, "step": 4568 }, { "epoch": 10.175946547884188, "grad_norm": 20.087034225463867, "learning_rate": 1e-06, "loss": 0.5458, "num_input_tokens_seen": 255938112, "step": 4569 }, { "epoch": 10.175946547884188, "loss": 0.5018346905708313, "loss_ce": 0.002200859831646085, "loss_iou": 0.2265625, "loss_num": 0.0093994140625, "loss_xval": 0.5, "num_input_tokens_seen": 255938112, "step": 4569 }, { "epoch": 10.178173719376392, "grad_norm": 17.099891662597656, "learning_rate": 1e-06, "loss": 0.6128, "num_input_tokens_seen": 255994480, "step": 4570 }, { "epoch": 10.178173719376392, "loss": 0.6893445253372192, "loss_ce": 0.00013553063035942614, "loss_iou": 0.27734375, "loss_num": 0.0267333984375, "loss_xval": 0.6875, "num_input_tokens_seen": 255994480, "step": 4570 }, { "epoch": 10.180400890868597, "grad_norm": 24.685178756713867, "learning_rate": 1e-06, "loss": 0.5695, "num_input_tokens_seen": 256051472, "step": 4571 }, { "epoch": 10.180400890868597, "loss": 0.691739022731781, "loss_ce": 0.0013093581655994058, "loss_iou": 0.306640625, "loss_num": 0.015380859375, "loss_xval": 0.69140625, "num_input_tokens_seen": 256051472, "step": 4571 }, { "epoch": 10.182628062360802, "grad_norm": 22.65061378479004, "learning_rate": 1e-06, "loss": 0.5629, "num_input_tokens_seen": 256104576, "step": 4572 }, { "epoch": 10.182628062360802, "loss": 0.7198662757873535, "loss_ce": 0.00013974003377370536, "loss_iou": 0.306640625, "loss_num": 0.021240234375, "loss_xval": 0.71875, "num_input_tokens_seen": 256104576, "step": 4572 }, { "epoch": 10.184855233853007, "grad_norm": 21.435075759887695, "learning_rate": 1e-06, "loss": 0.4938, "num_input_tokens_seen": 256159868, "step": 4573 }, { "epoch": 10.184855233853007, "loss": 0.5475330352783203, "loss_ce": 0.00016974794561974704, "loss_iou": 0.240234375, "loss_num": 0.0133056640625, "loss_xval": 0.546875, "num_input_tokens_seen": 256159868, "step": 4573 }, { "epoch": 10.187082405345212, "grad_norm": 15.654433250427246, "learning_rate": 1e-06, "loss": 0.5428, "num_input_tokens_seen": 256216408, "step": 4574 }, { "epoch": 10.187082405345212, "loss": 0.6571834087371826, "loss_ce": 0.00020100505207665265, "loss_iou": 0.2734375, "loss_num": 0.0224609375, "loss_xval": 0.65625, "num_input_tokens_seen": 256216408, "step": 4574 }, { "epoch": 10.189309576837417, "grad_norm": 20.29172706604004, "learning_rate": 1e-06, "loss": 0.4126, "num_input_tokens_seen": 256272580, "step": 4575 }, { "epoch": 10.189309576837417, "loss": 0.3565613627433777, "loss_ce": 0.00011605105100898072, "loss_iou": 0.154296875, "loss_num": 0.00958251953125, "loss_xval": 0.35546875, "num_input_tokens_seen": 256272580, "step": 4575 }, { "epoch": 10.191536748329622, "grad_norm": 47.00114440917969, "learning_rate": 1e-06, "loss": 0.486, "num_input_tokens_seen": 256327452, "step": 4576 }, { "epoch": 10.191536748329622, "loss": 0.6286421418190002, "loss_ce": 0.00022419106971938163, "loss_iou": 0.263671875, "loss_num": 0.0198974609375, "loss_xval": 0.62890625, "num_input_tokens_seen": 256327452, "step": 4576 }, { "epoch": 10.193763919821826, "grad_norm": 15.635924339294434, "learning_rate": 1e-06, "loss": 0.547, "num_input_tokens_seen": 256383876, "step": 4577 }, { "epoch": 10.193763919821826, "loss": 0.6068173050880432, "loss_ce": 0.00012786718434654176, "loss_iou": 0.267578125, "loss_num": 0.01409912109375, "loss_xval": 0.60546875, "num_input_tokens_seen": 256383876, "step": 4577 }, { "epoch": 10.195991091314031, "grad_norm": 24.49083137512207, "learning_rate": 1e-06, "loss": 0.4668, "num_input_tokens_seen": 256441820, "step": 4578 }, { "epoch": 10.195991091314031, "loss": 0.49414756894111633, "loss_ce": 0.00012899917783215642, "loss_iou": 0.21875, "loss_num": 0.0113525390625, "loss_xval": 0.494140625, "num_input_tokens_seen": 256441820, "step": 4578 }, { "epoch": 10.198218262806236, "grad_norm": 18.868743896484375, "learning_rate": 1e-06, "loss": 0.7535, "num_input_tokens_seen": 256495464, "step": 4579 }, { "epoch": 10.198218262806236, "loss": 0.7945007681846619, "loss_ce": 0.0001892938744276762, "loss_iou": 0.30859375, "loss_num": 0.03564453125, "loss_xval": 0.79296875, "num_input_tokens_seen": 256495464, "step": 4579 }, { "epoch": 10.200445434298441, "grad_norm": 18.00078773498535, "learning_rate": 1e-06, "loss": 0.5301, "num_input_tokens_seen": 256551316, "step": 4580 }, { "epoch": 10.200445434298441, "loss": 0.6253817677497864, "loss_ce": 0.000137610943056643, "loss_iou": 0.2734375, "loss_num": 0.01531982421875, "loss_xval": 0.625, "num_input_tokens_seen": 256551316, "step": 4580 }, { "epoch": 10.202672605790646, "grad_norm": 14.672399520874023, "learning_rate": 1e-06, "loss": 0.5689, "num_input_tokens_seen": 256607460, "step": 4581 }, { "epoch": 10.202672605790646, "loss": 0.7001278400421143, "loss_ce": 0.0001766737550497055, "loss_iou": 0.283203125, "loss_num": 0.0267333984375, "loss_xval": 0.69921875, "num_input_tokens_seen": 256607460, "step": 4581 }, { "epoch": 10.20489977728285, "grad_norm": 27.121843338012695, "learning_rate": 1e-06, "loss": 0.3811, "num_input_tokens_seen": 256662608, "step": 4582 }, { "epoch": 10.20489977728285, "loss": 0.28326690196990967, "loss_ce": 0.00012479553697630763, "loss_iou": 0.107421875, "loss_num": 0.013671875, "loss_xval": 0.283203125, "num_input_tokens_seen": 256662608, "step": 4582 }, { "epoch": 10.207126948775056, "grad_norm": 22.3253231048584, "learning_rate": 1e-06, "loss": 0.49, "num_input_tokens_seen": 256719680, "step": 4583 }, { "epoch": 10.207126948775056, "loss": 0.5782710313796997, "loss_ce": 0.0001460201747249812, "loss_iou": 0.2451171875, "loss_num": 0.0174560546875, "loss_xval": 0.578125, "num_input_tokens_seen": 256719680, "step": 4583 }, { "epoch": 10.20935412026726, "grad_norm": 15.034200668334961, "learning_rate": 1e-06, "loss": 0.3748, "num_input_tokens_seen": 256775480, "step": 4584 }, { "epoch": 10.20935412026726, "loss": 0.3525688052177429, "loss_ce": 0.0001518014323664829, "loss_iou": 0.1484375, "loss_num": 0.01123046875, "loss_xval": 0.3515625, "num_input_tokens_seen": 256775480, "step": 4584 }, { "epoch": 10.211581291759465, "grad_norm": 20.57643699645996, "learning_rate": 1e-06, "loss": 0.4699, "num_input_tokens_seen": 256830920, "step": 4585 }, { "epoch": 10.211581291759465, "loss": 0.6310067772865295, "loss_ce": 0.00014741039194632322, "loss_iou": 0.28125, "loss_num": 0.01348876953125, "loss_xval": 0.6328125, "num_input_tokens_seen": 256830920, "step": 4585 }, { "epoch": 10.21380846325167, "grad_norm": 19.190690994262695, "learning_rate": 1e-06, "loss": 0.6572, "num_input_tokens_seen": 256885536, "step": 4586 }, { "epoch": 10.21380846325167, "loss": 0.8363385200500488, "loss_ce": 0.0001568598672747612, "loss_iou": 0.36328125, "loss_num": 0.022216796875, "loss_xval": 0.8359375, "num_input_tokens_seen": 256885536, "step": 4586 }, { "epoch": 10.216035634743875, "grad_norm": 19.48312759399414, "learning_rate": 1e-06, "loss": 0.7196, "num_input_tokens_seen": 256944132, "step": 4587 }, { "epoch": 10.216035634743875, "loss": 0.6926649808883667, "loss_ce": 0.00016006956866476685, "loss_iou": 0.3046875, "loss_num": 0.0164794921875, "loss_xval": 0.69140625, "num_input_tokens_seen": 256944132, "step": 4587 }, { "epoch": 10.21826280623608, "grad_norm": 28.042089462280273, "learning_rate": 1e-06, "loss": 0.6051, "num_input_tokens_seen": 256998580, "step": 4588 }, { "epoch": 10.21826280623608, "loss": 0.7248637080192566, "loss_ce": 0.0006205601966939867, "loss_iou": 0.30078125, "loss_num": 0.0247802734375, "loss_xval": 0.72265625, "num_input_tokens_seen": 256998580, "step": 4588 }, { "epoch": 10.220489977728285, "grad_norm": 17.185420989990234, "learning_rate": 1e-06, "loss": 0.427, "num_input_tokens_seen": 257053016, "step": 4589 }, { "epoch": 10.220489977728285, "loss": 0.4973496198654175, "loss_ce": 0.0002793156891129911, "loss_iou": 0.1982421875, "loss_num": 0.0203857421875, "loss_xval": 0.49609375, "num_input_tokens_seen": 257053016, "step": 4589 }, { "epoch": 10.22271714922049, "grad_norm": 22.403812408447266, "learning_rate": 1e-06, "loss": 0.4526, "num_input_tokens_seen": 257109612, "step": 4590 }, { "epoch": 10.22271714922049, "loss": 0.4690048098564148, "loss_ce": 0.00013278050755616277, "loss_iou": 0.2080078125, "loss_num": 0.010498046875, "loss_xval": 0.46875, "num_input_tokens_seen": 257109612, "step": 4590 }, { "epoch": 10.224944320712694, "grad_norm": 19.512828826904297, "learning_rate": 1e-06, "loss": 0.5368, "num_input_tokens_seen": 257166788, "step": 4591 }, { "epoch": 10.224944320712694, "loss": 0.5036743879318237, "loss_ce": 0.00013440893962979317, "loss_iou": 0.2294921875, "loss_num": 0.0086669921875, "loss_xval": 0.50390625, "num_input_tokens_seen": 257166788, "step": 4591 }, { "epoch": 10.2271714922049, "grad_norm": 29.91023826599121, "learning_rate": 1e-06, "loss": 0.6053, "num_input_tokens_seen": 257223768, "step": 4592 }, { "epoch": 10.2271714922049, "loss": 0.4671659767627716, "loss_ce": 0.00012497020361479372, "loss_iou": 0.2177734375, "loss_num": 0.00628662109375, "loss_xval": 0.466796875, "num_input_tokens_seen": 257223768, "step": 4592 }, { "epoch": 10.229398663697104, "grad_norm": 21.827224731445312, "learning_rate": 1e-06, "loss": 0.6082, "num_input_tokens_seen": 257281108, "step": 4593 }, { "epoch": 10.229398663697104, "loss": 0.4103159010410309, "loss_ce": 0.00015964708290994167, "loss_iou": 0.185546875, "loss_num": 0.007781982421875, "loss_xval": 0.41015625, "num_input_tokens_seen": 257281108, "step": 4593 }, { "epoch": 10.231625835189309, "grad_norm": 14.107495307922363, "learning_rate": 1e-06, "loss": 0.5821, "num_input_tokens_seen": 257334832, "step": 4594 }, { "epoch": 10.231625835189309, "loss": 0.5812970399856567, "loss_ce": 0.00012028503260808066, "loss_iou": 0.255859375, "loss_num": 0.01348876953125, "loss_xval": 0.58203125, "num_input_tokens_seen": 257334832, "step": 4594 }, { "epoch": 10.233853006681514, "grad_norm": 30.097774505615234, "learning_rate": 1e-06, "loss": 0.4502, "num_input_tokens_seen": 257388612, "step": 4595 }, { "epoch": 10.233853006681514, "loss": 0.47809213399887085, "loss_ce": 0.00018684033420868218, "loss_iou": 0.2158203125, "loss_num": 0.00927734375, "loss_xval": 0.478515625, "num_input_tokens_seen": 257388612, "step": 4595 }, { "epoch": 10.236080178173719, "grad_norm": 287.798095703125, "learning_rate": 1e-06, "loss": 0.613, "num_input_tokens_seen": 257445652, "step": 4596 }, { "epoch": 10.236080178173719, "loss": 0.5535175800323486, "loss_ce": 0.00029491656459867954, "loss_iou": 0.224609375, "loss_num": 0.0206298828125, "loss_xval": 0.5546875, "num_input_tokens_seen": 257445652, "step": 4596 }, { "epoch": 10.238307349665924, "grad_norm": 380.41595458984375, "learning_rate": 1e-06, "loss": 0.7191, "num_input_tokens_seen": 257499196, "step": 4597 }, { "epoch": 10.238307349665924, "loss": 0.7513624429702759, "loss_ce": 0.00014180070138536394, "loss_iou": 0.306640625, "loss_num": 0.027587890625, "loss_xval": 0.75, "num_input_tokens_seen": 257499196, "step": 4597 }, { "epoch": 10.240534521158128, "grad_norm": 48.11332321166992, "learning_rate": 1e-06, "loss": 0.6278, "num_input_tokens_seen": 257553432, "step": 4598 }, { "epoch": 10.240534521158128, "loss": 0.884353756904602, "loss_ce": 0.00019845434871967882, "loss_iou": 0.37890625, "loss_num": 0.0247802734375, "loss_xval": 0.8828125, "num_input_tokens_seen": 257553432, "step": 4598 }, { "epoch": 10.242761692650333, "grad_norm": 14.302517890930176, "learning_rate": 1e-06, "loss": 0.4885, "num_input_tokens_seen": 257611352, "step": 4599 }, { "epoch": 10.242761692650333, "loss": 0.5572800636291504, "loss_ce": 0.00015118328155949712, "loss_iou": 0.224609375, "loss_num": 0.021728515625, "loss_xval": 0.55859375, "num_input_tokens_seen": 257611352, "step": 4599 }, { "epoch": 10.244988864142538, "grad_norm": 15.14469051361084, "learning_rate": 1e-06, "loss": 0.5646, "num_input_tokens_seen": 257663240, "step": 4600 }, { "epoch": 10.244988864142538, "loss": 0.3984256088733673, "loss_ce": 0.000232246849918738, "loss_iou": 0.154296875, "loss_num": 0.01806640625, "loss_xval": 0.3984375, "num_input_tokens_seen": 257663240, "step": 4600 }, { "epoch": 10.247216035634743, "grad_norm": 17.56197738647461, "learning_rate": 1e-06, "loss": 0.537, "num_input_tokens_seen": 257719556, "step": 4601 }, { "epoch": 10.247216035634743, "loss": 0.42298391461372375, "loss_ce": 0.00013235666847322136, "loss_iou": 0.189453125, "loss_num": 0.008544921875, "loss_xval": 0.421875, "num_input_tokens_seen": 257719556, "step": 4601 }, { "epoch": 10.249443207126948, "grad_norm": 18.37726593017578, "learning_rate": 1e-06, "loss": 0.6735, "num_input_tokens_seen": 257774564, "step": 4602 }, { "epoch": 10.249443207126948, "loss": 0.4393607974052429, "loss_ce": 0.00015179984620772302, "loss_iou": 0.1923828125, "loss_num": 0.0108642578125, "loss_xval": 0.439453125, "num_input_tokens_seen": 257774564, "step": 4602 }, { "epoch": 10.251670378619155, "grad_norm": 13.660320281982422, "learning_rate": 1e-06, "loss": 0.4367, "num_input_tokens_seen": 257830240, "step": 4603 }, { "epoch": 10.251670378619155, "loss": 0.31909534335136414, "loss_ce": 0.0001256193791050464, "loss_iou": 0.1474609375, "loss_num": 0.00457763671875, "loss_xval": 0.318359375, "num_input_tokens_seen": 257830240, "step": 4603 }, { "epoch": 10.25389755011136, "grad_norm": 13.991618156433105, "learning_rate": 1e-06, "loss": 0.4515, "num_input_tokens_seen": 257883768, "step": 4604 }, { "epoch": 10.25389755011136, "loss": 0.39838650822639465, "loss_ce": 0.0001321271702181548, "loss_iou": 0.1806640625, "loss_num": 0.0074462890625, "loss_xval": 0.3984375, "num_input_tokens_seen": 257883768, "step": 4604 }, { "epoch": 10.256124721603564, "grad_norm": 15.985126495361328, "learning_rate": 1e-06, "loss": 0.6116, "num_input_tokens_seen": 257940856, "step": 4605 }, { "epoch": 10.256124721603564, "loss": 0.7079139947891235, "loss_ce": 0.0001502782106399536, "loss_iou": 0.296875, "loss_num": 0.022705078125, "loss_xval": 0.70703125, "num_input_tokens_seen": 257940856, "step": 4605 }, { "epoch": 10.25835189309577, "grad_norm": 17.861207962036133, "learning_rate": 1e-06, "loss": 0.6644, "num_input_tokens_seen": 257997188, "step": 4606 }, { "epoch": 10.25835189309577, "loss": 0.5625550150871277, "loss_ce": 0.00017705293430481106, "loss_iou": 0.232421875, "loss_num": 0.019287109375, "loss_xval": 0.5625, "num_input_tokens_seen": 257997188, "step": 4606 }, { "epoch": 10.260579064587974, "grad_norm": 16.24393653869629, "learning_rate": 1e-06, "loss": 0.3853, "num_input_tokens_seen": 258053824, "step": 4607 }, { "epoch": 10.260579064587974, "loss": 0.4628918766975403, "loss_ce": 0.00015384730068035424, "loss_iou": 0.201171875, "loss_num": 0.01214599609375, "loss_xval": 0.462890625, "num_input_tokens_seen": 258053824, "step": 4607 }, { "epoch": 10.262806236080179, "grad_norm": 20.257102966308594, "learning_rate": 1e-06, "loss": 0.4479, "num_input_tokens_seen": 258109152, "step": 4608 }, { "epoch": 10.262806236080179, "loss": 0.3674119710922241, "loss_ce": 0.00010239638504572213, "loss_iou": 0.1552734375, "loss_num": 0.0113525390625, "loss_xval": 0.3671875, "num_input_tokens_seen": 258109152, "step": 4608 }, { "epoch": 10.265033407572384, "grad_norm": 17.029348373413086, "learning_rate": 1e-06, "loss": 0.4576, "num_input_tokens_seen": 258166380, "step": 4609 }, { "epoch": 10.265033407572384, "loss": 0.4160352945327759, "loss_ce": 0.00014175890828482807, "loss_iou": 0.1904296875, "loss_num": 0.00714111328125, "loss_xval": 0.416015625, "num_input_tokens_seen": 258166380, "step": 4609 }, { "epoch": 10.267260579064589, "grad_norm": 21.380460739135742, "learning_rate": 1e-06, "loss": 0.5284, "num_input_tokens_seen": 258220732, "step": 4610 }, { "epoch": 10.267260579064589, "loss": 0.3585323393344879, "loss_ce": 0.00013390296953730285, "loss_iou": 0.1513671875, "loss_num": 0.0111083984375, "loss_xval": 0.359375, "num_input_tokens_seen": 258220732, "step": 4610 }, { "epoch": 10.269487750556793, "grad_norm": 14.722599983215332, "learning_rate": 1e-06, "loss": 0.58, "num_input_tokens_seen": 258277352, "step": 4611 }, { "epoch": 10.269487750556793, "loss": 0.4370768666267395, "loss_ce": 0.00018718844512477517, "loss_iou": 0.1953125, "loss_num": 0.00909423828125, "loss_xval": 0.4375, "num_input_tokens_seen": 258277352, "step": 4611 }, { "epoch": 10.271714922048998, "grad_norm": 21.076213836669922, "learning_rate": 1e-06, "loss": 0.5657, "num_input_tokens_seen": 258332080, "step": 4612 }, { "epoch": 10.271714922048998, "loss": 0.4650370478630066, "loss_ce": 0.0001932941668201238, "loss_iou": 0.2041015625, "loss_num": 0.01123046875, "loss_xval": 0.46484375, "num_input_tokens_seen": 258332080, "step": 4612 }, { "epoch": 10.273942093541203, "grad_norm": 85.08380126953125, "learning_rate": 1e-06, "loss": 0.6145, "num_input_tokens_seen": 258389684, "step": 4613 }, { "epoch": 10.273942093541203, "loss": 0.5986820459365845, "loss_ce": 0.0001713307574391365, "loss_iou": 0.251953125, "loss_num": 0.0186767578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 258389684, "step": 4613 }, { "epoch": 10.276169265033408, "grad_norm": 32.875736236572266, "learning_rate": 1e-06, "loss": 0.6115, "num_input_tokens_seen": 258446596, "step": 4614 }, { "epoch": 10.276169265033408, "loss": 0.4154265522956848, "loss_ce": 0.00014332013961393386, "loss_iou": 0.1884765625, "loss_num": 0.007568359375, "loss_xval": 0.416015625, "num_input_tokens_seen": 258446596, "step": 4614 }, { "epoch": 10.278396436525613, "grad_norm": 25.337644577026367, "learning_rate": 1e-06, "loss": 0.6043, "num_input_tokens_seen": 258503504, "step": 4615 }, { "epoch": 10.278396436525613, "loss": 0.5890331268310547, "loss_ce": 0.00016596817295067012, "loss_iou": 0.2578125, "loss_num": 0.01434326171875, "loss_xval": 0.58984375, "num_input_tokens_seen": 258503504, "step": 4615 }, { "epoch": 10.280623608017818, "grad_norm": 22.491819381713867, "learning_rate": 1e-06, "loss": 0.4488, "num_input_tokens_seen": 258561264, "step": 4616 }, { "epoch": 10.280623608017818, "loss": 0.42713305354118347, "loss_ce": 0.00013111413863953203, "loss_iou": 0.1845703125, "loss_num": 0.011474609375, "loss_xval": 0.427734375, "num_input_tokens_seen": 258561264, "step": 4616 }, { "epoch": 10.282850779510023, "grad_norm": 16.053213119506836, "learning_rate": 1e-06, "loss": 0.6014, "num_input_tokens_seen": 258615720, "step": 4617 }, { "epoch": 10.282850779510023, "loss": 0.6224437952041626, "loss_ce": 0.00012936524581164122, "loss_iou": 0.2578125, "loss_num": 0.0213623046875, "loss_xval": 0.62109375, "num_input_tokens_seen": 258615720, "step": 4617 }, { "epoch": 10.285077951002227, "grad_norm": 21.939491271972656, "learning_rate": 1e-06, "loss": 0.447, "num_input_tokens_seen": 258671132, "step": 4618 }, { "epoch": 10.285077951002227, "loss": 0.33654463291168213, "loss_ce": 0.00011883860861416906, "loss_iou": 0.1474609375, "loss_num": 0.00830078125, "loss_xval": 0.3359375, "num_input_tokens_seen": 258671132, "step": 4618 }, { "epoch": 10.287305122494432, "grad_norm": 14.96814250946045, "learning_rate": 1e-06, "loss": 0.5002, "num_input_tokens_seen": 258728584, "step": 4619 }, { "epoch": 10.287305122494432, "loss": 0.5067825317382812, "loss_ce": 0.0002517920802347362, "loss_iou": 0.21875, "loss_num": 0.01385498046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 258728584, "step": 4619 }, { "epoch": 10.289532293986637, "grad_norm": 17.478721618652344, "learning_rate": 1e-06, "loss": 0.3775, "num_input_tokens_seen": 258786092, "step": 4620 }, { "epoch": 10.289532293986637, "loss": 0.46181732416152954, "loss_ce": 0.00014738523168489337, "loss_iou": 0.203125, "loss_num": 0.01123046875, "loss_xval": 0.4609375, "num_input_tokens_seen": 258786092, "step": 4620 }, { "epoch": 10.291759465478842, "grad_norm": 22.687427520751953, "learning_rate": 1e-06, "loss": 0.5753, "num_input_tokens_seen": 258841324, "step": 4621 }, { "epoch": 10.291759465478842, "loss": 0.6752172708511353, "loss_ce": 0.00016845832578837872, "loss_iou": 0.3125, "loss_num": 0.00994873046875, "loss_xval": 0.67578125, "num_input_tokens_seen": 258841324, "step": 4621 }, { "epoch": 10.293986636971047, "grad_norm": 20.015989303588867, "learning_rate": 1e-06, "loss": 0.7111, "num_input_tokens_seen": 258895908, "step": 4622 }, { "epoch": 10.293986636971047, "loss": 0.5323803424835205, "loss_ce": 0.00015376918599940836, "loss_iou": 0.240234375, "loss_num": 0.0103759765625, "loss_xval": 0.53125, "num_input_tokens_seen": 258895908, "step": 4622 }, { "epoch": 10.296213808463252, "grad_norm": 19.902332305908203, "learning_rate": 1e-06, "loss": 0.3957, "num_input_tokens_seen": 258955124, "step": 4623 }, { "epoch": 10.296213808463252, "loss": 0.35737037658691406, "loss_ce": 0.0001926565309986472, "loss_iou": 0.1572265625, "loss_num": 0.0086669921875, "loss_xval": 0.357421875, "num_input_tokens_seen": 258955124, "step": 4623 }, { "epoch": 10.298440979955457, "grad_norm": 30.156911849975586, "learning_rate": 1e-06, "loss": 0.5562, "num_input_tokens_seen": 259010176, "step": 4624 }, { "epoch": 10.298440979955457, "loss": 0.5099859833717346, "loss_ce": 0.0002203439362347126, "loss_iou": 0.228515625, "loss_num": 0.01055908203125, "loss_xval": 0.5078125, "num_input_tokens_seen": 259010176, "step": 4624 }, { "epoch": 10.300668151447661, "grad_norm": 18.427066802978516, "learning_rate": 1e-06, "loss": 0.5903, "num_input_tokens_seen": 259065656, "step": 4625 }, { "epoch": 10.300668151447661, "loss": 0.5748733878135681, "loss_ce": 0.0001511350565124303, "loss_iou": 0.251953125, "loss_num": 0.014404296875, "loss_xval": 0.57421875, "num_input_tokens_seen": 259065656, "step": 4625 }, { "epoch": 10.302895322939866, "grad_norm": 16.115806579589844, "learning_rate": 1e-06, "loss": 0.4775, "num_input_tokens_seen": 259121088, "step": 4626 }, { "epoch": 10.302895322939866, "loss": 0.33922505378723145, "loss_ce": 0.0001137549479608424, "loss_iou": 0.1484375, "loss_num": 0.00860595703125, "loss_xval": 0.33984375, "num_input_tokens_seen": 259121088, "step": 4626 }, { "epoch": 10.305122494432071, "grad_norm": 21.94735336303711, "learning_rate": 1e-06, "loss": 0.6358, "num_input_tokens_seen": 259179752, "step": 4627 }, { "epoch": 10.305122494432071, "loss": 0.6556517481803894, "loss_ce": 0.0001341440947726369, "loss_iou": 0.279296875, "loss_num": 0.01904296875, "loss_xval": 0.65625, "num_input_tokens_seen": 259179752, "step": 4627 }, { "epoch": 10.307349665924276, "grad_norm": 18.129470825195312, "learning_rate": 1e-06, "loss": 0.5075, "num_input_tokens_seen": 259231148, "step": 4628 }, { "epoch": 10.307349665924276, "loss": 0.5028002858161926, "loss_ce": 0.00011475315841380507, "loss_iou": 0.2099609375, "loss_num": 0.0166015625, "loss_xval": 0.50390625, "num_input_tokens_seen": 259231148, "step": 4628 }, { "epoch": 10.309576837416481, "grad_norm": 16.93099021911621, "learning_rate": 1e-06, "loss": 0.5894, "num_input_tokens_seen": 259289500, "step": 4629 }, { "epoch": 10.309576837416481, "loss": 0.4424053430557251, "loss_ce": 0.00014459306839853525, "loss_iou": 0.203125, "loss_num": 0.007049560546875, "loss_xval": 0.44140625, "num_input_tokens_seen": 259289500, "step": 4629 }, { "epoch": 10.311804008908686, "grad_norm": 25.796844482421875, "learning_rate": 1e-06, "loss": 0.6209, "num_input_tokens_seen": 259346848, "step": 4630 }, { "epoch": 10.311804008908686, "loss": 0.6188187599182129, "loss_ce": 0.00016644690185785294, "loss_iou": 0.275390625, "loss_num": 0.01348876953125, "loss_xval": 0.6171875, "num_input_tokens_seen": 259346848, "step": 4630 }, { "epoch": 10.31403118040089, "grad_norm": 16.472164154052734, "learning_rate": 1e-06, "loss": 0.7976, "num_input_tokens_seen": 259404428, "step": 4631 }, { "epoch": 10.31403118040089, "loss": 0.7173308730125427, "loss_ce": 0.0002898685052059591, "loss_iou": 0.27734375, "loss_num": 0.032470703125, "loss_xval": 0.71875, "num_input_tokens_seen": 259404428, "step": 4631 }, { "epoch": 10.316258351893095, "grad_norm": 17.40803337097168, "learning_rate": 1e-06, "loss": 0.4205, "num_input_tokens_seen": 259460152, "step": 4632 }, { "epoch": 10.316258351893095, "loss": 0.3678438663482666, "loss_ce": 0.00016810771194286644, "loss_iou": 0.15625, "loss_num": 0.01104736328125, "loss_xval": 0.3671875, "num_input_tokens_seen": 259460152, "step": 4632 }, { "epoch": 10.3184855233853, "grad_norm": 17.019670486450195, "learning_rate": 1e-06, "loss": 0.6062, "num_input_tokens_seen": 259512592, "step": 4633 }, { "epoch": 10.3184855233853, "loss": 0.5542778968811035, "loss_ce": 0.00020081247203052044, "loss_iou": 0.2392578125, "loss_num": 0.01519775390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 259512592, "step": 4633 }, { "epoch": 10.320712694877505, "grad_norm": 20.420970916748047, "learning_rate": 1e-06, "loss": 0.5013, "num_input_tokens_seen": 259569136, "step": 4634 }, { "epoch": 10.320712694877505, "loss": 0.5180292129516602, "loss_ce": 0.00020692471298389137, "loss_iou": 0.23046875, "loss_num": 0.01141357421875, "loss_xval": 0.51953125, "num_input_tokens_seen": 259569136, "step": 4634 }, { "epoch": 10.32293986636971, "grad_norm": 18.733917236328125, "learning_rate": 1e-06, "loss": 0.6095, "num_input_tokens_seen": 259626308, "step": 4635 }, { "epoch": 10.32293986636971, "loss": 0.5899757742881775, "loss_ce": 0.00013202980335336179, "loss_iou": 0.26171875, "loss_num": 0.0133056640625, "loss_xval": 0.58984375, "num_input_tokens_seen": 259626308, "step": 4635 }, { "epoch": 10.325167037861915, "grad_norm": 15.864693641662598, "learning_rate": 1e-06, "loss": 0.4543, "num_input_tokens_seen": 259683580, "step": 4636 }, { "epoch": 10.325167037861915, "loss": 0.36647483706474304, "loss_ce": 0.00014181638834998012, "loss_iou": 0.169921875, "loss_num": 0.0052490234375, "loss_xval": 0.3671875, "num_input_tokens_seen": 259683580, "step": 4636 }, { "epoch": 10.32739420935412, "grad_norm": 16.95244789123535, "learning_rate": 1e-06, "loss": 0.4863, "num_input_tokens_seen": 259740856, "step": 4637 }, { "epoch": 10.32739420935412, "loss": 0.4233607351779938, "loss_ce": 0.00014292271225713193, "loss_iou": 0.1826171875, "loss_num": 0.01165771484375, "loss_xval": 0.423828125, "num_input_tokens_seen": 259740856, "step": 4637 }, { "epoch": 10.329621380846325, "grad_norm": 14.428168296813965, "learning_rate": 1e-06, "loss": 0.5452, "num_input_tokens_seen": 259797892, "step": 4638 }, { "epoch": 10.329621380846325, "loss": 0.37976911664009094, "loss_ce": 0.00013044924708083272, "loss_iou": 0.173828125, "loss_num": 0.0064697265625, "loss_xval": 0.37890625, "num_input_tokens_seen": 259797892, "step": 4638 }, { "epoch": 10.33184855233853, "grad_norm": 20.9561767578125, "learning_rate": 1e-06, "loss": 0.5536, "num_input_tokens_seen": 259856484, "step": 4639 }, { "epoch": 10.33184855233853, "loss": 0.4375172257423401, "loss_ce": 0.00013931245484855026, "loss_iou": 0.19140625, "loss_num": 0.01068115234375, "loss_xval": 0.4375, "num_input_tokens_seen": 259856484, "step": 4639 }, { "epoch": 10.334075723830734, "grad_norm": 15.651261329650879, "learning_rate": 1e-06, "loss": 0.4453, "num_input_tokens_seen": 259916208, "step": 4640 }, { "epoch": 10.334075723830734, "loss": 0.548733115196228, "loss_ce": 0.00014910154277458787, "loss_iou": 0.2236328125, "loss_num": 0.0201416015625, "loss_xval": 0.546875, "num_input_tokens_seen": 259916208, "step": 4640 }, { "epoch": 10.33630289532294, "grad_norm": 13.660500526428223, "learning_rate": 1e-06, "loss": 0.4894, "num_input_tokens_seen": 259971708, "step": 4641 }, { "epoch": 10.33630289532294, "loss": 0.45058485865592957, "loss_ce": 0.000511608668603003, "loss_iou": 0.15234375, "loss_num": 0.02880859375, "loss_xval": 0.44921875, "num_input_tokens_seen": 259971708, "step": 4641 }, { "epoch": 10.338530066815144, "grad_norm": 25.716596603393555, "learning_rate": 1e-06, "loss": 0.5956, "num_input_tokens_seen": 260025852, "step": 4642 }, { "epoch": 10.338530066815144, "loss": 0.7315921783447266, "loss_ce": 0.00039098679553717375, "loss_iou": 0.298828125, "loss_num": 0.026611328125, "loss_xval": 0.73046875, "num_input_tokens_seen": 260025852, "step": 4642 }, { "epoch": 10.340757238307349, "grad_norm": 21.653146743774414, "learning_rate": 1e-06, "loss": 0.5072, "num_input_tokens_seen": 260079956, "step": 4643 }, { "epoch": 10.340757238307349, "loss": 0.434053510427475, "loss_ce": 0.0002766597317531705, "loss_iou": 0.1884765625, "loss_num": 0.0113525390625, "loss_xval": 0.43359375, "num_input_tokens_seen": 260079956, "step": 4643 }, { "epoch": 10.342984409799554, "grad_norm": 14.551214218139648, "learning_rate": 1e-06, "loss": 0.4545, "num_input_tokens_seen": 260136624, "step": 4644 }, { "epoch": 10.342984409799554, "loss": 0.5187681913375854, "loss_ce": 0.00021346815628930926, "loss_iou": 0.2294921875, "loss_num": 0.01177978515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 260136624, "step": 4644 }, { "epoch": 10.345211581291759, "grad_norm": 23.34543228149414, "learning_rate": 1e-06, "loss": 0.5684, "num_input_tokens_seen": 260193936, "step": 4645 }, { "epoch": 10.345211581291759, "loss": 0.6667625904083252, "loss_ce": 0.00013660687545780092, "loss_iou": 0.30078125, "loss_num": 0.0128173828125, "loss_xval": 0.66796875, "num_input_tokens_seen": 260193936, "step": 4645 }, { "epoch": 10.347438752783964, "grad_norm": 15.129518508911133, "learning_rate": 1e-06, "loss": 0.4025, "num_input_tokens_seen": 260247704, "step": 4646 }, { "epoch": 10.347438752783964, "loss": 0.3118034601211548, "loss_ce": 0.00012743064144160599, "loss_iou": 0.130859375, "loss_num": 0.0101318359375, "loss_xval": 0.3125, "num_input_tokens_seen": 260247704, "step": 4646 }, { "epoch": 10.34966592427617, "grad_norm": 20.0349063873291, "learning_rate": 1e-06, "loss": 0.8892, "num_input_tokens_seen": 260305780, "step": 4647 }, { "epoch": 10.34966592427617, "loss": 0.9932963252067566, "loss_ce": 0.00013225735165178776, "loss_iou": 0.43359375, "loss_num": 0.02490234375, "loss_xval": 0.9921875, "num_input_tokens_seen": 260305780, "step": 4647 }, { "epoch": 10.351893095768375, "grad_norm": 24.84522247314453, "learning_rate": 1e-06, "loss": 0.6352, "num_input_tokens_seen": 260360704, "step": 4648 }, { "epoch": 10.351893095768375, "loss": 0.5557194352149963, "loss_ce": 0.0001774309203028679, "loss_iou": 0.25390625, "loss_num": 0.00994873046875, "loss_xval": 0.5546875, "num_input_tokens_seen": 260360704, "step": 4648 }, { "epoch": 10.35412026726058, "grad_norm": 16.85805892944336, "learning_rate": 1e-06, "loss": 0.5613, "num_input_tokens_seen": 260419212, "step": 4649 }, { "epoch": 10.35412026726058, "loss": 0.43383756279945374, "loss_ce": 0.00012176090240245685, "loss_iou": 0.173828125, "loss_num": 0.0169677734375, "loss_xval": 0.43359375, "num_input_tokens_seen": 260419212, "step": 4649 }, { "epoch": 10.356347438752785, "grad_norm": 19.93196678161621, "learning_rate": 1e-06, "loss": 0.5456, "num_input_tokens_seen": 260475968, "step": 4650 }, { "epoch": 10.356347438752785, "loss": 0.5350778698921204, "loss_ce": 0.00016572429740335792, "loss_iou": 0.2294921875, "loss_num": 0.01519775390625, "loss_xval": 0.53515625, "num_input_tokens_seen": 260475968, "step": 4650 }, { "epoch": 10.35857461024499, "grad_norm": 22.55901336669922, "learning_rate": 1e-06, "loss": 0.5437, "num_input_tokens_seen": 260530128, "step": 4651 }, { "epoch": 10.35857461024499, "loss": 0.6019728183746338, "loss_ce": 0.0001661243732087314, "loss_iou": 0.28125, "loss_num": 0.00799560546875, "loss_xval": 0.6015625, "num_input_tokens_seen": 260530128, "step": 4651 }, { "epoch": 10.360801781737194, "grad_norm": 24.937402725219727, "learning_rate": 1e-06, "loss": 0.5643, "num_input_tokens_seen": 260587396, "step": 4652 }, { "epoch": 10.360801781737194, "loss": 0.4734136760234833, "loss_ce": 0.00014707804075442255, "loss_iou": 0.2099609375, "loss_num": 0.01068115234375, "loss_xval": 0.47265625, "num_input_tokens_seen": 260587396, "step": 4652 }, { "epoch": 10.3630289532294, "grad_norm": 13.982348442077637, "learning_rate": 1e-06, "loss": 0.4673, "num_input_tokens_seen": 260644020, "step": 4653 }, { "epoch": 10.3630289532294, "loss": 0.3794419467449188, "loss_ce": 0.0004746583290398121, "loss_iou": 0.1689453125, "loss_num": 0.00823974609375, "loss_xval": 0.37890625, "num_input_tokens_seen": 260644020, "step": 4653 }, { "epoch": 10.365256124721604, "grad_norm": 16.844064712524414, "learning_rate": 1e-06, "loss": 0.6486, "num_input_tokens_seen": 260700716, "step": 4654 }, { "epoch": 10.365256124721604, "loss": 0.5358029007911682, "loss_ce": 0.00015838223043829203, "loss_iou": 0.244140625, "loss_num": 0.00946044921875, "loss_xval": 0.53515625, "num_input_tokens_seen": 260700716, "step": 4654 }, { "epoch": 10.367483296213809, "grad_norm": 16.441743850708008, "learning_rate": 1e-06, "loss": 0.6257, "num_input_tokens_seen": 260756304, "step": 4655 }, { "epoch": 10.367483296213809, "loss": 0.6969484090805054, "loss_ce": 0.00017108957399614155, "loss_iou": 0.29296875, "loss_num": 0.0228271484375, "loss_xval": 0.6953125, "num_input_tokens_seen": 260756304, "step": 4655 }, { "epoch": 10.369710467706014, "grad_norm": 16.78958511352539, "learning_rate": 1e-06, "loss": 0.6998, "num_input_tokens_seen": 260810784, "step": 4656 }, { "epoch": 10.369710467706014, "loss": 0.6009594798088074, "loss_ce": 0.00012937135761603713, "loss_iou": 0.251953125, "loss_num": 0.01904296875, "loss_xval": 0.6015625, "num_input_tokens_seen": 260810784, "step": 4656 }, { "epoch": 10.371937639198219, "grad_norm": 18.355632781982422, "learning_rate": 1e-06, "loss": 0.5985, "num_input_tokens_seen": 260867540, "step": 4657 }, { "epoch": 10.371937639198219, "loss": 0.7768846750259399, "loss_ce": 0.00015131058171391487, "loss_iou": 0.333984375, "loss_num": 0.021728515625, "loss_xval": 0.77734375, "num_input_tokens_seen": 260867540, "step": 4657 }, { "epoch": 10.374164810690424, "grad_norm": 17.681825637817383, "learning_rate": 1e-06, "loss": 0.5509, "num_input_tokens_seen": 260923584, "step": 4658 }, { "epoch": 10.374164810690424, "loss": 0.5139190554618835, "loss_ce": 0.00012508760846685618, "loss_iou": 0.21875, "loss_num": 0.01513671875, "loss_xval": 0.515625, "num_input_tokens_seen": 260923584, "step": 4658 }, { "epoch": 10.376391982182628, "grad_norm": 29.6978759765625, "learning_rate": 1e-06, "loss": 0.549, "num_input_tokens_seen": 260980476, "step": 4659 }, { "epoch": 10.376391982182628, "loss": 0.6463862657546997, "loss_ce": 0.00014600764552596956, "loss_iou": 0.2734375, "loss_num": 0.02001953125, "loss_xval": 0.64453125, "num_input_tokens_seen": 260980476, "step": 4659 }, { "epoch": 10.378619153674833, "grad_norm": 20.990150451660156, "learning_rate": 1e-06, "loss": 0.4872, "num_input_tokens_seen": 261038064, "step": 4660 }, { "epoch": 10.378619153674833, "loss": 0.5661933422088623, "loss_ce": 0.00015332447947002947, "loss_iou": 0.2578125, "loss_num": 0.00994873046875, "loss_xval": 0.56640625, "num_input_tokens_seen": 261038064, "step": 4660 }, { "epoch": 10.380846325167038, "grad_norm": 20.679819107055664, "learning_rate": 1e-06, "loss": 0.4146, "num_input_tokens_seen": 261093180, "step": 4661 }, { "epoch": 10.380846325167038, "loss": 0.479025661945343, "loss_ce": 0.00014384492533281446, "loss_iou": 0.2138671875, "loss_num": 0.010009765625, "loss_xval": 0.478515625, "num_input_tokens_seen": 261093180, "step": 4661 }, { "epoch": 10.383073496659243, "grad_norm": 12.441951751708984, "learning_rate": 1e-06, "loss": 0.5305, "num_input_tokens_seen": 261149604, "step": 4662 }, { "epoch": 10.383073496659243, "loss": 0.5029485821723938, "loss_ce": 0.00014094685320742428, "loss_iou": 0.2275390625, "loss_num": 0.0096435546875, "loss_xval": 0.50390625, "num_input_tokens_seen": 261149604, "step": 4662 }, { "epoch": 10.385300668151448, "grad_norm": 39.21706008911133, "learning_rate": 1e-06, "loss": 0.5123, "num_input_tokens_seen": 261207228, "step": 4663 }, { "epoch": 10.385300668151448, "loss": 0.44520843029022217, "loss_ce": 0.00014007699792273343, "loss_iou": 0.2021484375, "loss_num": 0.00836181640625, "loss_xval": 0.4453125, "num_input_tokens_seen": 261207228, "step": 4663 }, { "epoch": 10.387527839643653, "grad_norm": 29.102785110473633, "learning_rate": 1e-06, "loss": 0.5844, "num_input_tokens_seen": 261261652, "step": 4664 }, { "epoch": 10.387527839643653, "loss": 0.523324191570282, "loss_ce": 0.00013084019883535802, "loss_iou": 0.2294921875, "loss_num": 0.01300048828125, "loss_xval": 0.5234375, "num_input_tokens_seen": 261261652, "step": 4664 }, { "epoch": 10.389755011135858, "grad_norm": 17.95996856689453, "learning_rate": 1e-06, "loss": 0.5083, "num_input_tokens_seen": 261314420, "step": 4665 }, { "epoch": 10.389755011135858, "loss": 0.666547417640686, "loss_ce": 0.00016560900257900357, "loss_iou": 0.3046875, "loss_num": 0.01141357421875, "loss_xval": 0.66796875, "num_input_tokens_seen": 261314420, "step": 4665 }, { "epoch": 10.391982182628063, "grad_norm": 17.621519088745117, "learning_rate": 1e-06, "loss": 0.4285, "num_input_tokens_seen": 261369500, "step": 4666 }, { "epoch": 10.391982182628063, "loss": 0.3165377378463745, "loss_ce": 0.0001314996334258467, "loss_iou": 0.13671875, "loss_num": 0.00836181640625, "loss_xval": 0.31640625, "num_input_tokens_seen": 261369500, "step": 4666 }, { "epoch": 10.394209354120267, "grad_norm": 28.80778694152832, "learning_rate": 1e-06, "loss": 0.6926, "num_input_tokens_seen": 261424900, "step": 4667 }, { "epoch": 10.394209354120267, "loss": 0.6756924390792847, "loss_ce": 0.00015533142141066492, "loss_iou": 0.3046875, "loss_num": 0.01336669921875, "loss_xval": 0.67578125, "num_input_tokens_seen": 261424900, "step": 4667 }, { "epoch": 10.396436525612472, "grad_norm": 16.814796447753906, "learning_rate": 1e-06, "loss": 0.6278, "num_input_tokens_seen": 261481180, "step": 4668 }, { "epoch": 10.396436525612472, "loss": 0.6310012340545654, "loss_ce": 0.00020286736253183335, "loss_iou": 0.259765625, "loss_num": 0.0220947265625, "loss_xval": 0.62890625, "num_input_tokens_seen": 261481180, "step": 4668 }, { "epoch": 10.398663697104677, "grad_norm": 28.424175262451172, "learning_rate": 1e-06, "loss": 0.5187, "num_input_tokens_seen": 261537668, "step": 4669 }, { "epoch": 10.398663697104677, "loss": 0.7215948104858398, "loss_ce": 0.00015925764455460012, "loss_iou": 0.296875, "loss_num": 0.0252685546875, "loss_xval": 0.72265625, "num_input_tokens_seen": 261537668, "step": 4669 }, { "epoch": 10.400890868596882, "grad_norm": 42.25555419921875, "learning_rate": 1e-06, "loss": 0.5724, "num_input_tokens_seen": 261592388, "step": 4670 }, { "epoch": 10.400890868596882, "loss": 0.532261848449707, "loss_ce": 0.00015735380293335766, "loss_iou": 0.23046875, "loss_num": 0.01416015625, "loss_xval": 0.53125, "num_input_tokens_seen": 261592388, "step": 4670 }, { "epoch": 10.403118040089087, "grad_norm": 15.451391220092773, "learning_rate": 1e-06, "loss": 0.6405, "num_input_tokens_seen": 261649236, "step": 4671 }, { "epoch": 10.403118040089087, "loss": 0.8438804149627686, "loss_ce": 0.000130399945192039, "loss_iou": 0.337890625, "loss_num": 0.033203125, "loss_xval": 0.84375, "num_input_tokens_seen": 261649236, "step": 4671 }, { "epoch": 10.405345211581292, "grad_norm": 18.49277687072754, "learning_rate": 1e-06, "loss": 0.7358, "num_input_tokens_seen": 261705516, "step": 4672 }, { "epoch": 10.405345211581292, "loss": 0.5958718061447144, "loss_ce": 0.0001686769537627697, "loss_iou": 0.26171875, "loss_num": 0.01446533203125, "loss_xval": 0.59375, "num_input_tokens_seen": 261705516, "step": 4672 }, { "epoch": 10.407572383073497, "grad_norm": 18.64063262939453, "learning_rate": 1e-06, "loss": 0.5341, "num_input_tokens_seen": 261759996, "step": 4673 }, { "epoch": 10.407572383073497, "loss": 0.4139430522918701, "loss_ce": 0.00012470106594264507, "loss_iou": 0.169921875, "loss_num": 0.01483154296875, "loss_xval": 0.4140625, "num_input_tokens_seen": 261759996, "step": 4673 }, { "epoch": 10.409799554565701, "grad_norm": 15.402804374694824, "learning_rate": 1e-06, "loss": 0.5624, "num_input_tokens_seen": 261814640, "step": 4674 }, { "epoch": 10.409799554565701, "loss": 0.5486302375793457, "loss_ce": 0.00016830695676617324, "loss_iou": 0.22265625, "loss_num": 0.0205078125, "loss_xval": 0.546875, "num_input_tokens_seen": 261814640, "step": 4674 }, { "epoch": 10.412026726057906, "grad_norm": 16.70870590209961, "learning_rate": 1e-06, "loss": 0.4344, "num_input_tokens_seen": 261873280, "step": 4675 }, { "epoch": 10.412026726057906, "loss": 0.26441287994384766, "loss_ce": 0.00013062989455647767, "loss_iou": 0.1162109375, "loss_num": 0.00634765625, "loss_xval": 0.263671875, "num_input_tokens_seen": 261873280, "step": 4675 }, { "epoch": 10.414253897550111, "grad_norm": 16.716644287109375, "learning_rate": 1e-06, "loss": 0.7616, "num_input_tokens_seen": 261930832, "step": 4676 }, { "epoch": 10.414253897550111, "loss": 0.6276519298553467, "loss_ce": 0.00021053958334960043, "loss_iou": 0.263671875, "loss_num": 0.0196533203125, "loss_xval": 0.62890625, "num_input_tokens_seen": 261930832, "step": 4676 }, { "epoch": 10.416481069042316, "grad_norm": 24.789369583129883, "learning_rate": 1e-06, "loss": 0.5369, "num_input_tokens_seen": 261985440, "step": 4677 }, { "epoch": 10.416481069042316, "loss": 0.5724594593048096, "loss_ce": 0.00019378411525394768, "loss_iou": 0.255859375, "loss_num": 0.01220703125, "loss_xval": 0.5703125, "num_input_tokens_seen": 261985440, "step": 4677 }, { "epoch": 10.41870824053452, "grad_norm": 23.978904724121094, "learning_rate": 1e-06, "loss": 0.5775, "num_input_tokens_seen": 262040176, "step": 4678 }, { "epoch": 10.41870824053452, "loss": 0.7208090424537659, "loss_ce": 0.0001669653574936092, "loss_iou": 0.291015625, "loss_num": 0.02783203125, "loss_xval": 0.71875, "num_input_tokens_seen": 262040176, "step": 4678 }, { "epoch": 10.420935412026726, "grad_norm": 21.936405181884766, "learning_rate": 1e-06, "loss": 0.5377, "num_input_tokens_seen": 262096712, "step": 4679 }, { "epoch": 10.420935412026726, "loss": 0.6339254379272461, "loss_ce": 0.0001363815099466592, "loss_iou": 0.27734375, "loss_num": 0.0157470703125, "loss_xval": 0.6328125, "num_input_tokens_seen": 262096712, "step": 4679 }, { "epoch": 10.42316258351893, "grad_norm": 15.21117115020752, "learning_rate": 1e-06, "loss": 0.5304, "num_input_tokens_seen": 262151328, "step": 4680 }, { "epoch": 10.42316258351893, "loss": 0.6026941537857056, "loss_ce": 0.00015511347737628967, "loss_iou": 0.2470703125, "loss_num": 0.021728515625, "loss_xval": 0.6015625, "num_input_tokens_seen": 262151328, "step": 4680 }, { "epoch": 10.425389755011135, "grad_norm": 19.147802352905273, "learning_rate": 1e-06, "loss": 0.6639, "num_input_tokens_seen": 262207744, "step": 4681 }, { "epoch": 10.425389755011135, "loss": 0.6351706981658936, "loss_ce": 0.00016094453167170286, "loss_iou": 0.28125, "loss_num": 0.01416015625, "loss_xval": 0.63671875, "num_input_tokens_seen": 262207744, "step": 4681 }, { "epoch": 10.42761692650334, "grad_norm": 20.792253494262695, "learning_rate": 1e-06, "loss": 0.5855, "num_input_tokens_seen": 262266176, "step": 4682 }, { "epoch": 10.42761692650334, "loss": 0.7266117930412292, "loss_ce": 0.00017138014663942158, "loss_iou": 0.30859375, "loss_num": 0.0218505859375, "loss_xval": 0.7265625, "num_input_tokens_seen": 262266176, "step": 4682 }, { "epoch": 10.429844097995545, "grad_norm": 17.91155433654785, "learning_rate": 1e-06, "loss": 0.4326, "num_input_tokens_seen": 262322384, "step": 4683 }, { "epoch": 10.429844097995545, "loss": 0.4974979758262634, "loss_ce": 0.00018352872575633228, "loss_iou": 0.2158203125, "loss_num": 0.0130615234375, "loss_xval": 0.498046875, "num_input_tokens_seen": 262322384, "step": 4683 }, { "epoch": 10.43207126948775, "grad_norm": 15.88755989074707, "learning_rate": 1e-06, "loss": 0.4703, "num_input_tokens_seen": 262378432, "step": 4684 }, { "epoch": 10.43207126948775, "loss": 0.6226903200149536, "loss_ce": 0.0001317547430517152, "loss_iou": 0.27734375, "loss_num": 0.013427734375, "loss_xval": 0.62109375, "num_input_tokens_seen": 262378432, "step": 4684 }, { "epoch": 10.434298440979955, "grad_norm": 27.248088836669922, "learning_rate": 1e-06, "loss": 0.6203, "num_input_tokens_seen": 262433420, "step": 4685 }, { "epoch": 10.434298440979955, "loss": 0.8291926980018616, "loss_ce": 0.000579414947424084, "loss_iou": 0.3359375, "loss_num": 0.031494140625, "loss_xval": 0.828125, "num_input_tokens_seen": 262433420, "step": 4685 }, { "epoch": 10.43652561247216, "grad_norm": 23.795869827270508, "learning_rate": 1e-06, "loss": 0.4751, "num_input_tokens_seen": 262492420, "step": 4686 }, { "epoch": 10.43652561247216, "loss": 0.5592020750045776, "loss_ce": 0.0001200107071781531, "loss_iou": 0.255859375, "loss_num": 0.0096435546875, "loss_xval": 0.55859375, "num_input_tokens_seen": 262492420, "step": 4686 }, { "epoch": 10.438752783964365, "grad_norm": 119.42134857177734, "learning_rate": 1e-06, "loss": 0.4158, "num_input_tokens_seen": 262547884, "step": 4687 }, { "epoch": 10.438752783964365, "loss": 0.2656348645687103, "loss_ce": 0.0001319482980761677, "loss_iou": 0.111328125, "loss_num": 0.00860595703125, "loss_xval": 0.265625, "num_input_tokens_seen": 262547884, "step": 4687 }, { "epoch": 10.44097995545657, "grad_norm": 29.43088722229004, "learning_rate": 1e-06, "loss": 0.6806, "num_input_tokens_seen": 262605912, "step": 4688 }, { "epoch": 10.44097995545657, "loss": 0.7944400310516357, "loss_ce": 0.0001285005419049412, "loss_iou": 0.35546875, "loss_num": 0.0167236328125, "loss_xval": 0.79296875, "num_input_tokens_seen": 262605912, "step": 4688 }, { "epoch": 10.443207126948774, "grad_norm": 37.14841842651367, "learning_rate": 1e-06, "loss": 0.5946, "num_input_tokens_seen": 262658560, "step": 4689 }, { "epoch": 10.443207126948774, "loss": 0.5586202144622803, "loss_ce": 0.00014853276661597192, "loss_iou": 0.2216796875, "loss_num": 0.02294921875, "loss_xval": 0.55859375, "num_input_tokens_seen": 262658560, "step": 4689 }, { "epoch": 10.44543429844098, "grad_norm": 97.54398345947266, "learning_rate": 1e-06, "loss": 0.3942, "num_input_tokens_seen": 262714528, "step": 4690 }, { "epoch": 10.44543429844098, "loss": 0.3732898235321045, "loss_ce": 0.00012086871720384806, "loss_iou": 0.1591796875, "loss_num": 0.010986328125, "loss_xval": 0.373046875, "num_input_tokens_seen": 262714528, "step": 4690 }, { "epoch": 10.447661469933184, "grad_norm": 20.17695426940918, "learning_rate": 1e-06, "loss": 0.492, "num_input_tokens_seen": 262770576, "step": 4691 }, { "epoch": 10.447661469933184, "loss": 0.47415345907211304, "loss_ce": 0.00015445941244252026, "loss_iou": 0.20703125, "loss_num": 0.011962890625, "loss_xval": 0.474609375, "num_input_tokens_seen": 262770576, "step": 4691 }, { "epoch": 10.449888641425389, "grad_norm": 21.019207000732422, "learning_rate": 1e-06, "loss": 0.4976, "num_input_tokens_seen": 262827748, "step": 4692 }, { "epoch": 10.449888641425389, "loss": 0.44643697142601013, "loss_ce": 0.00014791959256399423, "loss_iou": 0.1806640625, "loss_num": 0.01708984375, "loss_xval": 0.4453125, "num_input_tokens_seen": 262827748, "step": 4692 }, { "epoch": 10.452115812917596, "grad_norm": 17.917194366455078, "learning_rate": 1e-06, "loss": 0.5386, "num_input_tokens_seen": 262884516, "step": 4693 }, { "epoch": 10.452115812917596, "loss": 0.40125101804733276, "loss_ce": 0.00012796126247849315, "loss_iou": 0.18359375, "loss_num": 0.00677490234375, "loss_xval": 0.400390625, "num_input_tokens_seen": 262884516, "step": 4693 }, { "epoch": 10.4543429844098, "grad_norm": 25.924360275268555, "learning_rate": 1e-06, "loss": 0.5668, "num_input_tokens_seen": 262939420, "step": 4694 }, { "epoch": 10.4543429844098, "loss": 0.43259525299072266, "loss_ce": 0.00022220781829673797, "loss_iou": 0.1845703125, "loss_num": 0.012451171875, "loss_xval": 0.431640625, "num_input_tokens_seen": 262939420, "step": 4694 }, { "epoch": 10.456570155902005, "grad_norm": 29.25006675720215, "learning_rate": 1e-06, "loss": 0.6523, "num_input_tokens_seen": 262998928, "step": 4695 }, { "epoch": 10.456570155902005, "loss": 0.7494416236877441, "loss_ce": 0.00017405254766345024, "loss_iou": 0.33203125, "loss_num": 0.0167236328125, "loss_xval": 0.75, "num_input_tokens_seen": 262998928, "step": 4695 }, { "epoch": 10.45879732739421, "grad_norm": 14.804512023925781, "learning_rate": 1e-06, "loss": 0.5316, "num_input_tokens_seen": 263055744, "step": 4696 }, { "epoch": 10.45879732739421, "loss": 0.5465176105499268, "loss_ce": 0.0001309234939981252, "loss_iou": 0.236328125, "loss_num": 0.014892578125, "loss_xval": 0.546875, "num_input_tokens_seen": 263055744, "step": 4696 }, { "epoch": 10.461024498886415, "grad_norm": 23.423185348510742, "learning_rate": 1e-06, "loss": 0.4285, "num_input_tokens_seen": 263111136, "step": 4697 }, { "epoch": 10.461024498886415, "loss": 0.35573017597198486, "loss_ce": 0.00013935507740825415, "loss_iou": 0.162109375, "loss_num": 0.00616455078125, "loss_xval": 0.35546875, "num_input_tokens_seen": 263111136, "step": 4697 }, { "epoch": 10.46325167037862, "grad_norm": 15.791742324829102, "learning_rate": 1e-06, "loss": 0.5727, "num_input_tokens_seen": 263168172, "step": 4698 }, { "epoch": 10.46325167037862, "loss": 0.7756419777870178, "loss_ce": 0.00012929517833981663, "loss_iou": 0.310546875, "loss_num": 0.0308837890625, "loss_xval": 0.77734375, "num_input_tokens_seen": 263168172, "step": 4698 }, { "epoch": 10.465478841870825, "grad_norm": 16.894933700561523, "learning_rate": 1e-06, "loss": 0.5117, "num_input_tokens_seen": 263224844, "step": 4699 }, { "epoch": 10.465478841870825, "loss": 0.4671807885169983, "loss_ce": 0.0001397810410708189, "loss_iou": 0.208984375, "loss_num": 0.0098876953125, "loss_xval": 0.466796875, "num_input_tokens_seen": 263224844, "step": 4699 }, { "epoch": 10.46770601336303, "grad_norm": 21.554258346557617, "learning_rate": 1e-06, "loss": 0.5666, "num_input_tokens_seen": 263278976, "step": 4700 }, { "epoch": 10.46770601336303, "loss": 0.5681118965148926, "loss_ce": 0.00011875165364472196, "loss_iou": 0.259765625, "loss_num": 0.00994873046875, "loss_xval": 0.56640625, "num_input_tokens_seen": 263278976, "step": 4700 }, { "epoch": 10.469933184855234, "grad_norm": 17.350711822509766, "learning_rate": 1e-06, "loss": 0.5042, "num_input_tokens_seen": 263336380, "step": 4701 }, { "epoch": 10.469933184855234, "loss": 0.4490933418273926, "loss_ce": 0.0002408274740446359, "loss_iou": 0.1962890625, "loss_num": 0.01123046875, "loss_xval": 0.44921875, "num_input_tokens_seen": 263336380, "step": 4701 }, { "epoch": 10.47216035634744, "grad_norm": 20.207218170166016, "learning_rate": 1e-06, "loss": 0.5642, "num_input_tokens_seen": 263390656, "step": 4702 }, { "epoch": 10.47216035634744, "loss": 0.5887584090232849, "loss_ce": 0.0001353670231765136, "loss_iou": 0.2099609375, "loss_num": 0.03369140625, "loss_xval": 0.58984375, "num_input_tokens_seen": 263390656, "step": 4702 }, { "epoch": 10.474387527839644, "grad_norm": 29.60268783569336, "learning_rate": 1e-06, "loss": 0.6127, "num_input_tokens_seen": 263448332, "step": 4703 }, { "epoch": 10.474387527839644, "loss": 0.6975966691970825, "loss_ce": 0.00014798392658121884, "loss_iou": 0.302734375, "loss_num": 0.0184326171875, "loss_xval": 0.69921875, "num_input_tokens_seen": 263448332, "step": 4703 }, { "epoch": 10.476614699331849, "grad_norm": 14.4660005569458, "learning_rate": 1e-06, "loss": 0.4241, "num_input_tokens_seen": 263503824, "step": 4704 }, { "epoch": 10.476614699331849, "loss": 0.3470431864261627, "loss_ce": 0.00011936050577787682, "loss_iou": 0.1484375, "loss_num": 0.010009765625, "loss_xval": 0.34765625, "num_input_tokens_seen": 263503824, "step": 4704 }, { "epoch": 10.478841870824054, "grad_norm": 17.75361442565918, "learning_rate": 1e-06, "loss": 0.5674, "num_input_tokens_seen": 263559724, "step": 4705 }, { "epoch": 10.478841870824054, "loss": 0.6192811131477356, "loss_ce": 0.0001404954819008708, "loss_iou": 0.267578125, "loss_num": 0.016845703125, "loss_xval": 0.6171875, "num_input_tokens_seen": 263559724, "step": 4705 }, { "epoch": 10.481069042316259, "grad_norm": 16.871896743774414, "learning_rate": 1e-06, "loss": 0.5026, "num_input_tokens_seen": 263615824, "step": 4706 }, { "epoch": 10.481069042316259, "loss": 0.42048490047454834, "loss_ce": 0.00019680103287100792, "loss_iou": 0.1728515625, "loss_num": 0.01483154296875, "loss_xval": 0.419921875, "num_input_tokens_seen": 263615824, "step": 4706 }, { "epoch": 10.483296213808464, "grad_norm": 18.86580467224121, "learning_rate": 1e-06, "loss": 0.4055, "num_input_tokens_seen": 263672988, "step": 4707 }, { "epoch": 10.483296213808464, "loss": 0.4523187577724457, "loss_ce": 0.0001703191373962909, "loss_iou": 0.1982421875, "loss_num": 0.01116943359375, "loss_xval": 0.453125, "num_input_tokens_seen": 263672988, "step": 4707 }, { "epoch": 10.485523385300668, "grad_norm": 18.27134132385254, "learning_rate": 1e-06, "loss": 0.4455, "num_input_tokens_seen": 263729920, "step": 4708 }, { "epoch": 10.485523385300668, "loss": 0.46423059701919556, "loss_ce": 0.00011923116107936949, "loss_iou": 0.2099609375, "loss_num": 0.00885009765625, "loss_xval": 0.46484375, "num_input_tokens_seen": 263729920, "step": 4708 }, { "epoch": 10.487750556792873, "grad_norm": 16.8643856048584, "learning_rate": 1e-06, "loss": 0.4758, "num_input_tokens_seen": 263786968, "step": 4709 }, { "epoch": 10.487750556792873, "loss": 0.41022104024887085, "loss_ce": 0.00018687370175030082, "loss_iou": 0.16796875, "loss_num": 0.01495361328125, "loss_xval": 0.41015625, "num_input_tokens_seen": 263786968, "step": 4709 }, { "epoch": 10.489977728285078, "grad_norm": 22.2263240814209, "learning_rate": 1e-06, "loss": 0.4912, "num_input_tokens_seen": 263842484, "step": 4710 }, { "epoch": 10.489977728285078, "loss": 0.5018296241760254, "loss_ce": 0.00012060045264661312, "loss_iou": 0.224609375, "loss_num": 0.0103759765625, "loss_xval": 0.5, "num_input_tokens_seen": 263842484, "step": 4710 }, { "epoch": 10.492204899777283, "grad_norm": 17.63686180114746, "learning_rate": 1e-06, "loss": 0.5927, "num_input_tokens_seen": 263899136, "step": 4711 }, { "epoch": 10.492204899777283, "loss": 0.675079345703125, "loss_ce": 0.00015263669774867594, "loss_iou": 0.26171875, "loss_num": 0.030517578125, "loss_xval": 0.67578125, "num_input_tokens_seen": 263899136, "step": 4711 }, { "epoch": 10.494432071269488, "grad_norm": 19.261802673339844, "learning_rate": 1e-06, "loss": 0.5165, "num_input_tokens_seen": 263956272, "step": 4712 }, { "epoch": 10.494432071269488, "loss": 0.4028354585170746, "loss_ce": 0.00012550255632959306, "loss_iou": 0.16796875, "loss_num": 0.013427734375, "loss_xval": 0.40234375, "num_input_tokens_seen": 263956272, "step": 4712 }, { "epoch": 10.496659242761693, "grad_norm": 22.613391876220703, "learning_rate": 1e-06, "loss": 0.5683, "num_input_tokens_seen": 264010704, "step": 4713 }, { "epoch": 10.496659242761693, "loss": 0.7420130968093872, "loss_ce": 0.0001918151247082278, "loss_iou": 0.306640625, "loss_num": 0.025390625, "loss_xval": 0.7421875, "num_input_tokens_seen": 264010704, "step": 4713 }, { "epoch": 10.498886414253898, "grad_norm": 18.75325584411621, "learning_rate": 1e-06, "loss": 0.805, "num_input_tokens_seen": 264065440, "step": 4714 }, { "epoch": 10.498886414253898, "loss": 0.7152308225631714, "loss_ce": 0.00014293221465777606, "loss_iou": 0.287109375, "loss_num": 0.0277099609375, "loss_xval": 0.71484375, "num_input_tokens_seen": 264065440, "step": 4714 }, { "epoch": 10.501113585746102, "grad_norm": 17.041898727416992, "learning_rate": 1e-06, "loss": 0.5754, "num_input_tokens_seen": 264122460, "step": 4715 }, { "epoch": 10.501113585746102, "loss": 0.5261731743812561, "loss_ce": 0.0002942573046311736, "loss_iou": 0.228515625, "loss_num": 0.01373291015625, "loss_xval": 0.52734375, "num_input_tokens_seen": 264122460, "step": 4715 }, { "epoch": 10.503340757238307, "grad_norm": 24.113428115844727, "learning_rate": 1e-06, "loss": 0.4715, "num_input_tokens_seen": 264176608, "step": 4716 }, { "epoch": 10.503340757238307, "loss": 0.6706770658493042, "loss_ce": 0.0001447929535061121, "loss_iou": 0.287109375, "loss_num": 0.0196533203125, "loss_xval": 0.671875, "num_input_tokens_seen": 264176608, "step": 4716 }, { "epoch": 10.505567928730512, "grad_norm": 25.971769332885742, "learning_rate": 1e-06, "loss": 0.6877, "num_input_tokens_seen": 264232344, "step": 4717 }, { "epoch": 10.505567928730512, "loss": 0.8002659678459167, "loss_ce": 0.00046127024688757956, "loss_iou": 0.3515625, "loss_num": 0.0196533203125, "loss_xval": 0.80078125, "num_input_tokens_seen": 264232344, "step": 4717 }, { "epoch": 10.507795100222717, "grad_norm": 18.61360740661621, "learning_rate": 1e-06, "loss": 0.6597, "num_input_tokens_seen": 264287720, "step": 4718 }, { "epoch": 10.507795100222717, "loss": 0.4703478217124939, "loss_ce": 0.0001329787337454036, "loss_iou": 0.21484375, "loss_num": 0.00799560546875, "loss_xval": 0.470703125, "num_input_tokens_seen": 264287720, "step": 4718 }, { "epoch": 10.510022271714922, "grad_norm": 22.263904571533203, "learning_rate": 1e-06, "loss": 0.3736, "num_input_tokens_seen": 264344596, "step": 4719 }, { "epoch": 10.510022271714922, "loss": 0.29570016264915466, "loss_ce": 0.00016795132250990719, "loss_iou": 0.1298828125, "loss_num": 0.007080078125, "loss_xval": 0.294921875, "num_input_tokens_seen": 264344596, "step": 4719 }, { "epoch": 10.512249443207127, "grad_norm": 16.474157333374023, "learning_rate": 1e-06, "loss": 0.4543, "num_input_tokens_seen": 264401628, "step": 4720 }, { "epoch": 10.512249443207127, "loss": 0.4919620752334595, "loss_ce": 0.00014076274237595499, "loss_iou": 0.21875, "loss_num": 0.0108642578125, "loss_xval": 0.4921875, "num_input_tokens_seen": 264401628, "step": 4720 }, { "epoch": 10.514476614699332, "grad_norm": 14.566910743713379, "learning_rate": 1e-06, "loss": 0.4783, "num_input_tokens_seen": 264457620, "step": 4721 }, { "epoch": 10.514476614699332, "loss": 0.4565121829509735, "loss_ce": 0.0007016496965661645, "loss_iou": 0.1962890625, "loss_num": 0.01263427734375, "loss_xval": 0.455078125, "num_input_tokens_seen": 264457620, "step": 4721 }, { "epoch": 10.516703786191536, "grad_norm": 11.910482406616211, "learning_rate": 1e-06, "loss": 0.584, "num_input_tokens_seen": 264514528, "step": 4722 }, { "epoch": 10.516703786191536, "loss": 0.4461956024169922, "loss_ce": 0.00015069925575517118, "loss_iou": 0.1962890625, "loss_num": 0.0107421875, "loss_xval": 0.4453125, "num_input_tokens_seen": 264514528, "step": 4722 }, { "epoch": 10.518930957683741, "grad_norm": 20.123058319091797, "learning_rate": 1e-06, "loss": 0.5795, "num_input_tokens_seen": 264571452, "step": 4723 }, { "epoch": 10.518930957683741, "loss": 0.7175799608230591, "loss_ce": 0.0001727462949929759, "loss_iou": 0.314453125, "loss_num": 0.017822265625, "loss_xval": 0.71875, "num_input_tokens_seen": 264571452, "step": 4723 }, { "epoch": 10.521158129175946, "grad_norm": 15.674975395202637, "learning_rate": 1e-06, "loss": 0.5361, "num_input_tokens_seen": 264627296, "step": 4724 }, { "epoch": 10.521158129175946, "loss": 0.5477256178855896, "loss_ce": 0.00011817881022579968, "loss_iou": 0.2265625, "loss_num": 0.018798828125, "loss_xval": 0.546875, "num_input_tokens_seen": 264627296, "step": 4724 }, { "epoch": 10.523385300668151, "grad_norm": 22.113855361938477, "learning_rate": 1e-06, "loss": 0.5957, "num_input_tokens_seen": 264681016, "step": 4725 }, { "epoch": 10.523385300668151, "loss": 0.6728705167770386, "loss_ce": 0.0001409871329087764, "loss_iou": 0.283203125, "loss_num": 0.02099609375, "loss_xval": 0.671875, "num_input_tokens_seen": 264681016, "step": 4725 }, { "epoch": 10.525612472160356, "grad_norm": 18.36030387878418, "learning_rate": 1e-06, "loss": 0.5032, "num_input_tokens_seen": 264738488, "step": 4726 }, { "epoch": 10.525612472160356, "loss": 0.3978542685508728, "loss_ce": 0.00014918479428160936, "loss_iou": 0.166015625, "loss_num": 0.01300048828125, "loss_xval": 0.3984375, "num_input_tokens_seen": 264738488, "step": 4726 }, { "epoch": 10.52783964365256, "grad_norm": 17.425609588623047, "learning_rate": 1e-06, "loss": 0.3512, "num_input_tokens_seen": 264795500, "step": 4727 }, { "epoch": 10.52783964365256, "loss": 0.3651301860809326, "loss_ce": 0.0001399611501256004, "loss_iou": 0.150390625, "loss_num": 0.01300048828125, "loss_xval": 0.365234375, "num_input_tokens_seen": 264795500, "step": 4727 }, { "epoch": 10.530066815144766, "grad_norm": 27.829883575439453, "learning_rate": 1e-06, "loss": 0.472, "num_input_tokens_seen": 264853260, "step": 4728 }, { "epoch": 10.530066815144766, "loss": 0.40601441264152527, "loss_ce": 0.00025269074831157923, "loss_iou": 0.1689453125, "loss_num": 0.0135498046875, "loss_xval": 0.40625, "num_input_tokens_seen": 264853260, "step": 4728 }, { "epoch": 10.53229398663697, "grad_norm": 10.47251033782959, "learning_rate": 1e-06, "loss": 0.5847, "num_input_tokens_seen": 264907728, "step": 4729 }, { "epoch": 10.53229398663697, "loss": 0.5061122179031372, "loss_ce": 0.00013078347546979785, "loss_iou": 0.2216796875, "loss_num": 0.0125732421875, "loss_xval": 0.5078125, "num_input_tokens_seen": 264907728, "step": 4729 }, { "epoch": 10.534521158129175, "grad_norm": 26.059276580810547, "learning_rate": 1e-06, "loss": 0.6593, "num_input_tokens_seen": 264961788, "step": 4730 }, { "epoch": 10.534521158129175, "loss": 0.6526414752006531, "loss_ce": 0.00017564307199791074, "loss_iou": 0.283203125, "loss_num": 0.0174560546875, "loss_xval": 0.65234375, "num_input_tokens_seen": 264961788, "step": 4730 }, { "epoch": 10.53674832962138, "grad_norm": 17.807939529418945, "learning_rate": 1e-06, "loss": 0.3761, "num_input_tokens_seen": 265015304, "step": 4731 }, { "epoch": 10.53674832962138, "loss": 0.3732970356941223, "loss_ce": 0.00015859381528571248, "loss_iou": 0.171875, "loss_num": 0.005950927734375, "loss_xval": 0.373046875, "num_input_tokens_seen": 265015304, "step": 4731 }, { "epoch": 10.538975501113585, "grad_norm": 92.26382446289062, "learning_rate": 1e-06, "loss": 0.5017, "num_input_tokens_seen": 265070648, "step": 4732 }, { "epoch": 10.538975501113585, "loss": 0.42812007665634155, "loss_ce": 0.0010571026941761374, "loss_iou": 0.171875, "loss_num": 0.0166015625, "loss_xval": 0.427734375, "num_input_tokens_seen": 265070648, "step": 4732 }, { "epoch": 10.54120267260579, "grad_norm": 18.604368209838867, "learning_rate": 1e-06, "loss": 0.5278, "num_input_tokens_seen": 265126172, "step": 4733 }, { "epoch": 10.54120267260579, "loss": 0.5804719924926758, "loss_ce": 0.00018023110169451684, "loss_iou": 0.234375, "loss_num": 0.022216796875, "loss_xval": 0.58203125, "num_input_tokens_seen": 265126172, "step": 4733 }, { "epoch": 10.543429844097995, "grad_norm": 17.7320556640625, "learning_rate": 1e-06, "loss": 0.6031, "num_input_tokens_seen": 265183528, "step": 4734 }, { "epoch": 10.543429844097995, "loss": 0.49740028381347656, "loss_ce": 0.00045207011862657964, "loss_iou": 0.1884765625, "loss_num": 0.02392578125, "loss_xval": 0.49609375, "num_input_tokens_seen": 265183528, "step": 4734 }, { "epoch": 10.5456570155902, "grad_norm": 29.795515060424805, "learning_rate": 1e-06, "loss": 0.3743, "num_input_tokens_seen": 265241408, "step": 4735 }, { "epoch": 10.5456570155902, "loss": 0.37365561723709106, "loss_ce": 0.00012047103518852964, "loss_iou": 0.1640625, "loss_num": 0.009033203125, "loss_xval": 0.373046875, "num_input_tokens_seen": 265241408, "step": 4735 }, { "epoch": 10.547884187082406, "grad_norm": 60.60297775268555, "learning_rate": 1e-06, "loss": 0.5276, "num_input_tokens_seen": 265296856, "step": 4736 }, { "epoch": 10.547884187082406, "loss": 0.3346078097820282, "loss_ce": 0.0001351345854345709, "loss_iou": 0.15625, "loss_num": 0.004180908203125, "loss_xval": 0.333984375, "num_input_tokens_seen": 265296856, "step": 4736 }, { "epoch": 10.550111358574611, "grad_norm": 28.60586929321289, "learning_rate": 1e-06, "loss": 0.5012, "num_input_tokens_seen": 265351804, "step": 4737 }, { "epoch": 10.550111358574611, "loss": 0.5089592933654785, "loss_ce": 0.00017022958490997553, "loss_iou": 0.1982421875, "loss_num": 0.0224609375, "loss_xval": 0.5078125, "num_input_tokens_seen": 265351804, "step": 4737 }, { "epoch": 10.552338530066816, "grad_norm": 16.353670120239258, "learning_rate": 1e-06, "loss": 0.6282, "num_input_tokens_seen": 265406476, "step": 4738 }, { "epoch": 10.552338530066816, "loss": 0.6063532829284668, "loss_ce": 0.00015206160605885088, "loss_iou": 0.255859375, "loss_num": 0.018798828125, "loss_xval": 0.60546875, "num_input_tokens_seen": 265406476, "step": 4738 }, { "epoch": 10.55456570155902, "grad_norm": 18.11619758605957, "learning_rate": 1e-06, "loss": 0.5011, "num_input_tokens_seen": 265463764, "step": 4739 }, { "epoch": 10.55456570155902, "loss": 0.6158651113510132, "loss_ce": 0.000386598490877077, "loss_iou": 0.271484375, "loss_num": 0.01458740234375, "loss_xval": 0.6171875, "num_input_tokens_seen": 265463764, "step": 4739 }, { "epoch": 10.556792873051226, "grad_norm": 21.920385360717773, "learning_rate": 1e-06, "loss": 0.7462, "num_input_tokens_seen": 265520116, "step": 4740 }, { "epoch": 10.556792873051226, "loss": 0.9230633974075317, "loss_ce": 0.00021183726494200528, "loss_iou": 0.37890625, "loss_num": 0.033203125, "loss_xval": 0.921875, "num_input_tokens_seen": 265520116, "step": 4740 }, { "epoch": 10.55902004454343, "grad_norm": 12.749909400939941, "learning_rate": 1e-06, "loss": 0.4596, "num_input_tokens_seen": 265578596, "step": 4741 }, { "epoch": 10.55902004454343, "loss": 0.5135598182678223, "loss_ce": 0.00013206491712480783, "loss_iou": 0.2255859375, "loss_num": 0.01275634765625, "loss_xval": 0.51171875, "num_input_tokens_seen": 265578596, "step": 4741 }, { "epoch": 10.561247216035635, "grad_norm": 23.8961124420166, "learning_rate": 1e-06, "loss": 0.669, "num_input_tokens_seen": 265634096, "step": 4742 }, { "epoch": 10.561247216035635, "loss": 0.8774167895317078, "loss_ce": 0.0029050512239336967, "loss_iou": 0.34765625, "loss_num": 0.03564453125, "loss_xval": 0.875, "num_input_tokens_seen": 265634096, "step": 4742 }, { "epoch": 10.56347438752784, "grad_norm": 27.8192081451416, "learning_rate": 1e-06, "loss": 0.6048, "num_input_tokens_seen": 265687780, "step": 4743 }, { "epoch": 10.56347438752784, "loss": 0.5951275825500488, "loss_ce": 0.00015690061263740063, "loss_iou": 0.2578125, "loss_num": 0.0162353515625, "loss_xval": 0.59375, "num_input_tokens_seen": 265687780, "step": 4743 }, { "epoch": 10.565701559020045, "grad_norm": 23.720129013061523, "learning_rate": 1e-06, "loss": 0.4395, "num_input_tokens_seen": 265746240, "step": 4744 }, { "epoch": 10.565701559020045, "loss": 0.43288612365722656, "loss_ce": 0.0001468704140279442, "loss_iou": 0.203125, "loss_num": 0.005126953125, "loss_xval": 0.43359375, "num_input_tokens_seen": 265746240, "step": 4744 }, { "epoch": 10.56792873051225, "grad_norm": 18.638071060180664, "learning_rate": 1e-06, "loss": 0.5585, "num_input_tokens_seen": 265800708, "step": 4745 }, { "epoch": 10.56792873051225, "loss": 0.690610408782959, "loss_ce": 0.00018075491243507713, "loss_iou": 0.291015625, "loss_num": 0.0216064453125, "loss_xval": 0.69140625, "num_input_tokens_seen": 265800708, "step": 4745 }, { "epoch": 10.570155902004455, "grad_norm": 27.448678970336914, "learning_rate": 1e-06, "loss": 0.5302, "num_input_tokens_seen": 265857676, "step": 4746 }, { "epoch": 10.570155902004455, "loss": 0.5328932404518127, "loss_ce": 0.0001783959160093218, "loss_iou": 0.2333984375, "loss_num": 0.01312255859375, "loss_xval": 0.53125, "num_input_tokens_seen": 265857676, "step": 4746 }, { "epoch": 10.57238307349666, "grad_norm": 18.024444580078125, "learning_rate": 1e-06, "loss": 0.5738, "num_input_tokens_seen": 265914688, "step": 4747 }, { "epoch": 10.57238307349666, "loss": 0.5035586357116699, "loss_ce": 0.0001406791852787137, "loss_iou": 0.2236328125, "loss_num": 0.0113525390625, "loss_xval": 0.50390625, "num_input_tokens_seen": 265914688, "step": 4747 }, { "epoch": 10.574610244988865, "grad_norm": 17.24481773376465, "learning_rate": 1e-06, "loss": 0.6003, "num_input_tokens_seen": 265970292, "step": 4748 }, { "epoch": 10.574610244988865, "loss": 0.5748478174209595, "loss_ce": 0.00014080088294576854, "loss_iou": 0.255859375, "loss_num": 0.0126953125, "loss_xval": 0.57421875, "num_input_tokens_seen": 265970292, "step": 4748 }, { "epoch": 10.57683741648107, "grad_norm": 21.86254119873047, "learning_rate": 1e-06, "loss": 0.5795, "num_input_tokens_seen": 266027040, "step": 4749 }, { "epoch": 10.57683741648107, "loss": 0.6045183539390564, "loss_ce": 0.00014822966477368027, "loss_iou": 0.2578125, "loss_num": 0.017822265625, "loss_xval": 0.60546875, "num_input_tokens_seen": 266027040, "step": 4749 }, { "epoch": 10.579064587973274, "grad_norm": 15.369706153869629, "learning_rate": 1e-06, "loss": 0.5245, "num_input_tokens_seen": 266085392, "step": 4750 }, { "epoch": 10.579064587973274, "eval_seeclick_web_CIoU": 0.5785282254219055, "eval_seeclick_web_GIoU": 0.5777834057807922, "eval_seeclick_web_IoU": 0.5963070392608643, "eval_seeclick_web_MAE_all": 0.016103142872452736, "eval_seeclick_web_MAE_h": 0.0080435904674232, "eval_seeclick_web_MAE_w": 0.015959544107317924, "eval_seeclick_web_MAE_x_boxes": 0.009686239995062351, "eval_seeclick_web_MAE_y_boxes": 0.021757841110229492, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9105224013328552, "eval_seeclick_web_loss_ce": 0.0002034704084508121, "eval_seeclick_web_loss_iou": 0.415283203125, "eval_seeclick_web_loss_num": 0.012907028198242188, "eval_seeclick_web_loss_xval": 0.89501953125, "eval_seeclick_web_runtime": 21.8476, "eval_seeclick_web_samples_per_second": 2.289, "eval_seeclick_web_steps_per_second": 0.092, "num_input_tokens_seen": 266085392, "step": 4750 }, { "epoch": 10.579064587973274, "eval_icons_CIoU": 0.2794487178325653, "eval_icons_GIoU": 0.3004266917705536, "eval_icons_IoU": 0.3560175597667694, "eval_icons_MAE_all": 0.060138389468193054, "eval_icons_MAE_h": 0.03905831277370453, "eval_icons_MAE_w": 0.05829060822725296, "eval_icons_MAE_x_boxes": 0.05876126326620579, "eval_icons_MAE_y_boxes": 0.0388388317078352, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.7231128215789795, "eval_icons_loss_ce": 0.00027843897260027006, "eval_icons_loss_iou": 0.6767578125, "eval_icons_loss_num": 0.05956840515136719, "eval_icons_loss_xval": 1.6513671875, "eval_icons_runtime": 21.0355, "eval_icons_samples_per_second": 2.377, "eval_icons_steps_per_second": 0.095, "num_input_tokens_seen": 266085392, "step": 4750 }, { "epoch": 10.579064587973274, "eval_screenspot_CIoU": 0.3542960087458293, "eval_screenspot_GIoU": 0.373006671667099, "eval_screenspot_IoU": 0.4333365758260091, "eval_screenspot_MAE_all": 0.05960427472988764, "eval_screenspot_MAE_h": 0.03762041280666987, "eval_screenspot_MAE_w": 0.06836641455690066, "eval_screenspot_MAE_x_boxes": 0.07390400022268295, "eval_screenspot_MAE_y_boxes": 0.03899012443919977, "eval_screenspot_inside_bbox": 0.6862499912579855, "eval_screenspot_loss": 1.6113767623901367, "eval_screenspot_loss_ce": 0.0002595222613308579, "eval_screenspot_loss_iou": 0.6659342447916666, "eval_screenspot_loss_num": 0.0684064229329427, "eval_screenspot_loss_xval": 1.6736653645833333, "eval_screenspot_runtime": 35.1964, "eval_screenspot_samples_per_second": 2.529, "eval_screenspot_steps_per_second": 0.085, "num_input_tokens_seen": 266085392, "step": 4750 }, { "epoch": 10.579064587973274, "eval_compot_CIoU": 0.34722261130809784, "eval_compot_GIoU": 0.35881949961185455, "eval_compot_IoU": 0.4061947613954544, "eval_compot_MAE_all": 0.018287954851984978, "eval_compot_MAE_h": 0.009336336981505156, "eval_compot_MAE_w": 0.0211613979190588, "eval_compot_MAE_x_boxes": 0.030585231259465218, "eval_compot_MAE_y_boxes": 0.006953268777579069, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3919204473495483, "eval_compot_loss_ce": 0.00020033524197060615, "eval_compot_loss_iou": 0.6395263671875, "eval_compot_loss_num": 0.017185211181640625, "eval_compot_loss_xval": 1.364501953125, "eval_compot_runtime": 21.5206, "eval_compot_samples_per_second": 2.323, "eval_compot_steps_per_second": 0.093, "num_input_tokens_seen": 266085392, "step": 4750 }, { "epoch": 10.579064587973274, "eval_custom_ui_val_CIoU": 0.47197884652349686, "eval_custom_ui_val_GIoU": 0.48575951324568856, "eval_custom_ui_val_IoU": 0.5315978427728018, "eval_custom_ui_val_MAE_all": 0.03096564869499869, "eval_custom_ui_val_MAE_h": 0.016998413018882275, "eval_custom_ui_val_MAE_w": 0.039277435590823494, "eval_custom_ui_val_MAE_x_boxes": 0.038262664857837886, "eval_custom_ui_val_MAE_y_boxes": 0.01508552656095061, "eval_custom_ui_val_inside_bbox": 0.7527006202273898, "eval_custom_ui_val_loss": 1.1994266510009766, "eval_custom_ui_val_loss_ce": 0.00023388697849845307, "eval_custom_ui_val_loss_iou": 0.5101725260416666, "eval_custom_ui_val_loss_num": 0.028283013237847224, "eval_custom_ui_val_loss_xval": 1.1618109809027777, "eval_custom_ui_val_runtime": 65.3312, "eval_custom_ui_val_samples_per_second": 4.056, "eval_custom_ui_val_steps_per_second": 0.138, "num_input_tokens_seen": 266085392, "step": 4750 }, { "epoch": 10.579064587973274, "loss": 0.9167079925537109, "loss_ce": 0.00020406048861332238, "loss_iou": 0.39453125, "loss_num": 0.0252685546875, "loss_xval": 0.91796875, "num_input_tokens_seen": 266085392, "step": 4750 }, { "epoch": 10.58129175946548, "grad_norm": 22.031713485717773, "learning_rate": 1e-06, "loss": 0.5825, "num_input_tokens_seen": 266140888, "step": 4751 }, { "epoch": 10.58129175946548, "loss": 0.5439281463623047, "loss_ce": 0.00013536959886550903, "loss_iou": 0.24609375, "loss_num": 0.0101318359375, "loss_xval": 0.54296875, "num_input_tokens_seen": 266140888, "step": 4751 }, { "epoch": 10.583518930957684, "grad_norm": 28.358179092407227, "learning_rate": 1e-06, "loss": 0.5168, "num_input_tokens_seen": 266195452, "step": 4752 }, { "epoch": 10.583518930957684, "loss": 0.4804303050041199, "loss_ce": 0.0002057160745607689, "loss_iou": 0.2041015625, "loss_num": 0.014404296875, "loss_xval": 0.48046875, "num_input_tokens_seen": 266195452, "step": 4752 }, { "epoch": 10.585746102449889, "grad_norm": 20.244579315185547, "learning_rate": 1e-06, "loss": 0.6685, "num_input_tokens_seen": 266253420, "step": 4753 }, { "epoch": 10.585746102449889, "loss": 0.6307578086853027, "loss_ce": 0.00014260906027629972, "loss_iou": 0.283203125, "loss_num": 0.01300048828125, "loss_xval": 0.62890625, "num_input_tokens_seen": 266253420, "step": 4753 }, { "epoch": 10.587973273942094, "grad_norm": 23.68046760559082, "learning_rate": 1e-06, "loss": 0.5178, "num_input_tokens_seen": 266309880, "step": 4754 }, { "epoch": 10.587973273942094, "loss": 0.590294599533081, "loss_ce": 0.0002067518071271479, "loss_iou": 0.255859375, "loss_num": 0.015380859375, "loss_xval": 0.58984375, "num_input_tokens_seen": 266309880, "step": 4754 }, { "epoch": 10.590200445434299, "grad_norm": 14.198582649230957, "learning_rate": 1e-06, "loss": 0.6997, "num_input_tokens_seen": 266367096, "step": 4755 }, { "epoch": 10.590200445434299, "loss": 0.7059162855148315, "loss_ce": 0.00022783357417210937, "loss_iou": 0.3046875, "loss_num": 0.019287109375, "loss_xval": 0.70703125, "num_input_tokens_seen": 266367096, "step": 4755 }, { "epoch": 10.592427616926503, "grad_norm": 22.98945426940918, "learning_rate": 1e-06, "loss": 0.569, "num_input_tokens_seen": 266421524, "step": 4756 }, { "epoch": 10.592427616926503, "loss": 0.6033655405044556, "loss_ce": 0.0001550911256344989, "loss_iou": 0.267578125, "loss_num": 0.0137939453125, "loss_xval": 0.6015625, "num_input_tokens_seen": 266421524, "step": 4756 }, { "epoch": 10.594654788418708, "grad_norm": 22.537723541259766, "learning_rate": 1e-06, "loss": 0.3968, "num_input_tokens_seen": 266475392, "step": 4757 }, { "epoch": 10.594654788418708, "loss": 0.424811989068985, "loss_ce": 0.00012936009443365037, "loss_iou": 0.1875, "loss_num": 0.010009765625, "loss_xval": 0.423828125, "num_input_tokens_seen": 266475392, "step": 4757 }, { "epoch": 10.596881959910913, "grad_norm": 21.612504959106445, "learning_rate": 1e-06, "loss": 0.5241, "num_input_tokens_seen": 266531920, "step": 4758 }, { "epoch": 10.596881959910913, "loss": 0.5215491056442261, "loss_ce": 0.0003089057863689959, "loss_iou": 0.2275390625, "loss_num": 0.01348876953125, "loss_xval": 0.51953125, "num_input_tokens_seen": 266531920, "step": 4758 }, { "epoch": 10.599109131403118, "grad_norm": 14.450573921203613, "learning_rate": 1e-06, "loss": 0.5417, "num_input_tokens_seen": 266585680, "step": 4759 }, { "epoch": 10.599109131403118, "loss": 0.6613370180130005, "loss_ce": 0.00014318549074232578, "loss_iou": 0.26953125, "loss_num": 0.0242919921875, "loss_xval": 0.66015625, "num_input_tokens_seen": 266585680, "step": 4759 }, { "epoch": 10.601336302895323, "grad_norm": 24.842741012573242, "learning_rate": 1e-06, "loss": 0.7178, "num_input_tokens_seen": 266643272, "step": 4760 }, { "epoch": 10.601336302895323, "loss": 0.8287136554718018, "loss_ce": 0.00022248990717343986, "loss_iou": 0.365234375, "loss_num": 0.0196533203125, "loss_xval": 0.828125, "num_input_tokens_seen": 266643272, "step": 4760 }, { "epoch": 10.603563474387528, "grad_norm": 21.21478271484375, "learning_rate": 1e-06, "loss": 0.622, "num_input_tokens_seen": 266700996, "step": 4761 }, { "epoch": 10.603563474387528, "loss": 0.6402872800827026, "loss_ce": 0.00015057012205943465, "loss_iou": 0.28515625, "loss_num": 0.0140380859375, "loss_xval": 0.640625, "num_input_tokens_seen": 266700996, "step": 4761 }, { "epoch": 10.605790645879733, "grad_norm": 31.64474868774414, "learning_rate": 1e-06, "loss": 0.4276, "num_input_tokens_seen": 266758212, "step": 4762 }, { "epoch": 10.605790645879733, "loss": 0.43672651052474976, "loss_ce": 0.0002030893083428964, "loss_iou": 0.19140625, "loss_num": 0.0108642578125, "loss_xval": 0.4375, "num_input_tokens_seen": 266758212, "step": 4762 }, { "epoch": 10.608017817371937, "grad_norm": 32.779476165771484, "learning_rate": 1e-06, "loss": 0.6393, "num_input_tokens_seen": 266811900, "step": 4763 }, { "epoch": 10.608017817371937, "loss": 0.5560557842254639, "loss_ce": 0.00014755260781385005, "loss_iou": 0.2373046875, "loss_num": 0.016357421875, "loss_xval": 0.5546875, "num_input_tokens_seen": 266811900, "step": 4763 }, { "epoch": 10.610244988864142, "grad_norm": 17.99529266357422, "learning_rate": 1e-06, "loss": 0.4536, "num_input_tokens_seen": 266863520, "step": 4764 }, { "epoch": 10.610244988864142, "loss": 0.441336989402771, "loss_ce": 0.00011382217053323984, "loss_iou": 0.15625, "loss_num": 0.02587890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 266863520, "step": 4764 }, { "epoch": 10.612472160356347, "grad_norm": 20.494014739990234, "learning_rate": 1e-06, "loss": 0.7394, "num_input_tokens_seen": 266918760, "step": 4765 }, { "epoch": 10.612472160356347, "loss": 0.49647536873817444, "loss_ce": 0.00013749409117735922, "loss_iou": 0.2216796875, "loss_num": 0.01055908203125, "loss_xval": 0.49609375, "num_input_tokens_seen": 266918760, "step": 4765 }, { "epoch": 10.614699331848552, "grad_norm": 19.456296920776367, "learning_rate": 1e-06, "loss": 0.4065, "num_input_tokens_seen": 266975336, "step": 4766 }, { "epoch": 10.614699331848552, "loss": 0.4348350167274475, "loss_ce": 0.00014263816410675645, "loss_iou": 0.1943359375, "loss_num": 0.009033203125, "loss_xval": 0.435546875, "num_input_tokens_seen": 266975336, "step": 4766 }, { "epoch": 10.616926503340757, "grad_norm": 23.569063186645508, "learning_rate": 1e-06, "loss": 0.4614, "num_input_tokens_seen": 267031440, "step": 4767 }, { "epoch": 10.616926503340757, "loss": 0.4862057566642761, "loss_ce": 0.00012178381439298391, "loss_iou": 0.2236328125, "loss_num": 0.0076904296875, "loss_xval": 0.486328125, "num_input_tokens_seen": 267031440, "step": 4767 }, { "epoch": 10.619153674832962, "grad_norm": 20.638471603393555, "learning_rate": 1e-06, "loss": 0.4967, "num_input_tokens_seen": 267086692, "step": 4768 }, { "epoch": 10.619153674832962, "loss": 0.5913643836975098, "loss_ce": 0.00017783006478566676, "loss_iou": 0.267578125, "loss_num": 0.0108642578125, "loss_xval": 0.58984375, "num_input_tokens_seen": 267086692, "step": 4768 }, { "epoch": 10.621380846325167, "grad_norm": 17.369722366333008, "learning_rate": 1e-06, "loss": 0.5522, "num_input_tokens_seen": 267143996, "step": 4769 }, { "epoch": 10.621380846325167, "loss": 0.6038705706596375, "loss_ce": 0.0001718288112897426, "loss_iou": 0.275390625, "loss_num": 0.01043701171875, "loss_xval": 0.60546875, "num_input_tokens_seen": 267143996, "step": 4769 }, { "epoch": 10.623608017817372, "grad_norm": 15.817121505737305, "learning_rate": 1e-06, "loss": 0.6093, "num_input_tokens_seen": 267202664, "step": 4770 }, { "epoch": 10.623608017817372, "loss": 0.608995795249939, "loss_ce": 0.00010910604032687843, "loss_iou": 0.26953125, "loss_num": 0.01434326171875, "loss_xval": 0.609375, "num_input_tokens_seen": 267202664, "step": 4770 }, { "epoch": 10.625835189309576, "grad_norm": 19.741697311401367, "learning_rate": 1e-06, "loss": 0.6764, "num_input_tokens_seen": 267259716, "step": 4771 }, { "epoch": 10.625835189309576, "loss": 0.5544874668121338, "loss_ce": 0.00016620635869912803, "loss_iou": 0.21875, "loss_num": 0.0233154296875, "loss_xval": 0.5546875, "num_input_tokens_seen": 267259716, "step": 4771 }, { "epoch": 10.628062360801781, "grad_norm": 14.572179794311523, "learning_rate": 1e-06, "loss": 0.4699, "num_input_tokens_seen": 267316276, "step": 4772 }, { "epoch": 10.628062360801781, "loss": 0.37722790241241455, "loss_ce": 0.0001526982377981767, "loss_iou": 0.169921875, "loss_num": 0.00738525390625, "loss_xval": 0.376953125, "num_input_tokens_seen": 267316276, "step": 4772 }, { "epoch": 10.630289532293986, "grad_norm": 35.8219108581543, "learning_rate": 1e-06, "loss": 0.7025, "num_input_tokens_seen": 267370620, "step": 4773 }, { "epoch": 10.630289532293986, "loss": 0.7943651080131531, "loss_ce": 0.00017563029541634023, "loss_iou": 0.359375, "loss_num": 0.0147705078125, "loss_xval": 0.79296875, "num_input_tokens_seen": 267370620, "step": 4773 }, { "epoch": 10.632516703786191, "grad_norm": 20.09000015258789, "learning_rate": 1e-06, "loss": 0.7104, "num_input_tokens_seen": 267427184, "step": 4774 }, { "epoch": 10.632516703786191, "loss": 0.812397837638855, "loss_ce": 0.00014197138079907745, "loss_iou": 0.318359375, "loss_num": 0.03515625, "loss_xval": 0.8125, "num_input_tokens_seen": 267427184, "step": 4774 }, { "epoch": 10.634743875278396, "grad_norm": 21.24492835998535, "learning_rate": 1e-06, "loss": 0.6861, "num_input_tokens_seen": 267481704, "step": 4775 }, { "epoch": 10.634743875278396, "loss": 0.7231541872024536, "loss_ce": 0.0001317608985118568, "loss_iou": 0.306640625, "loss_num": 0.0223388671875, "loss_xval": 0.72265625, "num_input_tokens_seen": 267481704, "step": 4775 }, { "epoch": 10.6369710467706, "grad_norm": 32.74296951293945, "learning_rate": 1e-06, "loss": 0.5468, "num_input_tokens_seen": 267539444, "step": 4776 }, { "epoch": 10.6369710467706, "loss": 0.5010995864868164, "loss_ce": 0.00030611734837293625, "loss_iou": 0.2109375, "loss_num": 0.015869140625, "loss_xval": 0.5, "num_input_tokens_seen": 267539444, "step": 4776 }, { "epoch": 10.639198218262806, "grad_norm": 21.132326126098633, "learning_rate": 1e-06, "loss": 0.6992, "num_input_tokens_seen": 267595728, "step": 4777 }, { "epoch": 10.639198218262806, "loss": 0.6195580959320068, "loss_ce": 0.00017328912508673966, "loss_iou": 0.271484375, "loss_num": 0.0152587890625, "loss_xval": 0.62109375, "num_input_tokens_seen": 267595728, "step": 4777 }, { "epoch": 10.64142538975501, "grad_norm": 13.380131721496582, "learning_rate": 1e-06, "loss": 0.3196, "num_input_tokens_seen": 267650392, "step": 4778 }, { "epoch": 10.64142538975501, "loss": 0.3022405207157135, "loss_ce": 0.00011649330554064363, "loss_iou": 0.1240234375, "loss_num": 0.0107421875, "loss_xval": 0.302734375, "num_input_tokens_seen": 267650392, "step": 4778 }, { "epoch": 10.643652561247215, "grad_norm": 16.144832611083984, "learning_rate": 1e-06, "loss": 0.5431, "num_input_tokens_seen": 267707784, "step": 4779 }, { "epoch": 10.643652561247215, "loss": 0.7454525828361511, "loss_ce": 0.00021334037592168897, "loss_iou": 0.302734375, "loss_num": 0.02783203125, "loss_xval": 0.74609375, "num_input_tokens_seen": 267707784, "step": 4779 }, { "epoch": 10.64587973273942, "grad_norm": 32.90868377685547, "learning_rate": 1e-06, "loss": 0.57, "num_input_tokens_seen": 267764500, "step": 4780 }, { "epoch": 10.64587973273942, "loss": 0.6209944486618042, "loss_ce": 0.0001448530238121748, "loss_iou": 0.263671875, "loss_num": 0.0189208984375, "loss_xval": 0.62109375, "num_input_tokens_seen": 267764500, "step": 4780 }, { "epoch": 10.648106904231625, "grad_norm": 90.17802429199219, "learning_rate": 1e-06, "loss": 0.5177, "num_input_tokens_seen": 267819128, "step": 4781 }, { "epoch": 10.648106904231625, "loss": 0.5430988073348999, "loss_ce": 0.00013003447384107858, "loss_iou": 0.21875, "loss_num": 0.021240234375, "loss_xval": 0.54296875, "num_input_tokens_seen": 267819128, "step": 4781 }, { "epoch": 10.65033407572383, "grad_norm": 20.155319213867188, "learning_rate": 1e-06, "loss": 0.6905, "num_input_tokens_seen": 267876160, "step": 4782 }, { "epoch": 10.65033407572383, "loss": 0.7085360288619995, "loss_ce": 0.00016200730169657618, "loss_iou": 0.287109375, "loss_num": 0.026611328125, "loss_xval": 0.70703125, "num_input_tokens_seen": 267876160, "step": 4782 }, { "epoch": 10.652561247216035, "grad_norm": 20.946746826171875, "learning_rate": 1e-06, "loss": 0.6077, "num_input_tokens_seen": 267933324, "step": 4783 }, { "epoch": 10.652561247216035, "loss": 0.5577636361122131, "loss_ce": 0.00014642757014371455, "loss_iou": 0.24609375, "loss_num": 0.01287841796875, "loss_xval": 0.55859375, "num_input_tokens_seen": 267933324, "step": 4783 }, { "epoch": 10.654788418708241, "grad_norm": 23.43316650390625, "learning_rate": 1e-06, "loss": 0.7215, "num_input_tokens_seen": 267990524, "step": 4784 }, { "epoch": 10.654788418708241, "loss": 0.5793881416320801, "loss_ce": 0.00016452360432595015, "loss_iou": 0.2578125, "loss_num": 0.01263427734375, "loss_xval": 0.578125, "num_input_tokens_seen": 267990524, "step": 4784 }, { "epoch": 10.657015590200446, "grad_norm": 29.320165634155273, "learning_rate": 1e-06, "loss": 0.6149, "num_input_tokens_seen": 268046904, "step": 4785 }, { "epoch": 10.657015590200446, "loss": 0.6889311075210571, "loss_ce": 0.00021046542678959668, "loss_iou": 0.30078125, "loss_num": 0.0177001953125, "loss_xval": 0.6875, "num_input_tokens_seen": 268046904, "step": 4785 }, { "epoch": 10.659242761692651, "grad_norm": 16.68779182434082, "learning_rate": 1e-06, "loss": 0.6267, "num_input_tokens_seen": 268105252, "step": 4786 }, { "epoch": 10.659242761692651, "loss": 0.6637170314788818, "loss_ce": 0.0001428277901140973, "loss_iou": 0.283203125, "loss_num": 0.01904296875, "loss_xval": 0.6640625, "num_input_tokens_seen": 268105252, "step": 4786 }, { "epoch": 10.661469933184856, "grad_norm": 41.306156158447266, "learning_rate": 1e-06, "loss": 0.7032, "num_input_tokens_seen": 268159052, "step": 4787 }, { "epoch": 10.661469933184856, "loss": 0.565805196762085, "loss_ce": 0.00013136808411218226, "loss_iou": 0.2490234375, "loss_num": 0.01361083984375, "loss_xval": 0.56640625, "num_input_tokens_seen": 268159052, "step": 4787 }, { "epoch": 10.66369710467706, "grad_norm": 12.712847709655762, "learning_rate": 1e-06, "loss": 0.3692, "num_input_tokens_seen": 268214984, "step": 4788 }, { "epoch": 10.66369710467706, "loss": 0.47919151186943054, "loss_ce": 0.0001876367605291307, "loss_iou": 0.21875, "loss_num": 0.00848388671875, "loss_xval": 0.478515625, "num_input_tokens_seen": 268214984, "step": 4788 }, { "epoch": 10.665924276169266, "grad_norm": 15.455141067504883, "learning_rate": 1e-06, "loss": 0.5079, "num_input_tokens_seen": 268269568, "step": 4789 }, { "epoch": 10.665924276169266, "loss": 0.4243258833885193, "loss_ce": 0.00013158305955585092, "loss_iou": 0.1650390625, "loss_num": 0.018798828125, "loss_xval": 0.423828125, "num_input_tokens_seen": 268269568, "step": 4789 }, { "epoch": 10.66815144766147, "grad_norm": 21.24195671081543, "learning_rate": 1e-06, "loss": 0.5663, "num_input_tokens_seen": 268325664, "step": 4790 }, { "epoch": 10.66815144766147, "loss": 0.4882197380065918, "loss_ce": 0.000182637624675408, "loss_iou": 0.2255859375, "loss_num": 0.007415771484375, "loss_xval": 0.48828125, "num_input_tokens_seen": 268325664, "step": 4790 }, { "epoch": 10.670378619153675, "grad_norm": 14.548911094665527, "learning_rate": 1e-06, "loss": 0.7085, "num_input_tokens_seen": 268378752, "step": 4791 }, { "epoch": 10.670378619153675, "loss": 0.6994498372077942, "loss_ce": 0.00023110417532734573, "loss_iou": 0.2734375, "loss_num": 0.030029296875, "loss_xval": 0.69921875, "num_input_tokens_seen": 268378752, "step": 4791 }, { "epoch": 10.67260579064588, "grad_norm": 14.135177612304688, "learning_rate": 1e-06, "loss": 0.6148, "num_input_tokens_seen": 268435256, "step": 4792 }, { "epoch": 10.67260579064588, "loss": 0.4595029354095459, "loss_ce": 0.00015233115118462592, "loss_iou": 0.2109375, "loss_num": 0.007232666015625, "loss_xval": 0.458984375, "num_input_tokens_seen": 268435256, "step": 4792 }, { "epoch": 10.674832962138085, "grad_norm": 20.141786575317383, "learning_rate": 1e-06, "loss": 0.7621, "num_input_tokens_seen": 268491724, "step": 4793 }, { "epoch": 10.674832962138085, "loss": 0.5482085943222046, "loss_ce": 0.00011288469249848276, "loss_iou": 0.236328125, "loss_num": 0.01507568359375, "loss_xval": 0.546875, "num_input_tokens_seen": 268491724, "step": 4793 }, { "epoch": 10.67706013363029, "grad_norm": 17.971179962158203, "learning_rate": 1e-06, "loss": 0.5321, "num_input_tokens_seen": 268548580, "step": 4794 }, { "epoch": 10.67706013363029, "loss": 0.5524182319641113, "loss_ce": 0.0001721275766612962, "loss_iou": 0.2353515625, "loss_num": 0.01611328125, "loss_xval": 0.55078125, "num_input_tokens_seen": 268548580, "step": 4794 }, { "epoch": 10.679287305122495, "grad_norm": 17.928714752197266, "learning_rate": 1e-06, "loss": 0.4135, "num_input_tokens_seen": 268604548, "step": 4795 }, { "epoch": 10.679287305122495, "loss": 0.3612457513809204, "loss_ce": 0.0001617687230464071, "loss_iou": 0.1669921875, "loss_num": 0.0054931640625, "loss_xval": 0.361328125, "num_input_tokens_seen": 268604548, "step": 4795 }, { "epoch": 10.6815144766147, "grad_norm": 17.0757999420166, "learning_rate": 1e-06, "loss": 0.5741, "num_input_tokens_seen": 268661340, "step": 4796 }, { "epoch": 10.6815144766147, "loss": 0.6327658891677856, "loss_ce": 0.0001975290069822222, "loss_iou": 0.255859375, "loss_num": 0.02392578125, "loss_xval": 0.6328125, "num_input_tokens_seen": 268661340, "step": 4796 }, { "epoch": 10.683741648106905, "grad_norm": 19.690837860107422, "learning_rate": 1e-06, "loss": 0.5216, "num_input_tokens_seen": 268715384, "step": 4797 }, { "epoch": 10.683741648106905, "loss": 0.3917364180088043, "loss_ce": 0.0001348454534308985, "loss_iou": 0.1806640625, "loss_num": 0.0059814453125, "loss_xval": 0.390625, "num_input_tokens_seen": 268715384, "step": 4797 }, { "epoch": 10.68596881959911, "grad_norm": 40.683292388916016, "learning_rate": 1e-06, "loss": 0.5789, "num_input_tokens_seen": 268772412, "step": 4798 }, { "epoch": 10.68596881959911, "loss": 0.6195793151855469, "loss_ce": 0.00019457080634310842, "loss_iou": 0.267578125, "loss_num": 0.0164794921875, "loss_xval": 0.62109375, "num_input_tokens_seen": 268772412, "step": 4798 }, { "epoch": 10.688195991091314, "grad_norm": 24.19279670715332, "learning_rate": 1e-06, "loss": 0.5381, "num_input_tokens_seen": 268830036, "step": 4799 }, { "epoch": 10.688195991091314, "loss": 0.528715193271637, "loss_ce": 0.0001507120905444026, "loss_iou": 0.244140625, "loss_num": 0.00811767578125, "loss_xval": 0.52734375, "num_input_tokens_seen": 268830036, "step": 4799 }, { "epoch": 10.690423162583519, "grad_norm": 15.297384262084961, "learning_rate": 1e-06, "loss": 0.5682, "num_input_tokens_seen": 268885000, "step": 4800 }, { "epoch": 10.690423162583519, "loss": 0.4110143184661865, "loss_ce": 0.00012564694043248892, "loss_iou": 0.1865234375, "loss_num": 0.00750732421875, "loss_xval": 0.41015625, "num_input_tokens_seen": 268885000, "step": 4800 }, { "epoch": 10.692650334075724, "grad_norm": 30.53861427307129, "learning_rate": 1e-06, "loss": 0.6299, "num_input_tokens_seen": 268941972, "step": 4801 }, { "epoch": 10.692650334075724, "loss": 0.45105427503585815, "loss_ce": 0.00012653737212531269, "loss_iou": 0.201171875, "loss_num": 0.009521484375, "loss_xval": 0.451171875, "num_input_tokens_seen": 268941972, "step": 4801 }, { "epoch": 10.694877505567929, "grad_norm": 22.213823318481445, "learning_rate": 1e-06, "loss": 0.4161, "num_input_tokens_seen": 268996852, "step": 4802 }, { "epoch": 10.694877505567929, "loss": 0.36823076009750366, "loss_ce": 0.00012772114132530987, "loss_iou": 0.1533203125, "loss_num": 0.0123291015625, "loss_xval": 0.3671875, "num_input_tokens_seen": 268996852, "step": 4802 }, { "epoch": 10.697104677060134, "grad_norm": 15.323545455932617, "learning_rate": 1e-06, "loss": 0.7173, "num_input_tokens_seen": 269052540, "step": 4803 }, { "epoch": 10.697104677060134, "loss": 0.6369800567626953, "loss_ce": 0.0002612703829072416, "loss_iou": 0.291015625, "loss_num": 0.0113525390625, "loss_xval": 0.63671875, "num_input_tokens_seen": 269052540, "step": 4803 }, { "epoch": 10.699331848552339, "grad_norm": 22.477542877197266, "learning_rate": 1e-06, "loss": 0.4287, "num_input_tokens_seen": 269107128, "step": 4804 }, { "epoch": 10.699331848552339, "loss": 0.31477898359298706, "loss_ce": 0.00014274932618718594, "loss_iou": 0.138671875, "loss_num": 0.007354736328125, "loss_xval": 0.314453125, "num_input_tokens_seen": 269107128, "step": 4804 }, { "epoch": 10.701559020044543, "grad_norm": 15.610451698303223, "learning_rate": 1e-06, "loss": 0.4464, "num_input_tokens_seen": 269162060, "step": 4805 }, { "epoch": 10.701559020044543, "loss": 0.3491445779800415, "loss_ce": 0.00020659035362768918, "loss_iou": 0.1552734375, "loss_num": 0.00775146484375, "loss_xval": 0.349609375, "num_input_tokens_seen": 269162060, "step": 4805 }, { "epoch": 10.703786191536748, "grad_norm": 28.197786331176758, "learning_rate": 1e-06, "loss": 0.6925, "num_input_tokens_seen": 269217312, "step": 4806 }, { "epoch": 10.703786191536748, "loss": 0.7023131847381592, "loss_ce": 0.0001647536118980497, "loss_iou": 0.30859375, "loss_num": 0.016845703125, "loss_xval": 0.703125, "num_input_tokens_seen": 269217312, "step": 4806 }, { "epoch": 10.706013363028953, "grad_norm": 15.568071365356445, "learning_rate": 1e-06, "loss": 0.3815, "num_input_tokens_seen": 269275728, "step": 4807 }, { "epoch": 10.706013363028953, "loss": 0.4796176552772522, "loss_ce": 0.00012549404345918447, "loss_iou": 0.220703125, "loss_num": 0.0074462890625, "loss_xval": 0.48046875, "num_input_tokens_seen": 269275728, "step": 4807 }, { "epoch": 10.708240534521158, "grad_norm": 17.49445915222168, "learning_rate": 1e-06, "loss": 0.4644, "num_input_tokens_seen": 269332812, "step": 4808 }, { "epoch": 10.708240534521158, "loss": 0.5414130687713623, "loss_ce": 0.0001532989408588037, "loss_iou": 0.2421875, "loss_num": 0.01116943359375, "loss_xval": 0.54296875, "num_input_tokens_seen": 269332812, "step": 4808 }, { "epoch": 10.710467706013363, "grad_norm": 28.943540573120117, "learning_rate": 1e-06, "loss": 0.5068, "num_input_tokens_seen": 269388828, "step": 4809 }, { "epoch": 10.710467706013363, "loss": 0.441672682762146, "loss_ce": 0.0001443543005734682, "loss_iou": 0.1923828125, "loss_num": 0.01123046875, "loss_xval": 0.44140625, "num_input_tokens_seen": 269388828, "step": 4809 }, { "epoch": 10.712694877505568, "grad_norm": 17.984169006347656, "learning_rate": 1e-06, "loss": 0.5561, "num_input_tokens_seen": 269446184, "step": 4810 }, { "epoch": 10.712694877505568, "loss": 0.5116062164306641, "loss_ce": 0.0001315802219323814, "loss_iou": 0.23828125, "loss_num": 0.007110595703125, "loss_xval": 0.51171875, "num_input_tokens_seen": 269446184, "step": 4810 }, { "epoch": 10.714922048997773, "grad_norm": 22.300188064575195, "learning_rate": 1e-06, "loss": 0.5555, "num_input_tokens_seen": 269502464, "step": 4811 }, { "epoch": 10.714922048997773, "loss": 0.6414833068847656, "loss_ce": 0.00012583400530274957, "loss_iou": 0.26171875, "loss_num": 0.0238037109375, "loss_xval": 0.640625, "num_input_tokens_seen": 269502464, "step": 4811 }, { "epoch": 10.717149220489977, "grad_norm": 22.90621566772461, "learning_rate": 1e-06, "loss": 0.5469, "num_input_tokens_seen": 269560400, "step": 4812 }, { "epoch": 10.717149220489977, "loss": 0.694524884223938, "loss_ce": 0.00018889480270445347, "loss_iou": 0.32421875, "loss_num": 0.009521484375, "loss_xval": 0.6953125, "num_input_tokens_seen": 269560400, "step": 4812 }, { "epoch": 10.719376391982182, "grad_norm": 20.077648162841797, "learning_rate": 1e-06, "loss": 0.665, "num_input_tokens_seen": 269617692, "step": 4813 }, { "epoch": 10.719376391982182, "loss": 0.9352070093154907, "loss_ce": 0.00014838551578577608, "loss_iou": 0.408203125, "loss_num": 0.023681640625, "loss_xval": 0.93359375, "num_input_tokens_seen": 269617692, "step": 4813 }, { "epoch": 10.721603563474387, "grad_norm": 45.49837875366211, "learning_rate": 1e-06, "loss": 0.6051, "num_input_tokens_seen": 269675808, "step": 4814 }, { "epoch": 10.721603563474387, "loss": 0.6009942889213562, "loss_ce": 0.00016420066822320223, "loss_iou": 0.263671875, "loss_num": 0.01446533203125, "loss_xval": 0.6015625, "num_input_tokens_seen": 269675808, "step": 4814 }, { "epoch": 10.723830734966592, "grad_norm": 18.847713470458984, "learning_rate": 1e-06, "loss": 0.6815, "num_input_tokens_seen": 269731972, "step": 4815 }, { "epoch": 10.723830734966592, "loss": 0.5592037439346313, "loss_ce": 0.00012171984417364001, "loss_iou": 0.2392578125, "loss_num": 0.01611328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 269731972, "step": 4815 }, { "epoch": 10.726057906458797, "grad_norm": 21.027870178222656, "learning_rate": 1e-06, "loss": 0.6109, "num_input_tokens_seen": 269785520, "step": 4816 }, { "epoch": 10.726057906458797, "loss": 0.5723341107368469, "loss_ce": 0.0001295319088967517, "loss_iou": 0.251953125, "loss_num": 0.01373291015625, "loss_xval": 0.5703125, "num_input_tokens_seen": 269785520, "step": 4816 }, { "epoch": 10.728285077951002, "grad_norm": 22.105512619018555, "learning_rate": 1e-06, "loss": 0.4586, "num_input_tokens_seen": 269843136, "step": 4817 }, { "epoch": 10.728285077951002, "loss": 0.5098918676376343, "loss_ce": 0.0001262695004697889, "loss_iou": 0.220703125, "loss_num": 0.01373291015625, "loss_xval": 0.5078125, "num_input_tokens_seen": 269843136, "step": 4817 }, { "epoch": 10.730512249443207, "grad_norm": 20.30752944946289, "learning_rate": 1e-06, "loss": 0.6458, "num_input_tokens_seen": 269897972, "step": 4818 }, { "epoch": 10.730512249443207, "loss": 0.8840016722679138, "loss_ce": 0.00021258596098050475, "loss_iou": 0.3671875, "loss_num": 0.029541015625, "loss_xval": 0.8828125, "num_input_tokens_seen": 269897972, "step": 4818 }, { "epoch": 10.732739420935411, "grad_norm": 19.499021530151367, "learning_rate": 1e-06, "loss": 0.4868, "num_input_tokens_seen": 269955164, "step": 4819 }, { "epoch": 10.732739420935411, "loss": 0.4875670373439789, "loss_ce": 0.0001403049536747858, "loss_iou": 0.1865234375, "loss_num": 0.022705078125, "loss_xval": 0.48828125, "num_input_tokens_seen": 269955164, "step": 4819 }, { "epoch": 10.734966592427616, "grad_norm": 22.588951110839844, "learning_rate": 1e-06, "loss": 0.5172, "num_input_tokens_seen": 270014316, "step": 4820 }, { "epoch": 10.734966592427616, "loss": 0.3510775864124298, "loss_ce": 0.00012545072240754962, "loss_iou": 0.1533203125, "loss_num": 0.00885009765625, "loss_xval": 0.3515625, "num_input_tokens_seen": 270014316, "step": 4820 }, { "epoch": 10.737193763919821, "grad_norm": 20.028270721435547, "learning_rate": 1e-06, "loss": 0.6092, "num_input_tokens_seen": 270070980, "step": 4821 }, { "epoch": 10.737193763919821, "loss": 0.6935439109802246, "loss_ce": 0.00018454053497407585, "loss_iou": 0.29296875, "loss_num": 0.021240234375, "loss_xval": 0.6953125, "num_input_tokens_seen": 270070980, "step": 4821 }, { "epoch": 10.739420935412026, "grad_norm": 14.508070945739746, "learning_rate": 1e-06, "loss": 0.5061, "num_input_tokens_seen": 270128576, "step": 4822 }, { "epoch": 10.739420935412026, "loss": 0.6202830672264099, "loss_ce": 0.00016589835286140442, "loss_iou": 0.2578125, "loss_num": 0.0208740234375, "loss_xval": 0.62109375, "num_input_tokens_seen": 270128576, "step": 4822 }, { "epoch": 10.74164810690423, "grad_norm": 17.07376480102539, "learning_rate": 1e-06, "loss": 0.7271, "num_input_tokens_seen": 270184516, "step": 4823 }, { "epoch": 10.74164810690423, "loss": 0.64375239610672, "loss_ce": 0.00019769492791965604, "loss_iou": 0.298828125, "loss_num": 0.0089111328125, "loss_xval": 0.64453125, "num_input_tokens_seen": 270184516, "step": 4823 }, { "epoch": 10.743875278396436, "grad_norm": 20.712217330932617, "learning_rate": 1e-06, "loss": 0.5371, "num_input_tokens_seen": 270240064, "step": 4824 }, { "epoch": 10.743875278396436, "loss": 0.5007485747337341, "loss_ce": 0.0001382490445394069, "loss_iou": 0.216796875, "loss_num": 0.01312255859375, "loss_xval": 0.5, "num_input_tokens_seen": 270240064, "step": 4824 }, { "epoch": 10.74610244988864, "grad_norm": 20.345590591430664, "learning_rate": 1e-06, "loss": 0.5216, "num_input_tokens_seen": 270297036, "step": 4825 }, { "epoch": 10.74610244988864, "loss": 0.6117961406707764, "loss_ce": 0.00022386244381777942, "loss_iou": 0.267578125, "loss_num": 0.015625, "loss_xval": 0.61328125, "num_input_tokens_seen": 270297036, "step": 4825 }, { "epoch": 10.748329621380847, "grad_norm": 19.20088768005371, "learning_rate": 1e-06, "loss": 0.5529, "num_input_tokens_seen": 270352788, "step": 4826 }, { "epoch": 10.748329621380847, "loss": 0.6521322131156921, "loss_ce": 0.00015466928016394377, "loss_iou": 0.30078125, "loss_num": 0.01025390625, "loss_xval": 0.65234375, "num_input_tokens_seen": 270352788, "step": 4826 }, { "epoch": 10.750556792873052, "grad_norm": 16.478546142578125, "learning_rate": 1e-06, "loss": 0.458, "num_input_tokens_seen": 270407444, "step": 4827 }, { "epoch": 10.750556792873052, "loss": 0.5561587810516357, "loss_ce": 0.00012851686915382743, "loss_iou": 0.234375, "loss_num": 0.017578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 270407444, "step": 4827 }, { "epoch": 10.752783964365257, "grad_norm": 30.999256134033203, "learning_rate": 1e-06, "loss": 0.4163, "num_input_tokens_seen": 270465300, "step": 4828 }, { "epoch": 10.752783964365257, "loss": 0.41544434428215027, "loss_ce": 0.00016115896869450808, "loss_iou": 0.1865234375, "loss_num": 0.00830078125, "loss_xval": 0.416015625, "num_input_tokens_seen": 270465300, "step": 4828 }, { "epoch": 10.755011135857462, "grad_norm": 21.072765350341797, "learning_rate": 1e-06, "loss": 0.507, "num_input_tokens_seen": 270524188, "step": 4829 }, { "epoch": 10.755011135857462, "loss": 0.682302713394165, "loss_ce": 0.0005400052759796381, "loss_iou": 0.294921875, "loss_num": 0.018310546875, "loss_xval": 0.68359375, "num_input_tokens_seen": 270524188, "step": 4829 }, { "epoch": 10.757238307349667, "grad_norm": 25.556285858154297, "learning_rate": 1e-06, "loss": 0.6146, "num_input_tokens_seen": 270580156, "step": 4830 }, { "epoch": 10.757238307349667, "loss": 0.43665558099746704, "loss_ce": 0.000132172426674515, "loss_iou": 0.189453125, "loss_num": 0.01165771484375, "loss_xval": 0.4375, "num_input_tokens_seen": 270580156, "step": 4830 }, { "epoch": 10.759465478841872, "grad_norm": 39.73129653930664, "learning_rate": 1e-06, "loss": 0.6867, "num_input_tokens_seen": 270637276, "step": 4831 }, { "epoch": 10.759465478841872, "loss": 0.7980098128318787, "loss_ce": 0.00015826240996830165, "loss_iou": 0.328125, "loss_num": 0.02783203125, "loss_xval": 0.796875, "num_input_tokens_seen": 270637276, "step": 4831 }, { "epoch": 10.761692650334076, "grad_norm": 31.106664657592773, "learning_rate": 1e-06, "loss": 0.5331, "num_input_tokens_seen": 270691744, "step": 4832 }, { "epoch": 10.761692650334076, "loss": 0.45221608877182007, "loss_ce": 0.00012868586054537445, "loss_iou": 0.1953125, "loss_num": 0.01226806640625, "loss_xval": 0.451171875, "num_input_tokens_seen": 270691744, "step": 4832 }, { "epoch": 10.763919821826281, "grad_norm": 19.635936737060547, "learning_rate": 1e-06, "loss": 0.4175, "num_input_tokens_seen": 270749204, "step": 4833 }, { "epoch": 10.763919821826281, "loss": 0.510043740272522, "loss_ce": 0.00015601412451360375, "loss_iou": 0.2197265625, "loss_num": 0.01397705078125, "loss_xval": 0.51171875, "num_input_tokens_seen": 270749204, "step": 4833 }, { "epoch": 10.766146993318486, "grad_norm": 30.472660064697266, "learning_rate": 1e-06, "loss": 0.6393, "num_input_tokens_seen": 270803020, "step": 4834 }, { "epoch": 10.766146993318486, "loss": 0.6473684906959534, "loss_ce": 0.0001517097553005442, "loss_iou": 0.28125, "loss_num": 0.0172119140625, "loss_xval": 0.6484375, "num_input_tokens_seen": 270803020, "step": 4834 }, { "epoch": 10.768374164810691, "grad_norm": 18.92053985595703, "learning_rate": 1e-06, "loss": 0.6669, "num_input_tokens_seen": 270859752, "step": 4835 }, { "epoch": 10.768374164810691, "loss": 0.4643715023994446, "loss_ce": 0.00013810490781906992, "loss_iou": 0.2138671875, "loss_num": 0.007476806640625, "loss_xval": 0.46484375, "num_input_tokens_seen": 270859752, "step": 4835 }, { "epoch": 10.770601336302896, "grad_norm": 16.783376693725586, "learning_rate": 1e-06, "loss": 0.4961, "num_input_tokens_seen": 270916024, "step": 4836 }, { "epoch": 10.770601336302896, "loss": 0.640272319316864, "loss_ce": 0.0001356131979264319, "loss_iou": 0.2734375, "loss_num": 0.018798828125, "loss_xval": 0.640625, "num_input_tokens_seen": 270916024, "step": 4836 }, { "epoch": 10.7728285077951, "grad_norm": 28.108156204223633, "learning_rate": 1e-06, "loss": 0.4965, "num_input_tokens_seen": 270972228, "step": 4837 }, { "epoch": 10.7728285077951, "loss": 0.6063258051872253, "loss_ce": 0.00012462104496080428, "loss_iou": 0.25, "loss_num": 0.021484375, "loss_xval": 0.60546875, "num_input_tokens_seen": 270972228, "step": 4837 }, { "epoch": 10.775055679287306, "grad_norm": 14.273365020751953, "learning_rate": 1e-06, "loss": 0.396, "num_input_tokens_seen": 271028976, "step": 4838 }, { "epoch": 10.775055679287306, "loss": 0.40957385301589966, "loss_ce": 0.00015002323198132217, "loss_iou": 0.19140625, "loss_num": 0.00531005859375, "loss_xval": 0.41015625, "num_input_tokens_seen": 271028976, "step": 4838 }, { "epoch": 10.77728285077951, "grad_norm": 16.653898239135742, "learning_rate": 1e-06, "loss": 0.4034, "num_input_tokens_seen": 271086780, "step": 4839 }, { "epoch": 10.77728285077951, "loss": 0.32855719327926636, "loss_ce": 0.0001270081556867808, "loss_iou": 0.1435546875, "loss_num": 0.0084228515625, "loss_xval": 0.328125, "num_input_tokens_seen": 271086780, "step": 4839 }, { "epoch": 10.779510022271715, "grad_norm": 22.08172607421875, "learning_rate": 1e-06, "loss": 0.6062, "num_input_tokens_seen": 271142040, "step": 4840 }, { "epoch": 10.779510022271715, "loss": 0.6195139288902283, "loss_ce": 0.00012916022387798876, "loss_iou": 0.267578125, "loss_num": 0.0166015625, "loss_xval": 0.62109375, "num_input_tokens_seen": 271142040, "step": 4840 }, { "epoch": 10.78173719376392, "grad_norm": 31.35270881652832, "learning_rate": 1e-06, "loss": 0.4893, "num_input_tokens_seen": 271198064, "step": 4841 }, { "epoch": 10.78173719376392, "loss": 0.6805669069290161, "loss_ce": 0.00014694841229356825, "loss_iou": 0.29296875, "loss_num": 0.019287109375, "loss_xval": 0.6796875, "num_input_tokens_seen": 271198064, "step": 4841 }, { "epoch": 10.783964365256125, "grad_norm": 19.86975860595703, "learning_rate": 1e-06, "loss": 0.5828, "num_input_tokens_seen": 271253592, "step": 4842 }, { "epoch": 10.783964365256125, "loss": 0.6472707390785217, "loss_ce": 0.00020657834829762578, "loss_iou": 0.2353515625, "loss_num": 0.035400390625, "loss_xval": 0.6484375, "num_input_tokens_seen": 271253592, "step": 4842 }, { "epoch": 10.78619153674833, "grad_norm": 21.59044075012207, "learning_rate": 1e-06, "loss": 0.7014, "num_input_tokens_seen": 271310024, "step": 4843 }, { "epoch": 10.78619153674833, "loss": 0.7749493718147278, "loss_ce": 0.0002301402564626187, "loss_iou": 0.306640625, "loss_num": 0.03271484375, "loss_xval": 0.7734375, "num_input_tokens_seen": 271310024, "step": 4843 }, { "epoch": 10.788418708240535, "grad_norm": 20.93609619140625, "learning_rate": 1e-06, "loss": 0.4648, "num_input_tokens_seen": 271361888, "step": 4844 }, { "epoch": 10.788418708240535, "loss": 0.5555073022842407, "loss_ce": 0.00020940190006513149, "loss_iou": 0.23046875, "loss_num": 0.0191650390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 271361888, "step": 4844 }, { "epoch": 10.79064587973274, "grad_norm": 35.37983322143555, "learning_rate": 1e-06, "loss": 0.5951, "num_input_tokens_seen": 271417132, "step": 4845 }, { "epoch": 10.79064587973274, "loss": 0.44118914008140564, "loss_ce": 0.0001490934519097209, "loss_iou": 0.197265625, "loss_num": 0.00927734375, "loss_xval": 0.44140625, "num_input_tokens_seen": 271417132, "step": 4845 }, { "epoch": 10.792873051224944, "grad_norm": 19.222061157226562, "learning_rate": 1e-06, "loss": 0.5955, "num_input_tokens_seen": 271471928, "step": 4846 }, { "epoch": 10.792873051224944, "loss": 0.6026766300201416, "loss_ce": 0.00013759825378656387, "loss_iou": 0.26171875, "loss_num": 0.0155029296875, "loss_xval": 0.6015625, "num_input_tokens_seen": 271471928, "step": 4846 }, { "epoch": 10.79510022271715, "grad_norm": 17.895984649658203, "learning_rate": 1e-06, "loss": 0.5379, "num_input_tokens_seen": 271526128, "step": 4847 }, { "epoch": 10.79510022271715, "loss": 0.4887864291667938, "loss_ce": 0.00013897617463953793, "loss_iou": 0.21875, "loss_num": 0.01019287109375, "loss_xval": 0.48828125, "num_input_tokens_seen": 271526128, "step": 4847 }, { "epoch": 10.797327394209354, "grad_norm": 22.95305061340332, "learning_rate": 1e-06, "loss": 0.5451, "num_input_tokens_seen": 271581608, "step": 4848 }, { "epoch": 10.797327394209354, "loss": 0.42127272486686707, "loss_ce": 0.00013015880540478975, "loss_iou": 0.1923828125, "loss_num": 0.007415771484375, "loss_xval": 0.421875, "num_input_tokens_seen": 271581608, "step": 4848 }, { "epoch": 10.799554565701559, "grad_norm": 28.807262420654297, "learning_rate": 1e-06, "loss": 0.5903, "num_input_tokens_seen": 271638680, "step": 4849 }, { "epoch": 10.799554565701559, "loss": 0.8107374310493469, "loss_ce": 0.00019058278121519834, "loss_iou": 0.345703125, "loss_num": 0.024169921875, "loss_xval": 0.8125, "num_input_tokens_seen": 271638680, "step": 4849 }, { "epoch": 10.801781737193764, "grad_norm": 22.489822387695312, "learning_rate": 1e-06, "loss": 0.4126, "num_input_tokens_seen": 271694712, "step": 4850 }, { "epoch": 10.801781737193764, "loss": 0.43495360016822815, "loss_ce": 0.000139150011818856, "loss_iou": 0.1904296875, "loss_num": 0.01080322265625, "loss_xval": 0.435546875, "num_input_tokens_seen": 271694712, "step": 4850 }, { "epoch": 10.804008908685969, "grad_norm": 19.50384521484375, "learning_rate": 1e-06, "loss": 0.4934, "num_input_tokens_seen": 271750692, "step": 4851 }, { "epoch": 10.804008908685969, "loss": 0.36242765188217163, "loss_ce": 0.00012293207691982388, "loss_iou": 0.1611328125, "loss_num": 0.00799560546875, "loss_xval": 0.36328125, "num_input_tokens_seen": 271750692, "step": 4851 }, { "epoch": 10.806236080178174, "grad_norm": 20.99281883239746, "learning_rate": 1e-06, "loss": 0.5668, "num_input_tokens_seen": 271807552, "step": 4852 }, { "epoch": 10.806236080178174, "loss": 0.4213836193084717, "loss_ce": 0.00011898807133547962, "loss_iou": 0.1708984375, "loss_num": 0.0159912109375, "loss_xval": 0.421875, "num_input_tokens_seen": 271807552, "step": 4852 }, { "epoch": 10.808463251670378, "grad_norm": 14.47896957397461, "learning_rate": 1e-06, "loss": 0.5013, "num_input_tokens_seen": 271862320, "step": 4853 }, { "epoch": 10.808463251670378, "loss": 0.608984112739563, "loss_ce": 9.740462701302022e-05, "loss_iou": 0.21875, "loss_num": 0.0341796875, "loss_xval": 0.609375, "num_input_tokens_seen": 271862320, "step": 4853 }, { "epoch": 10.810690423162583, "grad_norm": 25.232343673706055, "learning_rate": 1e-06, "loss": 0.7236, "num_input_tokens_seen": 271916944, "step": 4854 }, { "epoch": 10.810690423162583, "loss": 0.6214773654937744, "loss_ce": 0.00013948752894066274, "loss_iou": 0.265625, "loss_num": 0.017578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 271916944, "step": 4854 }, { "epoch": 10.812917594654788, "grad_norm": 24.822452545166016, "learning_rate": 1e-06, "loss": 0.4163, "num_input_tokens_seen": 271972016, "step": 4855 }, { "epoch": 10.812917594654788, "loss": 0.461017906665802, "loss_ce": 0.0004466102982405573, "loss_iou": 0.2080078125, "loss_num": 0.00897216796875, "loss_xval": 0.4609375, "num_input_tokens_seen": 271972016, "step": 4855 }, { "epoch": 10.815144766146993, "grad_norm": 17.059764862060547, "learning_rate": 1e-06, "loss": 0.5627, "num_input_tokens_seen": 272026808, "step": 4856 }, { "epoch": 10.815144766146993, "loss": 0.7428523898124695, "loss_ce": 0.0001765760825946927, "loss_iou": 0.3203125, "loss_num": 0.020263671875, "loss_xval": 0.7421875, "num_input_tokens_seen": 272026808, "step": 4856 }, { "epoch": 10.817371937639198, "grad_norm": 22.709232330322266, "learning_rate": 1e-06, "loss": 0.5341, "num_input_tokens_seen": 272080680, "step": 4857 }, { "epoch": 10.817371937639198, "loss": 0.6549853086471558, "loss_ce": 0.0002001727989409119, "loss_iou": 0.275390625, "loss_num": 0.0206298828125, "loss_xval": 0.65625, "num_input_tokens_seen": 272080680, "step": 4857 }, { "epoch": 10.819599109131403, "grad_norm": 19.562105178833008, "learning_rate": 1e-06, "loss": 0.5892, "num_input_tokens_seen": 272137808, "step": 4858 }, { "epoch": 10.819599109131403, "loss": 0.5621104836463928, "loss_ce": 0.00022084277588874102, "loss_iou": 0.2275390625, "loss_num": 0.0213623046875, "loss_xval": 0.5625, "num_input_tokens_seen": 272137808, "step": 4858 }, { "epoch": 10.821826280623608, "grad_norm": 11.724193572998047, "learning_rate": 1e-06, "loss": 0.4827, "num_input_tokens_seen": 272196140, "step": 4859 }, { "epoch": 10.821826280623608, "loss": 0.531132161617279, "loss_ce": 0.00024835762451402843, "loss_iou": 0.197265625, "loss_num": 0.0274658203125, "loss_xval": 0.53125, "num_input_tokens_seen": 272196140, "step": 4859 }, { "epoch": 10.824053452115812, "grad_norm": 26.051990509033203, "learning_rate": 1e-06, "loss": 0.7094, "num_input_tokens_seen": 272252688, "step": 4860 }, { "epoch": 10.824053452115812, "loss": 0.38587331771850586, "loss_ce": 0.0001311138621531427, "loss_iou": 0.17578125, "loss_num": 0.00677490234375, "loss_xval": 0.38671875, "num_input_tokens_seen": 272252688, "step": 4860 }, { "epoch": 10.826280623608017, "grad_norm": 19.309463500976562, "learning_rate": 1e-06, "loss": 0.4197, "num_input_tokens_seen": 272307848, "step": 4861 }, { "epoch": 10.826280623608017, "loss": 0.42674195766448975, "loss_ce": 0.00010623160051181912, "loss_iou": 0.177734375, "loss_num": 0.01416015625, "loss_xval": 0.42578125, "num_input_tokens_seen": 272307848, "step": 4861 }, { "epoch": 10.828507795100222, "grad_norm": 25.22977066040039, "learning_rate": 1e-06, "loss": 0.6507, "num_input_tokens_seen": 272364068, "step": 4862 }, { "epoch": 10.828507795100222, "loss": 0.6106005311012268, "loss_ce": 0.00024895532988011837, "loss_iou": 0.251953125, "loss_num": 0.0211181640625, "loss_xval": 0.609375, "num_input_tokens_seen": 272364068, "step": 4862 }, { "epoch": 10.830734966592427, "grad_norm": 13.005237579345703, "learning_rate": 1e-06, "loss": 0.6308, "num_input_tokens_seen": 272421856, "step": 4863 }, { "epoch": 10.830734966592427, "loss": 0.5312567949295044, "loss_ce": 0.00012890055950265378, "loss_iou": 0.208984375, "loss_num": 0.0225830078125, "loss_xval": 0.53125, "num_input_tokens_seen": 272421856, "step": 4863 }, { "epoch": 10.832962138084632, "grad_norm": 14.533258438110352, "learning_rate": 1e-06, "loss": 0.4557, "num_input_tokens_seen": 272479860, "step": 4864 }, { "epoch": 10.832962138084632, "loss": 0.4337289333343506, "loss_ce": 0.000135167152620852, "loss_iou": 0.193359375, "loss_num": 0.00933837890625, "loss_xval": 0.43359375, "num_input_tokens_seen": 272479860, "step": 4864 }, { "epoch": 10.835189309576837, "grad_norm": 16.71564292907715, "learning_rate": 1e-06, "loss": 0.4138, "num_input_tokens_seen": 272534472, "step": 4865 }, { "epoch": 10.835189309576837, "loss": 0.5382235646247864, "loss_ce": 0.0001376058644382283, "loss_iou": 0.244140625, "loss_num": 0.01007080078125, "loss_xval": 0.5390625, "num_input_tokens_seen": 272534472, "step": 4865 }, { "epoch": 10.837416481069042, "grad_norm": 35.24814987182617, "learning_rate": 1e-06, "loss": 0.5191, "num_input_tokens_seen": 272591164, "step": 4866 }, { "epoch": 10.837416481069042, "loss": 0.4814545214176178, "loss_ce": 0.00013124944234732538, "loss_iou": 0.197265625, "loss_num": 0.017333984375, "loss_xval": 0.48046875, "num_input_tokens_seen": 272591164, "step": 4866 }, { "epoch": 10.839643652561247, "grad_norm": 13.28064250946045, "learning_rate": 1e-06, "loss": 0.4789, "num_input_tokens_seen": 272647484, "step": 4867 }, { "epoch": 10.839643652561247, "loss": 0.4676069915294647, "loss_ce": 0.00013870664406567812, "loss_iou": 0.201171875, "loss_num": 0.01287841796875, "loss_xval": 0.466796875, "num_input_tokens_seen": 272647484, "step": 4867 }, { "epoch": 10.841870824053451, "grad_norm": 20.322200775146484, "learning_rate": 1e-06, "loss": 0.5039, "num_input_tokens_seen": 272703104, "step": 4868 }, { "epoch": 10.841870824053451, "loss": 0.5150094628334045, "loss_ce": 0.0005441223620437086, "loss_iou": 0.2138671875, "loss_num": 0.017333984375, "loss_xval": 0.515625, "num_input_tokens_seen": 272703104, "step": 4868 }, { "epoch": 10.844097995545656, "grad_norm": 24.41085433959961, "learning_rate": 1e-06, "loss": 0.5288, "num_input_tokens_seen": 272760632, "step": 4869 }, { "epoch": 10.844097995545656, "loss": 0.6352982521057129, "loss_ce": 0.00016645353753119707, "loss_iou": 0.25390625, "loss_num": 0.0252685546875, "loss_xval": 0.63671875, "num_input_tokens_seen": 272760632, "step": 4869 }, { "epoch": 10.846325167037861, "grad_norm": 26.662748336791992, "learning_rate": 1e-06, "loss": 0.5393, "num_input_tokens_seen": 272817712, "step": 4870 }, { "epoch": 10.846325167037861, "loss": 0.3460940420627594, "loss_ce": 0.00014678272418677807, "loss_iou": 0.158203125, "loss_num": 0.0059814453125, "loss_xval": 0.345703125, "num_input_tokens_seen": 272817712, "step": 4870 }, { "epoch": 10.848552338530066, "grad_norm": 24.583892822265625, "learning_rate": 1e-06, "loss": 0.7168, "num_input_tokens_seen": 272871604, "step": 4871 }, { "epoch": 10.848552338530066, "loss": 0.7562465667724609, "loss_ce": 0.0001430131815141067, "loss_iou": 0.3125, "loss_num": 0.0263671875, "loss_xval": 0.7578125, "num_input_tokens_seen": 272871604, "step": 4871 }, { "epoch": 10.85077951002227, "grad_norm": 21.29097557067871, "learning_rate": 1e-06, "loss": 0.4865, "num_input_tokens_seen": 272924804, "step": 4872 }, { "epoch": 10.85077951002227, "loss": 0.5399699211120605, "loss_ce": 0.00017501205729786307, "loss_iou": 0.2333984375, "loss_num": 0.014404296875, "loss_xval": 0.5390625, "num_input_tokens_seen": 272924804, "step": 4872 }, { "epoch": 10.853006681514476, "grad_norm": 24.785858154296875, "learning_rate": 1e-06, "loss": 0.7053, "num_input_tokens_seen": 272979376, "step": 4873 }, { "epoch": 10.853006681514476, "loss": 0.7309668660163879, "loss_ce": 0.0002539680281188339, "loss_iou": 0.322265625, "loss_num": 0.01708984375, "loss_xval": 0.73046875, "num_input_tokens_seen": 272979376, "step": 4873 }, { "epoch": 10.855233853006682, "grad_norm": 21.17921257019043, "learning_rate": 1e-06, "loss": 0.4353, "num_input_tokens_seen": 273037616, "step": 4874 }, { "epoch": 10.855233853006682, "loss": 0.5316353440284729, "loss_ce": 0.00014121364802122116, "loss_iou": 0.2255859375, "loss_num": 0.0162353515625, "loss_xval": 0.53125, "num_input_tokens_seen": 273037616, "step": 4874 }, { "epoch": 10.857461024498887, "grad_norm": 21.799673080444336, "learning_rate": 1e-06, "loss": 0.8208, "num_input_tokens_seen": 273097464, "step": 4875 }, { "epoch": 10.857461024498887, "loss": 0.9345442652702332, "loss_ce": 0.00021809733880218118, "loss_iou": 0.341796875, "loss_num": 0.0498046875, "loss_xval": 0.93359375, "num_input_tokens_seen": 273097464, "step": 4875 }, { "epoch": 10.859688195991092, "grad_norm": 20.746036529541016, "learning_rate": 1e-06, "loss": 0.5591, "num_input_tokens_seen": 273152000, "step": 4876 }, { "epoch": 10.859688195991092, "loss": 0.5373209118843079, "loss_ce": 0.00021149568783584982, "loss_iou": 0.2431640625, "loss_num": 0.01043701171875, "loss_xval": 0.5390625, "num_input_tokens_seen": 273152000, "step": 4876 }, { "epoch": 10.861915367483297, "grad_norm": 16.878854751586914, "learning_rate": 1e-06, "loss": 0.5007, "num_input_tokens_seen": 273209936, "step": 4877 }, { "epoch": 10.861915367483297, "loss": 0.5112770795822144, "loss_ce": 0.0001686862960923463, "loss_iou": 0.20703125, "loss_num": 0.019287109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 273209936, "step": 4877 }, { "epoch": 10.864142538975502, "grad_norm": 17.051406860351562, "learning_rate": 1e-06, "loss": 0.4679, "num_input_tokens_seen": 273264832, "step": 4878 }, { "epoch": 10.864142538975502, "loss": 0.5240702629089355, "loss_ce": 0.00014447516878135502, "loss_iou": 0.220703125, "loss_num": 0.0166015625, "loss_xval": 0.5234375, "num_input_tokens_seen": 273264832, "step": 4878 }, { "epoch": 10.866369710467707, "grad_norm": 19.403690338134766, "learning_rate": 1e-06, "loss": 0.7818, "num_input_tokens_seen": 273321468, "step": 4879 }, { "epoch": 10.866369710467707, "loss": 0.8524916172027588, "loss_ce": 0.00013568377471528947, "loss_iou": 0.345703125, "loss_num": 0.0322265625, "loss_xval": 0.8515625, "num_input_tokens_seen": 273321468, "step": 4879 }, { "epoch": 10.868596881959911, "grad_norm": 17.883703231811523, "learning_rate": 1e-06, "loss": 0.589, "num_input_tokens_seen": 273378420, "step": 4880 }, { "epoch": 10.868596881959911, "loss": 0.49145275354385376, "loss_ce": 0.00024180561013054103, "loss_iou": 0.2236328125, "loss_num": 0.0089111328125, "loss_xval": 0.4921875, "num_input_tokens_seen": 273378420, "step": 4880 }, { "epoch": 10.870824053452116, "grad_norm": 29.07217025756836, "learning_rate": 1e-06, "loss": 0.4582, "num_input_tokens_seen": 273433924, "step": 4881 }, { "epoch": 10.870824053452116, "loss": 0.4086874723434448, "loss_ce": 0.00011811024160124362, "loss_iou": 0.1748046875, "loss_num": 0.01171875, "loss_xval": 0.408203125, "num_input_tokens_seen": 273433924, "step": 4881 }, { "epoch": 10.873051224944321, "grad_norm": 22.693309783935547, "learning_rate": 1e-06, "loss": 0.4058, "num_input_tokens_seen": 273488764, "step": 4882 }, { "epoch": 10.873051224944321, "loss": 0.5119615197181702, "loss_ce": 0.00012070569937350228, "loss_iou": 0.21875, "loss_num": 0.0147705078125, "loss_xval": 0.51171875, "num_input_tokens_seen": 273488764, "step": 4882 }, { "epoch": 10.875278396436526, "grad_norm": 16.45703125, "learning_rate": 1e-06, "loss": 0.5703, "num_input_tokens_seen": 273547644, "step": 4883 }, { "epoch": 10.875278396436526, "loss": 0.5592530965805054, "loss_ce": 0.00017106565064750612, "loss_iou": 0.234375, "loss_num": 0.017822265625, "loss_xval": 0.55859375, "num_input_tokens_seen": 273547644, "step": 4883 }, { "epoch": 10.877505567928731, "grad_norm": 16.91400909423828, "learning_rate": 1e-06, "loss": 0.4222, "num_input_tokens_seen": 273602376, "step": 4884 }, { "epoch": 10.877505567928731, "loss": 0.5984039306640625, "loss_ce": 0.00013728870544582605, "loss_iou": 0.224609375, "loss_num": 0.0299072265625, "loss_xval": 0.59765625, "num_input_tokens_seen": 273602376, "step": 4884 }, { "epoch": 10.879732739420936, "grad_norm": 15.582435607910156, "learning_rate": 1e-06, "loss": 0.736, "num_input_tokens_seen": 273658164, "step": 4885 }, { "epoch": 10.879732739420936, "loss": 0.728879451751709, "loss_ce": 0.00011967639875365421, "loss_iou": 0.296875, "loss_num": 0.02685546875, "loss_xval": 0.73046875, "num_input_tokens_seen": 273658164, "step": 4885 }, { "epoch": 10.88195991091314, "grad_norm": 21.012617111206055, "learning_rate": 1e-06, "loss": 0.5643, "num_input_tokens_seen": 273715456, "step": 4886 }, { "epoch": 10.88195991091314, "loss": 0.49945536255836487, "loss_ce": 0.00018778612138703465, "loss_iou": 0.208984375, "loss_num": 0.016357421875, "loss_xval": 0.5, "num_input_tokens_seen": 273715456, "step": 4886 }, { "epoch": 10.884187082405345, "grad_norm": 18.003925323486328, "learning_rate": 1e-06, "loss": 0.4371, "num_input_tokens_seen": 273766692, "step": 4887 }, { "epoch": 10.884187082405345, "loss": 0.45562559366226196, "loss_ce": 0.00033384724520146847, "loss_iou": 0.1669921875, "loss_num": 0.024169921875, "loss_xval": 0.455078125, "num_input_tokens_seen": 273766692, "step": 4887 }, { "epoch": 10.88641425389755, "grad_norm": 26.937620162963867, "learning_rate": 1e-06, "loss": 0.5724, "num_input_tokens_seen": 273822332, "step": 4888 }, { "epoch": 10.88641425389755, "loss": 0.5189422369003296, "loss_ce": 0.00014341062342282385, "loss_iou": 0.228515625, "loss_num": 0.01220703125, "loss_xval": 0.51953125, "num_input_tokens_seen": 273822332, "step": 4888 }, { "epoch": 10.888641425389755, "grad_norm": 25.740150451660156, "learning_rate": 1e-06, "loss": 0.5291, "num_input_tokens_seen": 273877776, "step": 4889 }, { "epoch": 10.888641425389755, "loss": 0.5222045183181763, "loss_ce": 0.00023185207101050764, "loss_iou": 0.2216796875, "loss_num": 0.015869140625, "loss_xval": 0.5234375, "num_input_tokens_seen": 273877776, "step": 4889 }, { "epoch": 10.89086859688196, "grad_norm": 13.944757461547852, "learning_rate": 1e-06, "loss": 0.5313, "num_input_tokens_seen": 273934432, "step": 4890 }, { "epoch": 10.89086859688196, "loss": 0.5552128553390503, "loss_ce": 0.00015907504712231457, "loss_iou": 0.2255859375, "loss_num": 0.020751953125, "loss_xval": 0.5546875, "num_input_tokens_seen": 273934432, "step": 4890 }, { "epoch": 10.893095768374165, "grad_norm": 32.77812194824219, "learning_rate": 1e-06, "loss": 0.5006, "num_input_tokens_seen": 273990888, "step": 4891 }, { "epoch": 10.893095768374165, "loss": 0.5784051418304443, "loss_ce": 0.0001580218377057463, "loss_iou": 0.2392578125, "loss_num": 0.02001953125, "loss_xval": 0.578125, "num_input_tokens_seen": 273990888, "step": 4891 }, { "epoch": 10.89532293986637, "grad_norm": 21.749361038208008, "learning_rate": 1e-06, "loss": 0.4988, "num_input_tokens_seen": 274045808, "step": 4892 }, { "epoch": 10.89532293986637, "loss": 0.47173595428466797, "loss_ce": 0.0017652796814218163, "loss_iou": 0.2060546875, "loss_num": 0.011474609375, "loss_xval": 0.470703125, "num_input_tokens_seen": 274045808, "step": 4892 }, { "epoch": 10.897550111358575, "grad_norm": 22.703105926513672, "learning_rate": 1e-06, "loss": 0.4889, "num_input_tokens_seen": 274101860, "step": 4893 }, { "epoch": 10.897550111358575, "loss": 0.5470762252807617, "loss_ce": 0.000323267828207463, "loss_iou": 0.2021484375, "loss_num": 0.0281982421875, "loss_xval": 0.546875, "num_input_tokens_seen": 274101860, "step": 4893 }, { "epoch": 10.89977728285078, "grad_norm": 19.098848342895508, "learning_rate": 1e-06, "loss": 0.5863, "num_input_tokens_seen": 274158588, "step": 4894 }, { "epoch": 10.89977728285078, "loss": 0.47424912452697754, "loss_ce": 0.0001280199212487787, "loss_iou": 0.20703125, "loss_num": 0.01190185546875, "loss_xval": 0.474609375, "num_input_tokens_seen": 274158588, "step": 4894 }, { "epoch": 10.902004454342984, "grad_norm": 25.0705509185791, "learning_rate": 1e-06, "loss": 0.5995, "num_input_tokens_seen": 274214796, "step": 4895 }, { "epoch": 10.902004454342984, "loss": 0.7026468515396118, "loss_ce": 0.00013221558765508235, "loss_iou": 0.294921875, "loss_num": 0.0223388671875, "loss_xval": 0.703125, "num_input_tokens_seen": 274214796, "step": 4895 }, { "epoch": 10.90423162583519, "grad_norm": 16.38619613647461, "learning_rate": 1e-06, "loss": 0.4676, "num_input_tokens_seen": 274270000, "step": 4896 }, { "epoch": 10.90423162583519, "loss": 0.5520434379577637, "loss_ce": 0.0002856444625649601, "loss_iou": 0.22265625, "loss_num": 0.0211181640625, "loss_xval": 0.55078125, "num_input_tokens_seen": 274270000, "step": 4896 }, { "epoch": 10.906458797327394, "grad_norm": 28.38006591796875, "learning_rate": 1e-06, "loss": 0.4696, "num_input_tokens_seen": 274324580, "step": 4897 }, { "epoch": 10.906458797327394, "loss": 0.513346791267395, "loss_ce": 0.00016320010763593018, "loss_iou": 0.2333984375, "loss_num": 0.009521484375, "loss_xval": 0.51171875, "num_input_tokens_seen": 274324580, "step": 4897 }, { "epoch": 10.908685968819599, "grad_norm": 34.72296142578125, "learning_rate": 1e-06, "loss": 0.7088, "num_input_tokens_seen": 274380456, "step": 4898 }, { "epoch": 10.908685968819599, "loss": 0.5927585363388062, "loss_ce": 0.0002292667340952903, "loss_iou": 0.251953125, "loss_num": 0.0174560546875, "loss_xval": 0.59375, "num_input_tokens_seen": 274380456, "step": 4898 }, { "epoch": 10.910913140311804, "grad_norm": 17.991031646728516, "learning_rate": 1e-06, "loss": 0.7028, "num_input_tokens_seen": 274436900, "step": 4899 }, { "epoch": 10.910913140311804, "loss": 1.07291579246521, "loss_ce": 0.0001619314862182364, "loss_iou": 0.451171875, "loss_num": 0.033935546875, "loss_xval": 1.0703125, "num_input_tokens_seen": 274436900, "step": 4899 }, { "epoch": 10.913140311804009, "grad_norm": 19.890304565429688, "learning_rate": 1e-06, "loss": 0.6784, "num_input_tokens_seen": 274492560, "step": 4900 }, { "epoch": 10.913140311804009, "loss": 0.7131062746047974, "loss_ce": 0.0007039305055513978, "loss_iou": 0.28515625, "loss_num": 0.028564453125, "loss_xval": 0.7109375, "num_input_tokens_seen": 274492560, "step": 4900 }, { "epoch": 10.915367483296214, "grad_norm": 19.241336822509766, "learning_rate": 1e-06, "loss": 0.4938, "num_input_tokens_seen": 274549672, "step": 4901 }, { "epoch": 10.915367483296214, "loss": 0.4737103581428528, "loss_ce": 0.00013855646830052137, "loss_iou": 0.2138671875, "loss_num": 0.00933837890625, "loss_xval": 0.47265625, "num_input_tokens_seen": 274549672, "step": 4901 }, { "epoch": 10.917594654788418, "grad_norm": 20.94312858581543, "learning_rate": 1e-06, "loss": 0.5268, "num_input_tokens_seen": 274606064, "step": 4902 }, { "epoch": 10.917594654788418, "loss": 0.6111065149307251, "loss_ce": 0.00014459769590757787, "loss_iou": 0.2451171875, "loss_num": 0.0244140625, "loss_xval": 0.609375, "num_input_tokens_seen": 274606064, "step": 4902 }, { "epoch": 10.919821826280623, "grad_norm": 55.303871154785156, "learning_rate": 1e-06, "loss": 0.4566, "num_input_tokens_seen": 274662744, "step": 4903 }, { "epoch": 10.919821826280623, "loss": 0.4556983709335327, "loss_ce": 0.0001320057053817436, "loss_iou": 0.2060546875, "loss_num": 0.0086669921875, "loss_xval": 0.455078125, "num_input_tokens_seen": 274662744, "step": 4903 }, { "epoch": 10.922048997772828, "grad_norm": 13.991397857666016, "learning_rate": 1e-06, "loss": 0.3496, "num_input_tokens_seen": 274719288, "step": 4904 }, { "epoch": 10.922048997772828, "loss": 0.35973674058914185, "loss_ce": 0.00011758786422433332, "loss_iou": 0.1513671875, "loss_num": 0.0113525390625, "loss_xval": 0.359375, "num_input_tokens_seen": 274719288, "step": 4904 }, { "epoch": 10.924276169265033, "grad_norm": 18.747949600219727, "learning_rate": 1e-06, "loss": 0.4155, "num_input_tokens_seen": 274774944, "step": 4905 }, { "epoch": 10.924276169265033, "loss": 0.44049468636512756, "loss_ce": 0.0002480950206518173, "loss_iou": 0.185546875, "loss_num": 0.013671875, "loss_xval": 0.439453125, "num_input_tokens_seen": 274774944, "step": 4905 }, { "epoch": 10.926503340757238, "grad_norm": 21.45561408996582, "learning_rate": 1e-06, "loss": 0.4906, "num_input_tokens_seen": 274831664, "step": 4906 }, { "epoch": 10.926503340757238, "loss": 0.5997252464294434, "loss_ce": 0.00011587167682591826, "loss_iou": 0.27734375, "loss_num": 0.0086669921875, "loss_xval": 0.6015625, "num_input_tokens_seen": 274831664, "step": 4906 }, { "epoch": 10.928730512249443, "grad_norm": 21.097562789916992, "learning_rate": 1e-06, "loss": 0.4488, "num_input_tokens_seen": 274888812, "step": 4907 }, { "epoch": 10.928730512249443, "loss": 0.4747345447540283, "loss_ce": 0.00012518178846221417, "loss_iou": 0.220703125, "loss_num": 0.00640869140625, "loss_xval": 0.474609375, "num_input_tokens_seen": 274888812, "step": 4907 }, { "epoch": 10.930957683741648, "grad_norm": 20.24143409729004, "learning_rate": 1e-06, "loss": 0.4771, "num_input_tokens_seen": 274945504, "step": 4908 }, { "epoch": 10.930957683741648, "loss": 0.39001041650772095, "loss_ce": 0.00011784063826780766, "loss_iou": 0.177734375, "loss_num": 0.00677490234375, "loss_xval": 0.390625, "num_input_tokens_seen": 274945504, "step": 4908 }, { "epoch": 10.933184855233852, "grad_norm": 20.678117752075195, "learning_rate": 1e-06, "loss": 0.4148, "num_input_tokens_seen": 275004552, "step": 4909 }, { "epoch": 10.933184855233852, "loss": 0.3625844419002533, "loss_ce": 0.0001576666400069371, "loss_iou": 0.1611328125, "loss_num": 0.00787353515625, "loss_xval": 0.36328125, "num_input_tokens_seen": 275004552, "step": 4909 }, { "epoch": 10.935412026726057, "grad_norm": 20.854366302490234, "learning_rate": 1e-06, "loss": 0.5889, "num_input_tokens_seen": 275062356, "step": 4910 }, { "epoch": 10.935412026726057, "loss": 0.4733467102050781, "loss_ce": 0.0002021462714765221, "loss_iou": 0.2158203125, "loss_num": 0.00811767578125, "loss_xval": 0.47265625, "num_input_tokens_seen": 275062356, "step": 4910 }, { "epoch": 10.937639198218262, "grad_norm": 23.64391326904297, "learning_rate": 1e-06, "loss": 0.7998, "num_input_tokens_seen": 275117932, "step": 4911 }, { "epoch": 10.937639198218262, "loss": 0.7572309970855713, "loss_ce": 0.0001508938439656049, "loss_iou": 0.322265625, "loss_num": 0.0220947265625, "loss_xval": 0.7578125, "num_input_tokens_seen": 275117932, "step": 4911 }, { "epoch": 10.939866369710467, "grad_norm": 22.301734924316406, "learning_rate": 1e-06, "loss": 0.4655, "num_input_tokens_seen": 275171980, "step": 4912 }, { "epoch": 10.939866369710467, "loss": 0.4408171772956848, "loss_ce": 0.00014336456661112607, "loss_iou": 0.208984375, "loss_num": 0.0045166015625, "loss_xval": 0.44140625, "num_input_tokens_seen": 275171980, "step": 4912 }, { "epoch": 10.942093541202672, "grad_norm": 18.19380760192871, "learning_rate": 1e-06, "loss": 0.6564, "num_input_tokens_seen": 275227324, "step": 4913 }, { "epoch": 10.942093541202672, "loss": 0.8009915351867676, "loss_ce": 0.00021027974435128272, "loss_iou": 0.3515625, "loss_num": 0.0194091796875, "loss_xval": 0.80078125, "num_input_tokens_seen": 275227324, "step": 4913 }, { "epoch": 10.944320712694877, "grad_norm": 14.776144981384277, "learning_rate": 1e-06, "loss": 0.5706, "num_input_tokens_seen": 275282508, "step": 4914 }, { "epoch": 10.944320712694877, "loss": 0.657973051071167, "loss_ce": 0.00013618965749628842, "loss_iou": 0.2734375, "loss_num": 0.022216796875, "loss_xval": 0.65625, "num_input_tokens_seen": 275282508, "step": 4914 }, { "epoch": 10.946547884187082, "grad_norm": 23.789960861206055, "learning_rate": 1e-06, "loss": 0.4952, "num_input_tokens_seen": 275338856, "step": 4915 }, { "epoch": 10.946547884187082, "loss": 0.48818719387054443, "loss_ce": 0.00015006719331722707, "loss_iou": 0.2158203125, "loss_num": 0.01141357421875, "loss_xval": 0.48828125, "num_input_tokens_seen": 275338856, "step": 4915 }, { "epoch": 10.948775055679288, "grad_norm": 19.028474807739258, "learning_rate": 1e-06, "loss": 0.5003, "num_input_tokens_seen": 275395740, "step": 4916 }, { "epoch": 10.948775055679288, "loss": 0.5374853610992432, "loss_ce": 0.0001318200957030058, "loss_iou": 0.2294921875, "loss_num": 0.01556396484375, "loss_xval": 0.5390625, "num_input_tokens_seen": 275395740, "step": 4916 }, { "epoch": 10.951002227171493, "grad_norm": 20.012557983398438, "learning_rate": 1e-06, "loss": 0.4823, "num_input_tokens_seen": 275454060, "step": 4917 }, { "epoch": 10.951002227171493, "loss": 0.466092973947525, "loss_ce": 0.0001506020489614457, "loss_iou": 0.2021484375, "loss_num": 0.01226806640625, "loss_xval": 0.466796875, "num_input_tokens_seen": 275454060, "step": 4917 }, { "epoch": 10.953229398663698, "grad_norm": 16.04680633544922, "learning_rate": 1e-06, "loss": 0.4923, "num_input_tokens_seen": 275511984, "step": 4918 }, { "epoch": 10.953229398663698, "loss": 0.4693390130996704, "loss_ce": 0.0001617702655494213, "loss_iou": 0.2138671875, "loss_num": 0.00836181640625, "loss_xval": 0.46875, "num_input_tokens_seen": 275511984, "step": 4918 }, { "epoch": 10.955456570155903, "grad_norm": 19.825653076171875, "learning_rate": 1e-06, "loss": 0.5232, "num_input_tokens_seen": 275568436, "step": 4919 }, { "epoch": 10.955456570155903, "loss": 0.561336100101471, "loss_ce": 0.00011784351227106526, "loss_iou": 0.2392578125, "loss_num": 0.0166015625, "loss_xval": 0.5625, "num_input_tokens_seen": 275568436, "step": 4919 }, { "epoch": 10.957683741648108, "grad_norm": 22.18250846862793, "learning_rate": 1e-06, "loss": 0.4804, "num_input_tokens_seen": 275625196, "step": 4920 }, { "epoch": 10.957683741648108, "loss": 0.43557554483413696, "loss_ce": 0.00015077181160449982, "loss_iou": 0.18359375, "loss_num": 0.0135498046875, "loss_xval": 0.435546875, "num_input_tokens_seen": 275625196, "step": 4920 }, { "epoch": 10.959910913140313, "grad_norm": 23.71367835998535, "learning_rate": 1e-06, "loss": 0.5365, "num_input_tokens_seen": 275681500, "step": 4921 }, { "epoch": 10.959910913140313, "loss": 0.5750996470451355, "loss_ce": 0.0002705350052565336, "loss_iou": 0.2353515625, "loss_num": 0.02099609375, "loss_xval": 0.57421875, "num_input_tokens_seen": 275681500, "step": 4921 }, { "epoch": 10.962138084632517, "grad_norm": 36.595542907714844, "learning_rate": 1e-06, "loss": 0.4071, "num_input_tokens_seen": 275736712, "step": 4922 }, { "epoch": 10.962138084632517, "loss": 0.35608160495758057, "loss_ce": 0.0001245439052581787, "loss_iou": 0.162109375, "loss_num": 0.0064697265625, "loss_xval": 0.35546875, "num_input_tokens_seen": 275736712, "step": 4922 }, { "epoch": 10.964365256124722, "grad_norm": 22.66077995300293, "learning_rate": 1e-06, "loss": 0.6015, "num_input_tokens_seen": 275792276, "step": 4923 }, { "epoch": 10.964365256124722, "loss": 0.39454376697540283, "loss_ce": 0.00013458277680911124, "loss_iou": 0.1748046875, "loss_num": 0.0089111328125, "loss_xval": 0.39453125, "num_input_tokens_seen": 275792276, "step": 4923 }, { "epoch": 10.966592427616927, "grad_norm": 18.849925994873047, "learning_rate": 1e-06, "loss": 0.4902, "num_input_tokens_seen": 275847012, "step": 4924 }, { "epoch": 10.966592427616927, "loss": 0.5802684426307678, "loss_ce": 0.0015330992173403502, "loss_iou": 0.2412109375, "loss_num": 0.019287109375, "loss_xval": 0.578125, "num_input_tokens_seen": 275847012, "step": 4924 }, { "epoch": 10.968819599109132, "grad_norm": 25.976478576660156, "learning_rate": 1e-06, "loss": 0.6866, "num_input_tokens_seen": 275905656, "step": 4925 }, { "epoch": 10.968819599109132, "loss": 0.5414575338363647, "loss_ce": 0.0001978027867153287, "loss_iou": 0.2236328125, "loss_num": 0.018798828125, "loss_xval": 0.54296875, "num_input_tokens_seen": 275905656, "step": 4925 }, { "epoch": 10.971046770601337, "grad_norm": 22.996116638183594, "learning_rate": 1e-06, "loss": 0.578, "num_input_tokens_seen": 275961284, "step": 4926 }, { "epoch": 10.971046770601337, "loss": 0.4320530593395233, "loss_ce": 0.00016829956439323723, "loss_iou": 0.1826171875, "loss_num": 0.01361083984375, "loss_xval": 0.431640625, "num_input_tokens_seen": 275961284, "step": 4926 }, { "epoch": 10.973273942093542, "grad_norm": 15.009370803833008, "learning_rate": 1e-06, "loss": 0.596, "num_input_tokens_seen": 276017624, "step": 4927 }, { "epoch": 10.973273942093542, "loss": 0.6515401601791382, "loss_ce": 0.0001729477517073974, "loss_iou": 0.2734375, "loss_num": 0.0208740234375, "loss_xval": 0.65234375, "num_input_tokens_seen": 276017624, "step": 4927 }, { "epoch": 10.975501113585747, "grad_norm": 17.324737548828125, "learning_rate": 1e-06, "loss": 0.4243, "num_input_tokens_seen": 276075216, "step": 4928 }, { "epoch": 10.975501113585747, "loss": 0.45728790760040283, "loss_ce": 0.00013462905189953744, "loss_iou": 0.193359375, "loss_num": 0.01397705078125, "loss_xval": 0.45703125, "num_input_tokens_seen": 276075216, "step": 4928 }, { "epoch": 10.977728285077951, "grad_norm": 77.1220932006836, "learning_rate": 1e-06, "loss": 0.4622, "num_input_tokens_seen": 276132700, "step": 4929 }, { "epoch": 10.977728285077951, "loss": 0.4627261161804199, "loss_ce": 0.000201685048523359, "loss_iou": 0.2021484375, "loss_num": 0.0115966796875, "loss_xval": 0.462890625, "num_input_tokens_seen": 276132700, "step": 4929 }, { "epoch": 10.979955456570156, "grad_norm": 21.860414505004883, "learning_rate": 1e-06, "loss": 0.4459, "num_input_tokens_seen": 276192092, "step": 4930 }, { "epoch": 10.979955456570156, "loss": 0.5067447423934937, "loss_ce": 0.00015294540207833052, "loss_iou": 0.2236328125, "loss_num": 0.01177978515625, "loss_xval": 0.5078125, "num_input_tokens_seen": 276192092, "step": 4930 }, { "epoch": 10.982182628062361, "grad_norm": 23.157732009887695, "learning_rate": 1e-06, "loss": 0.5643, "num_input_tokens_seen": 276248164, "step": 4931 }, { "epoch": 10.982182628062361, "loss": 0.5332338809967041, "loss_ce": 0.00013755704276263714, "loss_iou": 0.2392578125, "loss_num": 0.0111083984375, "loss_xval": 0.53125, "num_input_tokens_seen": 276248164, "step": 4931 }, { "epoch": 10.984409799554566, "grad_norm": 18.797258377075195, "learning_rate": 1e-06, "loss": 0.5975, "num_input_tokens_seen": 276304632, "step": 4932 }, { "epoch": 10.984409799554566, "loss": 0.6781335473060608, "loss_ce": 0.00027711730217561126, "loss_iou": 0.265625, "loss_num": 0.0296630859375, "loss_xval": 0.6796875, "num_input_tokens_seen": 276304632, "step": 4932 }, { "epoch": 10.98663697104677, "grad_norm": 23.31140899658203, "learning_rate": 1e-06, "loss": 0.4345, "num_input_tokens_seen": 276361908, "step": 4933 }, { "epoch": 10.98663697104677, "loss": 0.5331314206123352, "loss_ce": 0.00011140106653328985, "loss_iou": 0.2392578125, "loss_num": 0.010986328125, "loss_xval": 0.53125, "num_input_tokens_seen": 276361908, "step": 4933 }, { "epoch": 10.988864142538976, "grad_norm": 15.896737098693848, "learning_rate": 1e-06, "loss": 0.6221, "num_input_tokens_seen": 276420060, "step": 4934 }, { "epoch": 10.988864142538976, "loss": 0.7978502511978149, "loss_ce": 0.00012079518637619913, "loss_iou": 0.3515625, "loss_num": 0.0186767578125, "loss_xval": 0.796875, "num_input_tokens_seen": 276420060, "step": 4934 }, { "epoch": 10.99109131403118, "grad_norm": 17.5236759185791, "learning_rate": 1e-06, "loss": 0.6223, "num_input_tokens_seen": 276475372, "step": 4935 }, { "epoch": 10.99109131403118, "loss": 0.7966042160987854, "loss_ce": 0.00021749922598246485, "loss_iou": 0.30859375, "loss_num": 0.035888671875, "loss_xval": 0.796875, "num_input_tokens_seen": 276475372, "step": 4935 }, { "epoch": 10.993318485523385, "grad_norm": 16.806560516357422, "learning_rate": 1e-06, "loss": 0.5073, "num_input_tokens_seen": 276529704, "step": 4936 }, { "epoch": 10.993318485523385, "loss": 0.3875495195388794, "loss_ce": 0.00015937026182655245, "loss_iou": 0.1669921875, "loss_num": 0.0107421875, "loss_xval": 0.38671875, "num_input_tokens_seen": 276529704, "step": 4936 }, { "epoch": 10.99554565701559, "grad_norm": 15.568613052368164, "learning_rate": 1e-06, "loss": 0.4306, "num_input_tokens_seen": 276584256, "step": 4937 }, { "epoch": 10.99554565701559, "loss": 0.3633689284324646, "loss_ce": 0.0002707808162085712, "loss_iou": 0.1279296875, "loss_num": 0.021728515625, "loss_xval": 0.36328125, "num_input_tokens_seen": 276584256, "step": 4937 }, { "epoch": 10.997772828507795, "grad_norm": 25.16409683227539, "learning_rate": 1e-06, "loss": 0.3894, "num_input_tokens_seen": 276640164, "step": 4938 }, { "epoch": 10.997772828507795, "loss": 0.41700875759124756, "loss_ce": 0.00013862067135050893, "loss_iou": 0.1865234375, "loss_num": 0.00860595703125, "loss_xval": 0.416015625, "num_input_tokens_seen": 276640164, "step": 4938 }, { "epoch": 11.0, "grad_norm": 23.693870544433594, "learning_rate": 1e-06, "loss": 0.7586, "num_input_tokens_seen": 276695568, "step": 4939 }, { "epoch": 11.0, "loss": 0.8163493275642395, "loss_ce": 0.00018723538960330188, "loss_iou": 0.35546875, "loss_num": 0.020751953125, "loss_xval": 0.81640625, "num_input_tokens_seen": 276695568, "step": 4939 }, { "epoch": 11.002227171492205, "grad_norm": 14.375535011291504, "learning_rate": 1e-06, "loss": 0.408, "num_input_tokens_seen": 276751748, "step": 4940 }, { "epoch": 11.002227171492205, "loss": 0.34511420130729675, "loss_ce": 0.00020451581804081798, "loss_iou": 0.158203125, "loss_num": 0.005462646484375, "loss_xval": 0.345703125, "num_input_tokens_seen": 276751748, "step": 4940 }, { "epoch": 11.00445434298441, "grad_norm": 17.11101722717285, "learning_rate": 1e-06, "loss": 0.4981, "num_input_tokens_seen": 276808900, "step": 4941 }, { "epoch": 11.00445434298441, "loss": 0.5812661647796631, "loss_ce": 0.00021148948871996254, "loss_iou": 0.259765625, "loss_num": 0.0120849609375, "loss_xval": 0.58203125, "num_input_tokens_seen": 276808900, "step": 4941 }, { "epoch": 11.006681514476615, "grad_norm": 24.729581832885742, "learning_rate": 1e-06, "loss": 0.591, "num_input_tokens_seen": 276865236, "step": 4942 }, { "epoch": 11.006681514476615, "loss": 0.5612735748291016, "loss_ce": 0.00048253341810777783, "loss_iou": 0.248046875, "loss_num": 0.0130615234375, "loss_xval": 0.5625, "num_input_tokens_seen": 276865236, "step": 4942 }, { "epoch": 11.00890868596882, "grad_norm": 18.79891014099121, "learning_rate": 1e-06, "loss": 0.3387, "num_input_tokens_seen": 276923576, "step": 4943 }, { "epoch": 11.00890868596882, "loss": 0.26598143577575684, "loss_ce": 0.00011229849769733846, "loss_iou": 0.12109375, "loss_num": 0.004608154296875, "loss_xval": 0.265625, "num_input_tokens_seen": 276923576, "step": 4943 }, { "epoch": 11.011135857461024, "grad_norm": 19.149585723876953, "learning_rate": 1e-06, "loss": 0.3532, "num_input_tokens_seen": 276980108, "step": 4944 }, { "epoch": 11.011135857461024, "loss": 0.377750962972641, "loss_ce": 0.00012643022637348622, "loss_iou": 0.17578125, "loss_num": 0.0052490234375, "loss_xval": 0.376953125, "num_input_tokens_seen": 276980108, "step": 4944 }, { "epoch": 11.01336302895323, "grad_norm": 19.660892486572266, "learning_rate": 1e-06, "loss": 0.4988, "num_input_tokens_seen": 277035084, "step": 4945 }, { "epoch": 11.01336302895323, "loss": 0.49671003222465515, "loss_ce": 0.00012798060197383165, "loss_iou": 0.21875, "loss_num": 0.01171875, "loss_xval": 0.49609375, "num_input_tokens_seen": 277035084, "step": 4945 }, { "epoch": 11.015590200445434, "grad_norm": 17.99364471435547, "learning_rate": 1e-06, "loss": 0.5213, "num_input_tokens_seen": 277090600, "step": 4946 }, { "epoch": 11.015590200445434, "loss": 0.5547381043434143, "loss_ce": 0.0002947351022157818, "loss_iou": 0.2314453125, "loss_num": 0.018310546875, "loss_xval": 0.5546875, "num_input_tokens_seen": 277090600, "step": 4946 }, { "epoch": 11.017817371937639, "grad_norm": 26.641109466552734, "learning_rate": 1e-06, "loss": 0.4299, "num_input_tokens_seen": 277144644, "step": 4947 }, { "epoch": 11.017817371937639, "loss": 0.3615342974662781, "loss_ce": 0.0001451151620130986, "loss_iou": 0.1357421875, "loss_num": 0.0181884765625, "loss_xval": 0.361328125, "num_input_tokens_seen": 277144644, "step": 4947 }, { "epoch": 11.020044543429844, "grad_norm": 25.904264450073242, "learning_rate": 1e-06, "loss": 0.8089, "num_input_tokens_seen": 277198968, "step": 4948 }, { "epoch": 11.020044543429844, "loss": 0.8873788118362427, "loss_ce": 0.0001717885461403057, "loss_iou": 0.373046875, "loss_num": 0.028076171875, "loss_xval": 0.88671875, "num_input_tokens_seen": 277198968, "step": 4948 }, { "epoch": 11.022271714922049, "grad_norm": 18.756681442260742, "learning_rate": 1e-06, "loss": 0.6074, "num_input_tokens_seen": 277252344, "step": 4949 }, { "epoch": 11.022271714922049, "loss": 0.7791860103607178, "loss_ce": 0.00037738942774012685, "loss_iou": 0.302734375, "loss_num": 0.0341796875, "loss_xval": 0.77734375, "num_input_tokens_seen": 277252344, "step": 4949 }, { "epoch": 11.024498886414253, "grad_norm": 17.9628849029541, "learning_rate": 1e-06, "loss": 0.3852, "num_input_tokens_seen": 277308488, "step": 4950 }, { "epoch": 11.024498886414253, "loss": 0.34923800826072693, "loss_ce": 0.00011689884559018537, "loss_iou": 0.1513671875, "loss_num": 0.00927734375, "loss_xval": 0.349609375, "num_input_tokens_seen": 277308488, "step": 4950 }, { "epoch": 11.026726057906458, "grad_norm": 14.993110656738281, "learning_rate": 1e-06, "loss": 0.3982, "num_input_tokens_seen": 277364872, "step": 4951 }, { "epoch": 11.026726057906458, "loss": 0.3778972625732422, "loss_ce": 0.00015067942149471492, "loss_iou": 0.16796875, "loss_num": 0.00823974609375, "loss_xval": 0.376953125, "num_input_tokens_seen": 277364872, "step": 4951 }, { "epoch": 11.028953229398663, "grad_norm": 18.500215530395508, "learning_rate": 1e-06, "loss": 0.4492, "num_input_tokens_seen": 277422360, "step": 4952 }, { "epoch": 11.028953229398663, "loss": 0.517828643321991, "loss_ce": 0.00012847439211327583, "loss_iou": 0.2119140625, "loss_num": 0.0185546875, "loss_xval": 0.51953125, "num_input_tokens_seen": 277422360, "step": 4952 }, { "epoch": 11.031180400890868, "grad_norm": 28.1491641998291, "learning_rate": 1e-06, "loss": 0.6864, "num_input_tokens_seen": 277479676, "step": 4953 }, { "epoch": 11.031180400890868, "loss": 0.7513668537139893, "loss_ce": 0.00014616544649470598, "loss_iou": 0.2890625, "loss_num": 0.03466796875, "loss_xval": 0.75, "num_input_tokens_seen": 277479676, "step": 4953 }, { "epoch": 11.033407572383073, "grad_norm": 14.530891418457031, "learning_rate": 1e-06, "loss": 0.5625, "num_input_tokens_seen": 277535648, "step": 4954 }, { "epoch": 11.033407572383073, "loss": 0.6487134695053101, "loss_ce": 0.00015391816850751638, "loss_iou": 0.265625, "loss_num": 0.023681640625, "loss_xval": 0.6484375, "num_input_tokens_seen": 277535648, "step": 4954 }, { "epoch": 11.035634743875278, "grad_norm": 18.762638092041016, "learning_rate": 1e-06, "loss": 0.4931, "num_input_tokens_seen": 277593048, "step": 4955 }, { "epoch": 11.035634743875278, "loss": 0.5269228219985962, "loss_ce": 0.00012840772978961468, "loss_iou": 0.21875, "loss_num": 0.0177001953125, "loss_xval": 0.52734375, "num_input_tokens_seen": 277593048, "step": 4955 }, { "epoch": 11.037861915367483, "grad_norm": 14.296708106994629, "learning_rate": 1e-06, "loss": 0.6159, "num_input_tokens_seen": 277647512, "step": 4956 }, { "epoch": 11.037861915367483, "loss": 0.7726843357086182, "loss_ce": 0.00022338703274726868, "loss_iou": 0.296875, "loss_num": 0.03564453125, "loss_xval": 0.7734375, "num_input_tokens_seen": 277647512, "step": 4956 }, { "epoch": 11.040089086859687, "grad_norm": 16.444766998291016, "learning_rate": 1e-06, "loss": 0.476, "num_input_tokens_seen": 277705924, "step": 4957 }, { "epoch": 11.040089086859687, "loss": 0.48854783177375793, "loss_ce": 0.0001445203961338848, "loss_iou": 0.2119140625, "loss_num": 0.0126953125, "loss_xval": 0.48828125, "num_input_tokens_seen": 277705924, "step": 4957 }, { "epoch": 11.042316258351892, "grad_norm": 33.47507095336914, "learning_rate": 1e-06, "loss": 0.6862, "num_input_tokens_seen": 277763404, "step": 4958 }, { "epoch": 11.042316258351892, "loss": 0.7765988111495972, "loss_ce": 0.00010955688776448369, "loss_iou": 0.34765625, "loss_num": 0.0166015625, "loss_xval": 0.77734375, "num_input_tokens_seen": 277763404, "step": 4958 }, { "epoch": 11.044543429844097, "grad_norm": 23.200275421142578, "learning_rate": 1e-06, "loss": 0.5959, "num_input_tokens_seen": 277819472, "step": 4959 }, { "epoch": 11.044543429844097, "loss": 0.7127882838249207, "loss_ce": 0.00014180471771396697, "loss_iou": 0.32421875, "loss_num": 0.01287841796875, "loss_xval": 0.7109375, "num_input_tokens_seen": 277819472, "step": 4959 }, { "epoch": 11.046770601336302, "grad_norm": 19.549856185913086, "learning_rate": 1e-06, "loss": 0.6735, "num_input_tokens_seen": 277874920, "step": 4960 }, { "epoch": 11.046770601336302, "loss": 0.7458186149597168, "loss_ce": 0.00021311640739440918, "loss_iou": 0.294921875, "loss_num": 0.03125, "loss_xval": 0.74609375, "num_input_tokens_seen": 277874920, "step": 4960 }, { "epoch": 11.048997772828507, "grad_norm": 18.301462173461914, "learning_rate": 1e-06, "loss": 0.4467, "num_input_tokens_seen": 277932412, "step": 4961 }, { "epoch": 11.048997772828507, "loss": 0.38100382685661316, "loss_ce": 0.0001444508379790932, "loss_iou": 0.16796875, "loss_num": 0.0091552734375, "loss_xval": 0.380859375, "num_input_tokens_seen": 277932412, "step": 4961 }, { "epoch": 11.051224944320714, "grad_norm": 22.31706428527832, "learning_rate": 1e-06, "loss": 0.6524, "num_input_tokens_seen": 277990888, "step": 4962 }, { "epoch": 11.051224944320714, "loss": 0.6529885530471802, "loss_ce": 0.00015653553418815136, "loss_iou": 0.265625, "loss_num": 0.0242919921875, "loss_xval": 0.65234375, "num_input_tokens_seen": 277990888, "step": 4962 }, { "epoch": 11.053452115812918, "grad_norm": 48.24069595336914, "learning_rate": 1e-06, "loss": 0.5579, "num_input_tokens_seen": 278045352, "step": 4963 }, { "epoch": 11.053452115812918, "loss": 0.4462721645832062, "loss_ce": 0.00013570513692684472, "loss_iou": 0.171875, "loss_num": 0.0206298828125, "loss_xval": 0.4453125, "num_input_tokens_seen": 278045352, "step": 4963 }, { "epoch": 11.055679287305123, "grad_norm": 14.883129119873047, "learning_rate": 1e-06, "loss": 0.4838, "num_input_tokens_seen": 278100884, "step": 4964 }, { "epoch": 11.055679287305123, "loss": 0.5324569344520569, "loss_ce": 0.00016932294238358736, "loss_iou": 0.220703125, "loss_num": 0.01806640625, "loss_xval": 0.53125, "num_input_tokens_seen": 278100884, "step": 4964 }, { "epoch": 11.057906458797328, "grad_norm": 92.90229797363281, "learning_rate": 1e-06, "loss": 0.5606, "num_input_tokens_seen": 278155508, "step": 4965 }, { "epoch": 11.057906458797328, "loss": 0.6233317255973816, "loss_ce": 0.00016281349235214293, "loss_iou": 0.287109375, "loss_num": 0.00994873046875, "loss_xval": 0.625, "num_input_tokens_seen": 278155508, "step": 4965 }, { "epoch": 11.060133630289533, "grad_norm": 22.694913864135742, "learning_rate": 1e-06, "loss": 0.5463, "num_input_tokens_seen": 278210972, "step": 4966 }, { "epoch": 11.060133630289533, "loss": 0.5516934394836426, "loss_ce": 0.00017979381664190441, "loss_iou": 0.2294921875, "loss_num": 0.018310546875, "loss_xval": 0.55078125, "num_input_tokens_seen": 278210972, "step": 4966 }, { "epoch": 11.062360801781738, "grad_norm": 14.845329284667969, "learning_rate": 1e-06, "loss": 0.3656, "num_input_tokens_seen": 278266736, "step": 4967 }, { "epoch": 11.062360801781738, "loss": 0.3910008668899536, "loss_ce": 0.00019277536193840206, "loss_iou": 0.1767578125, "loss_num": 0.0074462890625, "loss_xval": 0.390625, "num_input_tokens_seen": 278266736, "step": 4967 }, { "epoch": 11.064587973273943, "grad_norm": 16.45245361328125, "learning_rate": 1e-06, "loss": 0.3967, "num_input_tokens_seen": 278321340, "step": 4968 }, { "epoch": 11.064587973273943, "loss": 0.4062022566795349, "loss_ce": 0.00013537969789467752, "loss_iou": 0.177734375, "loss_num": 0.00994873046875, "loss_xval": 0.40625, "num_input_tokens_seen": 278321340, "step": 4968 }, { "epoch": 11.066815144766148, "grad_norm": 13.446118354797363, "learning_rate": 1e-06, "loss": 0.4845, "num_input_tokens_seen": 278377700, "step": 4969 }, { "epoch": 11.066815144766148, "loss": 0.5192990303039551, "loss_ce": 0.00013396976282820106, "loss_iou": 0.2255859375, "loss_num": 0.01361083984375, "loss_xval": 0.51953125, "num_input_tokens_seen": 278377700, "step": 4969 }, { "epoch": 11.069042316258352, "grad_norm": 31.59632682800293, "learning_rate": 1e-06, "loss": 0.5488, "num_input_tokens_seen": 278431796, "step": 4970 }, { "epoch": 11.069042316258352, "loss": 0.5724695920944214, "loss_ce": 0.0001734097022563219, "loss_iou": 0.234375, "loss_num": 0.0206298828125, "loss_xval": 0.57421875, "num_input_tokens_seen": 278431796, "step": 4970 }, { "epoch": 11.071269487750557, "grad_norm": 21.374279022216797, "learning_rate": 1e-06, "loss": 0.5448, "num_input_tokens_seen": 278486652, "step": 4971 }, { "epoch": 11.071269487750557, "loss": 0.5677635073661804, "loss_ce": 0.00013656642113346606, "loss_iou": 0.2431640625, "loss_num": 0.0162353515625, "loss_xval": 0.56640625, "num_input_tokens_seen": 278486652, "step": 4971 }, { "epoch": 11.073496659242762, "grad_norm": 62.18361282348633, "learning_rate": 1e-06, "loss": 0.696, "num_input_tokens_seen": 278543052, "step": 4972 }, { "epoch": 11.073496659242762, "loss": 0.5261435508728027, "loss_ce": 0.00014258406008593738, "loss_iou": 0.232421875, "loss_num": 0.01220703125, "loss_xval": 0.52734375, "num_input_tokens_seen": 278543052, "step": 4972 }, { "epoch": 11.075723830734967, "grad_norm": 16.852285385131836, "learning_rate": 1e-06, "loss": 0.7198, "num_input_tokens_seen": 278598500, "step": 4973 }, { "epoch": 11.075723830734967, "loss": 0.7613972425460815, "loss_ce": 0.00016673312347847968, "loss_iou": 0.333984375, "loss_num": 0.0189208984375, "loss_xval": 0.76171875, "num_input_tokens_seen": 278598500, "step": 4973 }, { "epoch": 11.077951002227172, "grad_norm": 26.721847534179688, "learning_rate": 1e-06, "loss": 0.5459, "num_input_tokens_seen": 278652932, "step": 4974 }, { "epoch": 11.077951002227172, "loss": 0.6054716110229492, "loss_ce": 0.0001249103806912899, "loss_iou": 0.2314453125, "loss_num": 0.0289306640625, "loss_xval": 0.60546875, "num_input_tokens_seen": 278652932, "step": 4974 }, { "epoch": 11.080178173719377, "grad_norm": 28.737173080444336, "learning_rate": 1e-06, "loss": 0.5445, "num_input_tokens_seen": 278709224, "step": 4975 }, { "epoch": 11.080178173719377, "loss": 0.5305880904197693, "loss_ce": 0.00013153886538930237, "loss_iou": 0.208984375, "loss_num": 0.0224609375, "loss_xval": 0.53125, "num_input_tokens_seen": 278709224, "step": 4975 }, { "epoch": 11.082405345211582, "grad_norm": 22.68699836730957, "learning_rate": 1e-06, "loss": 0.4661, "num_input_tokens_seen": 278761808, "step": 4976 }, { "epoch": 11.082405345211582, "loss": 0.41276633739471436, "loss_ce": 0.00016868110105860978, "loss_iou": 0.181640625, "loss_num": 0.0096435546875, "loss_xval": 0.412109375, "num_input_tokens_seen": 278761808, "step": 4976 }, { "epoch": 11.084632516703786, "grad_norm": 23.453115463256836, "learning_rate": 1e-06, "loss": 0.6005, "num_input_tokens_seen": 278817304, "step": 4977 }, { "epoch": 11.084632516703786, "loss": 0.5315991044044495, "loss_ce": 0.00010497516632312909, "loss_iou": 0.220703125, "loss_num": 0.01806640625, "loss_xval": 0.53125, "num_input_tokens_seen": 278817304, "step": 4977 }, { "epoch": 11.086859688195991, "grad_norm": 22.387310028076172, "learning_rate": 1e-06, "loss": 0.5064, "num_input_tokens_seen": 278873696, "step": 4978 }, { "epoch": 11.086859688195991, "loss": 0.41907966136932373, "loss_ce": 0.00013435332220979035, "loss_iou": 0.1748046875, "loss_num": 0.01385498046875, "loss_xval": 0.41796875, "num_input_tokens_seen": 278873696, "step": 4978 }, { "epoch": 11.089086859688196, "grad_norm": 15.790838241577148, "learning_rate": 1e-06, "loss": 0.596, "num_input_tokens_seen": 278931180, "step": 4979 }, { "epoch": 11.089086859688196, "loss": 0.5526787042617798, "loss_ce": 0.00012747167784254998, "loss_iou": 0.248046875, "loss_num": 0.011474609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 278931180, "step": 4979 }, { "epoch": 11.091314031180401, "grad_norm": 15.259247779846191, "learning_rate": 1e-06, "loss": 0.6733, "num_input_tokens_seen": 278988536, "step": 4980 }, { "epoch": 11.091314031180401, "loss": 0.7944159507751465, "loss_ce": 0.0004706614126916975, "loss_iou": 0.341796875, "loss_num": 0.0218505859375, "loss_xval": 0.79296875, "num_input_tokens_seen": 278988536, "step": 4980 }, { "epoch": 11.093541202672606, "grad_norm": 22.694530487060547, "learning_rate": 1e-06, "loss": 0.5318, "num_input_tokens_seen": 279045060, "step": 4981 }, { "epoch": 11.093541202672606, "loss": 0.5756011605262756, "loss_ce": 0.0001617103407625109, "loss_iou": 0.2490234375, "loss_num": 0.015380859375, "loss_xval": 0.57421875, "num_input_tokens_seen": 279045060, "step": 4981 }, { "epoch": 11.09576837416481, "grad_norm": 20.57561492919922, "learning_rate": 1e-06, "loss": 0.5533, "num_input_tokens_seen": 279101812, "step": 4982 }, { "epoch": 11.09576837416481, "loss": 0.5785654783248901, "loss_ce": 0.00019637157674878836, "loss_iou": 0.2392578125, "loss_num": 0.02001953125, "loss_xval": 0.578125, "num_input_tokens_seen": 279101812, "step": 4982 }, { "epoch": 11.097995545657016, "grad_norm": 20.943668365478516, "learning_rate": 1e-06, "loss": 0.4383, "num_input_tokens_seen": 279156604, "step": 4983 }, { "epoch": 11.097995545657016, "loss": 0.4315411448478699, "loss_ce": 0.0002667338994797319, "loss_iou": 0.1787109375, "loss_num": 0.0145263671875, "loss_xval": 0.431640625, "num_input_tokens_seen": 279156604, "step": 4983 }, { "epoch": 11.10022271714922, "grad_norm": 25.43724822998047, "learning_rate": 1e-06, "loss": 0.4991, "num_input_tokens_seen": 279212128, "step": 4984 }, { "epoch": 11.10022271714922, "loss": 0.560702919960022, "loss_ce": 0.00015607105160597712, "loss_iou": 0.248046875, "loss_num": 0.012939453125, "loss_xval": 0.5625, "num_input_tokens_seen": 279212128, "step": 4984 }, { "epoch": 11.102449888641425, "grad_norm": 30.57325553894043, "learning_rate": 1e-06, "loss": 0.6142, "num_input_tokens_seen": 279267056, "step": 4985 }, { "epoch": 11.102449888641425, "loss": 0.5670490860939026, "loss_ce": 0.00015456389519385993, "loss_iou": 0.2451171875, "loss_num": 0.0152587890625, "loss_xval": 0.56640625, "num_input_tokens_seen": 279267056, "step": 4985 }, { "epoch": 11.10467706013363, "grad_norm": 21.70029067993164, "learning_rate": 1e-06, "loss": 0.4192, "num_input_tokens_seen": 279322728, "step": 4986 }, { "epoch": 11.10467706013363, "loss": 0.4730498492717743, "loss_ce": 0.00014946176088415086, "loss_iou": 0.1982421875, "loss_num": 0.015380859375, "loss_xval": 0.47265625, "num_input_tokens_seen": 279322728, "step": 4986 }, { "epoch": 11.106904231625835, "grad_norm": 17.88077735900879, "learning_rate": 1e-06, "loss": 0.3673, "num_input_tokens_seen": 279379872, "step": 4987 }, { "epoch": 11.106904231625835, "loss": 0.3716070055961609, "loss_ce": 0.00014704751083627343, "loss_iou": 0.169921875, "loss_num": 0.006561279296875, "loss_xval": 0.37109375, "num_input_tokens_seen": 279379872, "step": 4987 }, { "epoch": 11.10913140311804, "grad_norm": 21.7382755279541, "learning_rate": 1e-06, "loss": 0.609, "num_input_tokens_seen": 279435852, "step": 4988 }, { "epoch": 11.10913140311804, "loss": 0.6673039197921753, "loss_ce": 0.00018964617629535496, "loss_iou": 0.265625, "loss_num": 0.0272216796875, "loss_xval": 0.66796875, "num_input_tokens_seen": 279435852, "step": 4988 }, { "epoch": 11.111358574610245, "grad_norm": 13.589532852172852, "learning_rate": 1e-06, "loss": 0.4054, "num_input_tokens_seen": 279492372, "step": 4989 }, { "epoch": 11.111358574610245, "loss": 0.5108894109725952, "loss_ce": 0.00014718393504153937, "loss_iou": 0.2255859375, "loss_num": 0.01190185546875, "loss_xval": 0.51171875, "num_input_tokens_seen": 279492372, "step": 4989 }, { "epoch": 11.11358574610245, "grad_norm": 30.877511978149414, "learning_rate": 1e-06, "loss": 0.4336, "num_input_tokens_seen": 279548732, "step": 4990 }, { "epoch": 11.11358574610245, "loss": 0.36500683426856995, "loss_ce": 0.0001386810909025371, "loss_iou": 0.1650390625, "loss_num": 0.007049560546875, "loss_xval": 0.365234375, "num_input_tokens_seen": 279548732, "step": 4990 }, { "epoch": 11.115812917594655, "grad_norm": 20.038894653320312, "learning_rate": 1e-06, "loss": 0.5222, "num_input_tokens_seen": 279605916, "step": 4991 }, { "epoch": 11.115812917594655, "loss": 0.6044121980667114, "loss_ce": 0.00016413633420597762, "loss_iou": 0.267578125, "loss_num": 0.0140380859375, "loss_xval": 0.60546875, "num_input_tokens_seen": 279605916, "step": 4991 }, { "epoch": 11.11804008908686, "grad_norm": 23.694477081298828, "learning_rate": 1e-06, "loss": 0.521, "num_input_tokens_seen": 279659500, "step": 4992 }, { "epoch": 11.11804008908686, "loss": 0.4539830684661865, "loss_ce": 0.00012564370990730822, "loss_iou": 0.201171875, "loss_num": 0.010498046875, "loss_xval": 0.453125, "num_input_tokens_seen": 279659500, "step": 4992 }, { "epoch": 11.120267260579064, "grad_norm": 16.13028907775879, "learning_rate": 1e-06, "loss": 0.5406, "num_input_tokens_seen": 279716076, "step": 4993 }, { "epoch": 11.120267260579064, "loss": 0.4950714707374573, "loss_ce": 0.00013739880523644388, "loss_iou": 0.2158203125, "loss_num": 0.01251220703125, "loss_xval": 0.494140625, "num_input_tokens_seen": 279716076, "step": 4993 }, { "epoch": 11.122494432071269, "grad_norm": 25.81940460205078, "learning_rate": 1e-06, "loss": 0.7181, "num_input_tokens_seen": 279772812, "step": 4994 }, { "epoch": 11.122494432071269, "loss": 0.6644489765167236, "loss_ce": 0.00014238519361242652, "loss_iou": 0.283203125, "loss_num": 0.02001953125, "loss_xval": 0.6640625, "num_input_tokens_seen": 279772812, "step": 4994 }, { "epoch": 11.124721603563474, "grad_norm": 21.302309036254883, "learning_rate": 1e-06, "loss": 0.5902, "num_input_tokens_seen": 279828292, "step": 4995 }, { "epoch": 11.124721603563474, "loss": 0.5325771570205688, "loss_ce": 0.00022852000256534666, "loss_iou": 0.240234375, "loss_num": 0.0106201171875, "loss_xval": 0.53125, "num_input_tokens_seen": 279828292, "step": 4995 }, { "epoch": 11.126948775055679, "grad_norm": 16.87586212158203, "learning_rate": 1e-06, "loss": 0.472, "num_input_tokens_seen": 279885472, "step": 4996 }, { "epoch": 11.126948775055679, "loss": 0.3971105217933655, "loss_ce": 0.0001378602028125897, "loss_iou": 0.166015625, "loss_num": 0.01312255859375, "loss_xval": 0.396484375, "num_input_tokens_seen": 279885472, "step": 4996 }, { "epoch": 11.129175946547884, "grad_norm": 20.097671508789062, "learning_rate": 1e-06, "loss": 0.6366, "num_input_tokens_seen": 279938956, "step": 4997 }, { "epoch": 11.129175946547884, "loss": 0.634530246257782, "loss_ce": 0.00013083560043014586, "loss_iou": 0.271484375, "loss_num": 0.01806640625, "loss_xval": 0.6328125, "num_input_tokens_seen": 279938956, "step": 4997 }, { "epoch": 11.131403118040089, "grad_norm": 16.503360748291016, "learning_rate": 1e-06, "loss": 0.5446, "num_input_tokens_seen": 279994336, "step": 4998 }, { "epoch": 11.131403118040089, "loss": 0.38283979892730713, "loss_ce": 0.0001493898016633466, "loss_iou": 0.16015625, "loss_num": 0.0123291015625, "loss_xval": 0.3828125, "num_input_tokens_seen": 279994336, "step": 4998 }, { "epoch": 11.133630289532293, "grad_norm": 13.981780052185059, "learning_rate": 1e-06, "loss": 0.606, "num_input_tokens_seen": 280048108, "step": 4999 }, { "epoch": 11.133630289532293, "loss": 0.5781067609786987, "loss_ce": 0.00010381722677266225, "loss_iou": 0.205078125, "loss_num": 0.03369140625, "loss_xval": 0.578125, "num_input_tokens_seen": 280048108, "step": 4999 }, { "epoch": 11.135857461024498, "grad_norm": 16.743953704833984, "learning_rate": 1e-06, "loss": 0.4433, "num_input_tokens_seen": 280103384, "step": 5000 }, { "epoch": 11.135857461024498, "eval_seeclick_web_CIoU": 0.5857278108596802, "eval_seeclick_web_GIoU": 0.5834327638149261, "eval_seeclick_web_IoU": 0.6043886542320251, "eval_seeclick_web_MAE_all": 0.015452081337571144, "eval_seeclick_web_MAE_h": 0.0073387217707931995, "eval_seeclick_web_MAE_w": 0.015388870611786842, "eval_seeclick_web_MAE_x_boxes": 0.009706755401566625, "eval_seeclick_web_MAE_y_boxes": 0.021358829457312822, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9223926067352295, "eval_seeclick_web_loss_ce": 0.0001983401962206699, "eval_seeclick_web_loss_iou": 0.4200439453125, "eval_seeclick_web_loss_num": 0.01239013671875, "eval_seeclick_web_loss_xval": 0.90185546875, "eval_seeclick_web_runtime": 21.9551, "eval_seeclick_web_samples_per_second": 2.277, "eval_seeclick_web_steps_per_second": 0.091, "num_input_tokens_seen": 280103384, "step": 5000 }, { "epoch": 11.135857461024498, "eval_icons_CIoU": 0.2814893424510956, "eval_icons_GIoU": 0.3012331575155258, "eval_icons_IoU": 0.3595561385154724, "eval_icons_MAE_all": 0.059540608897805214, "eval_icons_MAE_h": 0.03891334868967533, "eval_icons_MAE_w": 0.0580837675370276, "eval_icons_MAE_x_boxes": 0.05784302018582821, "eval_icons_MAE_y_boxes": 0.038528745993971825, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.7233656644821167, "eval_icons_loss_ce": 0.00022988053387962282, "eval_icons_loss_iou": 0.678466796875, "eval_icons_loss_num": 0.058696746826171875, "eval_icons_loss_xval": 1.6513671875, "eval_icons_runtime": 20.195, "eval_icons_samples_per_second": 2.476, "eval_icons_steps_per_second": 0.099, "num_input_tokens_seen": 280103384, "step": 5000 }, { "epoch": 11.135857461024498, "eval_screenspot_CIoU": 0.3535158932209015, "eval_screenspot_GIoU": 0.36772539218266803, "eval_screenspot_IoU": 0.43389413754145306, "eval_screenspot_MAE_all": 0.05942438915371895, "eval_screenspot_MAE_h": 0.03856873946885268, "eval_screenspot_MAE_w": 0.0688897892832756, "eval_screenspot_MAE_x_boxes": 0.07383330973486106, "eval_screenspot_MAE_y_boxes": 0.03976897584895293, "eval_screenspot_inside_bbox": 0.6862499912579855, "eval_screenspot_loss": 1.6220872402191162, "eval_screenspot_loss_ce": 0.000261851722219338, "eval_screenspot_loss_iou": 0.67236328125, "eval_screenspot_loss_num": 0.06821314493815105, "eval_screenspot_loss_xval": 1.6868489583333333, "eval_screenspot_runtime": 33.7501, "eval_screenspot_samples_per_second": 2.637, "eval_screenspot_steps_per_second": 0.089, "num_input_tokens_seen": 280103384, "step": 5000 }, { "epoch": 11.135857461024498, "eval_compot_CIoU": 0.3433762341737747, "eval_compot_GIoU": 0.35395348072052, "eval_compot_IoU": 0.4032938480377197, "eval_compot_MAE_all": 0.01813736092299223, "eval_compot_MAE_h": 0.009242744650691748, "eval_compot_MAE_w": 0.02075517177581787, "eval_compot_MAE_x_boxes": 0.030595741234719753, "eval_compot_MAE_y_boxes": 0.007154007675126195, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3828521966934204, "eval_compot_loss_ce": 0.000194034306332469, "eval_compot_loss_iou": 0.6356201171875, "eval_compot_loss_num": 0.016706466674804688, "eval_compot_loss_xval": 1.35498046875, "eval_compot_runtime": 22.2096, "eval_compot_samples_per_second": 2.251, "eval_compot_steps_per_second": 0.09, "num_input_tokens_seen": 280103384, "step": 5000 }, { "epoch": 11.135857461024498, "eval_custom_ui_val_CIoU": 0.46921708765957093, "eval_custom_ui_val_GIoU": 0.48126794232262504, "eval_custom_ui_val_IoU": 0.5315591428014967, "eval_custom_ui_val_MAE_all": 0.030911025901635487, "eval_custom_ui_val_MAE_h": 0.016821589320898056, "eval_custom_ui_val_MAE_w": 0.0393333797239595, "eval_custom_ui_val_MAE_x_boxes": 0.03895654301676485, "eval_custom_ui_val_MAE_y_boxes": 0.015129889025249414, "eval_custom_ui_val_inside_bbox": 0.7527006202273898, "eval_custom_ui_val_loss": 1.2006866931915283, "eval_custom_ui_val_loss_ce": 0.00023336601023200073, "eval_custom_ui_val_loss_iou": 0.5094129774305556, "eval_custom_ui_val_loss_num": 0.028292549981011286, "eval_custom_ui_val_loss_xval": 1.1603190104166667, "eval_custom_ui_val_runtime": 65.5609, "eval_custom_ui_val_samples_per_second": 4.042, "eval_custom_ui_val_steps_per_second": 0.137, "num_input_tokens_seen": 280103384, "step": 5000 }, { "epoch": 11.135857461024498, "loss": 0.8996152877807617, "loss_ce": 0.00020119547843933105, "loss_iou": 0.38671875, "loss_num": 0.02490234375, "loss_xval": 0.8984375, "num_input_tokens_seen": 280103384, "step": 5000 }, { "epoch": 11.138084632516703, "grad_norm": 19.082584381103516, "learning_rate": 1e-06, "loss": 0.4929, "num_input_tokens_seen": 280158504, "step": 5001 }, { "epoch": 11.138084632516703, "loss": 0.39870956540107727, "loss_ce": 0.00015000064740888774, "loss_iou": 0.1728515625, "loss_num": 0.010498046875, "loss_xval": 0.3984375, "num_input_tokens_seen": 280158504, "step": 5001 }, { "epoch": 11.140311804008908, "grad_norm": 26.122913360595703, "learning_rate": 1e-06, "loss": 0.5198, "num_input_tokens_seen": 280214744, "step": 5002 }, { "epoch": 11.140311804008908, "loss": 0.45124655961990356, "loss_ce": 0.00013570950250141323, "loss_iou": 0.19921875, "loss_num": 0.0107421875, "loss_xval": 0.451171875, "num_input_tokens_seen": 280214744, "step": 5002 }, { "epoch": 11.142538975501113, "grad_norm": 27.3781681060791, "learning_rate": 1e-06, "loss": 0.4616, "num_input_tokens_seen": 280271032, "step": 5003 }, { "epoch": 11.142538975501113, "loss": 0.32605159282684326, "loss_ce": 0.00012386470916680992, "loss_iou": 0.125, "loss_num": 0.0150146484375, "loss_xval": 0.326171875, "num_input_tokens_seen": 280271032, "step": 5003 }, { "epoch": 11.144766146993318, "grad_norm": 19.879722595214844, "learning_rate": 1e-06, "loss": 0.531, "num_input_tokens_seen": 280327288, "step": 5004 }, { "epoch": 11.144766146993318, "loss": 0.43859922885894775, "loss_ce": 0.00012264544784557074, "loss_iou": 0.1982421875, "loss_num": 0.00823974609375, "loss_xval": 0.4375, "num_input_tokens_seen": 280327288, "step": 5004 }, { "epoch": 11.146993318485523, "grad_norm": 22.407188415527344, "learning_rate": 1e-06, "loss": 0.7016, "num_input_tokens_seen": 280383788, "step": 5005 }, { "epoch": 11.146993318485523, "loss": 0.49451902508735657, "loss_ce": 0.00013426925579551607, "loss_iou": 0.203125, "loss_num": 0.0177001953125, "loss_xval": 0.494140625, "num_input_tokens_seen": 280383788, "step": 5005 }, { "epoch": 11.14922048997773, "grad_norm": 17.652860641479492, "learning_rate": 1e-06, "loss": 0.4529, "num_input_tokens_seen": 280438888, "step": 5006 }, { "epoch": 11.14922048997773, "loss": 0.38709527254104614, "loss_ce": 0.00013238785322755575, "loss_iou": 0.1796875, "loss_num": 0.00537109375, "loss_xval": 0.38671875, "num_input_tokens_seen": 280438888, "step": 5006 }, { "epoch": 11.151447661469934, "grad_norm": 28.968141555786133, "learning_rate": 1e-06, "loss": 0.561, "num_input_tokens_seen": 280494244, "step": 5007 }, { "epoch": 11.151447661469934, "loss": 0.5118998289108276, "loss_ce": 0.0005473287310451269, "loss_iou": 0.2158203125, "loss_num": 0.0159912109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 280494244, "step": 5007 }, { "epoch": 11.153674832962139, "grad_norm": 22.353212356567383, "learning_rate": 1e-06, "loss": 0.5322, "num_input_tokens_seen": 280547136, "step": 5008 }, { "epoch": 11.153674832962139, "loss": 0.6800668239593506, "loss_ce": 0.00013513598241843283, "loss_iou": 0.30078125, "loss_num": 0.0159912109375, "loss_xval": 0.6796875, "num_input_tokens_seen": 280547136, "step": 5008 }, { "epoch": 11.155902004454344, "grad_norm": 16.07948875427246, "learning_rate": 1e-06, "loss": 0.6479, "num_input_tokens_seen": 280605596, "step": 5009 }, { "epoch": 11.155902004454344, "loss": 0.7251046895980835, "loss_ce": 0.00012912959209643304, "loss_iou": 0.2890625, "loss_num": 0.029296875, "loss_xval": 0.7265625, "num_input_tokens_seen": 280605596, "step": 5009 }, { "epoch": 11.158129175946549, "grad_norm": 21.49631690979004, "learning_rate": 1e-06, "loss": 0.411, "num_input_tokens_seen": 280662056, "step": 5010 }, { "epoch": 11.158129175946549, "loss": 0.46879857778549194, "loss_ce": 0.000170662795426324, "loss_iou": 0.2138671875, "loss_num": 0.00836181640625, "loss_xval": 0.46875, "num_input_tokens_seen": 280662056, "step": 5010 }, { "epoch": 11.160356347438753, "grad_norm": 25.566015243530273, "learning_rate": 1e-06, "loss": 0.4451, "num_input_tokens_seen": 280718144, "step": 5011 }, { "epoch": 11.160356347438753, "loss": 0.3402805030345917, "loss_ce": 0.00019259938562754542, "loss_iou": 0.1533203125, "loss_num": 0.00677490234375, "loss_xval": 0.33984375, "num_input_tokens_seen": 280718144, "step": 5011 }, { "epoch": 11.162583518930958, "grad_norm": 19.456497192382812, "learning_rate": 1e-06, "loss": 0.5608, "num_input_tokens_seen": 280769956, "step": 5012 }, { "epoch": 11.162583518930958, "loss": 0.5276530981063843, "loss_ce": 0.00018727785209193826, "loss_iou": 0.228515625, "loss_num": 0.0140380859375, "loss_xval": 0.52734375, "num_input_tokens_seen": 280769956, "step": 5012 }, { "epoch": 11.164810690423163, "grad_norm": 19.570919036865234, "learning_rate": 1e-06, "loss": 0.4798, "num_input_tokens_seen": 280824936, "step": 5013 }, { "epoch": 11.164810690423163, "loss": 0.3978237807750702, "loss_ce": 0.00011871426249854267, "loss_iou": 0.15625, "loss_num": 0.016845703125, "loss_xval": 0.3984375, "num_input_tokens_seen": 280824936, "step": 5013 }, { "epoch": 11.167037861915368, "grad_norm": 26.35681915283203, "learning_rate": 1e-06, "loss": 0.5786, "num_input_tokens_seen": 280878960, "step": 5014 }, { "epoch": 11.167037861915368, "loss": 0.6183251142501831, "loss_ce": 0.00016105023678392172, "loss_iou": 0.255859375, "loss_num": 0.0213623046875, "loss_xval": 0.6171875, "num_input_tokens_seen": 280878960, "step": 5014 }, { "epoch": 11.169265033407573, "grad_norm": 20.287382125854492, "learning_rate": 1e-06, "loss": 0.5377, "num_input_tokens_seen": 280934428, "step": 5015 }, { "epoch": 11.169265033407573, "loss": 0.5026686191558838, "loss_ce": 0.00010515956091694534, "loss_iou": 0.2060546875, "loss_num": 0.01806640625, "loss_xval": 0.50390625, "num_input_tokens_seen": 280934428, "step": 5015 }, { "epoch": 11.171492204899778, "grad_norm": 27.965999603271484, "learning_rate": 1e-06, "loss": 0.514, "num_input_tokens_seen": 280992196, "step": 5016 }, { "epoch": 11.171492204899778, "loss": 0.521459698677063, "loss_ce": 0.00021950851078145206, "loss_iou": 0.2216796875, "loss_num": 0.015625, "loss_xval": 0.51953125, "num_input_tokens_seen": 280992196, "step": 5016 }, { "epoch": 11.173719376391983, "grad_norm": 16.570053100585938, "learning_rate": 1e-06, "loss": 0.4161, "num_input_tokens_seen": 281050828, "step": 5017 }, { "epoch": 11.173719376391983, "loss": 0.4344647526741028, "loss_ce": 0.00013857701560482383, "loss_iou": 0.1796875, "loss_num": 0.01513671875, "loss_xval": 0.43359375, "num_input_tokens_seen": 281050828, "step": 5017 }, { "epoch": 11.175946547884188, "grad_norm": 16.34454345703125, "learning_rate": 1e-06, "loss": 0.6104, "num_input_tokens_seen": 281106284, "step": 5018 }, { "epoch": 11.175946547884188, "loss": 0.7851086854934692, "loss_ce": 0.00019656957010738552, "loss_iou": 0.361328125, "loss_num": 0.01275634765625, "loss_xval": 0.78515625, "num_input_tokens_seen": 281106284, "step": 5018 }, { "epoch": 11.178173719376392, "grad_norm": 15.942395210266113, "learning_rate": 1e-06, "loss": 0.4458, "num_input_tokens_seen": 281164500, "step": 5019 }, { "epoch": 11.178173719376392, "loss": 0.3177483081817627, "loss_ce": 0.00012136220175307244, "loss_iou": 0.140625, "loss_num": 0.007080078125, "loss_xval": 0.318359375, "num_input_tokens_seen": 281164500, "step": 5019 }, { "epoch": 11.180400890868597, "grad_norm": 20.594022750854492, "learning_rate": 1e-06, "loss": 0.6501, "num_input_tokens_seen": 281222920, "step": 5020 }, { "epoch": 11.180400890868597, "loss": 0.4495760202407837, "loss_ce": 0.00011313259165035561, "loss_iou": 0.2041015625, "loss_num": 0.00836181640625, "loss_xval": 0.44921875, "num_input_tokens_seen": 281222920, "step": 5020 }, { "epoch": 11.182628062360802, "grad_norm": 20.63737678527832, "learning_rate": 1e-06, "loss": 0.7283, "num_input_tokens_seen": 281280520, "step": 5021 }, { "epoch": 11.182628062360802, "loss": 0.8178755640983582, "loss_ce": 0.00012653997691813856, "loss_iou": 0.330078125, "loss_num": 0.03173828125, "loss_xval": 0.81640625, "num_input_tokens_seen": 281280520, "step": 5021 }, { "epoch": 11.184855233853007, "grad_norm": 19.35364532470703, "learning_rate": 1e-06, "loss": 0.5065, "num_input_tokens_seen": 281338688, "step": 5022 }, { "epoch": 11.184855233853007, "loss": 0.5651026964187622, "loss_ce": 0.00016126442642416805, "loss_iou": 0.244140625, "loss_num": 0.01519775390625, "loss_xval": 0.56640625, "num_input_tokens_seen": 281338688, "step": 5022 }, { "epoch": 11.187082405345212, "grad_norm": 17.587987899780273, "learning_rate": 1e-06, "loss": 0.41, "num_input_tokens_seen": 281394900, "step": 5023 }, { "epoch": 11.187082405345212, "loss": 0.2979922890663147, "loss_ce": 0.00014070735778659582, "loss_iou": 0.119140625, "loss_num": 0.01202392578125, "loss_xval": 0.296875, "num_input_tokens_seen": 281394900, "step": 5023 }, { "epoch": 11.189309576837417, "grad_norm": 42.784358978271484, "learning_rate": 1e-06, "loss": 0.5281, "num_input_tokens_seen": 281448492, "step": 5024 }, { "epoch": 11.189309576837417, "loss": 0.5797094106674194, "loss_ce": 0.00011953900684602559, "loss_iou": 0.263671875, "loss_num": 0.01025390625, "loss_xval": 0.578125, "num_input_tokens_seen": 281448492, "step": 5024 }, { "epoch": 11.191536748329622, "grad_norm": 19.45455551147461, "learning_rate": 1e-06, "loss": 0.5473, "num_input_tokens_seen": 281505840, "step": 5025 }, { "epoch": 11.191536748329622, "loss": 0.4985758662223816, "loss_ce": 0.00016279635019600391, "loss_iou": 0.2177734375, "loss_num": 0.012451171875, "loss_xval": 0.498046875, "num_input_tokens_seen": 281505840, "step": 5025 }, { "epoch": 11.193763919821826, "grad_norm": 14.789341926574707, "learning_rate": 1e-06, "loss": 0.428, "num_input_tokens_seen": 281563968, "step": 5026 }, { "epoch": 11.193763919821826, "loss": 0.5229382514953613, "loss_ce": 0.00023318573948927224, "loss_iou": 0.2099609375, "loss_num": 0.0206298828125, "loss_xval": 0.5234375, "num_input_tokens_seen": 281563968, "step": 5026 }, { "epoch": 11.195991091314031, "grad_norm": 24.538976669311523, "learning_rate": 1e-06, "loss": 0.5299, "num_input_tokens_seen": 281618908, "step": 5027 }, { "epoch": 11.195991091314031, "loss": 0.4837605655193329, "loss_ce": 0.0001179840401164256, "loss_iou": 0.20703125, "loss_num": 0.01397705078125, "loss_xval": 0.484375, "num_input_tokens_seen": 281618908, "step": 5027 }, { "epoch": 11.198218262806236, "grad_norm": 16.29832649230957, "learning_rate": 1e-06, "loss": 0.4994, "num_input_tokens_seen": 281674612, "step": 5028 }, { "epoch": 11.198218262806236, "loss": 0.5437781810760498, "loss_ce": 0.00013804963964503258, "loss_iou": 0.2314453125, "loss_num": 0.0159912109375, "loss_xval": 0.54296875, "num_input_tokens_seen": 281674612, "step": 5028 }, { "epoch": 11.200445434298441, "grad_norm": 16.095746994018555, "learning_rate": 1e-06, "loss": 0.6377, "num_input_tokens_seen": 281731544, "step": 5029 }, { "epoch": 11.200445434298441, "loss": 0.584113359451294, "loss_ce": 0.0003731203032657504, "loss_iou": 0.224609375, "loss_num": 0.027099609375, "loss_xval": 0.58203125, "num_input_tokens_seen": 281731544, "step": 5029 }, { "epoch": 11.202672605790646, "grad_norm": 18.842683792114258, "learning_rate": 1e-06, "loss": 0.3468, "num_input_tokens_seen": 281788084, "step": 5030 }, { "epoch": 11.202672605790646, "loss": 0.38637834787368774, "loss_ce": 0.00014787877444177866, "loss_iou": 0.1611328125, "loss_num": 0.0125732421875, "loss_xval": 0.38671875, "num_input_tokens_seen": 281788084, "step": 5030 }, { "epoch": 11.20489977728285, "grad_norm": 17.219839096069336, "learning_rate": 1e-06, "loss": 0.655, "num_input_tokens_seen": 281846932, "step": 5031 }, { "epoch": 11.20489977728285, "loss": 0.9407340884208679, "loss_ce": 0.00018237490439787507, "loss_iou": 0.36328125, "loss_num": 0.042236328125, "loss_xval": 0.94140625, "num_input_tokens_seen": 281846932, "step": 5031 }, { "epoch": 11.207126948775056, "grad_norm": 22.804384231567383, "learning_rate": 1e-06, "loss": 0.452, "num_input_tokens_seen": 281905708, "step": 5032 }, { "epoch": 11.207126948775056, "loss": 0.5562899112701416, "loss_ce": 0.00013752697850577533, "loss_iou": 0.255859375, "loss_num": 0.00860595703125, "loss_xval": 0.5546875, "num_input_tokens_seen": 281905708, "step": 5032 }, { "epoch": 11.20935412026726, "grad_norm": 23.392213821411133, "learning_rate": 1e-06, "loss": 0.5097, "num_input_tokens_seen": 281963088, "step": 5033 }, { "epoch": 11.20935412026726, "loss": 0.4477725028991699, "loss_ce": 0.00014065910363569856, "loss_iou": 0.193359375, "loss_num": 0.01220703125, "loss_xval": 0.447265625, "num_input_tokens_seen": 281963088, "step": 5033 }, { "epoch": 11.211581291759465, "grad_norm": 26.24521255493164, "learning_rate": 1e-06, "loss": 0.673, "num_input_tokens_seen": 282019504, "step": 5034 }, { "epoch": 11.211581291759465, "loss": 0.5856134295463562, "loss_ce": 0.00016419796156696975, "loss_iou": 0.251953125, "loss_num": 0.0166015625, "loss_xval": 0.5859375, "num_input_tokens_seen": 282019504, "step": 5034 }, { "epoch": 11.21380846325167, "grad_norm": 13.377967834472656, "learning_rate": 1e-06, "loss": 0.5941, "num_input_tokens_seen": 282077584, "step": 5035 }, { "epoch": 11.21380846325167, "loss": 0.5731761455535889, "loss_ce": 0.00017806813411880285, "loss_iou": 0.263671875, "loss_num": 0.0091552734375, "loss_xval": 0.57421875, "num_input_tokens_seen": 282077584, "step": 5035 }, { "epoch": 11.216035634743875, "grad_norm": 23.46733283996582, "learning_rate": 1e-06, "loss": 0.555, "num_input_tokens_seen": 282135040, "step": 5036 }, { "epoch": 11.216035634743875, "loss": 0.5774731040000916, "loss_ce": 0.00020261471217963845, "loss_iou": 0.251953125, "loss_num": 0.014892578125, "loss_xval": 0.578125, "num_input_tokens_seen": 282135040, "step": 5036 }, { "epoch": 11.21826280623608, "grad_norm": 18.02016258239746, "learning_rate": 1e-06, "loss": 0.5232, "num_input_tokens_seen": 282192724, "step": 5037 }, { "epoch": 11.21826280623608, "loss": 0.5690996050834656, "loss_ce": 0.00012990040704607964, "loss_iou": 0.2158203125, "loss_num": 0.027587890625, "loss_xval": 0.5703125, "num_input_tokens_seen": 282192724, "step": 5037 }, { "epoch": 11.220489977728285, "grad_norm": 17.373300552368164, "learning_rate": 1e-06, "loss": 0.4308, "num_input_tokens_seen": 282245988, "step": 5038 }, { "epoch": 11.220489977728285, "loss": 0.4509298801422119, "loss_ce": 0.00012421750579960644, "loss_iou": 0.193359375, "loss_num": 0.0126953125, "loss_xval": 0.451171875, "num_input_tokens_seen": 282245988, "step": 5038 }, { "epoch": 11.22271714922049, "grad_norm": 16.49465560913086, "learning_rate": 1e-06, "loss": 0.6109, "num_input_tokens_seen": 282301460, "step": 5039 }, { "epoch": 11.22271714922049, "loss": 0.5691109895706177, "loss_ce": 0.00014124812150839716, "loss_iou": 0.2578125, "loss_num": 0.010498046875, "loss_xval": 0.5703125, "num_input_tokens_seen": 282301460, "step": 5039 }, { "epoch": 11.224944320712694, "grad_norm": 24.778156280517578, "learning_rate": 1e-06, "loss": 0.5473, "num_input_tokens_seen": 282356420, "step": 5040 }, { "epoch": 11.224944320712694, "loss": 0.5384612679481506, "loss_ce": 0.0001312288804911077, "loss_iou": 0.23828125, "loss_num": 0.01220703125, "loss_xval": 0.5390625, "num_input_tokens_seen": 282356420, "step": 5040 }, { "epoch": 11.2271714922049, "grad_norm": 17.579057693481445, "learning_rate": 1e-06, "loss": 0.5956, "num_input_tokens_seen": 282411144, "step": 5041 }, { "epoch": 11.2271714922049, "loss": 0.5568938851356506, "loss_ce": 0.00013120746007189155, "loss_iou": 0.236328125, "loss_num": 0.0167236328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 282411144, "step": 5041 }, { "epoch": 11.229398663697104, "grad_norm": 20.767990112304688, "learning_rate": 1e-06, "loss": 0.5745, "num_input_tokens_seen": 282466800, "step": 5042 }, { "epoch": 11.229398663697104, "loss": 0.35937535762786865, "loss_ce": 0.00012243175297044218, "loss_iou": 0.162109375, "loss_num": 0.006927490234375, "loss_xval": 0.359375, "num_input_tokens_seen": 282466800, "step": 5042 }, { "epoch": 11.231625835189309, "grad_norm": 18.92509651184082, "learning_rate": 1e-06, "loss": 0.4772, "num_input_tokens_seen": 282524332, "step": 5043 }, { "epoch": 11.231625835189309, "loss": 0.47509121894836426, "loss_ce": 0.00011561952851479873, "loss_iou": 0.22265625, "loss_num": 0.006011962890625, "loss_xval": 0.474609375, "num_input_tokens_seen": 282524332, "step": 5043 }, { "epoch": 11.233853006681514, "grad_norm": 22.10957908630371, "learning_rate": 1e-06, "loss": 0.5585, "num_input_tokens_seen": 282580732, "step": 5044 }, { "epoch": 11.233853006681514, "loss": 0.7198188304901123, "loss_ce": 0.00033639208413660526, "loss_iou": 0.306640625, "loss_num": 0.0213623046875, "loss_xval": 0.71875, "num_input_tokens_seen": 282580732, "step": 5044 }, { "epoch": 11.236080178173719, "grad_norm": 20.335216522216797, "learning_rate": 1e-06, "loss": 0.3889, "num_input_tokens_seen": 282638292, "step": 5045 }, { "epoch": 11.236080178173719, "loss": 0.44411370158195496, "loss_ce": 0.00014397443737834692, "loss_iou": 0.2041015625, "loss_num": 0.00701904296875, "loss_xval": 0.443359375, "num_input_tokens_seen": 282638292, "step": 5045 }, { "epoch": 11.238307349665924, "grad_norm": 17.235029220581055, "learning_rate": 1e-06, "loss": 0.5289, "num_input_tokens_seen": 282695300, "step": 5046 }, { "epoch": 11.238307349665924, "loss": 0.5496923923492432, "loss_ce": 0.00013185839634388685, "loss_iou": 0.23828125, "loss_num": 0.014404296875, "loss_xval": 0.55078125, "num_input_tokens_seen": 282695300, "step": 5046 }, { "epoch": 11.240534521158128, "grad_norm": 15.562467575073242, "learning_rate": 1e-06, "loss": 0.7311, "num_input_tokens_seen": 282751756, "step": 5047 }, { "epoch": 11.240534521158128, "loss": 0.8263590335845947, "loss_ce": 0.00018715820624493062, "loss_iou": 0.33984375, "loss_num": 0.029052734375, "loss_xval": 0.828125, "num_input_tokens_seen": 282751756, "step": 5047 }, { "epoch": 11.242761692650333, "grad_norm": 12.08515453338623, "learning_rate": 1e-06, "loss": 0.5536, "num_input_tokens_seen": 282807936, "step": 5048 }, { "epoch": 11.242761692650333, "loss": 0.4205526113510132, "loss_ce": 0.00014246124192140996, "loss_iou": 0.1904296875, "loss_num": 0.0078125, "loss_xval": 0.419921875, "num_input_tokens_seen": 282807936, "step": 5048 }, { "epoch": 11.244988864142538, "grad_norm": 15.926555633544922, "learning_rate": 1e-06, "loss": 0.5036, "num_input_tokens_seen": 282863292, "step": 5049 }, { "epoch": 11.244988864142538, "loss": 0.6954518556594849, "loss_ce": 0.00013932373258285224, "loss_iou": 0.298828125, "loss_num": 0.0194091796875, "loss_xval": 0.6953125, "num_input_tokens_seen": 282863292, "step": 5049 }, { "epoch": 11.247216035634743, "grad_norm": 41.89063262939453, "learning_rate": 1e-06, "loss": 0.405, "num_input_tokens_seen": 282917964, "step": 5050 }, { "epoch": 11.247216035634743, "loss": 0.4797305464744568, "loss_ce": 0.00011627860658336431, "loss_iou": 0.216796875, "loss_num": 0.00909423828125, "loss_xval": 0.48046875, "num_input_tokens_seen": 282917964, "step": 5050 }, { "epoch": 11.249443207126948, "grad_norm": 20.080503463745117, "learning_rate": 1e-06, "loss": 0.5513, "num_input_tokens_seen": 282975304, "step": 5051 }, { "epoch": 11.249443207126948, "loss": 0.5874269604682922, "loss_ce": 0.0001466784015065059, "loss_iou": 0.255859375, "loss_num": 0.0150146484375, "loss_xval": 0.5859375, "num_input_tokens_seen": 282975304, "step": 5051 }, { "epoch": 11.251670378619155, "grad_norm": 18.951017379760742, "learning_rate": 1e-06, "loss": 0.4595, "num_input_tokens_seen": 283031220, "step": 5052 }, { "epoch": 11.251670378619155, "loss": 0.43687593936920166, "loss_ce": 0.00010837505396921188, "loss_iou": 0.1796875, "loss_num": 0.01556396484375, "loss_xval": 0.4375, "num_input_tokens_seen": 283031220, "step": 5052 }, { "epoch": 11.25389755011136, "grad_norm": 20.08397674560547, "learning_rate": 1e-06, "loss": 0.5126, "num_input_tokens_seen": 283088852, "step": 5053 }, { "epoch": 11.25389755011136, "loss": 0.47264423966407776, "loss_ce": 0.00011004768748534843, "loss_iou": 0.197265625, "loss_num": 0.015625, "loss_xval": 0.47265625, "num_input_tokens_seen": 283088852, "step": 5053 }, { "epoch": 11.256124721603564, "grad_norm": 23.235828399658203, "learning_rate": 1e-06, "loss": 0.6491, "num_input_tokens_seen": 283146176, "step": 5054 }, { "epoch": 11.256124721603564, "loss": 0.5091769695281982, "loss_ce": 0.000143796467455104, "loss_iou": 0.21484375, "loss_num": 0.015869140625, "loss_xval": 0.5078125, "num_input_tokens_seen": 283146176, "step": 5054 }, { "epoch": 11.25835189309577, "grad_norm": 18.110916137695312, "learning_rate": 1e-06, "loss": 0.4254, "num_input_tokens_seen": 283201764, "step": 5055 }, { "epoch": 11.25835189309577, "loss": 0.3657214045524597, "loss_ce": 0.00012084872287232429, "loss_iou": 0.1640625, "loss_num": 0.007568359375, "loss_xval": 0.365234375, "num_input_tokens_seen": 283201764, "step": 5055 }, { "epoch": 11.260579064587974, "grad_norm": 15.130694389343262, "learning_rate": 1e-06, "loss": 0.5045, "num_input_tokens_seen": 283259004, "step": 5056 }, { "epoch": 11.260579064587974, "loss": 0.401218056678772, "loss_ce": 9.503310866421089e-05, "loss_iou": 0.16796875, "loss_num": 0.012939453125, "loss_xval": 0.400390625, "num_input_tokens_seen": 283259004, "step": 5056 }, { "epoch": 11.262806236080179, "grad_norm": 20.411800384521484, "learning_rate": 1e-06, "loss": 0.6591, "num_input_tokens_seen": 283318136, "step": 5057 }, { "epoch": 11.262806236080179, "loss": 0.8671286702156067, "loss_ce": 0.00018532315152697265, "loss_iou": 0.34375, "loss_num": 0.0361328125, "loss_xval": 0.8671875, "num_input_tokens_seen": 283318136, "step": 5057 }, { "epoch": 11.265033407572384, "grad_norm": 15.510054588317871, "learning_rate": 1e-06, "loss": 0.5234, "num_input_tokens_seen": 283376952, "step": 5058 }, { "epoch": 11.265033407572384, "loss": 0.48036888241767883, "loss_ce": 0.00014426674169953912, "loss_iou": 0.2109375, "loss_num": 0.01165771484375, "loss_xval": 0.48046875, "num_input_tokens_seen": 283376952, "step": 5058 }, { "epoch": 11.267260579064589, "grad_norm": 16.76605224609375, "learning_rate": 1e-06, "loss": 0.5593, "num_input_tokens_seen": 283432408, "step": 5059 }, { "epoch": 11.267260579064589, "loss": 0.5044692754745483, "loss_ce": 0.000135770023916848, "loss_iou": 0.220703125, "loss_num": 0.012451171875, "loss_xval": 0.50390625, "num_input_tokens_seen": 283432408, "step": 5059 }, { "epoch": 11.269487750556793, "grad_norm": 21.036209106445312, "learning_rate": 1e-06, "loss": 0.5646, "num_input_tokens_seen": 283490212, "step": 5060 }, { "epoch": 11.269487750556793, "loss": 0.7949022054672241, "loss_ce": 0.00022446672664955258, "loss_iou": 0.337890625, "loss_num": 0.0238037109375, "loss_xval": 0.79296875, "num_input_tokens_seen": 283490212, "step": 5060 }, { "epoch": 11.271714922048998, "grad_norm": 14.880892753601074, "learning_rate": 1e-06, "loss": 0.5155, "num_input_tokens_seen": 283547208, "step": 5061 }, { "epoch": 11.271714922048998, "loss": 0.34775838255882263, "loss_ce": 0.00010212791676167399, "loss_iou": 0.126953125, "loss_num": 0.0186767578125, "loss_xval": 0.34765625, "num_input_tokens_seen": 283547208, "step": 5061 }, { "epoch": 11.273942093541203, "grad_norm": 11.523423194885254, "learning_rate": 1e-06, "loss": 0.4296, "num_input_tokens_seen": 283605480, "step": 5062 }, { "epoch": 11.273942093541203, "loss": 0.2592126131057739, "loss_ce": 0.00011838733917102218, "loss_iou": 0.103515625, "loss_num": 0.01043701171875, "loss_xval": 0.259765625, "num_input_tokens_seen": 283605480, "step": 5062 }, { "epoch": 11.276169265033408, "grad_norm": 20.444461822509766, "learning_rate": 1e-06, "loss": 0.4135, "num_input_tokens_seen": 283660960, "step": 5063 }, { "epoch": 11.276169265033408, "loss": 0.3837610185146332, "loss_ce": 0.0001245247694896534, "loss_iou": 0.1611328125, "loss_num": 0.01220703125, "loss_xval": 0.3828125, "num_input_tokens_seen": 283660960, "step": 5063 }, { "epoch": 11.278396436525613, "grad_norm": 16.973899841308594, "learning_rate": 1e-06, "loss": 0.5658, "num_input_tokens_seen": 283716348, "step": 5064 }, { "epoch": 11.278396436525613, "loss": 0.47617107629776, "loss_ce": 0.000585106376092881, "loss_iou": 0.216796875, "loss_num": 0.00848388671875, "loss_xval": 0.4765625, "num_input_tokens_seen": 283716348, "step": 5064 }, { "epoch": 11.280623608017818, "grad_norm": 15.647051811218262, "learning_rate": 1e-06, "loss": 0.5102, "num_input_tokens_seen": 283775620, "step": 5065 }, { "epoch": 11.280623608017818, "loss": 0.4950053095817566, "loss_ce": 0.00019331733346916735, "loss_iou": 0.20703125, "loss_num": 0.0162353515625, "loss_xval": 0.494140625, "num_input_tokens_seen": 283775620, "step": 5065 }, { "epoch": 11.282850779510023, "grad_norm": 14.765268325805664, "learning_rate": 1e-06, "loss": 0.4677, "num_input_tokens_seen": 283834152, "step": 5066 }, { "epoch": 11.282850779510023, "loss": 0.4379033148288727, "loss_ce": 0.00012866513861808926, "loss_iou": 0.193359375, "loss_num": 0.01007080078125, "loss_xval": 0.4375, "num_input_tokens_seen": 283834152, "step": 5066 }, { "epoch": 11.285077951002227, "grad_norm": 18.143211364746094, "learning_rate": 1e-06, "loss": 0.4485, "num_input_tokens_seen": 283891084, "step": 5067 }, { "epoch": 11.285077951002227, "loss": 0.4209212064743042, "loss_ce": 0.00014482939150184393, "loss_iou": 0.185546875, "loss_num": 0.010009765625, "loss_xval": 0.419921875, "num_input_tokens_seen": 283891084, "step": 5067 }, { "epoch": 11.287305122494432, "grad_norm": 14.855110168457031, "learning_rate": 1e-06, "loss": 0.3737, "num_input_tokens_seen": 283949732, "step": 5068 }, { "epoch": 11.287305122494432, "loss": 0.3620651960372925, "loss_ce": 0.0001267299521714449, "loss_iou": 0.166015625, "loss_num": 0.005828857421875, "loss_xval": 0.361328125, "num_input_tokens_seen": 283949732, "step": 5068 }, { "epoch": 11.289532293986637, "grad_norm": 43.02797317504883, "learning_rate": 1e-06, "loss": 0.5399, "num_input_tokens_seen": 284006896, "step": 5069 }, { "epoch": 11.289532293986637, "loss": 0.5649739503860474, "loss_ce": 0.00015458805137313902, "loss_iou": 0.23828125, "loss_num": 0.0174560546875, "loss_xval": 0.56640625, "num_input_tokens_seen": 284006896, "step": 5069 }, { "epoch": 11.291759465478842, "grad_norm": 25.057252883911133, "learning_rate": 1e-06, "loss": 0.5187, "num_input_tokens_seen": 284063816, "step": 5070 }, { "epoch": 11.291759465478842, "loss": 0.6080396175384521, "loss_ce": 0.0001294128887820989, "loss_iou": 0.287109375, "loss_num": 0.007080078125, "loss_xval": 0.609375, "num_input_tokens_seen": 284063816, "step": 5070 }, { "epoch": 11.293986636971047, "grad_norm": 18.99828338623047, "learning_rate": 1e-06, "loss": 0.4259, "num_input_tokens_seen": 284120160, "step": 5071 }, { "epoch": 11.293986636971047, "loss": 0.5792200565338135, "loss_ce": 0.00011846120469272137, "loss_iou": 0.26953125, "loss_num": 0.00787353515625, "loss_xval": 0.578125, "num_input_tokens_seen": 284120160, "step": 5071 }, { "epoch": 11.296213808463252, "grad_norm": 11.530062675476074, "learning_rate": 1e-06, "loss": 0.4847, "num_input_tokens_seen": 284178348, "step": 5072 }, { "epoch": 11.296213808463252, "loss": 0.4051365852355957, "loss_ce": 0.0001073086605174467, "loss_iou": 0.1787109375, "loss_num": 0.00970458984375, "loss_xval": 0.404296875, "num_input_tokens_seen": 284178348, "step": 5072 }, { "epoch": 11.298440979955457, "grad_norm": 30.209646224975586, "learning_rate": 1e-06, "loss": 0.512, "num_input_tokens_seen": 284237160, "step": 5073 }, { "epoch": 11.298440979955457, "loss": 0.583868682384491, "loss_ce": 0.00012847641482949257, "loss_iou": 0.248046875, "loss_num": 0.017333984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 284237160, "step": 5073 }, { "epoch": 11.300668151447661, "grad_norm": 23.079030990600586, "learning_rate": 1e-06, "loss": 0.4586, "num_input_tokens_seen": 284293580, "step": 5074 }, { "epoch": 11.300668151447661, "loss": 0.5660747289657593, "loss_ce": 0.00015678048657719046, "loss_iou": 0.25, "loss_num": 0.01300048828125, "loss_xval": 0.56640625, "num_input_tokens_seen": 284293580, "step": 5074 }, { "epoch": 11.302895322939866, "grad_norm": 18.702524185180664, "learning_rate": 1e-06, "loss": 0.4264, "num_input_tokens_seen": 284350604, "step": 5075 }, { "epoch": 11.302895322939866, "loss": 0.38073596358299255, "loss_ce": 0.00012072990648448467, "loss_iou": 0.1630859375, "loss_num": 0.01092529296875, "loss_xval": 0.380859375, "num_input_tokens_seen": 284350604, "step": 5075 }, { "epoch": 11.305122494432071, "grad_norm": 72.22659301757812, "learning_rate": 1e-06, "loss": 0.7127, "num_input_tokens_seen": 284408168, "step": 5076 }, { "epoch": 11.305122494432071, "loss": 0.8336408138275146, "loss_ce": 0.00014475997886620462, "loss_iou": 0.349609375, "loss_num": 0.0269775390625, "loss_xval": 0.83203125, "num_input_tokens_seen": 284408168, "step": 5076 }, { "epoch": 11.307349665924276, "grad_norm": 27.653568267822266, "learning_rate": 1e-06, "loss": 0.4948, "num_input_tokens_seen": 284465572, "step": 5077 }, { "epoch": 11.307349665924276, "loss": 0.4449566602706909, "loss_ce": 0.0001324501063209027, "loss_iou": 0.197265625, "loss_num": 0.00994873046875, "loss_xval": 0.4453125, "num_input_tokens_seen": 284465572, "step": 5077 }, { "epoch": 11.309576837416481, "grad_norm": 29.11145782470703, "learning_rate": 1e-06, "loss": 0.4789, "num_input_tokens_seen": 284519732, "step": 5078 }, { "epoch": 11.309576837416481, "loss": 0.3955211043357849, "loss_ce": 0.0001353362895315513, "loss_iou": 0.162109375, "loss_num": 0.01416015625, "loss_xval": 0.39453125, "num_input_tokens_seen": 284519732, "step": 5078 }, { "epoch": 11.311804008908686, "grad_norm": 14.795896530151367, "learning_rate": 1e-06, "loss": 0.5233, "num_input_tokens_seen": 284578024, "step": 5079 }, { "epoch": 11.311804008908686, "loss": 0.3413148522377014, "loss_ce": 0.00012834850349463522, "loss_iou": 0.1591796875, "loss_num": 0.00457763671875, "loss_xval": 0.341796875, "num_input_tokens_seen": 284578024, "step": 5079 }, { "epoch": 11.31403118040089, "grad_norm": 20.22231101989746, "learning_rate": 1e-06, "loss": 0.3846, "num_input_tokens_seen": 284636628, "step": 5080 }, { "epoch": 11.31403118040089, "loss": 0.40492209792137146, "loss_ce": 0.0001369206584058702, "loss_iou": 0.1875, "loss_num": 0.006103515625, "loss_xval": 0.404296875, "num_input_tokens_seen": 284636628, "step": 5080 }, { "epoch": 11.316258351893095, "grad_norm": 14.39472484588623, "learning_rate": 1e-06, "loss": 0.5526, "num_input_tokens_seen": 284694192, "step": 5081 }, { "epoch": 11.316258351893095, "loss": 0.5521363019943237, "loss_ce": 0.00013434255379252136, "loss_iou": 0.236328125, "loss_num": 0.0159912109375, "loss_xval": 0.55078125, "num_input_tokens_seen": 284694192, "step": 5081 }, { "epoch": 11.3184855233853, "grad_norm": 15.9749755859375, "learning_rate": 1e-06, "loss": 0.4355, "num_input_tokens_seen": 284752428, "step": 5082 }, { "epoch": 11.3184855233853, "loss": 0.5515244603157043, "loss_ce": 0.00013286221656017005, "loss_iou": 0.236328125, "loss_num": 0.0157470703125, "loss_xval": 0.55078125, "num_input_tokens_seen": 284752428, "step": 5082 }, { "epoch": 11.320712694877505, "grad_norm": 17.713111877441406, "learning_rate": 1e-06, "loss": 0.4379, "num_input_tokens_seen": 284806252, "step": 5083 }, { "epoch": 11.320712694877505, "loss": 0.44035178422927856, "loss_ce": 0.0001662498980294913, "loss_iou": 0.2001953125, "loss_num": 0.008056640625, "loss_xval": 0.439453125, "num_input_tokens_seen": 284806252, "step": 5083 }, { "epoch": 11.32293986636971, "grad_norm": 16.1520938873291, "learning_rate": 1e-06, "loss": 0.5477, "num_input_tokens_seen": 284863332, "step": 5084 }, { "epoch": 11.32293986636971, "loss": 0.6391512155532837, "loss_ce": 0.000357240904122591, "loss_iou": 0.26953125, "loss_num": 0.020263671875, "loss_xval": 0.640625, "num_input_tokens_seen": 284863332, "step": 5084 }, { "epoch": 11.325167037861915, "grad_norm": 19.611209869384766, "learning_rate": 1e-06, "loss": 0.4462, "num_input_tokens_seen": 284919672, "step": 5085 }, { "epoch": 11.325167037861915, "loss": 0.37651515007019043, "loss_ce": 0.00017235177801921964, "loss_iou": 0.1650390625, "loss_num": 0.0093994140625, "loss_xval": 0.376953125, "num_input_tokens_seen": 284919672, "step": 5085 }, { "epoch": 11.32739420935412, "grad_norm": 17.526779174804688, "learning_rate": 1e-06, "loss": 0.6155, "num_input_tokens_seen": 284977908, "step": 5086 }, { "epoch": 11.32739420935412, "loss": 0.48550522327423096, "loss_ce": 0.00015366033767350018, "loss_iou": 0.220703125, "loss_num": 0.00885009765625, "loss_xval": 0.484375, "num_input_tokens_seen": 284977908, "step": 5086 }, { "epoch": 11.329621380846325, "grad_norm": 14.409747123718262, "learning_rate": 1e-06, "loss": 0.4557, "num_input_tokens_seen": 285033440, "step": 5087 }, { "epoch": 11.329621380846325, "loss": 0.44422948360443115, "loss_ce": 0.00013769487850368023, "loss_iou": 0.177734375, "loss_num": 0.0179443359375, "loss_xval": 0.443359375, "num_input_tokens_seen": 285033440, "step": 5087 }, { "epoch": 11.33184855233853, "grad_norm": 30.620845794677734, "learning_rate": 1e-06, "loss": 0.55, "num_input_tokens_seen": 285089588, "step": 5088 }, { "epoch": 11.33184855233853, "loss": 0.440792441368103, "loss_ce": 0.00011861581879202276, "loss_iou": 0.1865234375, "loss_num": 0.013427734375, "loss_xval": 0.44140625, "num_input_tokens_seen": 285089588, "step": 5088 }, { "epoch": 11.334075723830734, "grad_norm": 21.544681549072266, "learning_rate": 1e-06, "loss": 0.496, "num_input_tokens_seen": 285144700, "step": 5089 }, { "epoch": 11.334075723830734, "loss": 0.5140707492828369, "loss_ce": 0.00015477146371267736, "loss_iou": 0.2255859375, "loss_num": 0.01251220703125, "loss_xval": 0.515625, "num_input_tokens_seen": 285144700, "step": 5089 }, { "epoch": 11.33630289532294, "grad_norm": 26.6117000579834, "learning_rate": 1e-06, "loss": 0.5348, "num_input_tokens_seen": 285200876, "step": 5090 }, { "epoch": 11.33630289532294, "loss": 0.5355468392372131, "loss_ce": 0.00014644389739260077, "loss_iou": 0.2431640625, "loss_num": 0.0098876953125, "loss_xval": 0.53515625, "num_input_tokens_seen": 285200876, "step": 5090 }, { "epoch": 11.338530066815144, "grad_norm": 17.32594871520996, "learning_rate": 1e-06, "loss": 0.6661, "num_input_tokens_seen": 285258992, "step": 5091 }, { "epoch": 11.338530066815144, "loss": 0.8300636410713196, "loss_ce": 0.00022966302640270442, "loss_iou": 0.33203125, "loss_num": 0.032958984375, "loss_xval": 0.828125, "num_input_tokens_seen": 285258992, "step": 5091 }, { "epoch": 11.340757238307349, "grad_norm": 26.523305892944336, "learning_rate": 1e-06, "loss": 0.6046, "num_input_tokens_seen": 285314720, "step": 5092 }, { "epoch": 11.340757238307349, "loss": 0.6371076107025146, "loss_ce": 0.0001447718241252005, "loss_iou": 0.2734375, "loss_num": 0.017822265625, "loss_xval": 0.63671875, "num_input_tokens_seen": 285314720, "step": 5092 }, { "epoch": 11.342984409799554, "grad_norm": 23.49717140197754, "learning_rate": 1e-06, "loss": 0.4436, "num_input_tokens_seen": 285372940, "step": 5093 }, { "epoch": 11.342984409799554, "loss": 0.5071090459823608, "loss_ce": 0.00015102185716386884, "loss_iou": 0.220703125, "loss_num": 0.01318359375, "loss_xval": 0.5078125, "num_input_tokens_seen": 285372940, "step": 5093 }, { "epoch": 11.345211581291759, "grad_norm": 21.801851272583008, "learning_rate": 1e-06, "loss": 0.5209, "num_input_tokens_seen": 285429172, "step": 5094 }, { "epoch": 11.345211581291759, "loss": 0.526289701461792, "loss_ce": 0.00016666974988766015, "loss_iou": 0.2255859375, "loss_num": 0.01507568359375, "loss_xval": 0.52734375, "num_input_tokens_seen": 285429172, "step": 5094 }, { "epoch": 11.347438752783964, "grad_norm": 16.54652214050293, "learning_rate": 1e-06, "loss": 0.6023, "num_input_tokens_seen": 285484976, "step": 5095 }, { "epoch": 11.347438752783964, "loss": 0.5767943263053894, "loss_ce": 0.00013418751768767834, "loss_iou": 0.25390625, "loss_num": 0.01318359375, "loss_xval": 0.578125, "num_input_tokens_seen": 285484976, "step": 5095 }, { "epoch": 11.34966592427617, "grad_norm": 21.48624610900879, "learning_rate": 1e-06, "loss": 0.5497, "num_input_tokens_seen": 285540752, "step": 5096 }, { "epoch": 11.34966592427617, "loss": 0.6203560829162598, "loss_ce": 0.00023887879797257483, "loss_iou": 0.2578125, "loss_num": 0.0211181640625, "loss_xval": 0.62109375, "num_input_tokens_seen": 285540752, "step": 5096 }, { "epoch": 11.351893095768375, "grad_norm": 24.3822021484375, "learning_rate": 1e-06, "loss": 0.553, "num_input_tokens_seen": 285594876, "step": 5097 }, { "epoch": 11.351893095768375, "loss": 0.4341278076171875, "loss_ce": 0.00010682163701858371, "loss_iou": 0.1748046875, "loss_num": 0.0166015625, "loss_xval": 0.43359375, "num_input_tokens_seen": 285594876, "step": 5097 }, { "epoch": 11.35412026726058, "grad_norm": 16.19316864013672, "learning_rate": 1e-06, "loss": 0.5853, "num_input_tokens_seen": 285650340, "step": 5098 }, { "epoch": 11.35412026726058, "loss": 0.7105726003646851, "loss_ce": 0.00012335649807937443, "loss_iou": 0.306640625, "loss_num": 0.019287109375, "loss_xval": 0.7109375, "num_input_tokens_seen": 285650340, "step": 5098 }, { "epoch": 11.356347438752785, "grad_norm": 17.640634536743164, "learning_rate": 1e-06, "loss": 0.516, "num_input_tokens_seen": 285706040, "step": 5099 }, { "epoch": 11.356347438752785, "loss": 0.6544548273086548, "loss_ce": 0.0001579629024490714, "loss_iou": 0.287109375, "loss_num": 0.016357421875, "loss_xval": 0.65625, "num_input_tokens_seen": 285706040, "step": 5099 }, { "epoch": 11.35857461024499, "grad_norm": 22.37962532043457, "learning_rate": 1e-06, "loss": 0.7185, "num_input_tokens_seen": 285762072, "step": 5100 }, { "epoch": 11.35857461024499, "loss": 0.8059933185577393, "loss_ce": 0.00020718795713037252, "loss_iou": 0.306640625, "loss_num": 0.038330078125, "loss_xval": 0.8046875, "num_input_tokens_seen": 285762072, "step": 5100 }, { "epoch": 11.360801781737194, "grad_norm": 14.851436614990234, "learning_rate": 1e-06, "loss": 0.4088, "num_input_tokens_seen": 285818724, "step": 5101 }, { "epoch": 11.360801781737194, "loss": 0.40162622928619385, "loss_ce": 0.0001369684759993106, "loss_iou": 0.1806640625, "loss_num": 0.0081787109375, "loss_xval": 0.40234375, "num_input_tokens_seen": 285818724, "step": 5101 }, { "epoch": 11.3630289532294, "grad_norm": 16.314359664916992, "learning_rate": 1e-06, "loss": 0.4846, "num_input_tokens_seen": 285875484, "step": 5102 }, { "epoch": 11.3630289532294, "loss": 0.46129605174064636, "loss_ce": 0.00011440047819633037, "loss_iou": 0.208984375, "loss_num": 0.0084228515625, "loss_xval": 0.4609375, "num_input_tokens_seen": 285875484, "step": 5102 }, { "epoch": 11.365256124721604, "grad_norm": 23.65479278564453, "learning_rate": 1e-06, "loss": 0.5254, "num_input_tokens_seen": 285931436, "step": 5103 }, { "epoch": 11.365256124721604, "loss": 0.6119776964187622, "loss_ce": 0.00016128391143865883, "loss_iou": 0.267578125, "loss_num": 0.015380859375, "loss_xval": 0.61328125, "num_input_tokens_seen": 285931436, "step": 5103 }, { "epoch": 11.367483296213809, "grad_norm": 19.001941680908203, "learning_rate": 1e-06, "loss": 0.522, "num_input_tokens_seen": 285987400, "step": 5104 }, { "epoch": 11.367483296213809, "loss": 0.6559271216392517, "loss_ce": 0.0001653625804465264, "loss_iou": 0.28125, "loss_num": 0.0184326171875, "loss_xval": 0.65625, "num_input_tokens_seen": 285987400, "step": 5104 }, { "epoch": 11.369710467706014, "grad_norm": 22.59067153930664, "learning_rate": 1e-06, "loss": 0.5637, "num_input_tokens_seen": 286040144, "step": 5105 }, { "epoch": 11.369710467706014, "loss": 0.5408531427383423, "loss_ce": 0.00032578702666796744, "loss_iou": 0.2470703125, "loss_num": 0.009033203125, "loss_xval": 0.5390625, "num_input_tokens_seen": 286040144, "step": 5105 }, { "epoch": 11.371937639198219, "grad_norm": 13.022751808166504, "learning_rate": 1e-06, "loss": 0.4658, "num_input_tokens_seen": 286095652, "step": 5106 }, { "epoch": 11.371937639198219, "loss": 0.49904271960258484, "loss_ce": 0.0001413495047017932, "loss_iou": 0.2099609375, "loss_num": 0.015869140625, "loss_xval": 0.498046875, "num_input_tokens_seen": 286095652, "step": 5106 }, { "epoch": 11.374164810690424, "grad_norm": 17.726234436035156, "learning_rate": 1e-06, "loss": 0.4394, "num_input_tokens_seen": 286153252, "step": 5107 }, { "epoch": 11.374164810690424, "loss": 0.4727289080619812, "loss_ce": 0.00019473947759252042, "loss_iou": 0.189453125, "loss_num": 0.0186767578125, "loss_xval": 0.47265625, "num_input_tokens_seen": 286153252, "step": 5107 }, { "epoch": 11.376391982182628, "grad_norm": 20.799928665161133, "learning_rate": 1e-06, "loss": 0.5046, "num_input_tokens_seen": 286210432, "step": 5108 }, { "epoch": 11.376391982182628, "loss": 0.666250467300415, "loss_ce": 0.00023488188162446022, "loss_iou": 0.26953125, "loss_num": 0.02490234375, "loss_xval": 0.6640625, "num_input_tokens_seen": 286210432, "step": 5108 }, { "epoch": 11.378619153674833, "grad_norm": 21.947765350341797, "learning_rate": 1e-06, "loss": 0.4669, "num_input_tokens_seen": 286266940, "step": 5109 }, { "epoch": 11.378619153674833, "loss": 0.5005042552947998, "loss_ce": 0.00013799341104459018, "loss_iou": 0.22265625, "loss_num": 0.01129150390625, "loss_xval": 0.5, "num_input_tokens_seen": 286266940, "step": 5109 }, { "epoch": 11.380846325167038, "grad_norm": 15.626119613647461, "learning_rate": 1e-06, "loss": 0.5814, "num_input_tokens_seen": 286321024, "step": 5110 }, { "epoch": 11.380846325167038, "loss": 0.5123552680015564, "loss_ce": 0.0001482515363022685, "loss_iou": 0.228515625, "loss_num": 0.01129150390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 286321024, "step": 5110 }, { "epoch": 11.383073496659243, "grad_norm": 20.23106575012207, "learning_rate": 1e-06, "loss": 0.4268, "num_input_tokens_seen": 286376468, "step": 5111 }, { "epoch": 11.383073496659243, "loss": 0.46500349044799805, "loss_ce": 0.00015975304995663464, "loss_iou": 0.21875, "loss_num": 0.005584716796875, "loss_xval": 0.46484375, "num_input_tokens_seen": 286376468, "step": 5111 }, { "epoch": 11.385300668151448, "grad_norm": 16.623905181884766, "learning_rate": 1e-06, "loss": 0.5151, "num_input_tokens_seen": 286429412, "step": 5112 }, { "epoch": 11.385300668151448, "loss": 0.46804624795913696, "loss_ce": 0.00015074793191161007, "loss_iou": 0.181640625, "loss_num": 0.0206298828125, "loss_xval": 0.46875, "num_input_tokens_seen": 286429412, "step": 5112 }, { "epoch": 11.387527839643653, "grad_norm": 20.689804077148438, "learning_rate": 1e-06, "loss": 0.3954, "num_input_tokens_seen": 286486412, "step": 5113 }, { "epoch": 11.387527839643653, "loss": 0.3609923720359802, "loss_ce": 0.00015251885633915663, "loss_iou": 0.1640625, "loss_num": 0.006500244140625, "loss_xval": 0.361328125, "num_input_tokens_seen": 286486412, "step": 5113 }, { "epoch": 11.389755011135858, "grad_norm": 28.57643699645996, "learning_rate": 1e-06, "loss": 0.5011, "num_input_tokens_seen": 286541332, "step": 5114 }, { "epoch": 11.389755011135858, "loss": 0.4327373206615448, "loss_ce": 0.00012012844672426581, "loss_iou": 0.1923828125, "loss_num": 0.009765625, "loss_xval": 0.43359375, "num_input_tokens_seen": 286541332, "step": 5114 }, { "epoch": 11.391982182628063, "grad_norm": 15.638270378112793, "learning_rate": 1e-06, "loss": 0.5359, "num_input_tokens_seen": 286599148, "step": 5115 }, { "epoch": 11.391982182628063, "loss": 0.4875502586364746, "loss_ce": 0.0001235070376424119, "loss_iou": 0.2119140625, "loss_num": 0.01263427734375, "loss_xval": 0.48828125, "num_input_tokens_seen": 286599148, "step": 5115 }, { "epoch": 11.394209354120267, "grad_norm": 22.461973190307617, "learning_rate": 1e-06, "loss": 0.5282, "num_input_tokens_seen": 286656460, "step": 5116 }, { "epoch": 11.394209354120267, "loss": 0.7095192074775696, "loss_ce": 0.00016860665346030146, "loss_iou": 0.28515625, "loss_num": 0.0277099609375, "loss_xval": 0.7109375, "num_input_tokens_seen": 286656460, "step": 5116 }, { "epoch": 11.396436525612472, "grad_norm": 18.777915954589844, "learning_rate": 1e-06, "loss": 0.5819, "num_input_tokens_seen": 286711268, "step": 5117 }, { "epoch": 11.396436525612472, "loss": 0.5128494501113892, "loss_ce": 0.00015413996879942715, "loss_iou": 0.2158203125, "loss_num": 0.0159912109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 286711268, "step": 5117 }, { "epoch": 11.398663697104677, "grad_norm": 18.205013275146484, "learning_rate": 1e-06, "loss": 0.4723, "num_input_tokens_seen": 286767632, "step": 5118 }, { "epoch": 11.398663697104677, "loss": 0.44845569133758545, "loss_ce": 0.0001524692343082279, "loss_iou": 0.2060546875, "loss_num": 0.007232666015625, "loss_xval": 0.44921875, "num_input_tokens_seen": 286767632, "step": 5118 }, { "epoch": 11.400890868596882, "grad_norm": 14.385238647460938, "learning_rate": 1e-06, "loss": 0.5913, "num_input_tokens_seen": 286824064, "step": 5119 }, { "epoch": 11.400890868596882, "loss": 0.6861345767974854, "loss_ce": 9.943717304849997e-05, "loss_iou": 0.310546875, "loss_num": 0.013427734375, "loss_xval": 0.6875, "num_input_tokens_seen": 286824064, "step": 5119 }, { "epoch": 11.403118040089087, "grad_norm": 15.494654655456543, "learning_rate": 1e-06, "loss": 0.3787, "num_input_tokens_seen": 286881632, "step": 5120 }, { "epoch": 11.403118040089087, "loss": 0.36211222410202026, "loss_ce": 0.00011270066897850484, "loss_iou": 0.158203125, "loss_num": 0.0089111328125, "loss_xval": 0.361328125, "num_input_tokens_seen": 286881632, "step": 5120 }, { "epoch": 11.405345211581292, "grad_norm": 17.95199203491211, "learning_rate": 1e-06, "loss": 0.711, "num_input_tokens_seen": 286934124, "step": 5121 }, { "epoch": 11.405345211581292, "loss": 0.7193175554275513, "loss_ce": 0.00020134558144491166, "loss_iou": 0.31640625, "loss_num": 0.0172119140625, "loss_xval": 0.71875, "num_input_tokens_seen": 286934124, "step": 5121 }, { "epoch": 11.407572383073497, "grad_norm": 28.730737686157227, "learning_rate": 1e-06, "loss": 0.5827, "num_input_tokens_seen": 286990012, "step": 5122 }, { "epoch": 11.407572383073497, "loss": 0.39236217737197876, "loss_ce": 0.00019602719112299383, "loss_iou": 0.177734375, "loss_num": 0.00750732421875, "loss_xval": 0.392578125, "num_input_tokens_seen": 286990012, "step": 5122 }, { "epoch": 11.409799554565701, "grad_norm": 39.97563171386719, "learning_rate": 1e-06, "loss": 0.6908, "num_input_tokens_seen": 287044416, "step": 5123 }, { "epoch": 11.409799554565701, "loss": 0.6997367739677429, "loss_ce": 0.00015180771879386157, "loss_iou": 0.306640625, "loss_num": 0.016845703125, "loss_xval": 0.69921875, "num_input_tokens_seen": 287044416, "step": 5123 }, { "epoch": 11.412026726057906, "grad_norm": 21.006521224975586, "learning_rate": 1e-06, "loss": 0.5632, "num_input_tokens_seen": 287100240, "step": 5124 }, { "epoch": 11.412026726057906, "loss": 0.6319659352302551, "loss_ce": 0.00012999521277379245, "loss_iou": 0.279296875, "loss_num": 0.01458740234375, "loss_xval": 0.6328125, "num_input_tokens_seen": 287100240, "step": 5124 }, { "epoch": 11.414253897550111, "grad_norm": 17.86157989501953, "learning_rate": 1e-06, "loss": 0.5612, "num_input_tokens_seen": 287157868, "step": 5125 }, { "epoch": 11.414253897550111, "loss": 0.3876880407333374, "loss_ce": 0.00011479659588076174, "loss_iou": 0.162109375, "loss_num": 0.01263427734375, "loss_xval": 0.38671875, "num_input_tokens_seen": 287157868, "step": 5125 }, { "epoch": 11.416481069042316, "grad_norm": 16.256969451904297, "learning_rate": 1e-06, "loss": 0.6867, "num_input_tokens_seen": 287212888, "step": 5126 }, { "epoch": 11.416481069042316, "loss": 0.6945188045501709, "loss_ce": 0.00012188311666250229, "loss_iou": 0.28515625, "loss_num": 0.025146484375, "loss_xval": 0.6953125, "num_input_tokens_seen": 287212888, "step": 5126 }, { "epoch": 11.41870824053452, "grad_norm": 36.22967529296875, "learning_rate": 1e-06, "loss": 0.6743, "num_input_tokens_seen": 287268684, "step": 5127 }, { "epoch": 11.41870824053452, "loss": 0.8071569204330444, "loss_ce": 0.00015009319758974016, "loss_iou": 0.375, "loss_num": 0.01123046875, "loss_xval": 0.80859375, "num_input_tokens_seen": 287268684, "step": 5127 }, { "epoch": 11.420935412026726, "grad_norm": 18.855321884155273, "learning_rate": 1e-06, "loss": 0.6559, "num_input_tokens_seen": 287326204, "step": 5128 }, { "epoch": 11.420935412026726, "loss": 0.6211212873458862, "loss_ce": 0.00014961831038817763, "loss_iou": 0.265625, "loss_num": 0.017578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 287326204, "step": 5128 }, { "epoch": 11.42316258351893, "grad_norm": 13.376533508300781, "learning_rate": 1e-06, "loss": 0.5103, "num_input_tokens_seen": 287381800, "step": 5129 }, { "epoch": 11.42316258351893, "loss": 0.5299174189567566, "loss_ce": 0.0001627803430892527, "loss_iou": 0.2294921875, "loss_num": 0.0142822265625, "loss_xval": 0.53125, "num_input_tokens_seen": 287381800, "step": 5129 }, { "epoch": 11.425389755011135, "grad_norm": 22.959978103637695, "learning_rate": 1e-06, "loss": 0.5615, "num_input_tokens_seen": 287435956, "step": 5130 }, { "epoch": 11.425389755011135, "loss": 0.45396700501441956, "loss_ce": 0.0001095635088859126, "loss_iou": 0.1923828125, "loss_num": 0.013916015625, "loss_xval": 0.453125, "num_input_tokens_seen": 287435956, "step": 5130 }, { "epoch": 11.42761692650334, "grad_norm": 25.492908477783203, "learning_rate": 1e-06, "loss": 0.5793, "num_input_tokens_seen": 287491780, "step": 5131 }, { "epoch": 11.42761692650334, "loss": 0.6335743069648743, "loss_ce": 0.0001514378236606717, "loss_iou": 0.259765625, "loss_num": 0.0230712890625, "loss_xval": 0.6328125, "num_input_tokens_seen": 287491780, "step": 5131 }, { "epoch": 11.429844097995545, "grad_norm": 24.56211280822754, "learning_rate": 1e-06, "loss": 0.6357, "num_input_tokens_seen": 287548600, "step": 5132 }, { "epoch": 11.429844097995545, "loss": 0.28725236654281616, "loss_ce": 0.000387123815016821, "loss_iou": 0.1162109375, "loss_num": 0.0108642578125, "loss_xval": 0.287109375, "num_input_tokens_seen": 287548600, "step": 5132 }, { "epoch": 11.43207126948775, "grad_norm": 19.3801212310791, "learning_rate": 1e-06, "loss": 0.6795, "num_input_tokens_seen": 287603800, "step": 5133 }, { "epoch": 11.43207126948775, "loss": 0.7802882194519043, "loss_ce": 0.00013687220052815974, "loss_iou": 0.3203125, "loss_num": 0.0277099609375, "loss_xval": 0.78125, "num_input_tokens_seen": 287603800, "step": 5133 }, { "epoch": 11.434298440979955, "grad_norm": 32.62427520751953, "learning_rate": 1e-06, "loss": 0.658, "num_input_tokens_seen": 287658344, "step": 5134 }, { "epoch": 11.434298440979955, "loss": 0.539149820804596, "loss_ce": 0.0001483388477936387, "loss_iou": 0.232421875, "loss_num": 0.01513671875, "loss_xval": 0.5390625, "num_input_tokens_seen": 287658344, "step": 5134 }, { "epoch": 11.43652561247216, "grad_norm": 36.44560623168945, "learning_rate": 1e-06, "loss": 0.6056, "num_input_tokens_seen": 287713276, "step": 5135 }, { "epoch": 11.43652561247216, "loss": 0.49588677287101746, "loss_ce": 0.0001592589687788859, "loss_iou": 0.2265625, "loss_num": 0.00848388671875, "loss_xval": 0.49609375, "num_input_tokens_seen": 287713276, "step": 5135 }, { "epoch": 11.438752783964365, "grad_norm": 29.658565521240234, "learning_rate": 1e-06, "loss": 0.3693, "num_input_tokens_seen": 287770612, "step": 5136 }, { "epoch": 11.438752783964365, "loss": 0.3307318091392517, "loss_ce": 0.000165409262990579, "loss_iou": 0.1494140625, "loss_num": 0.0064697265625, "loss_xval": 0.330078125, "num_input_tokens_seen": 287770612, "step": 5136 }, { "epoch": 11.44097995545657, "grad_norm": 16.48049545288086, "learning_rate": 1e-06, "loss": 0.508, "num_input_tokens_seen": 287824620, "step": 5137 }, { "epoch": 11.44097995545657, "loss": 0.45837414264678955, "loss_ce": 0.00012218940537422895, "loss_iou": 0.1953125, "loss_num": 0.01336669921875, "loss_xval": 0.458984375, "num_input_tokens_seen": 287824620, "step": 5137 }, { "epoch": 11.443207126948774, "grad_norm": 22.14146614074707, "learning_rate": 1e-06, "loss": 0.5379, "num_input_tokens_seen": 287882580, "step": 5138 }, { "epoch": 11.443207126948774, "loss": 0.5207144021987915, "loss_ce": 0.0001455346355214715, "loss_iou": 0.216796875, "loss_num": 0.017333984375, "loss_xval": 0.51953125, "num_input_tokens_seen": 287882580, "step": 5138 }, { "epoch": 11.44543429844098, "grad_norm": 19.23894500732422, "learning_rate": 1e-06, "loss": 0.3896, "num_input_tokens_seen": 287938492, "step": 5139 }, { "epoch": 11.44543429844098, "loss": 0.3553389310836792, "loss_ce": 0.0001143506815424189, "loss_iou": 0.154296875, "loss_num": 0.0093994140625, "loss_xval": 0.35546875, "num_input_tokens_seen": 287938492, "step": 5139 }, { "epoch": 11.447661469933184, "grad_norm": 17.079357147216797, "learning_rate": 1e-06, "loss": 0.5968, "num_input_tokens_seen": 287994108, "step": 5140 }, { "epoch": 11.447661469933184, "loss": 0.5203468203544617, "loss_ce": 0.00014417944476008415, "loss_iou": 0.236328125, "loss_num": 0.009521484375, "loss_xval": 0.51953125, "num_input_tokens_seen": 287994108, "step": 5140 }, { "epoch": 11.449888641425389, "grad_norm": 14.164678573608398, "learning_rate": 1e-06, "loss": 0.5193, "num_input_tokens_seen": 288049672, "step": 5141 }, { "epoch": 11.449888641425389, "loss": 0.4837890863418579, "loss_ce": 0.0001465213717892766, "loss_iou": 0.2060546875, "loss_num": 0.014404296875, "loss_xval": 0.484375, "num_input_tokens_seen": 288049672, "step": 5141 }, { "epoch": 11.452115812917596, "grad_norm": 20.210201263427734, "learning_rate": 1e-06, "loss": 0.7254, "num_input_tokens_seen": 288106140, "step": 5142 }, { "epoch": 11.452115812917596, "loss": 0.6080299615859985, "loss_ce": 0.00011977371468674392, "loss_iou": 0.267578125, "loss_num": 0.01470947265625, "loss_xval": 0.609375, "num_input_tokens_seen": 288106140, "step": 5142 }, { "epoch": 11.4543429844098, "grad_norm": 20.572181701660156, "learning_rate": 1e-06, "loss": 0.4897, "num_input_tokens_seen": 288163140, "step": 5143 }, { "epoch": 11.4543429844098, "loss": 0.49498531222343445, "loss_ce": 0.00011225108028156683, "loss_iou": 0.220703125, "loss_num": 0.01068115234375, "loss_xval": 0.494140625, "num_input_tokens_seen": 288163140, "step": 5143 }, { "epoch": 11.456570155902005, "grad_norm": 24.588138580322266, "learning_rate": 1e-06, "loss": 0.5276, "num_input_tokens_seen": 288219856, "step": 5144 }, { "epoch": 11.456570155902005, "loss": 0.5592126846313477, "loss_ce": 0.0003747770097106695, "loss_iou": 0.240234375, "loss_num": 0.015625, "loss_xval": 0.55859375, "num_input_tokens_seen": 288219856, "step": 5144 }, { "epoch": 11.45879732739421, "grad_norm": 16.87946319580078, "learning_rate": 1e-06, "loss": 0.3826, "num_input_tokens_seen": 288276876, "step": 5145 }, { "epoch": 11.45879732739421, "loss": 0.4041239619255066, "loss_ce": 0.0008036848739720881, "loss_iou": 0.1689453125, "loss_num": 0.01318359375, "loss_xval": 0.40234375, "num_input_tokens_seen": 288276876, "step": 5145 }, { "epoch": 11.461024498886415, "grad_norm": 34.00703811645508, "learning_rate": 1e-06, "loss": 0.5651, "num_input_tokens_seen": 288334908, "step": 5146 }, { "epoch": 11.461024498886415, "loss": 0.4293323755264282, "loss_ce": 0.0001331909152213484, "loss_iou": 0.2001953125, "loss_num": 0.0059814453125, "loss_xval": 0.4296875, "num_input_tokens_seen": 288334908, "step": 5146 }, { "epoch": 11.46325167037862, "grad_norm": 21.397933959960938, "learning_rate": 1e-06, "loss": 0.4555, "num_input_tokens_seen": 288393052, "step": 5147 }, { "epoch": 11.46325167037862, "loss": 0.4838143587112427, "loss_ce": 0.00017176388064399362, "loss_iou": 0.203125, "loss_num": 0.015380859375, "loss_xval": 0.484375, "num_input_tokens_seen": 288393052, "step": 5147 }, { "epoch": 11.465478841870825, "grad_norm": 26.195497512817383, "learning_rate": 1e-06, "loss": 0.4912, "num_input_tokens_seen": 288445524, "step": 5148 }, { "epoch": 11.465478841870825, "loss": 0.5275430679321289, "loss_ce": 0.0008096360834315419, "loss_iou": 0.2236328125, "loss_num": 0.0159912109375, "loss_xval": 0.52734375, "num_input_tokens_seen": 288445524, "step": 5148 }, { "epoch": 11.46770601336303, "grad_norm": 24.309276580810547, "learning_rate": 1e-06, "loss": 0.4972, "num_input_tokens_seen": 288500740, "step": 5149 }, { "epoch": 11.46770601336303, "loss": 0.623292088508606, "loss_ce": 0.00012314121704548597, "loss_iou": 0.263671875, "loss_num": 0.0191650390625, "loss_xval": 0.625, "num_input_tokens_seen": 288500740, "step": 5149 }, { "epoch": 11.469933184855234, "grad_norm": 16.2718448638916, "learning_rate": 1e-06, "loss": 0.6084, "num_input_tokens_seen": 288553616, "step": 5150 }, { "epoch": 11.469933184855234, "loss": 0.4980197846889496, "loss_ce": 9.498859435552731e-05, "loss_iou": 0.197265625, "loss_num": 0.0206298828125, "loss_xval": 0.498046875, "num_input_tokens_seen": 288553616, "step": 5150 }, { "epoch": 11.47216035634744, "grad_norm": 14.356439590454102, "learning_rate": 1e-06, "loss": 0.4675, "num_input_tokens_seen": 288611856, "step": 5151 }, { "epoch": 11.47216035634744, "loss": 0.6592938303947449, "loss_ce": 0.00011415663175284863, "loss_iou": 0.287109375, "loss_num": 0.0172119140625, "loss_xval": 0.66015625, "num_input_tokens_seen": 288611856, "step": 5151 }, { "epoch": 11.474387527839644, "grad_norm": 15.644416809082031, "learning_rate": 1e-06, "loss": 0.5468, "num_input_tokens_seen": 288669496, "step": 5152 }, { "epoch": 11.474387527839644, "loss": 0.4559364318847656, "loss_ce": 0.00012589515245053917, "loss_iou": 0.189453125, "loss_num": 0.01531982421875, "loss_xval": 0.455078125, "num_input_tokens_seen": 288669496, "step": 5152 }, { "epoch": 11.476614699331849, "grad_norm": 17.78461265563965, "learning_rate": 1e-06, "loss": 0.4928, "num_input_tokens_seen": 288727056, "step": 5153 }, { "epoch": 11.476614699331849, "loss": 0.6607722043991089, "loss_ce": 0.00012771939509548247, "loss_iou": 0.2578125, "loss_num": 0.0289306640625, "loss_xval": 0.66015625, "num_input_tokens_seen": 288727056, "step": 5153 }, { "epoch": 11.478841870824054, "grad_norm": 12.651546478271484, "learning_rate": 1e-06, "loss": 0.4647, "num_input_tokens_seen": 288782852, "step": 5154 }, { "epoch": 11.478841870824054, "loss": 0.3253457248210907, "loss_ce": 0.00015041950973682106, "loss_iou": 0.1484375, "loss_num": 0.0054931640625, "loss_xval": 0.32421875, "num_input_tokens_seen": 288782852, "step": 5154 }, { "epoch": 11.481069042316259, "grad_norm": 18.01323890686035, "learning_rate": 1e-06, "loss": 0.6355, "num_input_tokens_seen": 288835892, "step": 5155 }, { "epoch": 11.481069042316259, "loss": 0.5539816617965698, "loss_ce": 0.00014869558799546212, "loss_iou": 0.240234375, "loss_num": 0.01458740234375, "loss_xval": 0.5546875, "num_input_tokens_seen": 288835892, "step": 5155 }, { "epoch": 11.483296213808464, "grad_norm": 13.251718521118164, "learning_rate": 1e-06, "loss": 0.5962, "num_input_tokens_seen": 288891508, "step": 5156 }, { "epoch": 11.483296213808464, "loss": 0.8043664693832397, "loss_ce": 0.00016726629110053182, "loss_iou": 0.337890625, "loss_num": 0.02587890625, "loss_xval": 0.8046875, "num_input_tokens_seen": 288891508, "step": 5156 }, { "epoch": 11.485523385300668, "grad_norm": 13.777200698852539, "learning_rate": 1e-06, "loss": 0.4696, "num_input_tokens_seen": 288948688, "step": 5157 }, { "epoch": 11.485523385300668, "loss": 0.4466667175292969, "loss_ce": 0.00013350519293453544, "loss_iou": 0.201171875, "loss_num": 0.0089111328125, "loss_xval": 0.447265625, "num_input_tokens_seen": 288948688, "step": 5157 }, { "epoch": 11.487750556792873, "grad_norm": 19.02153968811035, "learning_rate": 1e-06, "loss": 0.3835, "num_input_tokens_seen": 289004432, "step": 5158 }, { "epoch": 11.487750556792873, "loss": 0.5000653266906738, "loss_ce": 0.00018739307415671647, "loss_iou": 0.19921875, "loss_num": 0.0205078125, "loss_xval": 0.5, "num_input_tokens_seen": 289004432, "step": 5158 }, { "epoch": 11.489977728285078, "grad_norm": 35.4432258605957, "learning_rate": 1e-06, "loss": 0.4898, "num_input_tokens_seen": 289058552, "step": 5159 }, { "epoch": 11.489977728285078, "loss": 0.3960947096347809, "loss_ce": 9.86138402367942e-05, "loss_iou": 0.162109375, "loss_num": 0.014404296875, "loss_xval": 0.396484375, "num_input_tokens_seen": 289058552, "step": 5159 }, { "epoch": 11.492204899777283, "grad_norm": 44.51033401489258, "learning_rate": 1e-06, "loss": 0.4918, "num_input_tokens_seen": 289115320, "step": 5160 }, { "epoch": 11.492204899777283, "loss": 0.5892336964607239, "loss_ce": 0.00012237070768605918, "loss_iou": 0.283203125, "loss_num": 0.00482177734375, "loss_xval": 0.58984375, "num_input_tokens_seen": 289115320, "step": 5160 }, { "epoch": 11.494432071269488, "grad_norm": 20.451549530029297, "learning_rate": 1e-06, "loss": 0.6798, "num_input_tokens_seen": 289171932, "step": 5161 }, { "epoch": 11.494432071269488, "loss": 0.5714766383171082, "loss_ce": 0.00018755605560727417, "loss_iou": 0.2421875, "loss_num": 0.01708984375, "loss_xval": 0.5703125, "num_input_tokens_seen": 289171932, "step": 5161 }, { "epoch": 11.496659242761693, "grad_norm": 17.223535537719727, "learning_rate": 1e-06, "loss": 0.3775, "num_input_tokens_seen": 289227384, "step": 5162 }, { "epoch": 11.496659242761693, "loss": 0.41967087984085083, "loss_ce": 0.00011523931607371196, "loss_iou": 0.1845703125, "loss_num": 0.010009765625, "loss_xval": 0.419921875, "num_input_tokens_seen": 289227384, "step": 5162 }, { "epoch": 11.498886414253898, "grad_norm": 18.763608932495117, "learning_rate": 1e-06, "loss": 0.3766, "num_input_tokens_seen": 289282336, "step": 5163 }, { "epoch": 11.498886414253898, "loss": 0.39795833826065063, "loss_ce": 0.0001312018430326134, "loss_iou": 0.17578125, "loss_num": 0.00909423828125, "loss_xval": 0.3984375, "num_input_tokens_seen": 289282336, "step": 5163 }, { "epoch": 11.501113585746102, "grad_norm": 25.63532829284668, "learning_rate": 1e-06, "loss": 0.6739, "num_input_tokens_seen": 289337816, "step": 5164 }, { "epoch": 11.501113585746102, "loss": 0.7168751358985901, "loss_ce": 0.00026139506371691823, "loss_iou": 0.271484375, "loss_num": 0.034912109375, "loss_xval": 0.71484375, "num_input_tokens_seen": 289337816, "step": 5164 }, { "epoch": 11.503340757238307, "grad_norm": 17.458229064941406, "learning_rate": 1e-06, "loss": 0.5773, "num_input_tokens_seen": 289394264, "step": 5165 }, { "epoch": 11.503340757238307, "loss": 0.570071816444397, "loss_ce": 0.00012553209671750665, "loss_iou": 0.2431640625, "loss_num": 0.0164794921875, "loss_xval": 0.5703125, "num_input_tokens_seen": 289394264, "step": 5165 }, { "epoch": 11.505567928730512, "grad_norm": 15.031347274780273, "learning_rate": 1e-06, "loss": 0.4042, "num_input_tokens_seen": 289452076, "step": 5166 }, { "epoch": 11.505567928730512, "loss": 0.32777801156044006, "loss_ce": 0.0002633580006659031, "loss_iou": 0.14453125, "loss_num": 0.0078125, "loss_xval": 0.328125, "num_input_tokens_seen": 289452076, "step": 5166 }, { "epoch": 11.507795100222717, "grad_norm": 17.256629943847656, "learning_rate": 1e-06, "loss": 0.5362, "num_input_tokens_seen": 289507452, "step": 5167 }, { "epoch": 11.507795100222717, "loss": 0.6130794286727905, "loss_ce": 0.00016442046035081148, "loss_iou": 0.25390625, "loss_num": 0.021484375, "loss_xval": 0.61328125, "num_input_tokens_seen": 289507452, "step": 5167 }, { "epoch": 11.510022271714922, "grad_norm": 23.37602424621582, "learning_rate": 1e-06, "loss": 0.5456, "num_input_tokens_seen": 289562412, "step": 5168 }, { "epoch": 11.510022271714922, "loss": 0.6275073289871216, "loss_ce": 0.0001879626652225852, "loss_iou": 0.28125, "loss_num": 0.01318359375, "loss_xval": 0.62890625, "num_input_tokens_seen": 289562412, "step": 5168 }, { "epoch": 11.512249443207127, "grad_norm": 28.662569046020508, "learning_rate": 1e-06, "loss": 0.5528, "num_input_tokens_seen": 289618920, "step": 5169 }, { "epoch": 11.512249443207127, "loss": 0.564295768737793, "loss_ce": 0.00020888610742986202, "loss_iou": 0.24609375, "loss_num": 0.01422119140625, "loss_xval": 0.5625, "num_input_tokens_seen": 289618920, "step": 5169 }, { "epoch": 11.514476614699332, "grad_norm": 28.88167381286621, "learning_rate": 1e-06, "loss": 0.5267, "num_input_tokens_seen": 289675588, "step": 5170 }, { "epoch": 11.514476614699332, "loss": 0.46462541818618774, "loss_ce": 0.0001478660269640386, "loss_iou": 0.212890625, "loss_num": 0.00787353515625, "loss_xval": 0.46484375, "num_input_tokens_seen": 289675588, "step": 5170 }, { "epoch": 11.516703786191536, "grad_norm": 12.050206184387207, "learning_rate": 1e-06, "loss": 0.4747, "num_input_tokens_seen": 289733692, "step": 5171 }, { "epoch": 11.516703786191536, "loss": 0.41063833236694336, "loss_ce": 0.00011587166227400303, "loss_iou": 0.18359375, "loss_num": 0.00860595703125, "loss_xval": 0.41015625, "num_input_tokens_seen": 289733692, "step": 5171 }, { "epoch": 11.518930957683741, "grad_norm": 13.707185745239258, "learning_rate": 1e-06, "loss": 0.5053, "num_input_tokens_seen": 289791396, "step": 5172 }, { "epoch": 11.518930957683741, "loss": 0.5518975257873535, "loss_ce": 0.00013974419562146068, "loss_iou": 0.2578125, "loss_num": 0.00677490234375, "loss_xval": 0.55078125, "num_input_tokens_seen": 289791396, "step": 5172 }, { "epoch": 11.521158129175946, "grad_norm": 17.915735244750977, "learning_rate": 1e-06, "loss": 0.6097, "num_input_tokens_seen": 289848220, "step": 5173 }, { "epoch": 11.521158129175946, "loss": 0.711284875869751, "loss_ce": 0.0003473775868769735, "loss_iou": 0.28515625, "loss_num": 0.028076171875, "loss_xval": 0.7109375, "num_input_tokens_seen": 289848220, "step": 5173 }, { "epoch": 11.523385300668151, "grad_norm": 19.404088973999023, "learning_rate": 1e-06, "loss": 0.4896, "num_input_tokens_seen": 289907036, "step": 5174 }, { "epoch": 11.523385300668151, "loss": 0.4599684178829193, "loss_ce": 0.00012956123100593686, "loss_iou": 0.203125, "loss_num": 0.0107421875, "loss_xval": 0.458984375, "num_input_tokens_seen": 289907036, "step": 5174 }, { "epoch": 11.525612472160356, "grad_norm": 14.955151557922363, "learning_rate": 1e-06, "loss": 0.4704, "num_input_tokens_seen": 289963580, "step": 5175 }, { "epoch": 11.525612472160356, "loss": 0.6151525974273682, "loss_ce": 0.0001623880089027807, "loss_iou": 0.271484375, "loss_num": 0.01409912109375, "loss_xval": 0.61328125, "num_input_tokens_seen": 289963580, "step": 5175 }, { "epoch": 11.52783964365256, "grad_norm": 30.26787567138672, "learning_rate": 1e-06, "loss": 0.4761, "num_input_tokens_seen": 290016656, "step": 5176 }, { "epoch": 11.52783964365256, "loss": 0.3829212784767151, "loss_ce": 0.00010878632019739598, "loss_iou": 0.15625, "loss_num": 0.0140380859375, "loss_xval": 0.3828125, "num_input_tokens_seen": 290016656, "step": 5176 }, { "epoch": 11.530066815144766, "grad_norm": 16.603086471557617, "learning_rate": 1e-06, "loss": 0.447, "num_input_tokens_seen": 290074272, "step": 5177 }, { "epoch": 11.530066815144766, "loss": 0.38126683235168457, "loss_ce": 0.00016332257655449212, "loss_iou": 0.16796875, "loss_num": 0.00909423828125, "loss_xval": 0.380859375, "num_input_tokens_seen": 290074272, "step": 5177 }, { "epoch": 11.53229398663697, "grad_norm": 13.765169143676758, "learning_rate": 1e-06, "loss": 0.6256, "num_input_tokens_seen": 290129748, "step": 5178 }, { "epoch": 11.53229398663697, "loss": 0.6682248115539551, "loss_ce": 0.00013396795839071274, "loss_iou": 0.294921875, "loss_num": 0.015869140625, "loss_xval": 0.66796875, "num_input_tokens_seen": 290129748, "step": 5178 }, { "epoch": 11.534521158129175, "grad_norm": 26.716320037841797, "learning_rate": 1e-06, "loss": 0.7209, "num_input_tokens_seen": 290182560, "step": 5179 }, { "epoch": 11.534521158129175, "loss": 0.7032727003097534, "loss_ce": 0.0001476738107157871, "loss_iou": 0.3046875, "loss_num": 0.0181884765625, "loss_xval": 0.703125, "num_input_tokens_seen": 290182560, "step": 5179 }, { "epoch": 11.53674832962138, "grad_norm": 23.010082244873047, "learning_rate": 1e-06, "loss": 0.7079, "num_input_tokens_seen": 290238272, "step": 5180 }, { "epoch": 11.53674832962138, "loss": 0.6293861865997314, "loss_ce": 0.00023578619584441185, "loss_iou": 0.28515625, "loss_num": 0.01171875, "loss_xval": 0.62890625, "num_input_tokens_seen": 290238272, "step": 5180 }, { "epoch": 11.538975501113585, "grad_norm": 12.722779273986816, "learning_rate": 1e-06, "loss": 0.4231, "num_input_tokens_seen": 290294832, "step": 5181 }, { "epoch": 11.538975501113585, "loss": 0.5143045783042908, "loss_ce": 0.00014442717656493187, "loss_iou": 0.2236328125, "loss_num": 0.01324462890625, "loss_xval": 0.515625, "num_input_tokens_seen": 290294832, "step": 5181 }, { "epoch": 11.54120267260579, "grad_norm": 17.865341186523438, "learning_rate": 1e-06, "loss": 0.599, "num_input_tokens_seen": 290351116, "step": 5182 }, { "epoch": 11.54120267260579, "loss": 0.6641983985900879, "loss_ce": 0.0001359161688014865, "loss_iou": 0.255859375, "loss_num": 0.03076171875, "loss_xval": 0.6640625, "num_input_tokens_seen": 290351116, "step": 5182 }, { "epoch": 11.543429844097995, "grad_norm": 16.24390983581543, "learning_rate": 1e-06, "loss": 0.4797, "num_input_tokens_seen": 290406636, "step": 5183 }, { "epoch": 11.543429844097995, "loss": 0.5366250276565552, "loss_ce": 0.0001260171557078138, "loss_iou": 0.2255859375, "loss_num": 0.01708984375, "loss_xval": 0.53515625, "num_input_tokens_seen": 290406636, "step": 5183 }, { "epoch": 11.5456570155902, "grad_norm": 33.181251525878906, "learning_rate": 1e-06, "loss": 0.5539, "num_input_tokens_seen": 290464236, "step": 5184 }, { "epoch": 11.5456570155902, "loss": 0.4681469798088074, "loss_ce": 0.0001293782697757706, "loss_iou": 0.2138671875, "loss_num": 0.008056640625, "loss_xval": 0.46875, "num_input_tokens_seen": 290464236, "step": 5184 }, { "epoch": 11.547884187082406, "grad_norm": 15.906049728393555, "learning_rate": 1e-06, "loss": 0.4499, "num_input_tokens_seen": 290519360, "step": 5185 }, { "epoch": 11.547884187082406, "loss": 0.5545018315315247, "loss_ce": 0.00018054830434266478, "loss_iou": 0.2294921875, "loss_num": 0.0191650390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 290519360, "step": 5185 }, { "epoch": 11.550111358574611, "grad_norm": 25.81562042236328, "learning_rate": 1e-06, "loss": 0.5231, "num_input_tokens_seen": 290572196, "step": 5186 }, { "epoch": 11.550111358574611, "loss": 0.5178329944610596, "loss_ce": 0.00025483942590653896, "loss_iou": 0.23046875, "loss_num": 0.01141357421875, "loss_xval": 0.515625, "num_input_tokens_seen": 290572196, "step": 5186 }, { "epoch": 11.552338530066816, "grad_norm": 17.265830993652344, "learning_rate": 1e-06, "loss": 0.4436, "num_input_tokens_seen": 290629824, "step": 5187 }, { "epoch": 11.552338530066816, "loss": 0.4118680953979492, "loss_ce": 0.0001249448541784659, "loss_iou": 0.1728515625, "loss_num": 0.01318359375, "loss_xval": 0.412109375, "num_input_tokens_seen": 290629824, "step": 5187 }, { "epoch": 11.55456570155902, "grad_norm": 13.806538581848145, "learning_rate": 1e-06, "loss": 0.465, "num_input_tokens_seen": 290683676, "step": 5188 }, { "epoch": 11.55456570155902, "loss": 0.28974953293800354, "loss_ce": 0.00010721624130383134, "loss_iou": 0.1220703125, "loss_num": 0.00921630859375, "loss_xval": 0.2890625, "num_input_tokens_seen": 290683676, "step": 5188 }, { "epoch": 11.556792873051226, "grad_norm": 25.621070861816406, "learning_rate": 1e-06, "loss": 0.5772, "num_input_tokens_seen": 290738676, "step": 5189 }, { "epoch": 11.556792873051226, "loss": 0.5882642865180969, "loss_ce": 0.0001295104157179594, "loss_iou": 0.25, "loss_num": 0.0174560546875, "loss_xval": 0.58984375, "num_input_tokens_seen": 290738676, "step": 5189 }, { "epoch": 11.55902004454343, "grad_norm": 21.53620719909668, "learning_rate": 1e-06, "loss": 0.583, "num_input_tokens_seen": 290793796, "step": 5190 }, { "epoch": 11.55902004454343, "loss": 0.6559303998947144, "loss_ce": 0.000412814028095454, "loss_iou": 0.27734375, "loss_num": 0.0203857421875, "loss_xval": 0.65625, "num_input_tokens_seen": 290793796, "step": 5190 }, { "epoch": 11.561247216035635, "grad_norm": 16.96056365966797, "learning_rate": 1e-06, "loss": 0.5001, "num_input_tokens_seen": 290850768, "step": 5191 }, { "epoch": 11.561247216035635, "loss": 0.5135844349861145, "loss_ce": 0.00015668988635297865, "loss_iou": 0.23046875, "loss_num": 0.010498046875, "loss_xval": 0.51171875, "num_input_tokens_seen": 290850768, "step": 5191 }, { "epoch": 11.56347438752784, "grad_norm": 19.151756286621094, "learning_rate": 1e-06, "loss": 0.5346, "num_input_tokens_seen": 290905584, "step": 5192 }, { "epoch": 11.56347438752784, "loss": 0.3937605321407318, "loss_ce": 0.00014479970559477806, "loss_iou": 0.1640625, "loss_num": 0.0130615234375, "loss_xval": 0.39453125, "num_input_tokens_seen": 290905584, "step": 5192 }, { "epoch": 11.565701559020045, "grad_norm": 19.19948959350586, "learning_rate": 1e-06, "loss": 0.5524, "num_input_tokens_seen": 290963692, "step": 5193 }, { "epoch": 11.565701559020045, "loss": 0.6412444114685059, "loss_ce": 0.00013114791363477707, "loss_iou": 0.267578125, "loss_num": 0.0208740234375, "loss_xval": 0.640625, "num_input_tokens_seen": 290963692, "step": 5193 }, { "epoch": 11.56792873051225, "grad_norm": 18.386707305908203, "learning_rate": 1e-06, "loss": 0.4723, "num_input_tokens_seen": 291021232, "step": 5194 }, { "epoch": 11.56792873051225, "loss": 0.3769450783729553, "loss_ce": 0.0002360670769121498, "loss_iou": 0.1533203125, "loss_num": 0.01397705078125, "loss_xval": 0.376953125, "num_input_tokens_seen": 291021232, "step": 5194 }, { "epoch": 11.570155902004455, "grad_norm": 20.23085594177246, "learning_rate": 1e-06, "loss": 0.7242, "num_input_tokens_seen": 291075260, "step": 5195 }, { "epoch": 11.570155902004455, "loss": 0.5950526595115662, "loss_ce": 0.00014300120528787374, "loss_iou": 0.2734375, "loss_num": 0.00982666015625, "loss_xval": 0.59375, "num_input_tokens_seen": 291075260, "step": 5195 }, { "epoch": 11.57238307349666, "grad_norm": 23.331451416015625, "learning_rate": 1e-06, "loss": 0.7261, "num_input_tokens_seen": 291131208, "step": 5196 }, { "epoch": 11.57238307349666, "loss": 0.664703905582428, "loss_ce": 0.000153136788867414, "loss_iou": 0.28125, "loss_num": 0.020263671875, "loss_xval": 0.6640625, "num_input_tokens_seen": 291131208, "step": 5196 }, { "epoch": 11.574610244988865, "grad_norm": 25.03767967224121, "learning_rate": 1e-06, "loss": 0.567, "num_input_tokens_seen": 291187856, "step": 5197 }, { "epoch": 11.574610244988865, "loss": 0.38165202736854553, "loss_ce": 0.0001823049533413723, "loss_iou": 0.1611328125, "loss_num": 0.01177978515625, "loss_xval": 0.380859375, "num_input_tokens_seen": 291187856, "step": 5197 }, { "epoch": 11.57683741648107, "grad_norm": 18.827882766723633, "learning_rate": 1e-06, "loss": 0.7002, "num_input_tokens_seen": 291243456, "step": 5198 }, { "epoch": 11.57683741648107, "loss": 0.9664729237556458, "loss_ce": 0.0004084957472514361, "loss_iou": 0.44140625, "loss_num": 0.0164794921875, "loss_xval": 0.96484375, "num_input_tokens_seen": 291243456, "step": 5198 }, { "epoch": 11.579064587973274, "grad_norm": 15.689250946044922, "learning_rate": 1e-06, "loss": 0.5917, "num_input_tokens_seen": 291301168, "step": 5199 }, { "epoch": 11.579064587973274, "loss": 0.37210649251937866, "loss_ce": 0.00015823188005015254, "loss_iou": 0.1630859375, "loss_num": 0.009033203125, "loss_xval": 0.37109375, "num_input_tokens_seen": 291301168, "step": 5199 }, { "epoch": 11.58129175946548, "grad_norm": 26.7224178314209, "learning_rate": 1e-06, "loss": 0.6347, "num_input_tokens_seen": 291356144, "step": 5200 }, { "epoch": 11.58129175946548, "loss": 0.6205277442932129, "loss_ce": 0.00016642545233480632, "loss_iou": 0.2734375, "loss_num": 0.0147705078125, "loss_xval": 0.62109375, "num_input_tokens_seen": 291356144, "step": 5200 }, { "epoch": 11.583518930957684, "grad_norm": 17.09632110595703, "learning_rate": 1e-06, "loss": 0.431, "num_input_tokens_seen": 291412052, "step": 5201 }, { "epoch": 11.583518930957684, "loss": 0.3809802234172821, "loss_ce": 0.0001208461108035408, "loss_iou": 0.16796875, "loss_num": 0.0089111328125, "loss_xval": 0.380859375, "num_input_tokens_seen": 291412052, "step": 5201 }, { "epoch": 11.585746102449889, "grad_norm": 13.215712547302246, "learning_rate": 1e-06, "loss": 0.3926, "num_input_tokens_seen": 291468324, "step": 5202 }, { "epoch": 11.585746102449889, "loss": 0.35443952679634094, "loss_ce": 0.00013045519881416112, "loss_iou": 0.1533203125, "loss_num": 0.00946044921875, "loss_xval": 0.353515625, "num_input_tokens_seen": 291468324, "step": 5202 }, { "epoch": 11.587973273942094, "grad_norm": 15.096648216247559, "learning_rate": 1e-06, "loss": 0.4367, "num_input_tokens_seen": 291525764, "step": 5203 }, { "epoch": 11.587973273942094, "loss": 0.31994152069091797, "loss_ce": 0.00011730578989954665, "loss_iou": 0.1455078125, "loss_num": 0.00579833984375, "loss_xval": 0.3203125, "num_input_tokens_seen": 291525764, "step": 5203 }, { "epoch": 11.590200445434299, "grad_norm": 21.537437438964844, "learning_rate": 1e-06, "loss": 0.4525, "num_input_tokens_seen": 291581816, "step": 5204 }, { "epoch": 11.590200445434299, "loss": 0.4407280683517456, "loss_ce": 0.0001763150212354958, "loss_iou": 0.181640625, "loss_num": 0.0155029296875, "loss_xval": 0.44140625, "num_input_tokens_seen": 291581816, "step": 5204 }, { "epoch": 11.592427616926503, "grad_norm": 17.791162490844727, "learning_rate": 1e-06, "loss": 0.4189, "num_input_tokens_seen": 291640008, "step": 5205 }, { "epoch": 11.592427616926503, "loss": 0.45544835925102234, "loss_ce": 0.0001261141151189804, "loss_iou": 0.20703125, "loss_num": 0.00836181640625, "loss_xval": 0.455078125, "num_input_tokens_seen": 291640008, "step": 5205 }, { "epoch": 11.594654788418708, "grad_norm": 15.262166976928711, "learning_rate": 1e-06, "loss": 0.4908, "num_input_tokens_seen": 291696576, "step": 5206 }, { "epoch": 11.594654788418708, "loss": 0.5726983547210693, "loss_ce": 0.00012755353236570954, "loss_iou": 0.2470703125, "loss_num": 0.0157470703125, "loss_xval": 0.57421875, "num_input_tokens_seen": 291696576, "step": 5206 }, { "epoch": 11.596881959910913, "grad_norm": 18.53944969177246, "learning_rate": 1e-06, "loss": 0.4826, "num_input_tokens_seen": 291753164, "step": 5207 }, { "epoch": 11.596881959910913, "loss": 0.5484709143638611, "loss_ce": 0.0001310570805799216, "loss_iou": 0.2236328125, "loss_num": 0.0205078125, "loss_xval": 0.546875, "num_input_tokens_seen": 291753164, "step": 5207 }, { "epoch": 11.599109131403118, "grad_norm": 20.32893943786621, "learning_rate": 1e-06, "loss": 0.5119, "num_input_tokens_seen": 291807164, "step": 5208 }, { "epoch": 11.599109131403118, "loss": 0.6429668664932251, "loss_ce": 0.00014457208453677595, "loss_iou": 0.2734375, "loss_num": 0.019287109375, "loss_xval": 0.64453125, "num_input_tokens_seen": 291807164, "step": 5208 }, { "epoch": 11.601336302895323, "grad_norm": 16.207355499267578, "learning_rate": 1e-06, "loss": 0.3707, "num_input_tokens_seen": 291861832, "step": 5209 }, { "epoch": 11.601336302895323, "loss": 0.35583117604255676, "loss_ce": 0.00011830384755739942, "loss_iou": 0.1591796875, "loss_num": 0.007354736328125, "loss_xval": 0.35546875, "num_input_tokens_seen": 291861832, "step": 5209 }, { "epoch": 11.603563474387528, "grad_norm": 20.643909454345703, "learning_rate": 1e-06, "loss": 0.3916, "num_input_tokens_seen": 291919612, "step": 5210 }, { "epoch": 11.603563474387528, "loss": 0.40422919392585754, "loss_ce": 0.0001764479384291917, "loss_iou": 0.162109375, "loss_num": 0.015869140625, "loss_xval": 0.404296875, "num_input_tokens_seen": 291919612, "step": 5210 }, { "epoch": 11.605790645879733, "grad_norm": 18.431650161743164, "learning_rate": 1e-06, "loss": 0.5809, "num_input_tokens_seen": 291976832, "step": 5211 }, { "epoch": 11.605790645879733, "loss": 0.5501755475997925, "loss_ce": 0.00012670463183894753, "loss_iou": 0.2490234375, "loss_num": 0.010498046875, "loss_xval": 0.55078125, "num_input_tokens_seen": 291976832, "step": 5211 }, { "epoch": 11.608017817371937, "grad_norm": 24.48961639404297, "learning_rate": 1e-06, "loss": 0.5894, "num_input_tokens_seen": 292033284, "step": 5212 }, { "epoch": 11.608017817371937, "loss": 0.565216064453125, "loss_ce": 0.00015255842299666256, "loss_iou": 0.248046875, "loss_num": 0.0135498046875, "loss_xval": 0.56640625, "num_input_tokens_seen": 292033284, "step": 5212 }, { "epoch": 11.610244988864142, "grad_norm": 16.848508834838867, "learning_rate": 1e-06, "loss": 0.404, "num_input_tokens_seen": 292086472, "step": 5213 }, { "epoch": 11.610244988864142, "loss": 0.3769487142562866, "loss_ce": 0.0001176485966425389, "loss_iou": 0.169921875, "loss_num": 0.007354736328125, "loss_xval": 0.376953125, "num_input_tokens_seen": 292086472, "step": 5213 }, { "epoch": 11.612472160356347, "grad_norm": 26.47736167907715, "learning_rate": 1e-06, "loss": 0.5472, "num_input_tokens_seen": 292141628, "step": 5214 }, { "epoch": 11.612472160356347, "loss": 0.6076180338859558, "loss_ce": 0.00013512838631868362, "loss_iou": 0.26953125, "loss_num": 0.01361083984375, "loss_xval": 0.609375, "num_input_tokens_seen": 292141628, "step": 5214 }, { "epoch": 11.614699331848552, "grad_norm": 20.124786376953125, "learning_rate": 1e-06, "loss": 0.5394, "num_input_tokens_seen": 292195460, "step": 5215 }, { "epoch": 11.614699331848552, "loss": 0.746225118637085, "loss_ce": 0.00013138932990841568, "loss_iou": 0.33203125, "loss_num": 0.0166015625, "loss_xval": 0.74609375, "num_input_tokens_seen": 292195460, "step": 5215 }, { "epoch": 11.616926503340757, "grad_norm": 16.484128952026367, "learning_rate": 1e-06, "loss": 0.5526, "num_input_tokens_seen": 292251864, "step": 5216 }, { "epoch": 11.616926503340757, "loss": 0.5467870831489563, "loss_ce": 0.00015625265950802714, "loss_iou": 0.22265625, "loss_num": 0.0201416015625, "loss_xval": 0.546875, "num_input_tokens_seen": 292251864, "step": 5216 }, { "epoch": 11.619153674832962, "grad_norm": 24.076684951782227, "learning_rate": 1e-06, "loss": 0.6972, "num_input_tokens_seen": 292304412, "step": 5217 }, { "epoch": 11.619153674832962, "loss": 0.6573572158813477, "loss_ce": 0.00013063887308817357, "loss_iou": 0.2890625, "loss_num": 0.01611328125, "loss_xval": 0.65625, "num_input_tokens_seen": 292304412, "step": 5217 }, { "epoch": 11.621380846325167, "grad_norm": 20.732887268066406, "learning_rate": 1e-06, "loss": 0.5674, "num_input_tokens_seen": 292360272, "step": 5218 }, { "epoch": 11.621380846325167, "loss": 0.6395326256752014, "loss_ce": 0.00012831481581088156, "loss_iou": 0.27734375, "loss_num": 0.017333984375, "loss_xval": 0.640625, "num_input_tokens_seen": 292360272, "step": 5218 }, { "epoch": 11.623608017817372, "grad_norm": 19.97538948059082, "learning_rate": 1e-06, "loss": 0.4222, "num_input_tokens_seen": 292415788, "step": 5219 }, { "epoch": 11.623608017817372, "loss": 0.3983200788497925, "loss_ce": 0.00012667715782299638, "loss_iou": 0.1845703125, "loss_num": 0.0057373046875, "loss_xval": 0.3984375, "num_input_tokens_seen": 292415788, "step": 5219 }, { "epoch": 11.625835189309576, "grad_norm": 22.856096267700195, "learning_rate": 1e-06, "loss": 0.5726, "num_input_tokens_seen": 292467908, "step": 5220 }, { "epoch": 11.625835189309576, "loss": 0.6926459074020386, "loss_ce": 0.00014099694089964032, "loss_iou": 0.310546875, "loss_num": 0.0147705078125, "loss_xval": 0.69140625, "num_input_tokens_seen": 292467908, "step": 5220 }, { "epoch": 11.628062360801781, "grad_norm": 15.5626859664917, "learning_rate": 1e-06, "loss": 0.6509, "num_input_tokens_seen": 292523568, "step": 5221 }, { "epoch": 11.628062360801781, "loss": 0.6292951107025146, "loss_ce": 0.0001447499671485275, "loss_iou": 0.296875, "loss_num": 0.00738525390625, "loss_xval": 0.62890625, "num_input_tokens_seen": 292523568, "step": 5221 }, { "epoch": 11.630289532293986, "grad_norm": 38.288299560546875, "learning_rate": 1e-06, "loss": 0.5488, "num_input_tokens_seen": 292577496, "step": 5222 }, { "epoch": 11.630289532293986, "loss": 0.6104898452758789, "loss_ce": 0.00013826916983816773, "loss_iou": 0.2451171875, "loss_num": 0.02392578125, "loss_xval": 0.609375, "num_input_tokens_seen": 292577496, "step": 5222 }, { "epoch": 11.632516703786191, "grad_norm": 25.399972915649414, "learning_rate": 1e-06, "loss": 0.482, "num_input_tokens_seen": 292633528, "step": 5223 }, { "epoch": 11.632516703786191, "loss": 0.5387084484100342, "loss_ce": 0.00013427785597741604, "loss_iou": 0.2412109375, "loss_num": 0.010986328125, "loss_xval": 0.5390625, "num_input_tokens_seen": 292633528, "step": 5223 }, { "epoch": 11.634743875278396, "grad_norm": 85.37445831298828, "learning_rate": 1e-06, "loss": 0.5576, "num_input_tokens_seen": 292688036, "step": 5224 }, { "epoch": 11.634743875278396, "loss": 0.7559876441955566, "loss_ce": 0.00012828274338971823, "loss_iou": 0.298828125, "loss_num": 0.031494140625, "loss_xval": 0.7578125, "num_input_tokens_seen": 292688036, "step": 5224 }, { "epoch": 11.6369710467706, "grad_norm": 54.62841796875, "learning_rate": 1e-06, "loss": 0.4919, "num_input_tokens_seen": 292745052, "step": 5225 }, { "epoch": 11.6369710467706, "loss": 0.5994965434074402, "loss_ce": 0.00013131627929396927, "loss_iou": 0.265625, "loss_num": 0.0140380859375, "loss_xval": 0.59765625, "num_input_tokens_seen": 292745052, "step": 5225 }, { "epoch": 11.639198218262806, "grad_norm": 13.105633735656738, "learning_rate": 1e-06, "loss": 0.3966, "num_input_tokens_seen": 292803352, "step": 5226 }, { "epoch": 11.639198218262806, "loss": 0.29151397943496704, "loss_ce": 0.0001321233285125345, "loss_iou": 0.134765625, "loss_num": 0.00457763671875, "loss_xval": 0.291015625, "num_input_tokens_seen": 292803352, "step": 5226 }, { "epoch": 11.64142538975501, "grad_norm": 30.192888259887695, "learning_rate": 1e-06, "loss": 0.5925, "num_input_tokens_seen": 292859900, "step": 5227 }, { "epoch": 11.64142538975501, "loss": 0.59175705909729, "loss_ce": 0.00020430152653716505, "loss_iou": 0.236328125, "loss_num": 0.0238037109375, "loss_xval": 0.58984375, "num_input_tokens_seen": 292859900, "step": 5227 }, { "epoch": 11.643652561247215, "grad_norm": 21.47431755065918, "learning_rate": 1e-06, "loss": 0.4515, "num_input_tokens_seen": 292916668, "step": 5228 }, { "epoch": 11.643652561247215, "loss": 0.5364910364151001, "loss_ce": 0.00011407821148168296, "loss_iou": 0.228515625, "loss_num": 0.0159912109375, "loss_xval": 0.53515625, "num_input_tokens_seen": 292916668, "step": 5228 }, { "epoch": 11.64587973273942, "grad_norm": 29.797399520874023, "learning_rate": 1e-06, "loss": 0.4839, "num_input_tokens_seen": 292969696, "step": 5229 }, { "epoch": 11.64587973273942, "loss": 0.41816890239715576, "loss_ce": 0.00010860178736038506, "loss_iou": 0.1796875, "loss_num": 0.01177978515625, "loss_xval": 0.41796875, "num_input_tokens_seen": 292969696, "step": 5229 }, { "epoch": 11.648106904231625, "grad_norm": 11.707711219787598, "learning_rate": 1e-06, "loss": 0.456, "num_input_tokens_seen": 293027520, "step": 5230 }, { "epoch": 11.648106904231625, "loss": 0.5190684199333191, "loss_ce": 0.00014752443530596793, "loss_iou": 0.21875, "loss_num": 0.0162353515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 293027520, "step": 5230 }, { "epoch": 11.65033407572383, "grad_norm": 29.20980453491211, "learning_rate": 1e-06, "loss": 0.53, "num_input_tokens_seen": 293085036, "step": 5231 }, { "epoch": 11.65033407572383, "loss": 0.42614778876304626, "loss_ce": 0.00012240000069141388, "loss_iou": 0.1787109375, "loss_num": 0.013671875, "loss_xval": 0.42578125, "num_input_tokens_seen": 293085036, "step": 5231 }, { "epoch": 11.652561247216035, "grad_norm": 39.99692916870117, "learning_rate": 1e-06, "loss": 0.7307, "num_input_tokens_seen": 293139624, "step": 5232 }, { "epoch": 11.652561247216035, "loss": 0.8740052580833435, "loss_ce": 0.00022594796610064805, "loss_iou": 0.361328125, "loss_num": 0.0301513671875, "loss_xval": 0.875, "num_input_tokens_seen": 293139624, "step": 5232 }, { "epoch": 11.654788418708241, "grad_norm": 13.025435447692871, "learning_rate": 1e-06, "loss": 0.3551, "num_input_tokens_seen": 293195124, "step": 5233 }, { "epoch": 11.654788418708241, "loss": 0.299808144569397, "loss_ce": 0.00018654628365766257, "loss_iou": 0.1279296875, "loss_num": 0.00872802734375, "loss_xval": 0.298828125, "num_input_tokens_seen": 293195124, "step": 5233 }, { "epoch": 11.657015590200446, "grad_norm": 18.018341064453125, "learning_rate": 1e-06, "loss": 0.4124, "num_input_tokens_seen": 293250064, "step": 5234 }, { "epoch": 11.657015590200446, "loss": 0.48889029026031494, "loss_ce": 0.00012073374819010496, "loss_iou": 0.2216796875, "loss_num": 0.009033203125, "loss_xval": 0.48828125, "num_input_tokens_seen": 293250064, "step": 5234 }, { "epoch": 11.659242761692651, "grad_norm": 17.814626693725586, "learning_rate": 1e-06, "loss": 0.5089, "num_input_tokens_seen": 293302392, "step": 5235 }, { "epoch": 11.659242761692651, "loss": 0.4006240665912628, "loss_ce": 0.00011137685942230746, "loss_iou": 0.162109375, "loss_num": 0.01507568359375, "loss_xval": 0.400390625, "num_input_tokens_seen": 293302392, "step": 5235 }, { "epoch": 11.661469933184856, "grad_norm": 25.53102684020996, "learning_rate": 1e-06, "loss": 0.3704, "num_input_tokens_seen": 293356928, "step": 5236 }, { "epoch": 11.661469933184856, "loss": 0.36693528294563293, "loss_ce": 0.00011399855429772288, "loss_iou": 0.146484375, "loss_num": 0.0146484375, "loss_xval": 0.3671875, "num_input_tokens_seen": 293356928, "step": 5236 }, { "epoch": 11.66369710467706, "grad_norm": 18.298686981201172, "learning_rate": 1e-06, "loss": 0.5776, "num_input_tokens_seen": 293410648, "step": 5237 }, { "epoch": 11.66369710467706, "loss": 0.4626512825489044, "loss_ce": 0.00012686576519627124, "loss_iou": 0.2021484375, "loss_num": 0.0115966796875, "loss_xval": 0.462890625, "num_input_tokens_seen": 293410648, "step": 5237 }, { "epoch": 11.665924276169266, "grad_norm": 11.563762664794922, "learning_rate": 1e-06, "loss": 0.3882, "num_input_tokens_seen": 293463284, "step": 5238 }, { "epoch": 11.665924276169266, "loss": 0.2963898777961731, "loss_ce": 0.00012523468467406929, "loss_iou": 0.11865234375, "loss_num": 0.01171875, "loss_xval": 0.296875, "num_input_tokens_seen": 293463284, "step": 5238 }, { "epoch": 11.66815144766147, "grad_norm": 16.8227596282959, "learning_rate": 1e-06, "loss": 0.6908, "num_input_tokens_seen": 293517264, "step": 5239 }, { "epoch": 11.66815144766147, "loss": 0.7157348394393921, "loss_ce": 0.0001587077567819506, "loss_iou": 0.30078125, "loss_num": 0.0233154296875, "loss_xval": 0.71484375, "num_input_tokens_seen": 293517264, "step": 5239 }, { "epoch": 11.670378619153675, "grad_norm": 17.42998695373535, "learning_rate": 1e-06, "loss": 0.4203, "num_input_tokens_seen": 293572600, "step": 5240 }, { "epoch": 11.670378619153675, "loss": 0.5567784905433655, "loss_ce": 0.0001378921151626855, "loss_iou": 0.2353515625, "loss_num": 0.0172119140625, "loss_xval": 0.5546875, "num_input_tokens_seen": 293572600, "step": 5240 }, { "epoch": 11.67260579064588, "grad_norm": 21.24955177307129, "learning_rate": 1e-06, "loss": 0.512, "num_input_tokens_seen": 293628344, "step": 5241 }, { "epoch": 11.67260579064588, "loss": 0.3846355080604553, "loss_ce": 0.00011400008224882185, "loss_iou": 0.1494140625, "loss_num": 0.0172119140625, "loss_xval": 0.384765625, "num_input_tokens_seen": 293628344, "step": 5241 }, { "epoch": 11.674832962138085, "grad_norm": 27.092111587524414, "learning_rate": 1e-06, "loss": 0.6355, "num_input_tokens_seen": 293684796, "step": 5242 }, { "epoch": 11.674832962138085, "loss": 0.6805571913719177, "loss_ce": 0.0001372963743051514, "loss_iou": 0.306640625, "loss_num": 0.01373291015625, "loss_xval": 0.6796875, "num_input_tokens_seen": 293684796, "step": 5242 }, { "epoch": 11.67706013363029, "grad_norm": 16.437114715576172, "learning_rate": 1e-06, "loss": 0.4618, "num_input_tokens_seen": 293740992, "step": 5243 }, { "epoch": 11.67706013363029, "loss": 0.3265749514102936, "loss_ce": 0.00012842280557379127, "loss_iou": 0.14453125, "loss_num": 0.00732421875, "loss_xval": 0.326171875, "num_input_tokens_seen": 293740992, "step": 5243 }, { "epoch": 11.679287305122495, "grad_norm": 23.96824836730957, "learning_rate": 1e-06, "loss": 0.6494, "num_input_tokens_seen": 293795756, "step": 5244 }, { "epoch": 11.679287305122495, "loss": 0.47252053022384644, "loss_ce": 0.00010840976028703153, "loss_iou": 0.2001953125, "loss_num": 0.01416015625, "loss_xval": 0.47265625, "num_input_tokens_seen": 293795756, "step": 5244 }, { "epoch": 11.6815144766147, "grad_norm": 24.439743041992188, "learning_rate": 1e-06, "loss": 0.5064, "num_input_tokens_seen": 293854164, "step": 5245 }, { "epoch": 11.6815144766147, "loss": 0.6212034821510315, "loss_ce": 0.00010969607683364302, "loss_iou": 0.27734375, "loss_num": 0.013671875, "loss_xval": 0.62109375, "num_input_tokens_seen": 293854164, "step": 5245 }, { "epoch": 11.683741648106905, "grad_norm": 14.390751838684082, "learning_rate": 1e-06, "loss": 0.5511, "num_input_tokens_seen": 293910360, "step": 5246 }, { "epoch": 11.683741648106905, "loss": 0.42605873942375183, "loss_ce": 0.00027748823049478233, "loss_iou": 0.2001953125, "loss_num": 0.005126953125, "loss_xval": 0.42578125, "num_input_tokens_seen": 293910360, "step": 5246 }, { "epoch": 11.68596881959911, "grad_norm": 24.848167419433594, "learning_rate": 1e-06, "loss": 0.4161, "num_input_tokens_seen": 293969128, "step": 5247 }, { "epoch": 11.68596881959911, "loss": 0.38915523886680603, "loss_ce": 0.0001171459662145935, "loss_iou": 0.171875, "loss_num": 0.009033203125, "loss_xval": 0.388671875, "num_input_tokens_seen": 293969128, "step": 5247 }, { "epoch": 11.688195991091314, "grad_norm": 18.111801147460938, "learning_rate": 1e-06, "loss": 0.4344, "num_input_tokens_seen": 294024516, "step": 5248 }, { "epoch": 11.688195991091314, "loss": 0.5797381401062012, "loss_ce": 0.0001482985680922866, "loss_iou": 0.244140625, "loss_num": 0.01806640625, "loss_xval": 0.578125, "num_input_tokens_seen": 294024516, "step": 5248 }, { "epoch": 11.690423162583519, "grad_norm": 18.03676414489746, "learning_rate": 1e-06, "loss": 0.543, "num_input_tokens_seen": 294076868, "step": 5249 }, { "epoch": 11.690423162583519, "loss": 0.41762179136276245, "loss_ce": 0.0001413249410688877, "loss_iou": 0.1845703125, "loss_num": 0.009521484375, "loss_xval": 0.41796875, "num_input_tokens_seen": 294076868, "step": 5249 }, { "epoch": 11.692650334075724, "grad_norm": 12.850332260131836, "learning_rate": 1e-06, "loss": 0.401, "num_input_tokens_seen": 294132900, "step": 5250 }, { "epoch": 11.692650334075724, "eval_seeclick_web_CIoU": 0.5772451758384705, "eval_seeclick_web_GIoU": 0.5742323994636536, "eval_seeclick_web_IoU": 0.5963829755783081, "eval_seeclick_web_MAE_all": 0.015578721649944782, "eval_seeclick_web_MAE_h": 0.007713136961683631, "eval_seeclick_web_MAE_w": 0.015674122143536806, "eval_seeclick_web_MAE_x_boxes": 0.009651401545852423, "eval_seeclick_web_MAE_y_boxes": 0.021532843122258782, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.929573655128479, "eval_seeclick_web_loss_ce": 0.00019275367230875418, "eval_seeclick_web_loss_iou": 0.423828125, "eval_seeclick_web_loss_num": 0.012483596801757812, "eval_seeclick_web_loss_xval": 0.909912109375, "eval_seeclick_web_runtime": 21.994, "eval_seeclick_web_samples_per_second": 2.273, "eval_seeclick_web_steps_per_second": 0.091, "num_input_tokens_seen": 294132900, "step": 5250 }, { "epoch": 11.692650334075724, "eval_icons_CIoU": 0.2656567245721817, "eval_icons_GIoU": 0.2912362068891525, "eval_icons_IoU": 0.35016924142837524, "eval_icons_MAE_all": 0.06389489211142063, "eval_icons_MAE_h": 0.04006360750645399, "eval_icons_MAE_w": 0.0678694061934948, "eval_icons_MAE_x_boxes": 0.059559810906648636, "eval_icons_MAE_y_boxes": 0.03940416965633631, "eval_icons_inside_bbox": 0.578125, "eval_icons_loss": 1.754642128944397, "eval_icons_loss_ce": 0.000254698476055637, "eval_icons_loss_iou": 0.68310546875, "eval_icons_loss_num": 0.06172943115234375, "eval_icons_loss_xval": 1.67333984375, "eval_icons_runtime": 20.5945, "eval_icons_samples_per_second": 2.428, "eval_icons_steps_per_second": 0.097, "num_input_tokens_seen": 294132900, "step": 5250 }, { "epoch": 11.692650334075724, "eval_screenspot_CIoU": 0.3591058651606242, "eval_screenspot_GIoU": 0.3725058138370514, "eval_screenspot_IoU": 0.4368898868560791, "eval_screenspot_MAE_all": 0.05924547587831815, "eval_screenspot_MAE_h": 0.039297002057234444, "eval_screenspot_MAE_w": 0.06727503115932147, "eval_screenspot_MAE_x_boxes": 0.07256409463783105, "eval_screenspot_MAE_y_boxes": 0.04037608547757069, "eval_screenspot_inside_bbox": 0.6862499912579855, "eval_screenspot_loss": 1.6120883226394653, "eval_screenspot_loss_ce": 0.0002683925946863989, "eval_screenspot_loss_iou": 0.66650390625, "eval_screenspot_loss_num": 0.06815338134765625, "eval_screenspot_loss_xval": 1.6746419270833333, "eval_screenspot_runtime": 35.2255, "eval_screenspot_samples_per_second": 2.527, "eval_screenspot_steps_per_second": 0.085, "num_input_tokens_seen": 294132900, "step": 5250 }, { "epoch": 11.692650334075724, "eval_compot_CIoU": 0.34088848531246185, "eval_compot_GIoU": 0.3492155075073242, "eval_compot_IoU": 0.400636687874794, "eval_compot_MAE_all": 0.01801011897623539, "eval_compot_MAE_h": 0.009378379676491022, "eval_compot_MAE_w": 0.021228870376944542, "eval_compot_MAE_x_boxes": 0.02999929618090391, "eval_compot_MAE_y_boxes": 0.007140443194657564, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4145660400390625, "eval_compot_loss_ce": 0.00018989732052432373, "eval_compot_loss_iou": 0.651611328125, "eval_compot_loss_num": 0.016811370849609375, "eval_compot_loss_xval": 1.387451171875, "eval_compot_runtime": 22.0109, "eval_compot_samples_per_second": 2.272, "eval_compot_steps_per_second": 0.091, "num_input_tokens_seen": 294132900, "step": 5250 }, { "epoch": 11.692650334075724, "eval_custom_ui_val_CIoU": 0.4712279670768314, "eval_custom_ui_val_GIoU": 0.48230206304126316, "eval_custom_ui_val_IoU": 0.533691535393397, "eval_custom_ui_val_MAE_all": 0.030553390168481402, "eval_custom_ui_val_MAE_h": 0.016979538809715047, "eval_custom_ui_val_MAE_w": 0.039266514798833266, "eval_custom_ui_val_MAE_x_boxes": 0.03789379137257735, "eval_custom_ui_val_MAE_y_boxes": 0.015024640881973837, "eval_custom_ui_val_inside_bbox": 0.7685185207260979, "eval_custom_ui_val_loss": 1.2054945230484009, "eval_custom_ui_val_loss_ce": 0.0002350941342431017, "eval_custom_ui_val_loss_iou": 0.5133327907986112, "eval_custom_ui_val_loss_num": 0.027909808688693576, "eval_custom_ui_val_loss_xval": 1.1665581597222223, "eval_custom_ui_val_runtime": 59.6121, "eval_custom_ui_val_samples_per_second": 4.445, "eval_custom_ui_val_steps_per_second": 0.151, "num_input_tokens_seen": 294132900, "step": 5250 }, { "epoch": 11.692650334075724, "loss": 0.9047340154647827, "loss_ce": 0.0001930339785758406, "loss_iou": 0.390625, "loss_num": 0.0242919921875, "loss_xval": 0.90625, "num_input_tokens_seen": 294132900, "step": 5250 }, { "epoch": 11.694877505567929, "grad_norm": 22.63847541809082, "learning_rate": 1e-06, "loss": 0.4242, "num_input_tokens_seen": 294188952, "step": 5251 }, { "epoch": 11.694877505567929, "loss": 0.36535871028900146, "loss_ce": 0.00012433102529030293, "loss_iou": 0.1552734375, "loss_num": 0.0108642578125, "loss_xval": 0.365234375, "num_input_tokens_seen": 294188952, "step": 5251 }, { "epoch": 11.697104677060134, "grad_norm": 23.704172134399414, "learning_rate": 1e-06, "loss": 0.4967, "num_input_tokens_seen": 294246668, "step": 5252 }, { "epoch": 11.697104677060134, "loss": 0.47535350918769836, "loss_ce": 0.00013378591393120587, "loss_iou": 0.2197265625, "loss_num": 0.0069580078125, "loss_xval": 0.474609375, "num_input_tokens_seen": 294246668, "step": 5252 }, { "epoch": 11.699331848552339, "grad_norm": 64.23724365234375, "learning_rate": 1e-06, "loss": 0.5925, "num_input_tokens_seen": 294303032, "step": 5253 }, { "epoch": 11.699331848552339, "loss": 0.4765591025352478, "loss_ce": 0.00011863937834277749, "loss_iou": 0.185546875, "loss_num": 0.02099609375, "loss_xval": 0.4765625, "num_input_tokens_seen": 294303032, "step": 5253 }, { "epoch": 11.701559020044543, "grad_norm": 19.613231658935547, "learning_rate": 1e-06, "loss": 0.5125, "num_input_tokens_seen": 294360592, "step": 5254 }, { "epoch": 11.701559020044543, "loss": 0.5249412059783936, "loss_ce": 0.0001609077153261751, "loss_iou": 0.2216796875, "loss_num": 0.0162353515625, "loss_xval": 0.5234375, "num_input_tokens_seen": 294360592, "step": 5254 }, { "epoch": 11.703786191536748, "grad_norm": 18.052928924560547, "learning_rate": 1e-06, "loss": 0.4107, "num_input_tokens_seen": 294416216, "step": 5255 }, { "epoch": 11.703786191536748, "loss": 0.3549973666667938, "loss_ce": 0.00013897998724132776, "loss_iou": 0.158203125, "loss_num": 0.007659912109375, "loss_xval": 0.35546875, "num_input_tokens_seen": 294416216, "step": 5255 }, { "epoch": 11.706013363028953, "grad_norm": 20.044328689575195, "learning_rate": 1e-06, "loss": 0.6376, "num_input_tokens_seen": 294473684, "step": 5256 }, { "epoch": 11.706013363028953, "loss": 0.5567925572395325, "loss_ce": 0.00015196386084426194, "loss_iou": 0.2294921875, "loss_num": 0.019775390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 294473684, "step": 5256 }, { "epoch": 11.708240534521158, "grad_norm": 32.992637634277344, "learning_rate": 1e-06, "loss": 0.4129, "num_input_tokens_seen": 294531200, "step": 5257 }, { "epoch": 11.708240534521158, "loss": 0.41336357593536377, "loss_ce": 0.00015554996207356453, "loss_iou": 0.19140625, "loss_num": 0.00604248046875, "loss_xval": 0.4140625, "num_input_tokens_seen": 294531200, "step": 5257 }, { "epoch": 11.710467706013363, "grad_norm": 17.366310119628906, "learning_rate": 1e-06, "loss": 0.3378, "num_input_tokens_seen": 294587048, "step": 5258 }, { "epoch": 11.710467706013363, "loss": 0.3781304359436035, "loss_ce": 0.00020075507927685976, "loss_iou": 0.1767578125, "loss_num": 0.00494384765625, "loss_xval": 0.37890625, "num_input_tokens_seen": 294587048, "step": 5258 }, { "epoch": 11.712694877505568, "grad_norm": 19.407751083374023, "learning_rate": 1e-06, "loss": 0.55, "num_input_tokens_seen": 294645188, "step": 5259 }, { "epoch": 11.712694877505568, "loss": 0.6682122349739075, "loss_ce": 0.00012143873027525842, "loss_iou": 0.283203125, "loss_num": 0.0203857421875, "loss_xval": 0.66796875, "num_input_tokens_seen": 294645188, "step": 5259 }, { "epoch": 11.714922048997773, "grad_norm": 27.0356388092041, "learning_rate": 1e-06, "loss": 0.5196, "num_input_tokens_seen": 294702920, "step": 5260 }, { "epoch": 11.714922048997773, "loss": 0.5554238557815552, "loss_ce": 0.00012605905067175627, "loss_iou": 0.240234375, "loss_num": 0.01507568359375, "loss_xval": 0.5546875, "num_input_tokens_seen": 294702920, "step": 5260 }, { "epoch": 11.717149220489977, "grad_norm": 25.279115676879883, "learning_rate": 1e-06, "loss": 0.5789, "num_input_tokens_seen": 294760020, "step": 5261 }, { "epoch": 11.717149220489977, "loss": 0.6674556732177734, "loss_ce": 0.0002193464315496385, "loss_iou": 0.283203125, "loss_num": 0.0205078125, "loss_xval": 0.66796875, "num_input_tokens_seen": 294760020, "step": 5261 }, { "epoch": 11.719376391982182, "grad_norm": 28.12050437927246, "learning_rate": 1e-06, "loss": 0.4775, "num_input_tokens_seen": 294815096, "step": 5262 }, { "epoch": 11.719376391982182, "loss": 0.4449352025985718, "loss_ce": 0.00011096900561824441, "loss_iou": 0.2001953125, "loss_num": 0.0089111328125, "loss_xval": 0.4453125, "num_input_tokens_seen": 294815096, "step": 5262 }, { "epoch": 11.721603563474387, "grad_norm": 15.876343727111816, "learning_rate": 1e-06, "loss": 0.6174, "num_input_tokens_seen": 294872920, "step": 5263 }, { "epoch": 11.721603563474387, "loss": 0.6857922077178955, "loss_ce": 0.0003674498002510518, "loss_iou": 0.30078125, "loss_num": 0.0166015625, "loss_xval": 0.68359375, "num_input_tokens_seen": 294872920, "step": 5263 }, { "epoch": 11.723830734966592, "grad_norm": 19.064332962036133, "learning_rate": 1e-06, "loss": 0.4169, "num_input_tokens_seen": 294929388, "step": 5264 }, { "epoch": 11.723830734966592, "loss": 0.2714940309524536, "loss_ce": 0.00013172137551009655, "loss_iou": 0.11279296875, "loss_num": 0.00909423828125, "loss_xval": 0.271484375, "num_input_tokens_seen": 294929388, "step": 5264 }, { "epoch": 11.726057906458797, "grad_norm": 19.456663131713867, "learning_rate": 1e-06, "loss": 0.4162, "num_input_tokens_seen": 294983048, "step": 5265 }, { "epoch": 11.726057906458797, "loss": 0.4633742868900299, "loss_ce": 0.00011746423842851073, "loss_iou": 0.2021484375, "loss_num": 0.01165771484375, "loss_xval": 0.462890625, "num_input_tokens_seen": 294983048, "step": 5265 }, { "epoch": 11.728285077951002, "grad_norm": 18.227783203125, "learning_rate": 1e-06, "loss": 0.5663, "num_input_tokens_seen": 295042996, "step": 5266 }, { "epoch": 11.728285077951002, "loss": 0.4463956952095032, "loss_ce": 0.00010662179556675255, "loss_iou": 0.1748046875, "loss_num": 0.0191650390625, "loss_xval": 0.4453125, "num_input_tokens_seen": 295042996, "step": 5266 }, { "epoch": 11.730512249443207, "grad_norm": 16.869722366333008, "learning_rate": 1e-06, "loss": 0.5432, "num_input_tokens_seen": 295096444, "step": 5267 }, { "epoch": 11.730512249443207, "loss": 0.6898572444915771, "loss_ce": 0.00015996204456314445, "loss_iou": 0.306640625, "loss_num": 0.01531982421875, "loss_xval": 0.69140625, "num_input_tokens_seen": 295096444, "step": 5267 }, { "epoch": 11.732739420935411, "grad_norm": 20.643428802490234, "learning_rate": 1e-06, "loss": 0.5397, "num_input_tokens_seen": 295151264, "step": 5268 }, { "epoch": 11.732739420935411, "loss": 0.45317167043685913, "loss_ce": 0.00010773130634333938, "loss_iou": 0.2060546875, "loss_num": 0.00799560546875, "loss_xval": 0.453125, "num_input_tokens_seen": 295151264, "step": 5268 }, { "epoch": 11.734966592427616, "grad_norm": 21.514039993286133, "learning_rate": 1e-06, "loss": 0.4523, "num_input_tokens_seen": 295208360, "step": 5269 }, { "epoch": 11.734966592427616, "loss": 0.41239604353904724, "loss_ce": 0.0001646070450078696, "loss_iou": 0.181640625, "loss_num": 0.0096435546875, "loss_xval": 0.412109375, "num_input_tokens_seen": 295208360, "step": 5269 }, { "epoch": 11.737193763919821, "grad_norm": 32.98282241821289, "learning_rate": 1e-06, "loss": 0.5202, "num_input_tokens_seen": 295262584, "step": 5270 }, { "epoch": 11.737193763919821, "loss": 0.4672822058200836, "loss_ce": 0.00011912808258784935, "loss_iou": 0.193359375, "loss_num": 0.0162353515625, "loss_xval": 0.466796875, "num_input_tokens_seen": 295262584, "step": 5270 }, { "epoch": 11.739420935412026, "grad_norm": 34.023441314697266, "learning_rate": 1e-06, "loss": 0.6488, "num_input_tokens_seen": 295318340, "step": 5271 }, { "epoch": 11.739420935412026, "loss": 0.686636209487915, "loss_ce": 0.00011279142927378416, "loss_iou": 0.3203125, "loss_num": 0.00927734375, "loss_xval": 0.6875, "num_input_tokens_seen": 295318340, "step": 5271 }, { "epoch": 11.74164810690423, "grad_norm": 26.27116584777832, "learning_rate": 1e-06, "loss": 0.6936, "num_input_tokens_seen": 295373900, "step": 5272 }, { "epoch": 11.74164810690423, "loss": 0.806282639503479, "loss_ce": 0.0001302346063312143, "loss_iou": 0.345703125, "loss_num": 0.0228271484375, "loss_xval": 0.8046875, "num_input_tokens_seen": 295373900, "step": 5272 }, { "epoch": 11.743875278396436, "grad_norm": 23.026466369628906, "learning_rate": 1e-06, "loss": 0.3724, "num_input_tokens_seen": 295429820, "step": 5273 }, { "epoch": 11.743875278396436, "loss": 0.42163270711898804, "loss_ce": 0.00012393189535941929, "loss_iou": 0.19921875, "loss_num": 0.0045166015625, "loss_xval": 0.421875, "num_input_tokens_seen": 295429820, "step": 5273 }, { "epoch": 11.74610244988864, "grad_norm": 22.444780349731445, "learning_rate": 1e-06, "loss": 0.6569, "num_input_tokens_seen": 295488184, "step": 5274 }, { "epoch": 11.74610244988864, "loss": 0.6977794766426086, "loss_ce": 0.00014764037041459233, "loss_iou": 0.298828125, "loss_num": 0.0201416015625, "loss_xval": 0.69921875, "num_input_tokens_seen": 295488184, "step": 5274 }, { "epoch": 11.748329621380847, "grad_norm": 13.779067993164062, "learning_rate": 1e-06, "loss": 0.4753, "num_input_tokens_seen": 295545832, "step": 5275 }, { "epoch": 11.748329621380847, "loss": 0.5852853059768677, "loss_ce": 0.0003243696701247245, "loss_iou": 0.26171875, "loss_num": 0.01275634765625, "loss_xval": 0.5859375, "num_input_tokens_seen": 295545832, "step": 5275 }, { "epoch": 11.750556792873052, "grad_norm": 28.335033416748047, "learning_rate": 1e-06, "loss": 0.8353, "num_input_tokens_seen": 295602708, "step": 5276 }, { "epoch": 11.750556792873052, "loss": 0.6632611155509949, "loss_ce": 0.0001752136304276064, "loss_iou": 0.283203125, "loss_num": 0.019287109375, "loss_xval": 0.6640625, "num_input_tokens_seen": 295602708, "step": 5276 }, { "epoch": 11.752783964365257, "grad_norm": 17.9951114654541, "learning_rate": 1e-06, "loss": 0.5694, "num_input_tokens_seen": 295661544, "step": 5277 }, { "epoch": 11.752783964365257, "loss": 0.4410516023635864, "loss_ce": 0.00013365020276978612, "loss_iou": 0.205078125, "loss_num": 0.00604248046875, "loss_xval": 0.44140625, "num_input_tokens_seen": 295661544, "step": 5277 }, { "epoch": 11.755011135857462, "grad_norm": 18.01840591430664, "learning_rate": 1e-06, "loss": 0.5552, "num_input_tokens_seen": 295718028, "step": 5278 }, { "epoch": 11.755011135857462, "loss": 0.5536874532699585, "loss_ce": 0.00015963352052494884, "loss_iou": 0.255859375, "loss_num": 0.00823974609375, "loss_xval": 0.5546875, "num_input_tokens_seen": 295718028, "step": 5278 }, { "epoch": 11.757238307349667, "grad_norm": 16.344501495361328, "learning_rate": 1e-06, "loss": 0.494, "num_input_tokens_seen": 295774608, "step": 5279 }, { "epoch": 11.757238307349667, "loss": 0.4308076500892639, "loss_ce": 0.000143599376315251, "loss_iou": 0.1982421875, "loss_num": 0.00677490234375, "loss_xval": 0.4296875, "num_input_tokens_seen": 295774608, "step": 5279 }, { "epoch": 11.759465478841872, "grad_norm": 16.951433181762695, "learning_rate": 1e-06, "loss": 0.4982, "num_input_tokens_seen": 295830944, "step": 5280 }, { "epoch": 11.759465478841872, "loss": 0.5626462697982788, "loss_ce": 0.00014624299365095794, "loss_iou": 0.2421875, "loss_num": 0.0157470703125, "loss_xval": 0.5625, "num_input_tokens_seen": 295830944, "step": 5280 }, { "epoch": 11.761692650334076, "grad_norm": 16.005573272705078, "learning_rate": 1e-06, "loss": 0.5351, "num_input_tokens_seen": 295887036, "step": 5281 }, { "epoch": 11.761692650334076, "loss": 0.3308425545692444, "loss_ce": 0.00015408273611683398, "loss_iou": 0.140625, "loss_num": 0.00982666015625, "loss_xval": 0.330078125, "num_input_tokens_seen": 295887036, "step": 5281 }, { "epoch": 11.763919821826281, "grad_norm": 24.683542251586914, "learning_rate": 1e-06, "loss": 0.4432, "num_input_tokens_seen": 295942760, "step": 5282 }, { "epoch": 11.763919821826281, "loss": 0.432136595249176, "loss_ce": 0.00012974410492461175, "loss_iou": 0.1943359375, "loss_num": 0.00860595703125, "loss_xval": 0.431640625, "num_input_tokens_seen": 295942760, "step": 5282 }, { "epoch": 11.766146993318486, "grad_norm": 14.518014907836914, "learning_rate": 1e-06, "loss": 0.4037, "num_input_tokens_seen": 295998632, "step": 5283 }, { "epoch": 11.766146993318486, "loss": 0.4797380268573761, "loss_ce": 0.00012378576502669603, "loss_iou": 0.20703125, "loss_num": 0.01300048828125, "loss_xval": 0.48046875, "num_input_tokens_seen": 295998632, "step": 5283 }, { "epoch": 11.768374164810691, "grad_norm": 21.187023162841797, "learning_rate": 1e-06, "loss": 0.4499, "num_input_tokens_seen": 296051864, "step": 5284 }, { "epoch": 11.768374164810691, "loss": 0.38384902477264404, "loss_ce": 0.00018203401123173535, "loss_iou": 0.17578125, "loss_num": 0.006195068359375, "loss_xval": 0.3828125, "num_input_tokens_seen": 296051864, "step": 5284 }, { "epoch": 11.770601336302896, "grad_norm": 34.56988525390625, "learning_rate": 1e-06, "loss": 0.4661, "num_input_tokens_seen": 296106776, "step": 5285 }, { "epoch": 11.770601336302896, "loss": 0.44788599014282227, "loss_ce": 0.00013207312440499663, "loss_iou": 0.1962890625, "loss_num": 0.010986328125, "loss_xval": 0.447265625, "num_input_tokens_seen": 296106776, "step": 5285 }, { "epoch": 11.7728285077951, "grad_norm": 15.454012870788574, "learning_rate": 1e-06, "loss": 0.7173, "num_input_tokens_seen": 296162332, "step": 5286 }, { "epoch": 11.7728285077951, "loss": 0.5844168066978455, "loss_ce": 0.0001882594224298373, "loss_iou": 0.23828125, "loss_num": 0.021728515625, "loss_xval": 0.5859375, "num_input_tokens_seen": 296162332, "step": 5286 }, { "epoch": 11.775055679287306, "grad_norm": 16.151887893676758, "learning_rate": 1e-06, "loss": 0.5455, "num_input_tokens_seen": 296219860, "step": 5287 }, { "epoch": 11.775055679287306, "loss": 0.5150243043899536, "loss_ce": 0.00013172382023185492, "loss_iou": 0.234375, "loss_num": 0.00933837890625, "loss_xval": 0.515625, "num_input_tokens_seen": 296219860, "step": 5287 }, { "epoch": 11.77728285077951, "grad_norm": 37.95972442626953, "learning_rate": 1e-06, "loss": 0.5543, "num_input_tokens_seen": 296273744, "step": 5288 }, { "epoch": 11.77728285077951, "loss": 0.5362709760665894, "loss_ce": 0.00013816248974762857, "loss_iou": 0.2333984375, "loss_num": 0.01385498046875, "loss_xval": 0.53515625, "num_input_tokens_seen": 296273744, "step": 5288 }, { "epoch": 11.779510022271715, "grad_norm": 30.687088012695312, "learning_rate": 1e-06, "loss": 0.465, "num_input_tokens_seen": 296330476, "step": 5289 }, { "epoch": 11.779510022271715, "loss": 0.575337290763855, "loss_ce": 0.0001419962791260332, "loss_iou": 0.25, "loss_num": 0.01495361328125, "loss_xval": 0.57421875, "num_input_tokens_seen": 296330476, "step": 5289 }, { "epoch": 11.78173719376392, "grad_norm": 22.51045799255371, "learning_rate": 1e-06, "loss": 0.6343, "num_input_tokens_seen": 296386848, "step": 5290 }, { "epoch": 11.78173719376392, "loss": 0.40053892135620117, "loss_ce": 0.00014829869905952364, "loss_iou": 0.1728515625, "loss_num": 0.01092529296875, "loss_xval": 0.400390625, "num_input_tokens_seen": 296386848, "step": 5290 }, { "epoch": 11.783964365256125, "grad_norm": 17.85065269470215, "learning_rate": 1e-06, "loss": 0.5055, "num_input_tokens_seen": 296442688, "step": 5291 }, { "epoch": 11.783964365256125, "loss": 0.36511754989624023, "loss_ce": 0.0001273062516702339, "loss_iou": 0.1484375, "loss_num": 0.013427734375, "loss_xval": 0.365234375, "num_input_tokens_seen": 296442688, "step": 5291 }, { "epoch": 11.78619153674833, "grad_norm": 16.581172943115234, "learning_rate": 1e-06, "loss": 0.5759, "num_input_tokens_seen": 296498192, "step": 5292 }, { "epoch": 11.78619153674833, "loss": 0.3235490620136261, "loss_ce": 0.0001237650285474956, "loss_iou": 0.1416015625, "loss_num": 0.008056640625, "loss_xval": 0.32421875, "num_input_tokens_seen": 296498192, "step": 5292 }, { "epoch": 11.788418708240535, "grad_norm": 14.886438369750977, "learning_rate": 1e-06, "loss": 0.425, "num_input_tokens_seen": 296555524, "step": 5293 }, { "epoch": 11.788418708240535, "loss": 0.44675683975219727, "loss_ce": 0.00016260198026429862, "loss_iou": 0.166015625, "loss_num": 0.0228271484375, "loss_xval": 0.447265625, "num_input_tokens_seen": 296555524, "step": 5293 }, { "epoch": 11.79064587973274, "grad_norm": 15.52322006225586, "learning_rate": 1e-06, "loss": 0.4785, "num_input_tokens_seen": 296613260, "step": 5294 }, { "epoch": 11.79064587973274, "loss": 0.3428999185562134, "loss_ce": 0.00012646152754314244, "loss_iou": 0.162109375, "loss_num": 0.0036163330078125, "loss_xval": 0.34375, "num_input_tokens_seen": 296613260, "step": 5294 }, { "epoch": 11.792873051224944, "grad_norm": 27.934289932250977, "learning_rate": 1e-06, "loss": 0.502, "num_input_tokens_seen": 296667928, "step": 5295 }, { "epoch": 11.792873051224944, "loss": 0.5954668521881104, "loss_ce": 0.00012995509314350784, "loss_iou": 0.25390625, "loss_num": 0.017578125, "loss_xval": 0.59375, "num_input_tokens_seen": 296667928, "step": 5295 }, { "epoch": 11.79510022271715, "grad_norm": 18.869325637817383, "learning_rate": 1e-06, "loss": 0.4571, "num_input_tokens_seen": 296723936, "step": 5296 }, { "epoch": 11.79510022271715, "loss": 0.5156769752502441, "loss_ce": 0.0003877178824041039, "loss_iou": 0.216796875, "loss_num": 0.0162353515625, "loss_xval": 0.515625, "num_input_tokens_seen": 296723936, "step": 5296 }, { "epoch": 11.797327394209354, "grad_norm": 19.03141975402832, "learning_rate": 1e-06, "loss": 0.5304, "num_input_tokens_seen": 296783132, "step": 5297 }, { "epoch": 11.797327394209354, "loss": 0.3304428160190582, "loss_ce": 0.00012054571561748162, "loss_iou": 0.1474609375, "loss_num": 0.0069580078125, "loss_xval": 0.330078125, "num_input_tokens_seen": 296783132, "step": 5297 }, { "epoch": 11.799554565701559, "grad_norm": 20.347238540649414, "learning_rate": 1e-06, "loss": 0.5689, "num_input_tokens_seen": 296839528, "step": 5298 }, { "epoch": 11.799554565701559, "loss": 0.5347905158996582, "loss_ce": 0.00012258738570380956, "loss_iou": 0.2421875, "loss_num": 0.01031494140625, "loss_xval": 0.53515625, "num_input_tokens_seen": 296839528, "step": 5298 }, { "epoch": 11.801781737193764, "grad_norm": 18.98991584777832, "learning_rate": 1e-06, "loss": 0.5361, "num_input_tokens_seen": 296899164, "step": 5299 }, { "epoch": 11.801781737193764, "loss": 0.6929968595504761, "loss_ce": 0.00018683550297282636, "loss_iou": 0.283203125, "loss_num": 0.025390625, "loss_xval": 0.69140625, "num_input_tokens_seen": 296899164, "step": 5299 }, { "epoch": 11.804008908685969, "grad_norm": 19.457019805908203, "learning_rate": 1e-06, "loss": 0.5461, "num_input_tokens_seen": 296954940, "step": 5300 }, { "epoch": 11.804008908685969, "loss": 0.5115100741386414, "loss_ce": 0.00015755894128233194, "loss_iou": 0.2265625, "loss_num": 0.01141357421875, "loss_xval": 0.51171875, "num_input_tokens_seen": 296954940, "step": 5300 }, { "epoch": 11.806236080178174, "grad_norm": 18.224740982055664, "learning_rate": 1e-06, "loss": 0.5393, "num_input_tokens_seen": 297010916, "step": 5301 }, { "epoch": 11.806236080178174, "loss": 0.6895302534103394, "loss_ce": 0.00019916013116016984, "loss_iou": 0.3046875, "loss_num": 0.0155029296875, "loss_xval": 0.6875, "num_input_tokens_seen": 297010916, "step": 5301 }, { "epoch": 11.808463251670378, "grad_norm": 22.27739715576172, "learning_rate": 1e-06, "loss": 0.5199, "num_input_tokens_seen": 297066748, "step": 5302 }, { "epoch": 11.808463251670378, "loss": 0.4963473081588745, "loss_ce": 0.00013145770935807377, "loss_iou": 0.2216796875, "loss_num": 0.010498046875, "loss_xval": 0.49609375, "num_input_tokens_seen": 297066748, "step": 5302 }, { "epoch": 11.810690423162583, "grad_norm": 13.744184494018555, "learning_rate": 1e-06, "loss": 0.3914, "num_input_tokens_seen": 297123360, "step": 5303 }, { "epoch": 11.810690423162583, "loss": 0.32909929752349854, "loss_ce": 0.00011983991134911776, "loss_iou": 0.146484375, "loss_num": 0.00732421875, "loss_xval": 0.328125, "num_input_tokens_seen": 297123360, "step": 5303 }, { "epoch": 11.812917594654788, "grad_norm": 17.198087692260742, "learning_rate": 1e-06, "loss": 0.6204, "num_input_tokens_seen": 297179212, "step": 5304 }, { "epoch": 11.812917594654788, "loss": 0.5738571882247925, "loss_ce": 0.0001267509942408651, "loss_iou": 0.22265625, "loss_num": 0.0257568359375, "loss_xval": 0.57421875, "num_input_tokens_seen": 297179212, "step": 5304 }, { "epoch": 11.815144766146993, "grad_norm": 21.798635482788086, "learning_rate": 1e-06, "loss": 0.4382, "num_input_tokens_seen": 297237840, "step": 5305 }, { "epoch": 11.815144766146993, "loss": 0.5149092674255371, "loss_ce": 0.00013874081196263433, "loss_iou": 0.2353515625, "loss_num": 0.0087890625, "loss_xval": 0.515625, "num_input_tokens_seen": 297237840, "step": 5305 }, { "epoch": 11.817371937639198, "grad_norm": 15.751426696777344, "learning_rate": 1e-06, "loss": 0.5304, "num_input_tokens_seen": 297295812, "step": 5306 }, { "epoch": 11.817371937639198, "loss": 0.5815914869308472, "loss_ce": 0.0001705837930785492, "loss_iou": 0.240234375, "loss_num": 0.020263671875, "loss_xval": 0.58203125, "num_input_tokens_seen": 297295812, "step": 5306 }, { "epoch": 11.819599109131403, "grad_norm": 30.267009735107422, "learning_rate": 1e-06, "loss": 0.6813, "num_input_tokens_seen": 297351380, "step": 5307 }, { "epoch": 11.819599109131403, "loss": 0.7589607238769531, "loss_ce": 0.000171684252563864, "loss_iou": 0.3046875, "loss_num": 0.0302734375, "loss_xval": 0.7578125, "num_input_tokens_seen": 297351380, "step": 5307 }, { "epoch": 11.821826280623608, "grad_norm": 16.236085891723633, "learning_rate": 1e-06, "loss": 0.5397, "num_input_tokens_seen": 297406340, "step": 5308 }, { "epoch": 11.821826280623608, "loss": 0.5382388830184937, "loss_ce": 0.00015292633906938136, "loss_iou": 0.2412109375, "loss_num": 0.01141357421875, "loss_xval": 0.5390625, "num_input_tokens_seen": 297406340, "step": 5308 }, { "epoch": 11.824053452115812, "grad_norm": 15.723315238952637, "learning_rate": 1e-06, "loss": 0.5465, "num_input_tokens_seen": 297464004, "step": 5309 }, { "epoch": 11.824053452115812, "loss": 0.6881057024002075, "loss_ce": 0.00011742223432520404, "loss_iou": 0.2890625, "loss_num": 0.021728515625, "loss_xval": 0.6875, "num_input_tokens_seen": 297464004, "step": 5309 }, { "epoch": 11.826280623608017, "grad_norm": 26.35066795349121, "learning_rate": 1e-06, "loss": 0.4958, "num_input_tokens_seen": 297520084, "step": 5310 }, { "epoch": 11.826280623608017, "loss": 0.4250958263874054, "loss_ce": 0.00016907165991142392, "loss_iou": 0.1748046875, "loss_num": 0.01495361328125, "loss_xval": 0.42578125, "num_input_tokens_seen": 297520084, "step": 5310 }, { "epoch": 11.828507795100222, "grad_norm": 17.730464935302734, "learning_rate": 1e-06, "loss": 0.6199, "num_input_tokens_seen": 297575516, "step": 5311 }, { "epoch": 11.828507795100222, "loss": 0.7365161180496216, "loss_ce": 0.0008594049722887576, "loss_iou": 0.291015625, "loss_num": 0.0306396484375, "loss_xval": 0.734375, "num_input_tokens_seen": 297575516, "step": 5311 }, { "epoch": 11.830734966592427, "grad_norm": 23.806507110595703, "learning_rate": 1e-06, "loss": 0.5492, "num_input_tokens_seen": 297631068, "step": 5312 }, { "epoch": 11.830734966592427, "loss": 0.6309938430786133, "loss_ce": 0.00013447852688841522, "loss_iou": 0.275390625, "loss_num": 0.016357421875, "loss_xval": 0.6328125, "num_input_tokens_seen": 297631068, "step": 5312 }, { "epoch": 11.832962138084632, "grad_norm": 14.093077659606934, "learning_rate": 1e-06, "loss": 0.4271, "num_input_tokens_seen": 297685616, "step": 5313 }, { "epoch": 11.832962138084632, "loss": 0.23368564248085022, "loss_ce": 0.00019564517424441874, "loss_iou": 0.10205078125, "loss_num": 0.00579833984375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 297685616, "step": 5313 }, { "epoch": 11.835189309576837, "grad_norm": 22.90943717956543, "learning_rate": 1e-06, "loss": 0.4905, "num_input_tokens_seen": 297743540, "step": 5314 }, { "epoch": 11.835189309576837, "loss": 0.39391031861305237, "loss_ce": 0.00011150065256515518, "loss_iou": 0.166015625, "loss_num": 0.01239013671875, "loss_xval": 0.39453125, "num_input_tokens_seen": 297743540, "step": 5314 }, { "epoch": 11.837416481069042, "grad_norm": 16.595645904541016, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 297801180, "step": 5315 }, { "epoch": 11.837416481069042, "loss": 0.6017987728118896, "loss_ce": 0.00011419894872233272, "loss_iou": 0.267578125, "loss_num": 0.0135498046875, "loss_xval": 0.6015625, "num_input_tokens_seen": 297801180, "step": 5315 }, { "epoch": 11.839643652561247, "grad_norm": 17.16973876953125, "learning_rate": 1e-06, "loss": 0.4081, "num_input_tokens_seen": 297858728, "step": 5316 }, { "epoch": 11.839643652561247, "loss": 0.3546229302883148, "loss_ce": 0.00013074639718979597, "loss_iou": 0.162109375, "loss_num": 0.006072998046875, "loss_xval": 0.35546875, "num_input_tokens_seen": 297858728, "step": 5316 }, { "epoch": 11.841870824053451, "grad_norm": 18.856740951538086, "learning_rate": 1e-06, "loss": 0.318, "num_input_tokens_seen": 297915964, "step": 5317 }, { "epoch": 11.841870824053451, "loss": 0.23262880742549896, "loss_ce": 0.0001001132040983066, "loss_iou": 0.10009765625, "loss_num": 0.006439208984375, "loss_xval": 0.232421875, "num_input_tokens_seen": 297915964, "step": 5317 }, { "epoch": 11.844097995545656, "grad_norm": 16.652254104614258, "learning_rate": 1e-06, "loss": 0.5737, "num_input_tokens_seen": 297971460, "step": 5318 }, { "epoch": 11.844097995545656, "loss": 0.6619553565979004, "loss_ce": 0.00033427210291847587, "loss_iou": 0.27734375, "loss_num": 0.021484375, "loss_xval": 0.66015625, "num_input_tokens_seen": 297971460, "step": 5318 }, { "epoch": 11.846325167037861, "grad_norm": 27.506261825561523, "learning_rate": 1e-06, "loss": 0.4867, "num_input_tokens_seen": 298027772, "step": 5319 }, { "epoch": 11.846325167037861, "loss": 0.42390304803848267, "loss_ce": 0.00013593377661891282, "loss_iou": 0.1845703125, "loss_num": 0.0111083984375, "loss_xval": 0.423828125, "num_input_tokens_seen": 298027772, "step": 5319 }, { "epoch": 11.848552338530066, "grad_norm": 22.7269229888916, "learning_rate": 1e-06, "loss": 0.4718, "num_input_tokens_seen": 298084460, "step": 5320 }, { "epoch": 11.848552338530066, "loss": 0.469988614320755, "loss_ce": 0.0007503442466259003, "loss_iou": 0.2001953125, "loss_num": 0.01397705078125, "loss_xval": 0.46875, "num_input_tokens_seen": 298084460, "step": 5320 }, { "epoch": 11.85077951002227, "grad_norm": 18.758642196655273, "learning_rate": 1e-06, "loss": 0.4439, "num_input_tokens_seen": 298138912, "step": 5321 }, { "epoch": 11.85077951002227, "loss": 0.5238094925880432, "loss_ce": 0.00012785423314198852, "loss_iou": 0.220703125, "loss_num": 0.016357421875, "loss_xval": 0.5234375, "num_input_tokens_seen": 298138912, "step": 5321 }, { "epoch": 11.853006681514476, "grad_norm": 20.615978240966797, "learning_rate": 1e-06, "loss": 0.5517, "num_input_tokens_seen": 298193028, "step": 5322 }, { "epoch": 11.853006681514476, "loss": 0.5698904991149902, "loss_ce": 0.00018834380898624659, "loss_iou": 0.2431640625, "loss_num": 0.0166015625, "loss_xval": 0.5703125, "num_input_tokens_seen": 298193028, "step": 5322 }, { "epoch": 11.855233853006682, "grad_norm": 16.021848678588867, "learning_rate": 1e-06, "loss": 0.342, "num_input_tokens_seen": 298248720, "step": 5323 }, { "epoch": 11.855233853006682, "loss": 0.24690712988376617, "loss_ce": 0.00014198827557265759, "loss_iou": 0.10693359375, "loss_num": 0.00665283203125, "loss_xval": 0.2470703125, "num_input_tokens_seen": 298248720, "step": 5323 }, { "epoch": 11.857461024498887, "grad_norm": 23.891801834106445, "learning_rate": 1e-06, "loss": 0.561, "num_input_tokens_seen": 298303808, "step": 5324 }, { "epoch": 11.857461024498887, "loss": 0.6983004808425903, "loss_ce": 0.000302450789604336, "loss_iou": 0.255859375, "loss_num": 0.037109375, "loss_xval": 0.69921875, "num_input_tokens_seen": 298303808, "step": 5324 }, { "epoch": 11.859688195991092, "grad_norm": 18.63146209716797, "learning_rate": 1e-06, "loss": 0.414, "num_input_tokens_seen": 298358804, "step": 5325 }, { "epoch": 11.859688195991092, "loss": 0.35854852199554443, "loss_ce": 0.00015006760077085346, "loss_iou": 0.1572265625, "loss_num": 0.00897216796875, "loss_xval": 0.359375, "num_input_tokens_seen": 298358804, "step": 5325 }, { "epoch": 11.861915367483297, "grad_norm": 19.940717697143555, "learning_rate": 1e-06, "loss": 0.4274, "num_input_tokens_seen": 298418072, "step": 5326 }, { "epoch": 11.861915367483297, "loss": 0.2874617576599121, "loss_ce": 0.00010823093907674775, "loss_iou": 0.130859375, "loss_num": 0.00494384765625, "loss_xval": 0.287109375, "num_input_tokens_seen": 298418072, "step": 5326 }, { "epoch": 11.864142538975502, "grad_norm": 23.029714584350586, "learning_rate": 1e-06, "loss": 0.4766, "num_input_tokens_seen": 298476076, "step": 5327 }, { "epoch": 11.864142538975502, "loss": 0.5553493499755859, "loss_ce": 0.00017354739247821271, "loss_iou": 0.2392578125, "loss_num": 0.0155029296875, "loss_xval": 0.5546875, "num_input_tokens_seen": 298476076, "step": 5327 }, { "epoch": 11.866369710467707, "grad_norm": 13.412109375, "learning_rate": 1e-06, "loss": 0.4057, "num_input_tokens_seen": 298532080, "step": 5328 }, { "epoch": 11.866369710467707, "loss": 0.3781711161136627, "loss_ce": 0.00011935001384699717, "loss_iou": 0.1494140625, "loss_num": 0.0157470703125, "loss_xval": 0.37890625, "num_input_tokens_seen": 298532080, "step": 5328 }, { "epoch": 11.868596881959911, "grad_norm": 22.732973098754883, "learning_rate": 1e-06, "loss": 0.5291, "num_input_tokens_seen": 298585788, "step": 5329 }, { "epoch": 11.868596881959911, "loss": 0.5079842209815979, "loss_ce": 0.00017171379295177758, "loss_iou": 0.2314453125, "loss_num": 0.00885009765625, "loss_xval": 0.5078125, "num_input_tokens_seen": 298585788, "step": 5329 }, { "epoch": 11.870824053452116, "grad_norm": 22.004831314086914, "learning_rate": 1e-06, "loss": 0.555, "num_input_tokens_seen": 298641296, "step": 5330 }, { "epoch": 11.870824053452116, "loss": 0.7135499715805054, "loss_ce": 0.00017109722830355167, "loss_iou": 0.330078125, "loss_num": 0.01043701171875, "loss_xval": 0.71484375, "num_input_tokens_seen": 298641296, "step": 5330 }, { "epoch": 11.873051224944321, "grad_norm": 33.82167434692383, "learning_rate": 1e-06, "loss": 0.7149, "num_input_tokens_seen": 298697308, "step": 5331 }, { "epoch": 11.873051224944321, "loss": 0.9743344187736511, "loss_ce": 0.0002132941735908389, "loss_iou": 0.4296875, "loss_num": 0.0225830078125, "loss_xval": 0.97265625, "num_input_tokens_seen": 298697308, "step": 5331 }, { "epoch": 11.875278396436526, "grad_norm": 23.670555114746094, "learning_rate": 1e-06, "loss": 0.5041, "num_input_tokens_seen": 298751336, "step": 5332 }, { "epoch": 11.875278396436526, "loss": 0.5620195865631104, "loss_ce": 0.00012994898133911192, "loss_iou": 0.26171875, "loss_num": 0.007568359375, "loss_xval": 0.5625, "num_input_tokens_seen": 298751336, "step": 5332 }, { "epoch": 11.877505567928731, "grad_norm": 19.82380485534668, "learning_rate": 1e-06, "loss": 0.6098, "num_input_tokens_seen": 298808312, "step": 5333 }, { "epoch": 11.877505567928731, "loss": 0.33348560333251953, "loss_ce": 0.00011158882989548147, "loss_iou": 0.1416015625, "loss_num": 0.0101318359375, "loss_xval": 0.333984375, "num_input_tokens_seen": 298808312, "step": 5333 }, { "epoch": 11.879732739420936, "grad_norm": 13.22695541381836, "learning_rate": 1e-06, "loss": 0.3795, "num_input_tokens_seen": 298864232, "step": 5334 }, { "epoch": 11.879732739420936, "loss": 0.37341421842575073, "loss_ce": 0.00012321470421738923, "loss_iou": 0.146484375, "loss_num": 0.0162353515625, "loss_xval": 0.373046875, "num_input_tokens_seen": 298864232, "step": 5334 }, { "epoch": 11.88195991091314, "grad_norm": 19.371503829956055, "learning_rate": 1e-06, "loss": 0.5704, "num_input_tokens_seen": 298919880, "step": 5335 }, { "epoch": 11.88195991091314, "loss": 0.5620713829994202, "loss_ce": 0.00012070624507032335, "loss_iou": 0.2392578125, "loss_num": 0.0166015625, "loss_xval": 0.5625, "num_input_tokens_seen": 298919880, "step": 5335 }, { "epoch": 11.884187082405345, "grad_norm": 20.38959503173828, "learning_rate": 1e-06, "loss": 0.4472, "num_input_tokens_seen": 298971852, "step": 5336 }, { "epoch": 11.884187082405345, "loss": 0.3908699154853821, "loss_ce": 0.0001228695473400876, "loss_iou": 0.17578125, "loss_num": 0.00762939453125, "loss_xval": 0.390625, "num_input_tokens_seen": 298971852, "step": 5336 }, { "epoch": 11.88641425389755, "grad_norm": 57.669029235839844, "learning_rate": 1e-06, "loss": 0.6538, "num_input_tokens_seen": 299029644, "step": 5337 }, { "epoch": 11.88641425389755, "loss": 0.6255764365196228, "loss_ce": 0.0011867830762639642, "loss_iou": 0.265625, "loss_num": 0.018798828125, "loss_xval": 0.625, "num_input_tokens_seen": 299029644, "step": 5337 }, { "epoch": 11.888641425389755, "grad_norm": 18.04958724975586, "learning_rate": 1e-06, "loss": 0.306, "num_input_tokens_seen": 299085212, "step": 5338 }, { "epoch": 11.888641425389755, "loss": 0.3291063904762268, "loss_ce": 0.00012687721755355597, "loss_iou": 0.1552734375, "loss_num": 0.00384521484375, "loss_xval": 0.328125, "num_input_tokens_seen": 299085212, "step": 5338 }, { "epoch": 11.89086859688196, "grad_norm": 26.414382934570312, "learning_rate": 1e-06, "loss": 0.6099, "num_input_tokens_seen": 299142868, "step": 5339 }, { "epoch": 11.89086859688196, "loss": 0.7533276081085205, "loss_ce": 0.0001537687494419515, "loss_iou": 0.30078125, "loss_num": 0.030517578125, "loss_xval": 0.75390625, "num_input_tokens_seen": 299142868, "step": 5339 }, { "epoch": 11.893095768374165, "grad_norm": 53.97526168823242, "learning_rate": 1e-06, "loss": 0.6077, "num_input_tokens_seen": 299200428, "step": 5340 }, { "epoch": 11.893095768374165, "loss": 0.7335488796234131, "loss_ce": 0.0001504583196947351, "loss_iou": 0.3046875, "loss_num": 0.024658203125, "loss_xval": 0.734375, "num_input_tokens_seen": 299200428, "step": 5340 }, { "epoch": 11.89532293986637, "grad_norm": 20.818517684936523, "learning_rate": 1e-06, "loss": 0.4193, "num_input_tokens_seen": 299255160, "step": 5341 }, { "epoch": 11.89532293986637, "loss": 0.4669525623321533, "loss_ce": 0.0001404241193085909, "loss_iou": 0.20703125, "loss_num": 0.0103759765625, "loss_xval": 0.466796875, "num_input_tokens_seen": 299255160, "step": 5341 }, { "epoch": 11.897550111358575, "grad_norm": 24.498870849609375, "learning_rate": 1e-06, "loss": 0.5564, "num_input_tokens_seen": 299309776, "step": 5342 }, { "epoch": 11.897550111358575, "loss": 0.5995131731033325, "loss_ce": 0.00014790653949603438, "loss_iou": 0.26953125, "loss_num": 0.01202392578125, "loss_xval": 0.59765625, "num_input_tokens_seen": 299309776, "step": 5342 }, { "epoch": 11.89977728285078, "grad_norm": 23.035675048828125, "learning_rate": 1e-06, "loss": 0.5478, "num_input_tokens_seen": 299366904, "step": 5343 }, { "epoch": 11.89977728285078, "loss": 0.5480039715766907, "loss_ce": 0.00015243007510434836, "loss_iou": 0.228515625, "loss_num": 0.018310546875, "loss_xval": 0.546875, "num_input_tokens_seen": 299366904, "step": 5343 }, { "epoch": 11.902004454342984, "grad_norm": 25.75360679626465, "learning_rate": 1e-06, "loss": 0.526, "num_input_tokens_seen": 299421724, "step": 5344 }, { "epoch": 11.902004454342984, "loss": 0.39622652530670166, "loss_ce": 0.00010832876432687044, "loss_iou": 0.17578125, "loss_num": 0.00885009765625, "loss_xval": 0.396484375, "num_input_tokens_seen": 299421724, "step": 5344 }, { "epoch": 11.90423162583519, "grad_norm": 15.133755683898926, "learning_rate": 1e-06, "loss": 0.4379, "num_input_tokens_seen": 299478644, "step": 5345 }, { "epoch": 11.90423162583519, "loss": 0.515461802482605, "loss_ce": 0.00020301563199609518, "loss_iou": 0.224609375, "loss_num": 0.0130615234375, "loss_xval": 0.515625, "num_input_tokens_seen": 299478644, "step": 5345 }, { "epoch": 11.906458797327394, "grad_norm": 14.36051082611084, "learning_rate": 1e-06, "loss": 0.4898, "num_input_tokens_seen": 299534612, "step": 5346 }, { "epoch": 11.906458797327394, "loss": 0.5628827810287476, "loss_ce": 0.00013867080269847065, "loss_iou": 0.255859375, "loss_num": 0.01025390625, "loss_xval": 0.5625, "num_input_tokens_seen": 299534612, "step": 5346 }, { "epoch": 11.908685968819599, "grad_norm": 15.240983009338379, "learning_rate": 1e-06, "loss": 0.5047, "num_input_tokens_seen": 299590416, "step": 5347 }, { "epoch": 11.908685968819599, "loss": 0.558215856552124, "loss_ce": 0.00011038097727578133, "loss_iou": 0.2490234375, "loss_num": 0.01202392578125, "loss_xval": 0.55859375, "num_input_tokens_seen": 299590416, "step": 5347 }, { "epoch": 11.910913140311804, "grad_norm": 27.012542724609375, "learning_rate": 1e-06, "loss": 0.6381, "num_input_tokens_seen": 299642864, "step": 5348 }, { "epoch": 11.910913140311804, "loss": 0.6602959632873535, "loss_ce": 0.00013973098248243332, "loss_iou": 0.28515625, "loss_num": 0.017822265625, "loss_xval": 0.66015625, "num_input_tokens_seen": 299642864, "step": 5348 }, { "epoch": 11.913140311804009, "grad_norm": 23.465747833251953, "learning_rate": 1e-06, "loss": 0.6058, "num_input_tokens_seen": 299696992, "step": 5349 }, { "epoch": 11.913140311804009, "loss": 0.6304618716239929, "loss_ce": 0.00033489661291241646, "loss_iou": 0.279296875, "loss_num": 0.01397705078125, "loss_xval": 0.62890625, "num_input_tokens_seen": 299696992, "step": 5349 }, { "epoch": 11.915367483296214, "grad_norm": 18.98960304260254, "learning_rate": 1e-06, "loss": 0.5868, "num_input_tokens_seen": 299753692, "step": 5350 }, { "epoch": 11.915367483296214, "loss": 0.6957433223724365, "loss_ce": 0.00018664819072000682, "loss_iou": 0.29296875, "loss_num": 0.021728515625, "loss_xval": 0.6953125, "num_input_tokens_seen": 299753692, "step": 5350 }, { "epoch": 11.917594654788418, "grad_norm": 18.785682678222656, "learning_rate": 1e-06, "loss": 0.5032, "num_input_tokens_seen": 299807860, "step": 5351 }, { "epoch": 11.917594654788418, "loss": 0.48331549763679504, "loss_ce": 0.00010016474698204547, "loss_iou": 0.2060546875, "loss_num": 0.01409912109375, "loss_xval": 0.482421875, "num_input_tokens_seen": 299807860, "step": 5351 }, { "epoch": 11.919821826280623, "grad_norm": 17.613218307495117, "learning_rate": 1e-06, "loss": 0.6931, "num_input_tokens_seen": 299864664, "step": 5352 }, { "epoch": 11.919821826280623, "loss": 0.5709285736083984, "loss_ce": 0.00012783391866832972, "loss_iou": 0.2392578125, "loss_num": 0.0184326171875, "loss_xval": 0.5703125, "num_input_tokens_seen": 299864664, "step": 5352 }, { "epoch": 11.922048997772828, "grad_norm": 27.25409698486328, "learning_rate": 1e-06, "loss": 0.5266, "num_input_tokens_seen": 299919748, "step": 5353 }, { "epoch": 11.922048997772828, "loss": 0.41632139682769775, "loss_ce": 0.00030577427241951227, "loss_iou": 0.1806640625, "loss_num": 0.0108642578125, "loss_xval": 0.416015625, "num_input_tokens_seen": 299919748, "step": 5353 }, { "epoch": 11.924276169265033, "grad_norm": 23.497005462646484, "learning_rate": 1e-06, "loss": 0.4849, "num_input_tokens_seen": 299977172, "step": 5354 }, { "epoch": 11.924276169265033, "loss": 0.5234737396240234, "loss_ce": 0.00015833397628739476, "loss_iou": 0.2314453125, "loss_num": 0.01214599609375, "loss_xval": 0.5234375, "num_input_tokens_seen": 299977172, "step": 5354 }, { "epoch": 11.926503340757238, "grad_norm": 24.323163986206055, "learning_rate": 1e-06, "loss": 0.4479, "num_input_tokens_seen": 300033976, "step": 5355 }, { "epoch": 11.926503340757238, "loss": 0.4954897165298462, "loss_ce": 0.00025045976508408785, "loss_iou": 0.2216796875, "loss_num": 0.0103759765625, "loss_xval": 0.49609375, "num_input_tokens_seen": 300033976, "step": 5355 }, { "epoch": 11.928730512249443, "grad_norm": 12.777032852172852, "learning_rate": 1e-06, "loss": 0.3908, "num_input_tokens_seen": 300089880, "step": 5356 }, { "epoch": 11.928730512249443, "loss": 0.38708722591400146, "loss_ce": 0.00012434215750545263, "loss_iou": 0.1708984375, "loss_num": 0.00921630859375, "loss_xval": 0.38671875, "num_input_tokens_seen": 300089880, "step": 5356 }, { "epoch": 11.930957683741648, "grad_norm": 18.82436180114746, "learning_rate": 1e-06, "loss": 0.3538, "num_input_tokens_seen": 300144144, "step": 5357 }, { "epoch": 11.930957683741648, "loss": 0.4027044475078583, "loss_ce": 0.00011656976130325347, "loss_iou": 0.1796875, "loss_num": 0.00848388671875, "loss_xval": 0.40234375, "num_input_tokens_seen": 300144144, "step": 5357 }, { "epoch": 11.933184855233852, "grad_norm": 15.833600044250488, "learning_rate": 1e-06, "loss": 0.3947, "num_input_tokens_seen": 300199272, "step": 5358 }, { "epoch": 11.933184855233852, "loss": 0.4046657085418701, "loss_ce": 0.00012470208457671106, "loss_iou": 0.1767578125, "loss_num": 0.0103759765625, "loss_xval": 0.404296875, "num_input_tokens_seen": 300199272, "step": 5358 }, { "epoch": 11.935412026726057, "grad_norm": 19.36171531677246, "learning_rate": 1e-06, "loss": 0.7569, "num_input_tokens_seen": 300253388, "step": 5359 }, { "epoch": 11.935412026726057, "loss": 0.6293305158615112, "loss_ce": 0.00018012213695328683, "loss_iou": 0.28125, "loss_num": 0.01336669921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 300253388, "step": 5359 }, { "epoch": 11.937639198218262, "grad_norm": 26.180601119995117, "learning_rate": 1e-06, "loss": 0.5059, "num_input_tokens_seen": 300307808, "step": 5360 }, { "epoch": 11.937639198218262, "loss": 0.5130758881568909, "loss_ce": 0.0001364394265692681, "loss_iou": 0.2373046875, "loss_num": 0.007659912109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 300307808, "step": 5360 }, { "epoch": 11.939866369710467, "grad_norm": 15.848928451538086, "learning_rate": 1e-06, "loss": 0.523, "num_input_tokens_seen": 300363860, "step": 5361 }, { "epoch": 11.939866369710467, "loss": 0.4611893892288208, "loss_ce": 0.0001298237475566566, "loss_iou": 0.201171875, "loss_num": 0.01153564453125, "loss_xval": 0.4609375, "num_input_tokens_seen": 300363860, "step": 5361 }, { "epoch": 11.942093541202672, "grad_norm": 17.287405014038086, "learning_rate": 1e-06, "loss": 0.5096, "num_input_tokens_seen": 300421080, "step": 5362 }, { "epoch": 11.942093541202672, "loss": 0.33068329095840454, "loss_ce": 0.00011686367361107841, "loss_iou": 0.1376953125, "loss_num": 0.01092529296875, "loss_xval": 0.330078125, "num_input_tokens_seen": 300421080, "step": 5362 }, { "epoch": 11.944320712694877, "grad_norm": 18.37860870361328, "learning_rate": 1e-06, "loss": 0.5593, "num_input_tokens_seen": 300477264, "step": 5363 }, { "epoch": 11.944320712694877, "loss": 0.5681136846542358, "loss_ce": 0.00024256901815533638, "loss_iou": 0.25, "loss_num": 0.01318359375, "loss_xval": 0.56640625, "num_input_tokens_seen": 300477264, "step": 5363 }, { "epoch": 11.946547884187082, "grad_norm": 23.356395721435547, "learning_rate": 1e-06, "loss": 0.4799, "num_input_tokens_seen": 300535224, "step": 5364 }, { "epoch": 11.946547884187082, "loss": 0.4557165503501892, "loss_ce": 0.00015014578821137547, "loss_iou": 0.19921875, "loss_num": 0.01165771484375, "loss_xval": 0.455078125, "num_input_tokens_seen": 300535224, "step": 5364 }, { "epoch": 11.948775055679288, "grad_norm": 23.67485809326172, "learning_rate": 1e-06, "loss": 0.7032, "num_input_tokens_seen": 300589224, "step": 5365 }, { "epoch": 11.948775055679288, "loss": 0.7572053670883179, "loss_ce": 0.00012526212958618999, "loss_iou": 0.298828125, "loss_num": 0.031494140625, "loss_xval": 0.7578125, "num_input_tokens_seen": 300589224, "step": 5365 }, { "epoch": 11.951002227171493, "grad_norm": 18.73485565185547, "learning_rate": 1e-06, "loss": 0.7542, "num_input_tokens_seen": 300643164, "step": 5366 }, { "epoch": 11.951002227171493, "loss": 0.9482482075691223, "loss_ce": 0.0002501863054931164, "loss_iou": 0.357421875, "loss_num": 0.04638671875, "loss_xval": 0.94921875, "num_input_tokens_seen": 300643164, "step": 5366 }, { "epoch": 11.953229398663698, "grad_norm": 22.971054077148438, "learning_rate": 1e-06, "loss": 0.4646, "num_input_tokens_seen": 300700556, "step": 5367 }, { "epoch": 11.953229398663698, "loss": 0.35095998644828796, "loss_ce": 0.0001299169525736943, "loss_iou": 0.1630859375, "loss_num": 0.004913330078125, "loss_xval": 0.3515625, "num_input_tokens_seen": 300700556, "step": 5367 }, { "epoch": 11.955456570155903, "grad_norm": 20.057994842529297, "learning_rate": 1e-06, "loss": 0.4387, "num_input_tokens_seen": 300755556, "step": 5368 }, { "epoch": 11.955456570155903, "loss": 0.41150492429733276, "loss_ce": 0.00012799599790014327, "loss_iou": 0.18359375, "loss_num": 0.00885009765625, "loss_xval": 0.412109375, "num_input_tokens_seen": 300755556, "step": 5368 }, { "epoch": 11.957683741648108, "grad_norm": 17.977746963500977, "learning_rate": 1e-06, "loss": 0.5373, "num_input_tokens_seen": 300809448, "step": 5369 }, { "epoch": 11.957683741648108, "loss": 0.5350706577301025, "loss_ce": 0.00015853876539040357, "loss_iou": 0.248046875, "loss_num": 0.007659912109375, "loss_xval": 0.53515625, "num_input_tokens_seen": 300809448, "step": 5369 }, { "epoch": 11.959910913140313, "grad_norm": 31.153818130493164, "learning_rate": 1e-06, "loss": 0.4172, "num_input_tokens_seen": 300863900, "step": 5370 }, { "epoch": 11.959910913140313, "loss": 0.4988964796066284, "loss_ce": 0.00011718348105205223, "loss_iou": 0.185546875, "loss_num": 0.025390625, "loss_xval": 0.498046875, "num_input_tokens_seen": 300863900, "step": 5370 }, { "epoch": 11.962138084632517, "grad_norm": 20.06622314453125, "learning_rate": 1e-06, "loss": 0.385, "num_input_tokens_seen": 300919612, "step": 5371 }, { "epoch": 11.962138084632517, "loss": 0.37853488326072693, "loss_ce": 0.00011691114195855334, "loss_iou": 0.1728515625, "loss_num": 0.006683349609375, "loss_xval": 0.37890625, "num_input_tokens_seen": 300919612, "step": 5371 }, { "epoch": 11.964365256124722, "grad_norm": 22.610204696655273, "learning_rate": 1e-06, "loss": 0.4837, "num_input_tokens_seen": 300975088, "step": 5372 }, { "epoch": 11.964365256124722, "loss": 0.5133309364318848, "loss_ce": 0.00014736468438059092, "loss_iou": 0.2314453125, "loss_num": 0.01025390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 300975088, "step": 5372 }, { "epoch": 11.966592427616927, "grad_norm": 23.854276657104492, "learning_rate": 1e-06, "loss": 0.5991, "num_input_tokens_seen": 301032024, "step": 5373 }, { "epoch": 11.966592427616927, "loss": 0.6122125387191772, "loss_ce": 0.00015198803157545626, "loss_iou": 0.25390625, "loss_num": 0.0205078125, "loss_xval": 0.61328125, "num_input_tokens_seen": 301032024, "step": 5373 }, { "epoch": 11.968819599109132, "grad_norm": 14.804436683654785, "learning_rate": 1e-06, "loss": 0.6603, "num_input_tokens_seen": 301087604, "step": 5374 }, { "epoch": 11.968819599109132, "loss": 0.8352023363113403, "loss_ce": 0.0001192896525026299, "loss_iou": 0.345703125, "loss_num": 0.0289306640625, "loss_xval": 0.8359375, "num_input_tokens_seen": 301087604, "step": 5374 }, { "epoch": 11.971046770601337, "grad_norm": 22.94840431213379, "learning_rate": 1e-06, "loss": 0.622, "num_input_tokens_seen": 301142676, "step": 5375 }, { "epoch": 11.971046770601337, "loss": 0.6568686962127686, "loss_ce": 0.0001304006582358852, "loss_iou": 0.279296875, "loss_num": 0.019287109375, "loss_xval": 0.65625, "num_input_tokens_seen": 301142676, "step": 5375 }, { "epoch": 11.973273942093542, "grad_norm": 12.997676849365234, "learning_rate": 1e-06, "loss": 0.4442, "num_input_tokens_seen": 301201488, "step": 5376 }, { "epoch": 11.973273942093542, "loss": 0.3666253089904785, "loss_ce": 0.00010921184730250388, "loss_iou": 0.1650390625, "loss_num": 0.00726318359375, "loss_xval": 0.3671875, "num_input_tokens_seen": 301201488, "step": 5376 }, { "epoch": 11.975501113585747, "grad_norm": 28.56017303466797, "learning_rate": 1e-06, "loss": 0.5158, "num_input_tokens_seen": 301259584, "step": 5377 }, { "epoch": 11.975501113585747, "loss": 0.3714648485183716, "loss_ce": 0.00012695527402684093, "loss_iou": 0.16796875, "loss_num": 0.006866455078125, "loss_xval": 0.37109375, "num_input_tokens_seen": 301259584, "step": 5377 }, { "epoch": 11.977728285077951, "grad_norm": 18.66181755065918, "learning_rate": 1e-06, "loss": 0.5058, "num_input_tokens_seen": 301316356, "step": 5378 }, { "epoch": 11.977728285077951, "loss": 0.5127632021903992, "loss_ce": 0.00018997653387486935, "loss_iou": 0.2353515625, "loss_num": 0.0084228515625, "loss_xval": 0.51171875, "num_input_tokens_seen": 301316356, "step": 5378 }, { "epoch": 11.979955456570156, "grad_norm": 21.85940170288086, "learning_rate": 1e-06, "loss": 0.5486, "num_input_tokens_seen": 301370076, "step": 5379 }, { "epoch": 11.979955456570156, "loss": 0.4443596601486206, "loss_ce": 0.00014578478294424713, "loss_iou": 0.2021484375, "loss_num": 0.00811767578125, "loss_xval": 0.443359375, "num_input_tokens_seen": 301370076, "step": 5379 }, { "epoch": 11.982182628062361, "grad_norm": 17.551218032836914, "learning_rate": 1e-06, "loss": 0.5728, "num_input_tokens_seen": 301427120, "step": 5380 }, { "epoch": 11.982182628062361, "loss": 0.7055622935295105, "loss_ce": 0.00011793218436650932, "loss_iou": 0.310546875, "loss_num": 0.0167236328125, "loss_xval": 0.70703125, "num_input_tokens_seen": 301427120, "step": 5380 }, { "epoch": 11.984409799554566, "grad_norm": 14.2471284866333, "learning_rate": 1e-06, "loss": 0.4307, "num_input_tokens_seen": 301480952, "step": 5381 }, { "epoch": 11.984409799554566, "loss": 0.4769345819950104, "loss_ce": 0.0001279542047996074, "loss_iou": 0.20703125, "loss_num": 0.0126953125, "loss_xval": 0.4765625, "num_input_tokens_seen": 301480952, "step": 5381 }, { "epoch": 11.98663697104677, "grad_norm": 23.872955322265625, "learning_rate": 1e-06, "loss": 0.6408, "num_input_tokens_seen": 301536748, "step": 5382 }, { "epoch": 11.98663697104677, "loss": 0.6590736508369446, "loss_ce": 0.00013808724179398268, "loss_iou": 0.29296875, "loss_num": 0.014404296875, "loss_xval": 0.66015625, "num_input_tokens_seen": 301536748, "step": 5382 }, { "epoch": 11.988864142538976, "grad_norm": 27.215002059936523, "learning_rate": 1e-06, "loss": 0.6737, "num_input_tokens_seen": 301591624, "step": 5383 }, { "epoch": 11.988864142538976, "loss": 0.7730184197425842, "loss_ce": 0.0003133205755148083, "loss_iou": 0.341796875, "loss_num": 0.01806640625, "loss_xval": 0.7734375, "num_input_tokens_seen": 301591624, "step": 5383 }, { "epoch": 11.99109131403118, "grad_norm": 16.59343910217285, "learning_rate": 1e-06, "loss": 0.4352, "num_input_tokens_seen": 301651368, "step": 5384 }, { "epoch": 11.99109131403118, "loss": 0.2988908886909485, "loss_ce": 0.00024585792561993003, "loss_iou": 0.1328125, "loss_num": 0.00677490234375, "loss_xval": 0.298828125, "num_input_tokens_seen": 301651368, "step": 5384 }, { "epoch": 11.993318485523385, "grad_norm": 17.6257266998291, "learning_rate": 1e-06, "loss": 0.5236, "num_input_tokens_seen": 301708848, "step": 5385 }, { "epoch": 11.993318485523385, "loss": 0.3951897919178009, "loss_ce": 0.000170250961673446, "loss_iou": 0.16796875, "loss_num": 0.01171875, "loss_xval": 0.39453125, "num_input_tokens_seen": 301708848, "step": 5385 }, { "epoch": 11.99554565701559, "grad_norm": 18.835487365722656, "learning_rate": 1e-06, "loss": 0.4832, "num_input_tokens_seen": 301766832, "step": 5386 }, { "epoch": 11.99554565701559, "loss": 0.45329028367996216, "loss_ce": 0.00016528656124137342, "loss_iou": 0.2060546875, "loss_num": 0.0081787109375, "loss_xval": 0.453125, "num_input_tokens_seen": 301766832, "step": 5386 }, { "epoch": 11.997772828507795, "grad_norm": 23.853605270385742, "learning_rate": 1e-06, "loss": 0.6195, "num_input_tokens_seen": 301822292, "step": 5387 }, { "epoch": 11.997772828507795, "loss": 0.6632393598556519, "loss_ce": 0.00015345893916673958, "loss_iou": 0.26953125, "loss_num": 0.0242919921875, "loss_xval": 0.6640625, "num_input_tokens_seen": 301822292, "step": 5387 }, { "epoch": 12.0, "grad_norm": 18.651371002197266, "learning_rate": 1e-06, "loss": 0.4679, "num_input_tokens_seen": 301876956, "step": 5388 }, { "epoch": 12.0, "loss": 0.5064892768859863, "loss_ce": 0.00014158777776174247, "loss_iou": 0.228515625, "loss_num": 0.00994873046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 301876956, "step": 5388 }, { "epoch": 12.002227171492205, "grad_norm": 24.909053802490234, "learning_rate": 1e-06, "loss": 0.6428, "num_input_tokens_seen": 301930580, "step": 5389 }, { "epoch": 12.002227171492205, "loss": 0.7445248365402222, "loss_ce": 0.00014005119737703353, "loss_iou": 0.314453125, "loss_num": 0.0228271484375, "loss_xval": 0.74609375, "num_input_tokens_seen": 301930580, "step": 5389 }, { "epoch": 12.00445434298441, "grad_norm": 23.323347091674805, "learning_rate": 1e-06, "loss": 0.5544, "num_input_tokens_seen": 301986100, "step": 5390 }, { "epoch": 12.00445434298441, "loss": 0.45033586025238037, "loss_ce": 0.0001405369839631021, "loss_iou": 0.20703125, "loss_num": 0.00701904296875, "loss_xval": 0.44921875, "num_input_tokens_seen": 301986100, "step": 5390 }, { "epoch": 12.006681514476615, "grad_norm": 16.831626892089844, "learning_rate": 1e-06, "loss": 0.2852, "num_input_tokens_seen": 302043816, "step": 5391 }, { "epoch": 12.006681514476615, "loss": 0.2847111225128174, "loss_ce": 0.00010420664330013096, "loss_iou": 0.12158203125, "loss_num": 0.00823974609375, "loss_xval": 0.28515625, "num_input_tokens_seen": 302043816, "step": 5391 }, { "epoch": 12.00890868596882, "grad_norm": 37.528282165527344, "learning_rate": 1e-06, "loss": 0.6277, "num_input_tokens_seen": 302098732, "step": 5392 }, { "epoch": 12.00890868596882, "loss": 0.6959569454193115, "loss_ce": 0.00015614379663020372, "loss_iou": 0.30859375, "loss_num": 0.015869140625, "loss_xval": 0.6953125, "num_input_tokens_seen": 302098732, "step": 5392 }, { "epoch": 12.011135857461024, "grad_norm": 24.19828987121582, "learning_rate": 1e-06, "loss": 0.4908, "num_input_tokens_seen": 302157220, "step": 5393 }, { "epoch": 12.011135857461024, "loss": 0.5651944279670715, "loss_ce": 0.00013094657333567739, "loss_iou": 0.259765625, "loss_num": 0.00909423828125, "loss_xval": 0.56640625, "num_input_tokens_seen": 302157220, "step": 5393 }, { "epoch": 12.01336302895323, "grad_norm": 14.71508502960205, "learning_rate": 1e-06, "loss": 0.496, "num_input_tokens_seen": 302214052, "step": 5394 }, { "epoch": 12.01336302895323, "loss": 0.5302682518959045, "loss_ce": 0.00011686344078043476, "loss_iou": 0.2412109375, "loss_num": 0.00927734375, "loss_xval": 0.53125, "num_input_tokens_seen": 302214052, "step": 5394 }, { "epoch": 12.015590200445434, "grad_norm": 18.591922760009766, "learning_rate": 1e-06, "loss": 0.491, "num_input_tokens_seen": 302269552, "step": 5395 }, { "epoch": 12.015590200445434, "loss": 0.4772958755493164, "loss_ce": 0.00024510070215910673, "loss_iou": 0.2177734375, "loss_num": 0.00823974609375, "loss_xval": 0.4765625, "num_input_tokens_seen": 302269552, "step": 5395 }, { "epoch": 12.017817371937639, "grad_norm": 13.601188659667969, "learning_rate": 1e-06, "loss": 0.4128, "num_input_tokens_seen": 302326968, "step": 5396 }, { "epoch": 12.017817371937639, "loss": 0.4751969277858734, "loss_ce": 9.925015910994262e-05, "loss_iou": 0.197265625, "loss_num": 0.016357421875, "loss_xval": 0.474609375, "num_input_tokens_seen": 302326968, "step": 5396 }, { "epoch": 12.020044543429844, "grad_norm": 22.953596115112305, "learning_rate": 1e-06, "loss": 0.449, "num_input_tokens_seen": 302383496, "step": 5397 }, { "epoch": 12.020044543429844, "loss": 0.5418699979782104, "loss_ce": 0.0001219719197251834, "loss_iou": 0.2431640625, "loss_num": 0.0111083984375, "loss_xval": 0.54296875, "num_input_tokens_seen": 302383496, "step": 5397 }, { "epoch": 12.022271714922049, "grad_norm": 28.485471725463867, "learning_rate": 1e-06, "loss": 0.4552, "num_input_tokens_seen": 302438028, "step": 5398 }, { "epoch": 12.022271714922049, "loss": 0.46738266944885254, "loss_ce": 9.752875484991819e-05, "loss_iou": 0.177734375, "loss_num": 0.0224609375, "loss_xval": 0.466796875, "num_input_tokens_seen": 302438028, "step": 5398 }, { "epoch": 12.024498886414253, "grad_norm": 16.616348266601562, "learning_rate": 1e-06, "loss": 0.5826, "num_input_tokens_seen": 302494768, "step": 5399 }, { "epoch": 12.024498886414253, "loss": 0.5422226190567017, "loss_ce": 0.0001083296156139113, "loss_iou": 0.224609375, "loss_num": 0.0184326171875, "loss_xval": 0.54296875, "num_input_tokens_seen": 302494768, "step": 5399 }, { "epoch": 12.026726057906458, "grad_norm": 13.746217727661133, "learning_rate": 1e-06, "loss": 0.4764, "num_input_tokens_seen": 302550772, "step": 5400 }, { "epoch": 12.026726057906458, "loss": 0.4675309658050537, "loss_ce": 0.0001237354299519211, "loss_iou": 0.21484375, "loss_num": 0.0074462890625, "loss_xval": 0.466796875, "num_input_tokens_seen": 302550772, "step": 5400 }, { "epoch": 12.028953229398663, "grad_norm": 17.52446746826172, "learning_rate": 1e-06, "loss": 0.5146, "num_input_tokens_seen": 302608476, "step": 5401 }, { "epoch": 12.028953229398663, "loss": 0.5597883462905884, "loss_ce": 0.0008283640490844846, "loss_iou": 0.208984375, "loss_num": 0.0281982421875, "loss_xval": 0.55859375, "num_input_tokens_seen": 302608476, "step": 5401 }, { "epoch": 12.031180400890868, "grad_norm": 21.796451568603516, "learning_rate": 1e-06, "loss": 0.4571, "num_input_tokens_seen": 302663956, "step": 5402 }, { "epoch": 12.031180400890868, "loss": 0.5090689659118652, "loss_ce": 0.00021888897754251957, "loss_iou": 0.2099609375, "loss_num": 0.017822265625, "loss_xval": 0.5078125, "num_input_tokens_seen": 302663956, "step": 5402 }, { "epoch": 12.033407572383073, "grad_norm": 24.178424835205078, "learning_rate": 1e-06, "loss": 0.45, "num_input_tokens_seen": 302719060, "step": 5403 }, { "epoch": 12.033407572383073, "loss": 0.39098450541496277, "loss_ce": 0.00011537145473994315, "loss_iou": 0.18359375, "loss_num": 0.004669189453125, "loss_xval": 0.390625, "num_input_tokens_seen": 302719060, "step": 5403 }, { "epoch": 12.035634743875278, "grad_norm": 17.01597785949707, "learning_rate": 1e-06, "loss": 0.4611, "num_input_tokens_seen": 302773636, "step": 5404 }, { "epoch": 12.035634743875278, "loss": 0.3315487504005432, "loss_ce": 0.00012785526632796973, "loss_iou": 0.1298828125, "loss_num": 0.01422119140625, "loss_xval": 0.33203125, "num_input_tokens_seen": 302773636, "step": 5404 }, { "epoch": 12.037861915367483, "grad_norm": 14.340649604797363, "learning_rate": 1e-06, "loss": 0.4127, "num_input_tokens_seen": 302831388, "step": 5405 }, { "epoch": 12.037861915367483, "loss": 0.3785756528377533, "loss_ce": 0.00012716675701085478, "loss_iou": 0.171875, "loss_num": 0.0068359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 302831388, "step": 5405 }, { "epoch": 12.040089086859687, "grad_norm": 25.321979522705078, "learning_rate": 1e-06, "loss": 0.5422, "num_input_tokens_seen": 302888920, "step": 5406 }, { "epoch": 12.040089086859687, "loss": 0.36657315492630005, "loss_ce": 0.00011808329145424068, "loss_iou": 0.1552734375, "loss_num": 0.01123046875, "loss_xval": 0.3671875, "num_input_tokens_seen": 302888920, "step": 5406 }, { "epoch": 12.042316258351892, "grad_norm": 18.01930809020996, "learning_rate": 1e-06, "loss": 0.4401, "num_input_tokens_seen": 302942712, "step": 5407 }, { "epoch": 12.042316258351892, "loss": 0.49123328924179077, "loss_ce": 0.00014440924860537052, "loss_iou": 0.21484375, "loss_num": 0.01239013671875, "loss_xval": 0.490234375, "num_input_tokens_seen": 302942712, "step": 5407 }, { "epoch": 12.044543429844097, "grad_norm": 35.77482604980469, "learning_rate": 1e-06, "loss": 0.6199, "num_input_tokens_seen": 302998400, "step": 5408 }, { "epoch": 12.044543429844097, "loss": 0.6408650875091553, "loss_ce": 0.00011807896953541785, "loss_iou": 0.275390625, "loss_num": 0.018310546875, "loss_xval": 0.640625, "num_input_tokens_seen": 302998400, "step": 5408 }, { "epoch": 12.046770601336302, "grad_norm": 40.30015563964844, "learning_rate": 1e-06, "loss": 0.4045, "num_input_tokens_seen": 303052680, "step": 5409 }, { "epoch": 12.046770601336302, "loss": 0.36034607887268066, "loss_ce": 0.00011658002040348947, "loss_iou": 0.1572265625, "loss_num": 0.00927734375, "loss_xval": 0.359375, "num_input_tokens_seen": 303052680, "step": 5409 }, { "epoch": 12.048997772828507, "grad_norm": 19.596263885498047, "learning_rate": 1e-06, "loss": 0.4598, "num_input_tokens_seen": 303110848, "step": 5410 }, { "epoch": 12.048997772828507, "loss": 0.3800050616264343, "loss_ce": 0.00012227029947098345, "loss_iou": 0.177734375, "loss_num": 0.005035400390625, "loss_xval": 0.37890625, "num_input_tokens_seen": 303110848, "step": 5410 }, { "epoch": 12.051224944320714, "grad_norm": 25.05034637451172, "learning_rate": 1e-06, "loss": 0.5697, "num_input_tokens_seen": 303167852, "step": 5411 }, { "epoch": 12.051224944320714, "loss": 0.6123285889625549, "loss_ce": 0.00014595050015486777, "loss_iou": 0.271484375, "loss_num": 0.0137939453125, "loss_xval": 0.61328125, "num_input_tokens_seen": 303167852, "step": 5411 }, { "epoch": 12.053452115812918, "grad_norm": 15.722366333007812, "learning_rate": 1e-06, "loss": 0.4301, "num_input_tokens_seen": 303226468, "step": 5412 }, { "epoch": 12.053452115812918, "loss": 0.44557318091392517, "loss_ce": 0.00026066991267725825, "loss_iou": 0.2001953125, "loss_num": 0.0089111328125, "loss_xval": 0.4453125, "num_input_tokens_seen": 303226468, "step": 5412 }, { "epoch": 12.055679287305123, "grad_norm": 30.098329544067383, "learning_rate": 1e-06, "loss": 0.5986, "num_input_tokens_seen": 303282616, "step": 5413 }, { "epoch": 12.055679287305123, "loss": 0.5686604976654053, "loss_ce": 0.0003011383814737201, "loss_iou": 0.2353515625, "loss_num": 0.019287109375, "loss_xval": 0.5703125, "num_input_tokens_seen": 303282616, "step": 5413 }, { "epoch": 12.057906458797328, "grad_norm": 11.368029594421387, "learning_rate": 1e-06, "loss": 0.618, "num_input_tokens_seen": 303340068, "step": 5414 }, { "epoch": 12.057906458797328, "loss": 0.6175664663314819, "loss_ce": 0.00013479686458595097, "loss_iou": 0.248046875, "loss_num": 0.0242919921875, "loss_xval": 0.6171875, "num_input_tokens_seen": 303340068, "step": 5414 }, { "epoch": 12.060133630289533, "grad_norm": 18.459564208984375, "learning_rate": 1e-06, "loss": 0.4627, "num_input_tokens_seen": 303396632, "step": 5415 }, { "epoch": 12.060133630289533, "loss": 0.5457392930984497, "loss_ce": 0.00014600652502849698, "loss_iou": 0.25, "loss_num": 0.00885009765625, "loss_xval": 0.546875, "num_input_tokens_seen": 303396632, "step": 5415 }, { "epoch": 12.062360801781738, "grad_norm": 27.545385360717773, "learning_rate": 1e-06, "loss": 0.4222, "num_input_tokens_seen": 303452996, "step": 5416 }, { "epoch": 12.062360801781738, "loss": 0.3862226605415344, "loss_ce": 0.00011425558477640152, "loss_iou": 0.1796875, "loss_num": 0.00531005859375, "loss_xval": 0.38671875, "num_input_tokens_seen": 303452996, "step": 5416 }, { "epoch": 12.064587973273943, "grad_norm": 17.189687728881836, "learning_rate": 1e-06, "loss": 0.6149, "num_input_tokens_seen": 303509476, "step": 5417 }, { "epoch": 12.064587973273943, "loss": 0.4504542648792267, "loss_ce": 0.0001368774683214724, "loss_iou": 0.201171875, "loss_num": 0.00970458984375, "loss_xval": 0.451171875, "num_input_tokens_seen": 303509476, "step": 5417 }, { "epoch": 12.066815144766148, "grad_norm": 13.223028182983398, "learning_rate": 1e-06, "loss": 0.4632, "num_input_tokens_seen": 303566016, "step": 5418 }, { "epoch": 12.066815144766148, "loss": 0.5367510318756104, "loss_ce": 0.00012994115240871906, "loss_iou": 0.2041015625, "loss_num": 0.0257568359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 303566016, "step": 5418 }, { "epoch": 12.069042316258352, "grad_norm": 22.01005744934082, "learning_rate": 1e-06, "loss": 0.5708, "num_input_tokens_seen": 303621640, "step": 5419 }, { "epoch": 12.069042316258352, "loss": 0.33945903182029724, "loss_ce": 0.00010356900747865438, "loss_iou": 0.1474609375, "loss_num": 0.00872802734375, "loss_xval": 0.33984375, "num_input_tokens_seen": 303621640, "step": 5419 }, { "epoch": 12.071269487750557, "grad_norm": 16.25597381591797, "learning_rate": 1e-06, "loss": 0.4158, "num_input_tokens_seen": 303676132, "step": 5420 }, { "epoch": 12.071269487750557, "loss": 0.43920350074768066, "loss_ce": 0.00011659059964586049, "loss_iou": 0.1875, "loss_num": 0.0126953125, "loss_xval": 0.439453125, "num_input_tokens_seen": 303676132, "step": 5420 }, { "epoch": 12.073496659242762, "grad_norm": 26.435026168823242, "learning_rate": 1e-06, "loss": 0.4823, "num_input_tokens_seen": 303731772, "step": 5421 }, { "epoch": 12.073496659242762, "loss": 0.5533719658851624, "loss_ce": 0.00014934616046957672, "loss_iou": 0.2099609375, "loss_num": 0.0264892578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 303731772, "step": 5421 }, { "epoch": 12.075723830734967, "grad_norm": 317.4898376464844, "learning_rate": 1e-06, "loss": 0.6007, "num_input_tokens_seen": 303788636, "step": 5422 }, { "epoch": 12.075723830734967, "loss": 0.8187873363494873, "loss_ce": 0.00018380230176262558, "loss_iou": 0.326171875, "loss_num": 0.033203125, "loss_xval": 0.8203125, "num_input_tokens_seen": 303788636, "step": 5422 }, { "epoch": 12.077951002227172, "grad_norm": 16.596609115600586, "learning_rate": 1e-06, "loss": 0.3791, "num_input_tokens_seen": 303844472, "step": 5423 }, { "epoch": 12.077951002227172, "loss": 0.29760003089904785, "loss_ce": 0.00011467649164842442, "loss_iou": 0.1259765625, "loss_num": 0.00921630859375, "loss_xval": 0.296875, "num_input_tokens_seen": 303844472, "step": 5423 }, { "epoch": 12.080178173719377, "grad_norm": 19.923240661621094, "learning_rate": 1e-06, "loss": 0.5823, "num_input_tokens_seen": 303899116, "step": 5424 }, { "epoch": 12.080178173719377, "loss": 0.7797752618789673, "loss_ce": 0.00011215673293918371, "loss_iou": 0.318359375, "loss_num": 0.028564453125, "loss_xval": 0.78125, "num_input_tokens_seen": 303899116, "step": 5424 }, { "epoch": 12.082405345211582, "grad_norm": 22.035127639770508, "learning_rate": 1e-06, "loss": 0.5462, "num_input_tokens_seen": 303952664, "step": 5425 }, { "epoch": 12.082405345211582, "loss": 0.6474908590316772, "loss_ce": 0.00015202595386654139, "loss_iou": 0.26953125, "loss_num": 0.0216064453125, "loss_xval": 0.6484375, "num_input_tokens_seen": 303952664, "step": 5425 }, { "epoch": 12.084632516703786, "grad_norm": 25.01907730102539, "learning_rate": 1e-06, "loss": 0.5354, "num_input_tokens_seen": 304009648, "step": 5426 }, { "epoch": 12.084632516703786, "loss": 0.660736083984375, "loss_ce": 0.00021362912957556546, "loss_iou": 0.263671875, "loss_num": 0.0267333984375, "loss_xval": 0.66015625, "num_input_tokens_seen": 304009648, "step": 5426 }, { "epoch": 12.086859688195991, "grad_norm": 14.428705215454102, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 304069088, "step": 5427 }, { "epoch": 12.086859688195991, "loss": 0.3368920683860779, "loss_ce": 0.0001000880729407072, "loss_iou": 0.1572265625, "loss_num": 0.0045166015625, "loss_xval": 0.3359375, "num_input_tokens_seen": 304069088, "step": 5427 }, { "epoch": 12.089086859688196, "grad_norm": 23.767419815063477, "learning_rate": 1e-06, "loss": 0.5659, "num_input_tokens_seen": 304125008, "step": 5428 }, { "epoch": 12.089086859688196, "loss": 0.5689517259597778, "loss_ce": 0.00010411132825538516, "loss_iou": 0.2275390625, "loss_num": 0.022705078125, "loss_xval": 0.5703125, "num_input_tokens_seen": 304125008, "step": 5428 }, { "epoch": 12.091314031180401, "grad_norm": 25.242143630981445, "learning_rate": 1e-06, "loss": 0.3432, "num_input_tokens_seen": 304182308, "step": 5429 }, { "epoch": 12.091314031180401, "loss": 0.38244450092315674, "loss_ce": 0.00012029155914206058, "loss_iou": 0.1796875, "loss_num": 0.00445556640625, "loss_xval": 0.3828125, "num_input_tokens_seen": 304182308, "step": 5429 }, { "epoch": 12.093541202672606, "grad_norm": 13.21595287322998, "learning_rate": 1e-06, "loss": 0.5233, "num_input_tokens_seen": 304239692, "step": 5430 }, { "epoch": 12.093541202672606, "loss": 0.6246329545974731, "loss_ce": 0.00012122253247071058, "loss_iou": 0.263671875, "loss_num": 0.0196533203125, "loss_xval": 0.625, "num_input_tokens_seen": 304239692, "step": 5430 }, { "epoch": 12.09576837416481, "grad_norm": 26.7417049407959, "learning_rate": 1e-06, "loss": 0.6778, "num_input_tokens_seen": 304296608, "step": 5431 }, { "epoch": 12.09576837416481, "loss": 0.5760781168937683, "loss_ce": 0.0001503834209870547, "loss_iou": 0.26171875, "loss_num": 0.010498046875, "loss_xval": 0.57421875, "num_input_tokens_seen": 304296608, "step": 5431 }, { "epoch": 12.097995545657016, "grad_norm": 14.140853881835938, "learning_rate": 1e-06, "loss": 0.5346, "num_input_tokens_seen": 304351720, "step": 5432 }, { "epoch": 12.097995545657016, "loss": 0.6522471308708191, "loss_ce": 0.00014752510469406843, "loss_iou": 0.265625, "loss_num": 0.0244140625, "loss_xval": 0.65234375, "num_input_tokens_seen": 304351720, "step": 5432 }, { "epoch": 12.10022271714922, "grad_norm": 17.195842742919922, "learning_rate": 1e-06, "loss": 0.4608, "num_input_tokens_seen": 304411140, "step": 5433 }, { "epoch": 12.10022271714922, "loss": 0.4692525267601013, "loss_ce": 0.00013632513582706451, "loss_iou": 0.212890625, "loss_num": 0.00848388671875, "loss_xval": 0.46875, "num_input_tokens_seen": 304411140, "step": 5433 }, { "epoch": 12.102449888641425, "grad_norm": 17.013219833374023, "learning_rate": 1e-06, "loss": 0.5863, "num_input_tokens_seen": 304466832, "step": 5434 }, { "epoch": 12.102449888641425, "loss": 0.46361833810806274, "loss_ce": 0.00011738392640836537, "loss_iou": 0.205078125, "loss_num": 0.01043701171875, "loss_xval": 0.462890625, "num_input_tokens_seen": 304466832, "step": 5434 }, { "epoch": 12.10467706013363, "grad_norm": 19.217731475830078, "learning_rate": 1e-06, "loss": 0.5909, "num_input_tokens_seen": 304524596, "step": 5435 }, { "epoch": 12.10467706013363, "loss": 0.5402824878692627, "loss_ce": 0.00012132612755522132, "loss_iou": 0.240234375, "loss_num": 0.01220703125, "loss_xval": 0.5390625, "num_input_tokens_seen": 304524596, "step": 5435 }, { "epoch": 12.106904231625835, "grad_norm": 12.651470184326172, "learning_rate": 1e-06, "loss": 0.4578, "num_input_tokens_seen": 304581824, "step": 5436 }, { "epoch": 12.106904231625835, "loss": 0.35941869020462036, "loss_ce": 0.00016576812777202576, "loss_iou": 0.1435546875, "loss_num": 0.0142822265625, "loss_xval": 0.359375, "num_input_tokens_seen": 304581824, "step": 5436 }, { "epoch": 12.10913140311804, "grad_norm": 15.489208221435547, "learning_rate": 1e-06, "loss": 0.3276, "num_input_tokens_seen": 304637832, "step": 5437 }, { "epoch": 12.10913140311804, "loss": 0.27845609188079834, "loss_ce": 0.00013578942161984742, "loss_iou": 0.1162109375, "loss_num": 0.00909423828125, "loss_xval": 0.27734375, "num_input_tokens_seen": 304637832, "step": 5437 }, { "epoch": 12.111358574610245, "grad_norm": 31.793598175048828, "learning_rate": 1e-06, "loss": 0.4174, "num_input_tokens_seen": 304695616, "step": 5438 }, { "epoch": 12.111358574610245, "loss": 0.40172773599624634, "loss_ce": 0.00011640525190159678, "loss_iou": 0.166015625, "loss_num": 0.01397705078125, "loss_xval": 0.40234375, "num_input_tokens_seen": 304695616, "step": 5438 }, { "epoch": 12.11358574610245, "grad_norm": 23.2586669921875, "learning_rate": 1e-06, "loss": 0.3909, "num_input_tokens_seen": 304751796, "step": 5439 }, { "epoch": 12.11358574610245, "loss": 0.3819471001625061, "loss_ce": 0.00011117332905996591, "loss_iou": 0.171875, "loss_num": 0.00762939453125, "loss_xval": 0.3828125, "num_input_tokens_seen": 304751796, "step": 5439 }, { "epoch": 12.115812917594655, "grad_norm": 15.429174423217773, "learning_rate": 1e-06, "loss": 0.3506, "num_input_tokens_seen": 304806936, "step": 5440 }, { "epoch": 12.115812917594655, "loss": 0.3502577543258667, "loss_ce": 0.0001600981195224449, "loss_iou": 0.16015625, "loss_num": 0.00616455078125, "loss_xval": 0.349609375, "num_input_tokens_seen": 304806936, "step": 5440 }, { "epoch": 12.11804008908686, "grad_norm": 16.58045196533203, "learning_rate": 1e-06, "loss": 0.408, "num_input_tokens_seen": 304864392, "step": 5441 }, { "epoch": 12.11804008908686, "loss": 0.4236249625682831, "loss_ce": 0.00016305723693221807, "loss_iou": 0.173828125, "loss_num": 0.01513671875, "loss_xval": 0.423828125, "num_input_tokens_seen": 304864392, "step": 5441 }, { "epoch": 12.120267260579064, "grad_norm": 13.274721145629883, "learning_rate": 1e-06, "loss": 0.4532, "num_input_tokens_seen": 304921888, "step": 5442 }, { "epoch": 12.120267260579064, "loss": 0.4875772297382355, "loss_ce": 0.00015048254863359034, "loss_iou": 0.21484375, "loss_num": 0.01171875, "loss_xval": 0.48828125, "num_input_tokens_seen": 304921888, "step": 5442 }, { "epoch": 12.122494432071269, "grad_norm": 17.956384658813477, "learning_rate": 1e-06, "loss": 0.6849, "num_input_tokens_seen": 304977556, "step": 5443 }, { "epoch": 12.122494432071269, "loss": 0.7030162811279297, "loss_ce": 0.00013541628140956163, "loss_iou": 0.263671875, "loss_num": 0.03515625, "loss_xval": 0.703125, "num_input_tokens_seen": 304977556, "step": 5443 }, { "epoch": 12.124721603563474, "grad_norm": 16.645193099975586, "learning_rate": 1e-06, "loss": 0.6395, "num_input_tokens_seen": 305033024, "step": 5444 }, { "epoch": 12.124721603563474, "loss": 0.5505350232124329, "loss_ce": 0.00011998764239251614, "loss_iou": 0.244140625, "loss_num": 0.01239013671875, "loss_xval": 0.55078125, "num_input_tokens_seen": 305033024, "step": 5444 }, { "epoch": 12.126948775055679, "grad_norm": 13.85136604309082, "learning_rate": 1e-06, "loss": 0.4585, "num_input_tokens_seen": 305089956, "step": 5445 }, { "epoch": 12.126948775055679, "loss": 0.44617876410484314, "loss_ce": 0.00013385072816163301, "loss_iou": 0.1650390625, "loss_num": 0.02294921875, "loss_xval": 0.4453125, "num_input_tokens_seen": 305089956, "step": 5445 }, { "epoch": 12.129175946547884, "grad_norm": 23.356822967529297, "learning_rate": 1e-06, "loss": 0.4678, "num_input_tokens_seen": 305143452, "step": 5446 }, { "epoch": 12.129175946547884, "loss": 0.4208873510360718, "loss_ce": 0.00011096645175712183, "loss_iou": 0.1904296875, "loss_num": 0.00811767578125, "loss_xval": 0.419921875, "num_input_tokens_seen": 305143452, "step": 5446 }, { "epoch": 12.131403118040089, "grad_norm": 24.38487434387207, "learning_rate": 1e-06, "loss": 0.488, "num_input_tokens_seen": 305196260, "step": 5447 }, { "epoch": 12.131403118040089, "loss": 0.4843854010105133, "loss_ce": 0.00013247507740743458, "loss_iou": 0.2119140625, "loss_num": 0.01202392578125, "loss_xval": 0.484375, "num_input_tokens_seen": 305196260, "step": 5447 }, { "epoch": 12.133630289532293, "grad_norm": 15.976398468017578, "learning_rate": 1e-06, "loss": 0.4716, "num_input_tokens_seen": 305253716, "step": 5448 }, { "epoch": 12.133630289532293, "loss": 0.5756908655166626, "loss_ce": 0.00012929181684739888, "loss_iou": 0.23046875, "loss_num": 0.02294921875, "loss_xval": 0.57421875, "num_input_tokens_seen": 305253716, "step": 5448 }, { "epoch": 12.135857461024498, "grad_norm": 17.87403678894043, "learning_rate": 1e-06, "loss": 0.5232, "num_input_tokens_seen": 305307148, "step": 5449 }, { "epoch": 12.135857461024498, "loss": 0.6177228689193726, "loss_ce": 0.00016915984451770782, "loss_iou": 0.26953125, "loss_num": 0.0159912109375, "loss_xval": 0.6171875, "num_input_tokens_seen": 305307148, "step": 5449 }, { "epoch": 12.138084632516703, "grad_norm": 18.055152893066406, "learning_rate": 1e-06, "loss": 0.5738, "num_input_tokens_seen": 305364540, "step": 5450 }, { "epoch": 12.138084632516703, "loss": 0.615594744682312, "loss_ce": 0.00011621808516792953, "loss_iou": 0.267578125, "loss_num": 0.0162353515625, "loss_xval": 0.6171875, "num_input_tokens_seen": 305364540, "step": 5450 }, { "epoch": 12.140311804008908, "grad_norm": 13.896754264831543, "learning_rate": 1e-06, "loss": 0.4093, "num_input_tokens_seen": 305419432, "step": 5451 }, { "epoch": 12.140311804008908, "loss": 0.4336770176887512, "loss_ce": 0.0009377308306284249, "loss_iou": 0.1826171875, "loss_num": 0.013671875, "loss_xval": 0.43359375, "num_input_tokens_seen": 305419432, "step": 5451 }, { "epoch": 12.142538975501113, "grad_norm": 70.83055114746094, "learning_rate": 1e-06, "loss": 0.4653, "num_input_tokens_seen": 305475840, "step": 5452 }, { "epoch": 12.142538975501113, "loss": 0.3804883062839508, "loss_ce": 0.00011720365728251636, "loss_iou": 0.166015625, "loss_num": 0.00970458984375, "loss_xval": 0.380859375, "num_input_tokens_seen": 305475840, "step": 5452 }, { "epoch": 12.144766146993318, "grad_norm": 33.42408752441406, "learning_rate": 1e-06, "loss": 0.661, "num_input_tokens_seen": 305532432, "step": 5453 }, { "epoch": 12.144766146993318, "loss": 0.8063072562217712, "loss_ce": 0.000154937180923298, "loss_iou": 0.337890625, "loss_num": 0.025634765625, "loss_xval": 0.8046875, "num_input_tokens_seen": 305532432, "step": 5453 }, { "epoch": 12.146993318485523, "grad_norm": 23.046737670898438, "learning_rate": 1e-06, "loss": 0.3551, "num_input_tokens_seen": 305589804, "step": 5454 }, { "epoch": 12.146993318485523, "loss": 0.37097907066345215, "loss_ce": 0.00012947215873282403, "loss_iou": 0.166015625, "loss_num": 0.0076904296875, "loss_xval": 0.37109375, "num_input_tokens_seen": 305589804, "step": 5454 }, { "epoch": 12.14922048997773, "grad_norm": 17.94960594177246, "learning_rate": 1e-06, "loss": 0.533, "num_input_tokens_seen": 305643368, "step": 5455 }, { "epoch": 12.14922048997773, "loss": 0.5467525720596313, "loss_ce": 0.00012171192065579817, "loss_iou": 0.255859375, "loss_num": 0.00701904296875, "loss_xval": 0.546875, "num_input_tokens_seen": 305643368, "step": 5455 }, { "epoch": 12.151447661469934, "grad_norm": 21.4966983795166, "learning_rate": 1e-06, "loss": 0.4453, "num_input_tokens_seen": 305698236, "step": 5456 }, { "epoch": 12.151447661469934, "loss": 0.5014622807502747, "loss_ce": 0.00011951071064686403, "loss_iou": 0.220703125, "loss_num": 0.01214599609375, "loss_xval": 0.5, "num_input_tokens_seen": 305698236, "step": 5456 }, { "epoch": 12.153674832962139, "grad_norm": 32.09659194946289, "learning_rate": 1e-06, "loss": 0.5381, "num_input_tokens_seen": 305755728, "step": 5457 }, { "epoch": 12.153674832962139, "loss": 0.6394698023796082, "loss_ce": 0.0001875544257927686, "loss_iou": 0.2890625, "loss_num": 0.01177978515625, "loss_xval": 0.640625, "num_input_tokens_seen": 305755728, "step": 5457 }, { "epoch": 12.155902004454344, "grad_norm": 28.464859008789062, "learning_rate": 1e-06, "loss": 0.4574, "num_input_tokens_seen": 305811120, "step": 5458 }, { "epoch": 12.155902004454344, "loss": 0.34215444326400757, "loss_ce": 0.00011341302888467908, "loss_iou": 0.1474609375, "loss_num": 0.00933837890625, "loss_xval": 0.341796875, "num_input_tokens_seen": 305811120, "step": 5458 }, { "epoch": 12.158129175946549, "grad_norm": 22.244796752929688, "learning_rate": 1e-06, "loss": 0.4736, "num_input_tokens_seen": 305867104, "step": 5459 }, { "epoch": 12.158129175946549, "loss": 0.4112476706504822, "loss_ce": 0.00011484247806947678, "loss_iou": 0.1884765625, "loss_num": 0.006866455078125, "loss_xval": 0.41015625, "num_input_tokens_seen": 305867104, "step": 5459 }, { "epoch": 12.160356347438753, "grad_norm": 29.14021873474121, "learning_rate": 1e-06, "loss": 0.5295, "num_input_tokens_seen": 305921188, "step": 5460 }, { "epoch": 12.160356347438753, "loss": 0.6369410753250122, "loss_ce": 0.0004054922901559621, "loss_iou": 0.283203125, "loss_num": 0.0140380859375, "loss_xval": 0.63671875, "num_input_tokens_seen": 305921188, "step": 5460 }, { "epoch": 12.162583518930958, "grad_norm": 16.301149368286133, "learning_rate": 1e-06, "loss": 0.4447, "num_input_tokens_seen": 305978144, "step": 5461 }, { "epoch": 12.162583518930958, "loss": 0.5294920802116394, "loss_ce": 0.0001952020829776302, "loss_iou": 0.2333984375, "loss_num": 0.01263427734375, "loss_xval": 0.53125, "num_input_tokens_seen": 305978144, "step": 5461 }, { "epoch": 12.164810690423163, "grad_norm": 17.781740188598633, "learning_rate": 1e-06, "loss": 0.2532, "num_input_tokens_seen": 306033640, "step": 5462 }, { "epoch": 12.164810690423163, "loss": 0.2828177809715271, "loss_ce": 0.0001639821712160483, "loss_iou": 0.1259765625, "loss_num": 0.00604248046875, "loss_xval": 0.283203125, "num_input_tokens_seen": 306033640, "step": 5462 }, { "epoch": 12.167037861915368, "grad_norm": 33.484806060791016, "learning_rate": 1e-06, "loss": 0.4501, "num_input_tokens_seen": 306091020, "step": 5463 }, { "epoch": 12.167037861915368, "loss": 0.46839091181755066, "loss_ce": 0.00012922003224957734, "loss_iou": 0.18359375, "loss_num": 0.020263671875, "loss_xval": 0.46875, "num_input_tokens_seen": 306091020, "step": 5463 }, { "epoch": 12.169265033407573, "grad_norm": 26.4747257232666, "learning_rate": 1e-06, "loss": 0.6015, "num_input_tokens_seen": 306147840, "step": 5464 }, { "epoch": 12.169265033407573, "loss": 0.5650879740715027, "loss_ce": 0.00014657803694717586, "loss_iou": 0.25390625, "loss_num": 0.01177978515625, "loss_xval": 0.56640625, "num_input_tokens_seen": 306147840, "step": 5464 }, { "epoch": 12.171492204899778, "grad_norm": 24.981590270996094, "learning_rate": 1e-06, "loss": 0.6129, "num_input_tokens_seen": 306203280, "step": 5465 }, { "epoch": 12.171492204899778, "loss": 0.8055815100669861, "loss_ce": 0.00016156808123923838, "loss_iou": 0.376953125, "loss_num": 0.010009765625, "loss_xval": 0.8046875, "num_input_tokens_seen": 306203280, "step": 5465 }, { "epoch": 12.173719376391983, "grad_norm": 14.754565238952637, "learning_rate": 1e-06, "loss": 0.5721, "num_input_tokens_seen": 306257676, "step": 5466 }, { "epoch": 12.173719376391983, "loss": 0.5629807710647583, "loss_ce": 0.00011456996435299516, "loss_iou": 0.25390625, "loss_num": 0.0108642578125, "loss_xval": 0.5625, "num_input_tokens_seen": 306257676, "step": 5466 }, { "epoch": 12.175946547884188, "grad_norm": 25.851285934448242, "learning_rate": 1e-06, "loss": 0.5937, "num_input_tokens_seen": 306314104, "step": 5467 }, { "epoch": 12.175946547884188, "loss": 0.5030755996704102, "loss_ce": 0.00020692951511591673, "loss_iou": 0.2109375, "loss_num": 0.0159912109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 306314104, "step": 5467 }, { "epoch": 12.178173719376392, "grad_norm": 19.489961624145508, "learning_rate": 1e-06, "loss": 0.385, "num_input_tokens_seen": 306372532, "step": 5468 }, { "epoch": 12.178173719376392, "loss": 0.4690125584602356, "loss_ce": 0.00014049038873054087, "loss_iou": 0.2001953125, "loss_num": 0.0135498046875, "loss_xval": 0.46875, "num_input_tokens_seen": 306372532, "step": 5468 }, { "epoch": 12.180400890868597, "grad_norm": 14.486827850341797, "learning_rate": 1e-06, "loss": 0.416, "num_input_tokens_seen": 306428160, "step": 5469 }, { "epoch": 12.180400890868597, "loss": 0.4102684259414673, "loss_ce": 0.00011218419240321964, "loss_iou": 0.185546875, "loss_num": 0.0078125, "loss_xval": 0.41015625, "num_input_tokens_seen": 306428160, "step": 5469 }, { "epoch": 12.182628062360802, "grad_norm": 48.91545867919922, "learning_rate": 1e-06, "loss": 0.5029, "num_input_tokens_seen": 306484716, "step": 5470 }, { "epoch": 12.182628062360802, "loss": 0.4544667601585388, "loss_ce": 0.00012103513290639967, "loss_iou": 0.1982421875, "loss_num": 0.01165771484375, "loss_xval": 0.455078125, "num_input_tokens_seen": 306484716, "step": 5470 }, { "epoch": 12.184855233853007, "grad_norm": 15.201376914978027, "learning_rate": 1e-06, "loss": 0.6931, "num_input_tokens_seen": 306542220, "step": 5471 }, { "epoch": 12.184855233853007, "loss": 0.5883694887161255, "loss_ce": 0.00011262335465289652, "loss_iou": 0.26953125, "loss_num": 0.00946044921875, "loss_xval": 0.58984375, "num_input_tokens_seen": 306542220, "step": 5471 }, { "epoch": 12.187082405345212, "grad_norm": 17.70683479309082, "learning_rate": 1e-06, "loss": 0.6325, "num_input_tokens_seen": 306598764, "step": 5472 }, { "epoch": 12.187082405345212, "loss": 0.5439872741699219, "loss_ce": 0.00016404017515014857, "loss_iou": 0.234375, "loss_num": 0.01507568359375, "loss_xval": 0.54296875, "num_input_tokens_seen": 306598764, "step": 5472 }, { "epoch": 12.189309576837417, "grad_norm": 15.60269546508789, "learning_rate": 1e-06, "loss": 0.4718, "num_input_tokens_seen": 306655192, "step": 5473 }, { "epoch": 12.189309576837417, "loss": 0.5688609480857849, "loss_ce": 0.000135363225126639, "loss_iou": 0.2373046875, "loss_num": 0.0189208984375, "loss_xval": 0.5703125, "num_input_tokens_seen": 306655192, "step": 5473 }, { "epoch": 12.191536748329622, "grad_norm": 23.64163589477539, "learning_rate": 1e-06, "loss": 0.5773, "num_input_tokens_seen": 306711220, "step": 5474 }, { "epoch": 12.191536748329622, "loss": 0.4205397963523865, "loss_ce": 0.00012965156929567456, "loss_iou": 0.1796875, "loss_num": 0.0120849609375, "loss_xval": 0.419921875, "num_input_tokens_seen": 306711220, "step": 5474 }, { "epoch": 12.193763919821826, "grad_norm": 19.232250213623047, "learning_rate": 1e-06, "loss": 0.6147, "num_input_tokens_seen": 306765912, "step": 5475 }, { "epoch": 12.193763919821826, "loss": 0.6638138294219971, "loss_ce": 0.0001785356434993446, "loss_iou": 0.3046875, "loss_num": 0.01116943359375, "loss_xval": 0.6640625, "num_input_tokens_seen": 306765912, "step": 5475 }, { "epoch": 12.195991091314031, "grad_norm": 24.970657348632812, "learning_rate": 1e-06, "loss": 0.4975, "num_input_tokens_seen": 306823380, "step": 5476 }, { "epoch": 12.195991091314031, "loss": 0.5586141347885132, "loss_ce": 0.000142430275445804, "loss_iou": 0.255859375, "loss_num": 0.00927734375, "loss_xval": 0.55859375, "num_input_tokens_seen": 306823380, "step": 5476 }, { "epoch": 12.198218262806236, "grad_norm": 23.488998413085938, "learning_rate": 1e-06, "loss": 0.5034, "num_input_tokens_seen": 306880924, "step": 5477 }, { "epoch": 12.198218262806236, "loss": 0.5716678500175476, "loss_ce": 0.00013462700007949024, "loss_iou": 0.24609375, "loss_num": 0.015869140625, "loss_xval": 0.5703125, "num_input_tokens_seen": 306880924, "step": 5477 }, { "epoch": 12.200445434298441, "grad_norm": 13.182589530944824, "learning_rate": 1e-06, "loss": 0.4163, "num_input_tokens_seen": 306937384, "step": 5478 }, { "epoch": 12.200445434298441, "loss": 0.4369211792945862, "loss_ce": 0.00015359185636043549, "loss_iou": 0.1982421875, "loss_num": 0.0079345703125, "loss_xval": 0.4375, "num_input_tokens_seen": 306937384, "step": 5478 }, { "epoch": 12.202672605790646, "grad_norm": 24.044740676879883, "learning_rate": 1e-06, "loss": 0.365, "num_input_tokens_seen": 306990924, "step": 5479 }, { "epoch": 12.202672605790646, "loss": 0.3719392418861389, "loss_ce": 0.00011306589294690639, "loss_iou": 0.16796875, "loss_num": 0.00701904296875, "loss_xval": 0.37109375, "num_input_tokens_seen": 306990924, "step": 5479 }, { "epoch": 12.20489977728285, "grad_norm": 29.678030014038086, "learning_rate": 1e-06, "loss": 0.5995, "num_input_tokens_seen": 307047468, "step": 5480 }, { "epoch": 12.20489977728285, "loss": 0.4761919677257538, "loss_ce": 0.00011774588347179815, "loss_iou": 0.2060546875, "loss_num": 0.01275634765625, "loss_xval": 0.4765625, "num_input_tokens_seen": 307047468, "step": 5480 }, { "epoch": 12.207126948775056, "grad_norm": 11.851914405822754, "learning_rate": 1e-06, "loss": 0.4111, "num_input_tokens_seen": 307103488, "step": 5481 }, { "epoch": 12.207126948775056, "loss": 0.25440388917922974, "loss_ce": 0.00022297943360172212, "loss_iou": 0.11328125, "loss_num": 0.00567626953125, "loss_xval": 0.25390625, "num_input_tokens_seen": 307103488, "step": 5481 }, { "epoch": 12.20935412026726, "grad_norm": 17.849971771240234, "learning_rate": 1e-06, "loss": 0.3751, "num_input_tokens_seen": 307159768, "step": 5482 }, { "epoch": 12.20935412026726, "loss": 0.32383567094802856, "loss_ce": 0.00010521031072130427, "loss_iou": 0.13671875, "loss_num": 0.00994873046875, "loss_xval": 0.32421875, "num_input_tokens_seen": 307159768, "step": 5482 }, { "epoch": 12.211581291759465, "grad_norm": 24.031517028808594, "learning_rate": 1e-06, "loss": 0.3802, "num_input_tokens_seen": 307212860, "step": 5483 }, { "epoch": 12.211581291759465, "loss": 0.354860782623291, "loss_ce": 0.00012447501649148762, "loss_iou": 0.1630859375, "loss_num": 0.0054931640625, "loss_xval": 0.35546875, "num_input_tokens_seen": 307212860, "step": 5483 }, { "epoch": 12.21380846325167, "grad_norm": 25.485084533691406, "learning_rate": 1e-06, "loss": 0.4435, "num_input_tokens_seen": 307265808, "step": 5484 }, { "epoch": 12.21380846325167, "loss": 0.42531439661979675, "loss_ce": 0.00013584828411694616, "loss_iou": 0.1943359375, "loss_num": 0.00714111328125, "loss_xval": 0.42578125, "num_input_tokens_seen": 307265808, "step": 5484 }, { "epoch": 12.216035634743875, "grad_norm": 31.889545440673828, "learning_rate": 1e-06, "loss": 0.5601, "num_input_tokens_seen": 307318728, "step": 5485 }, { "epoch": 12.216035634743875, "loss": 0.5997210741043091, "loss_ce": 0.00011171124060638249, "loss_iou": 0.28125, "loss_num": 0.0078125, "loss_xval": 0.6015625, "num_input_tokens_seen": 307318728, "step": 5485 }, { "epoch": 12.21826280623608, "grad_norm": 13.831578254699707, "learning_rate": 1e-06, "loss": 0.4215, "num_input_tokens_seen": 307374860, "step": 5486 }, { "epoch": 12.21826280623608, "loss": 0.48658156394958496, "loss_ce": 0.00013140994997229427, "loss_iou": 0.2060546875, "loss_num": 0.014892578125, "loss_xval": 0.486328125, "num_input_tokens_seen": 307374860, "step": 5486 }, { "epoch": 12.220489977728285, "grad_norm": 19.31671714782715, "learning_rate": 1e-06, "loss": 0.4264, "num_input_tokens_seen": 307432404, "step": 5487 }, { "epoch": 12.220489977728285, "loss": 0.4271966814994812, "loss_ce": 0.00013369151565711945, "loss_iou": 0.1669921875, "loss_num": 0.018798828125, "loss_xval": 0.427734375, "num_input_tokens_seen": 307432404, "step": 5487 }, { "epoch": 12.22271714922049, "grad_norm": 27.503997802734375, "learning_rate": 1e-06, "loss": 0.4349, "num_input_tokens_seen": 307488920, "step": 5488 }, { "epoch": 12.22271714922049, "loss": 0.4608370065689087, "loss_ce": 0.00014365185052156448, "loss_iou": 0.1923828125, "loss_num": 0.01513671875, "loss_xval": 0.4609375, "num_input_tokens_seen": 307488920, "step": 5488 }, { "epoch": 12.224944320712694, "grad_norm": 20.619155883789062, "learning_rate": 1e-06, "loss": 0.5518, "num_input_tokens_seen": 307542936, "step": 5489 }, { "epoch": 12.224944320712694, "loss": 0.4091936945915222, "loss_ce": 0.00013606807624455541, "loss_iou": 0.1884765625, "loss_num": 0.006439208984375, "loss_xval": 0.408203125, "num_input_tokens_seen": 307542936, "step": 5489 }, { "epoch": 12.2271714922049, "grad_norm": 40.87490463256836, "learning_rate": 1e-06, "loss": 0.5842, "num_input_tokens_seen": 307599944, "step": 5490 }, { "epoch": 12.2271714922049, "loss": 0.5064796805381775, "loss_ce": 0.0001320538140134886, "loss_iou": 0.2216796875, "loss_num": 0.01263427734375, "loss_xval": 0.5078125, "num_input_tokens_seen": 307599944, "step": 5490 }, { "epoch": 12.229398663697104, "grad_norm": 19.404682159423828, "learning_rate": 1e-06, "loss": 0.3539, "num_input_tokens_seen": 307656088, "step": 5491 }, { "epoch": 12.229398663697104, "loss": 0.3526671230792999, "loss_ce": 0.00012805430742446333, "loss_iou": 0.158203125, "loss_num": 0.007232666015625, "loss_xval": 0.3515625, "num_input_tokens_seen": 307656088, "step": 5491 }, { "epoch": 12.231625835189309, "grad_norm": 15.24465560913086, "learning_rate": 1e-06, "loss": 0.6326, "num_input_tokens_seen": 307711560, "step": 5492 }, { "epoch": 12.231625835189309, "loss": 0.7728477120399475, "loss_ce": 0.00014267500955611467, "loss_iou": 0.306640625, "loss_num": 0.03173828125, "loss_xval": 0.7734375, "num_input_tokens_seen": 307711560, "step": 5492 }, { "epoch": 12.233853006681514, "grad_norm": 20.780717849731445, "learning_rate": 1e-06, "loss": 0.6387, "num_input_tokens_seen": 307767096, "step": 5493 }, { "epoch": 12.233853006681514, "loss": 0.6877992153167725, "loss_ce": 0.00011609600915107876, "loss_iou": 0.244140625, "loss_num": 0.0400390625, "loss_xval": 0.6875, "num_input_tokens_seen": 307767096, "step": 5493 }, { "epoch": 12.236080178173719, "grad_norm": 38.66435241699219, "learning_rate": 1e-06, "loss": 0.4721, "num_input_tokens_seen": 307824084, "step": 5494 }, { "epoch": 12.236080178173719, "loss": 0.4882151782512665, "loss_ce": 0.0001780918682925403, "loss_iou": 0.2080078125, "loss_num": 0.01422119140625, "loss_xval": 0.48828125, "num_input_tokens_seen": 307824084, "step": 5494 }, { "epoch": 12.238307349665924, "grad_norm": 169.19766235351562, "learning_rate": 1e-06, "loss": 0.5511, "num_input_tokens_seen": 307878876, "step": 5495 }, { "epoch": 12.238307349665924, "loss": 0.6929959058761597, "loss_ce": 0.00012478306598495692, "loss_iou": 0.26171875, "loss_num": 0.033935546875, "loss_xval": 0.69140625, "num_input_tokens_seen": 307878876, "step": 5495 }, { "epoch": 12.240534521158128, "grad_norm": 18.359050750732422, "learning_rate": 1e-06, "loss": 0.4092, "num_input_tokens_seen": 307936548, "step": 5496 }, { "epoch": 12.240534521158128, "loss": 0.40076589584350586, "loss_ce": 0.00013115604815538973, "loss_iou": 0.185546875, "loss_num": 0.005828857421875, "loss_xval": 0.400390625, "num_input_tokens_seen": 307936548, "step": 5496 }, { "epoch": 12.242761692650333, "grad_norm": 16.64657211303711, "learning_rate": 1e-06, "loss": 0.5673, "num_input_tokens_seen": 307993096, "step": 5497 }, { "epoch": 12.242761692650333, "loss": 0.6693442463874817, "loss_ce": 0.0001547938009025529, "loss_iou": 0.2890625, "loss_num": 0.01806640625, "loss_xval": 0.66796875, "num_input_tokens_seen": 307993096, "step": 5497 }, { "epoch": 12.244988864142538, "grad_norm": 23.77404022216797, "learning_rate": 1e-06, "loss": 0.6996, "num_input_tokens_seen": 308046960, "step": 5498 }, { "epoch": 12.244988864142538, "loss": 0.610221266746521, "loss_ce": 0.00011382724915165454, "loss_iou": 0.2578125, "loss_num": 0.0191650390625, "loss_xval": 0.609375, "num_input_tokens_seen": 308046960, "step": 5498 }, { "epoch": 12.247216035634743, "grad_norm": 19.345346450805664, "learning_rate": 1e-06, "loss": 0.3564, "num_input_tokens_seen": 308103812, "step": 5499 }, { "epoch": 12.247216035634743, "loss": 0.3957526683807373, "loss_ce": 0.00012281053932383657, "loss_iou": 0.1806640625, "loss_num": 0.006805419921875, "loss_xval": 0.396484375, "num_input_tokens_seen": 308103812, "step": 5499 }, { "epoch": 12.249443207126948, "grad_norm": 18.410499572753906, "learning_rate": 1e-06, "loss": 0.5145, "num_input_tokens_seen": 308158408, "step": 5500 }, { "epoch": 12.249443207126948, "eval_seeclick_web_CIoU": 0.585688054561615, "eval_seeclick_web_GIoU": 0.5826278626918793, "eval_seeclick_web_IoU": 0.6041875779628754, "eval_seeclick_web_MAE_all": 0.015673364512622356, "eval_seeclick_web_MAE_h": 0.007765157613903284, "eval_seeclick_web_MAE_w": 0.015952853485941887, "eval_seeclick_web_MAE_x_boxes": 0.009551033610478044, "eval_seeclick_web_MAE_y_boxes": 0.02141634118743241, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9183205962181091, "eval_seeclick_web_loss_ce": 0.00018939840811071917, "eval_seeclick_web_loss_iou": 0.4190673828125, "eval_seeclick_web_loss_num": 0.0124969482421875, "eval_seeclick_web_loss_xval": 0.900390625, "eval_seeclick_web_runtime": 24.0722, "eval_seeclick_web_samples_per_second": 2.077, "eval_seeclick_web_steps_per_second": 0.083, "num_input_tokens_seen": 308158408, "step": 5500 }, { "epoch": 12.249443207126948, "eval_icons_CIoU": 0.2844693139195442, "eval_icons_GIoU": 0.29983824491500854, "eval_icons_IoU": 0.3569711297750473, "eval_icons_MAE_all": 0.056874074041843414, "eval_icons_MAE_h": 0.039589228108525276, "eval_icons_MAE_w": 0.05369975045323372, "eval_icons_MAE_x_boxes": 0.051283443346619606, "eval_icons_MAE_y_boxes": 0.03893454186618328, "eval_icons_inside_bbox": 0.6215277910232544, "eval_icons_loss": 1.6849582195281982, "eval_icons_loss_ce": 0.00027994449192192405, "eval_icons_loss_iou": 0.662353515625, "eval_icons_loss_num": 0.04971122741699219, "eval_icons_loss_xval": 1.573486328125, "eval_icons_runtime": 23.9366, "eval_icons_samples_per_second": 2.089, "eval_icons_steps_per_second": 0.084, "num_input_tokens_seen": 308158408, "step": 5500 }, { "epoch": 12.249443207126948, "eval_screenspot_CIoU": 0.35463671882947284, "eval_screenspot_GIoU": 0.36913161476453143, "eval_screenspot_IoU": 0.43245549003283185, "eval_screenspot_MAE_all": 0.058157578110694885, "eval_screenspot_MAE_h": 0.038887947176893554, "eval_screenspot_MAE_w": 0.067777914305528, "eval_screenspot_MAE_x_boxes": 0.07268660329282284, "eval_screenspot_MAE_y_boxes": 0.037438808319469295, "eval_screenspot_inside_bbox": 0.6966666579246521, "eval_screenspot_loss": 1.6075245141983032, "eval_screenspot_loss_ce": 0.0002708134804076205, "eval_screenspot_loss_iou": 0.6668294270833334, "eval_screenspot_loss_num": 0.06592305501302083, "eval_screenspot_loss_xval": 1.66162109375, "eval_screenspot_runtime": 38.3047, "eval_screenspot_samples_per_second": 2.323, "eval_screenspot_steps_per_second": 0.078, "num_input_tokens_seen": 308158408, "step": 5500 }, { "epoch": 12.249443207126948, "eval_compot_CIoU": 0.34248843789100647, "eval_compot_GIoU": 0.3517104983329773, "eval_compot_IoU": 0.40232492983341217, "eval_compot_MAE_all": 0.017755805049091578, "eval_compot_MAE_h": 0.009214944671839476, "eval_compot_MAE_w": 0.02113647386431694, "eval_compot_MAE_x_boxes": 0.029819749295711517, "eval_compot_MAE_y_boxes": 0.0068535758182406425, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4059686660766602, "eval_compot_loss_ce": 0.00018623641517478973, "eval_compot_loss_iou": 0.65087890625, "eval_compot_loss_num": 0.016448974609375, "eval_compot_loss_xval": 1.3837890625, "eval_compot_runtime": 24.7377, "eval_compot_samples_per_second": 2.021, "eval_compot_steps_per_second": 0.081, "num_input_tokens_seen": 308158408, "step": 5500 }, { "epoch": 12.249443207126948, "eval_custom_ui_val_CIoU": 0.47646190888351864, "eval_custom_ui_val_GIoU": 0.4871201482084062, "eval_custom_ui_val_IoU": 0.5378114382425944, "eval_custom_ui_val_MAE_all": 0.02943614311516285, "eval_custom_ui_val_MAE_h": 0.016217040493049555, "eval_custom_ui_val_MAE_w": 0.03668393205023474, "eval_custom_ui_val_MAE_x_boxes": 0.03531148243281576, "eval_custom_ui_val_MAE_y_boxes": 0.014928720322334103, "eval_custom_ui_val_inside_bbox": 0.7719907429483202, "eval_custom_ui_val_loss": 1.2021739482879639, "eval_custom_ui_val_loss_ce": 0.00023076724998342493, "eval_custom_ui_val_loss_iou": 0.5134412977430556, "eval_custom_ui_val_loss_num": 0.027161492241753474, "eval_custom_ui_val_loss_xval": 1.1628689236111112, "eval_custom_ui_val_runtime": 76.3883, "eval_custom_ui_val_samples_per_second": 3.469, "eval_custom_ui_val_steps_per_second": 0.118, "num_input_tokens_seen": 308158408, "step": 5500 }, { "epoch": 12.249443207126948, "loss": 0.8959341049194336, "loss_ce": 0.00018216308671981096, "loss_iou": 0.390625, "loss_num": 0.0228271484375, "loss_xval": 0.89453125, "num_input_tokens_seen": 308158408, "step": 5500 }, { "epoch": 12.251670378619155, "grad_norm": 15.896366119384766, "learning_rate": 1e-06, "loss": 0.3427, "num_input_tokens_seen": 308216040, "step": 5501 }, { "epoch": 12.251670378619155, "loss": 0.28972911834716797, "loss_ce": 0.0001172900665551424, "loss_iou": 0.11962890625, "loss_num": 0.010009765625, "loss_xval": 0.2890625, "num_input_tokens_seen": 308216040, "step": 5501 }, { "epoch": 12.25389755011136, "grad_norm": 31.605810165405273, "learning_rate": 1e-06, "loss": 0.4781, "num_input_tokens_seen": 308272560, "step": 5502 }, { "epoch": 12.25389755011136, "loss": 0.5278470516204834, "loss_ce": 0.00013708796177525073, "loss_iou": 0.244140625, "loss_num": 0.00787353515625, "loss_xval": 0.52734375, "num_input_tokens_seen": 308272560, "step": 5502 }, { "epoch": 12.256124721603564, "grad_norm": 18.19232940673828, "learning_rate": 1e-06, "loss": 0.5417, "num_input_tokens_seen": 308330492, "step": 5503 }, { "epoch": 12.256124721603564, "loss": 0.5479559302330017, "loss_ce": 0.00010438320168759674, "loss_iou": 0.2421875, "loss_num": 0.01263427734375, "loss_xval": 0.546875, "num_input_tokens_seen": 308330492, "step": 5503 }, { "epoch": 12.25835189309577, "grad_norm": 14.01863956451416, "learning_rate": 1e-06, "loss": 0.6479, "num_input_tokens_seen": 308385352, "step": 5504 }, { "epoch": 12.25835189309577, "loss": 0.4697185158729553, "loss_ce": 0.00011405147233745083, "loss_iou": 0.193359375, "loss_num": 0.0166015625, "loss_xval": 0.46875, "num_input_tokens_seen": 308385352, "step": 5504 }, { "epoch": 12.260579064587974, "grad_norm": 21.31361198425293, "learning_rate": 1e-06, "loss": 0.7673, "num_input_tokens_seen": 308443100, "step": 5505 }, { "epoch": 12.260579064587974, "loss": 0.7160589694976807, "loss_ce": 0.00011650074156932533, "loss_iou": 0.29296875, "loss_num": 0.025634765625, "loss_xval": 0.71484375, "num_input_tokens_seen": 308443100, "step": 5505 }, { "epoch": 12.262806236080179, "grad_norm": 199.44406127929688, "learning_rate": 1e-06, "loss": 0.4544, "num_input_tokens_seen": 308499652, "step": 5506 }, { "epoch": 12.262806236080179, "loss": 0.556039035320282, "loss_ce": 0.0001308466453338042, "loss_iou": 0.251953125, "loss_num": 0.01007080078125, "loss_xval": 0.5546875, "num_input_tokens_seen": 308499652, "step": 5506 }, { "epoch": 12.265033407572384, "grad_norm": 28.171159744262695, "learning_rate": 1e-06, "loss": 0.4748, "num_input_tokens_seen": 308551468, "step": 5507 }, { "epoch": 12.265033407572384, "loss": 0.4873943626880646, "loss_ce": 0.00021174979337956756, "loss_iou": 0.212890625, "loss_num": 0.01220703125, "loss_xval": 0.486328125, "num_input_tokens_seen": 308551468, "step": 5507 }, { "epoch": 12.267260579064589, "grad_norm": 17.694232940673828, "learning_rate": 1e-06, "loss": 0.5427, "num_input_tokens_seen": 308607228, "step": 5508 }, { "epoch": 12.267260579064589, "loss": 0.3442959189414978, "loss_ce": 0.00011866106069646776, "loss_iou": 0.134765625, "loss_num": 0.01513671875, "loss_xval": 0.34375, "num_input_tokens_seen": 308607228, "step": 5508 }, { "epoch": 12.269487750556793, "grad_norm": 17.69139289855957, "learning_rate": 1e-06, "loss": 0.5362, "num_input_tokens_seen": 308664588, "step": 5509 }, { "epoch": 12.269487750556793, "loss": 0.4937598705291748, "loss_ce": 0.00010753308015409857, "loss_iou": 0.21875, "loss_num": 0.010986328125, "loss_xval": 0.494140625, "num_input_tokens_seen": 308664588, "step": 5509 }, { "epoch": 12.271714922048998, "grad_norm": 18.234146118164062, "learning_rate": 1e-06, "loss": 0.6428, "num_input_tokens_seen": 308716208, "step": 5510 }, { "epoch": 12.271714922048998, "loss": 0.5791558027267456, "loss_ce": 0.00014576420653611422, "loss_iou": 0.26171875, "loss_num": 0.0107421875, "loss_xval": 0.578125, "num_input_tokens_seen": 308716208, "step": 5510 }, { "epoch": 12.273942093541203, "grad_norm": 18.1691951751709, "learning_rate": 1e-06, "loss": 0.5535, "num_input_tokens_seen": 308772680, "step": 5511 }, { "epoch": 12.273942093541203, "loss": 0.5991677045822144, "loss_ce": 0.00016867450904101133, "loss_iou": 0.26171875, "loss_num": 0.0147705078125, "loss_xval": 0.59765625, "num_input_tokens_seen": 308772680, "step": 5511 }, { "epoch": 12.276169265033408, "grad_norm": 15.399046897888184, "learning_rate": 1e-06, "loss": 0.4188, "num_input_tokens_seen": 308829192, "step": 5512 }, { "epoch": 12.276169265033408, "loss": 0.5632211565971375, "loss_ce": 0.0001413593563484028, "loss_iou": 0.2001953125, "loss_num": 0.032470703125, "loss_xval": 0.5625, "num_input_tokens_seen": 308829192, "step": 5512 }, { "epoch": 12.278396436525613, "grad_norm": 15.511956214904785, "learning_rate": 1e-06, "loss": 0.5697, "num_input_tokens_seen": 308886748, "step": 5513 }, { "epoch": 12.278396436525613, "loss": 0.5418818593025208, "loss_ce": 0.00013380208110902458, "loss_iou": 0.240234375, "loss_num": 0.01239013671875, "loss_xval": 0.54296875, "num_input_tokens_seen": 308886748, "step": 5513 }, { "epoch": 12.280623608017818, "grad_norm": 14.026480674743652, "learning_rate": 1e-06, "loss": 0.6363, "num_input_tokens_seen": 308942736, "step": 5514 }, { "epoch": 12.280623608017818, "loss": 0.7164598703384399, "loss_ce": 0.0001512969029136002, "loss_iou": 0.259765625, "loss_num": 0.0390625, "loss_xval": 0.71484375, "num_input_tokens_seen": 308942736, "step": 5514 }, { "epoch": 12.282850779510023, "grad_norm": 16.546672821044922, "learning_rate": 1e-06, "loss": 0.4416, "num_input_tokens_seen": 308997492, "step": 5515 }, { "epoch": 12.282850779510023, "loss": 0.3985058069229126, "loss_ce": 0.00012933829566463828, "loss_iou": 0.166015625, "loss_num": 0.01318359375, "loss_xval": 0.3984375, "num_input_tokens_seen": 308997492, "step": 5515 }, { "epoch": 12.285077951002227, "grad_norm": 16.246435165405273, "learning_rate": 1e-06, "loss": 0.517, "num_input_tokens_seen": 309054160, "step": 5516 }, { "epoch": 12.285077951002227, "loss": 0.4222395122051239, "loss_ce": 0.00012038354179821908, "loss_iou": 0.1953125, "loss_num": 0.006256103515625, "loss_xval": 0.421875, "num_input_tokens_seen": 309054160, "step": 5516 }, { "epoch": 12.287305122494432, "grad_norm": 16.089317321777344, "learning_rate": 1e-06, "loss": 0.4671, "num_input_tokens_seen": 309109496, "step": 5517 }, { "epoch": 12.287305122494432, "loss": 0.48379087448120117, "loss_ce": 0.00014830243890173733, "loss_iou": 0.2109375, "loss_num": 0.01214599609375, "loss_xval": 0.484375, "num_input_tokens_seen": 309109496, "step": 5517 }, { "epoch": 12.289532293986637, "grad_norm": 14.831037521362305, "learning_rate": 1e-06, "loss": 0.4507, "num_input_tokens_seen": 309166044, "step": 5518 }, { "epoch": 12.289532293986637, "loss": 0.6069037914276123, "loss_ce": 0.00015331347822211683, "loss_iou": 0.275390625, "loss_num": 0.0115966796875, "loss_xval": 0.60546875, "num_input_tokens_seen": 309166044, "step": 5518 }, { "epoch": 12.291759465478842, "grad_norm": 16.727554321289062, "learning_rate": 1e-06, "loss": 0.5212, "num_input_tokens_seen": 309224472, "step": 5519 }, { "epoch": 12.291759465478842, "loss": 0.37400585412979126, "loss_ce": 0.00010447671229485422, "loss_iou": 0.1611328125, "loss_num": 0.0103759765625, "loss_xval": 0.373046875, "num_input_tokens_seen": 309224472, "step": 5519 }, { "epoch": 12.293986636971047, "grad_norm": 16.9213809967041, "learning_rate": 1e-06, "loss": 0.8055, "num_input_tokens_seen": 309281284, "step": 5520 }, { "epoch": 12.293986636971047, "loss": 0.9495736360549927, "loss_ce": 0.00011074876238126308, "loss_iou": 0.373046875, "loss_num": 0.040771484375, "loss_xval": 0.94921875, "num_input_tokens_seen": 309281284, "step": 5520 }, { "epoch": 12.296213808463252, "grad_norm": 24.03369140625, "learning_rate": 1e-06, "loss": 0.5665, "num_input_tokens_seen": 309336800, "step": 5521 }, { "epoch": 12.296213808463252, "loss": 0.5887036323547363, "loss_ce": 0.0003247222339268774, "loss_iou": 0.232421875, "loss_num": 0.0250244140625, "loss_xval": 0.58984375, "num_input_tokens_seen": 309336800, "step": 5521 }, { "epoch": 12.298440979955457, "grad_norm": 17.732921600341797, "learning_rate": 1e-06, "loss": 0.6271, "num_input_tokens_seen": 309390848, "step": 5522 }, { "epoch": 12.298440979955457, "loss": 0.858605146408081, "loss_ce": 0.000206666489248164, "loss_iou": 0.365234375, "loss_num": 0.025634765625, "loss_xval": 0.859375, "num_input_tokens_seen": 309390848, "step": 5522 }, { "epoch": 12.300668151447661, "grad_norm": 14.111288070678711, "learning_rate": 1e-06, "loss": 0.5704, "num_input_tokens_seen": 309447824, "step": 5523 }, { "epoch": 12.300668151447661, "loss": 0.3774692416191101, "loss_ce": 0.00014990594354458153, "loss_iou": 0.154296875, "loss_num": 0.0135498046875, "loss_xval": 0.376953125, "num_input_tokens_seen": 309447824, "step": 5523 }, { "epoch": 12.302895322939866, "grad_norm": 19.964794158935547, "learning_rate": 1e-06, "loss": 0.4378, "num_input_tokens_seen": 309504964, "step": 5524 }, { "epoch": 12.302895322939866, "loss": 0.3729360103607178, "loss_ce": 0.00013328931527212262, "loss_iou": 0.1669921875, "loss_num": 0.0076904296875, "loss_xval": 0.373046875, "num_input_tokens_seen": 309504964, "step": 5524 }, { "epoch": 12.305122494432071, "grad_norm": 14.157723426818848, "learning_rate": 1e-06, "loss": 0.4644, "num_input_tokens_seen": 309560456, "step": 5525 }, { "epoch": 12.305122494432071, "loss": 0.5831543207168579, "loss_ce": 0.00014654998085461557, "loss_iou": 0.2177734375, "loss_num": 0.0296630859375, "loss_xval": 0.58203125, "num_input_tokens_seen": 309560456, "step": 5525 }, { "epoch": 12.307349665924276, "grad_norm": 22.95296287536621, "learning_rate": 1e-06, "loss": 0.5372, "num_input_tokens_seen": 309619680, "step": 5526 }, { "epoch": 12.307349665924276, "loss": 0.5235176086425781, "loss_ce": 0.00020221451995894313, "loss_iou": 0.2041015625, "loss_num": 0.0230712890625, "loss_xval": 0.5234375, "num_input_tokens_seen": 309619680, "step": 5526 }, { "epoch": 12.309576837416481, "grad_norm": 14.134220123291016, "learning_rate": 1e-06, "loss": 0.4565, "num_input_tokens_seen": 309676328, "step": 5527 }, { "epoch": 12.309576837416481, "loss": 0.35591933131217957, "loss_ce": 0.00020644423784688115, "loss_iou": 0.1611328125, "loss_num": 0.00677490234375, "loss_xval": 0.35546875, "num_input_tokens_seen": 309676328, "step": 5527 }, { "epoch": 12.311804008908686, "grad_norm": 26.886777877807617, "learning_rate": 1e-06, "loss": 0.5358, "num_input_tokens_seen": 309727028, "step": 5528 }, { "epoch": 12.311804008908686, "loss": 0.5397249460220337, "loss_ce": 0.0001741335727274418, "loss_iou": 0.236328125, "loss_num": 0.0133056640625, "loss_xval": 0.5390625, "num_input_tokens_seen": 309727028, "step": 5528 }, { "epoch": 12.31403118040089, "grad_norm": 17.852651596069336, "learning_rate": 1e-06, "loss": 0.5016, "num_input_tokens_seen": 309783832, "step": 5529 }, { "epoch": 12.31403118040089, "loss": 0.4710628390312195, "loss_ce": 0.00011555143282748759, "loss_iou": 0.2060546875, "loss_num": 0.01165771484375, "loss_xval": 0.470703125, "num_input_tokens_seen": 309783832, "step": 5529 }, { "epoch": 12.316258351893095, "grad_norm": 14.421836853027344, "learning_rate": 1e-06, "loss": 0.4286, "num_input_tokens_seen": 309839264, "step": 5530 }, { "epoch": 12.316258351893095, "loss": 0.29735976457595825, "loss_ce": 0.00011855886259581894, "loss_iou": 0.1298828125, "loss_num": 0.00738525390625, "loss_xval": 0.296875, "num_input_tokens_seen": 309839264, "step": 5530 }, { "epoch": 12.3184855233853, "grad_norm": 41.36547088623047, "learning_rate": 1e-06, "loss": 0.5509, "num_input_tokens_seen": 309893232, "step": 5531 }, { "epoch": 12.3184855233853, "loss": 0.648200511932373, "loss_ce": 0.00012922041059937328, "loss_iou": 0.296875, "loss_num": 0.0107421875, "loss_xval": 0.6484375, "num_input_tokens_seen": 309893232, "step": 5531 }, { "epoch": 12.320712694877505, "grad_norm": 22.48734474182129, "learning_rate": 1e-06, "loss": 0.505, "num_input_tokens_seen": 309948492, "step": 5532 }, { "epoch": 12.320712694877505, "loss": 0.4604489803314209, "loss_ce": 0.00012181737110950053, "loss_iou": 0.2060546875, "loss_num": 0.009521484375, "loss_xval": 0.4609375, "num_input_tokens_seen": 309948492, "step": 5532 }, { "epoch": 12.32293986636971, "grad_norm": 17.8016414642334, "learning_rate": 1e-06, "loss": 0.3587, "num_input_tokens_seen": 310005616, "step": 5533 }, { "epoch": 12.32293986636971, "loss": 0.39793771505355835, "loss_ce": 0.0001105824630940333, "loss_iou": 0.1806640625, "loss_num": 0.00726318359375, "loss_xval": 0.3984375, "num_input_tokens_seen": 310005616, "step": 5533 }, { "epoch": 12.325167037861915, "grad_norm": 17.646106719970703, "learning_rate": 1e-06, "loss": 0.581, "num_input_tokens_seen": 310061764, "step": 5534 }, { "epoch": 12.325167037861915, "loss": 0.666999101638794, "loss_ce": 0.00012893178791273385, "loss_iou": 0.2734375, "loss_num": 0.024169921875, "loss_xval": 0.66796875, "num_input_tokens_seen": 310061764, "step": 5534 }, { "epoch": 12.32739420935412, "grad_norm": 19.61566925048828, "learning_rate": 1e-06, "loss": 0.5461, "num_input_tokens_seen": 310116556, "step": 5535 }, { "epoch": 12.32739420935412, "loss": 0.31176358461380005, "loss_ce": 0.00011805207759607583, "loss_iou": 0.1416015625, "loss_num": 0.005584716796875, "loss_xval": 0.3125, "num_input_tokens_seen": 310116556, "step": 5535 }, { "epoch": 12.329621380846325, "grad_norm": 14.809009552001953, "learning_rate": 1e-06, "loss": 0.4208, "num_input_tokens_seen": 310173568, "step": 5536 }, { "epoch": 12.329621380846325, "loss": 0.40904849767684937, "loss_ce": 0.00011295877629891038, "loss_iou": 0.1845703125, "loss_num": 0.00799560546875, "loss_xval": 0.408203125, "num_input_tokens_seen": 310173568, "step": 5536 }, { "epoch": 12.33184855233853, "grad_norm": 26.909616470336914, "learning_rate": 1e-06, "loss": 0.4644, "num_input_tokens_seen": 310228872, "step": 5537 }, { "epoch": 12.33184855233853, "loss": 0.580930233001709, "loss_ce": 0.00011961960990447551, "loss_iou": 0.240234375, "loss_num": 0.0198974609375, "loss_xval": 0.58203125, "num_input_tokens_seen": 310228872, "step": 5537 }, { "epoch": 12.334075723830734, "grad_norm": 18.16213035583496, "learning_rate": 1e-06, "loss": 0.6525, "num_input_tokens_seen": 310284508, "step": 5538 }, { "epoch": 12.334075723830734, "loss": 0.5219680070877075, "loss_ce": 0.00011739489855244756, "loss_iou": 0.2353515625, "loss_num": 0.01019287109375, "loss_xval": 0.5234375, "num_input_tokens_seen": 310284508, "step": 5538 }, { "epoch": 12.33630289532294, "grad_norm": 32.21243667602539, "learning_rate": 1e-06, "loss": 0.4215, "num_input_tokens_seen": 310340144, "step": 5539 }, { "epoch": 12.33630289532294, "loss": 0.31780463457107544, "loss_ce": 0.00011664302292047068, "loss_iou": 0.1318359375, "loss_num": 0.01092529296875, "loss_xval": 0.318359375, "num_input_tokens_seen": 310340144, "step": 5539 }, { "epoch": 12.338530066815144, "grad_norm": 23.456138610839844, "learning_rate": 1e-06, "loss": 0.3759, "num_input_tokens_seen": 310398476, "step": 5540 }, { "epoch": 12.338530066815144, "loss": 0.36424580216407776, "loss_ce": 0.00011004651605617255, "loss_iou": 0.158203125, "loss_num": 0.00946044921875, "loss_xval": 0.36328125, "num_input_tokens_seen": 310398476, "step": 5540 }, { "epoch": 12.340757238307349, "grad_norm": 18.373727798461914, "learning_rate": 1e-06, "loss": 0.4471, "num_input_tokens_seen": 310456252, "step": 5541 }, { "epoch": 12.340757238307349, "loss": 0.4610535502433777, "loss_ce": 0.0001160675601568073, "loss_iou": 0.2060546875, "loss_num": 0.009765625, "loss_xval": 0.4609375, "num_input_tokens_seen": 310456252, "step": 5541 }, { "epoch": 12.342984409799554, "grad_norm": 15.688360214233398, "learning_rate": 1e-06, "loss": 0.4345, "num_input_tokens_seen": 310509648, "step": 5542 }, { "epoch": 12.342984409799554, "loss": 0.33905208110809326, "loss_ce": 0.00012385296577122062, "loss_iou": 0.1484375, "loss_num": 0.0081787109375, "loss_xval": 0.33984375, "num_input_tokens_seen": 310509648, "step": 5542 }, { "epoch": 12.345211581291759, "grad_norm": 24.152406692504883, "learning_rate": 1e-06, "loss": 0.4911, "num_input_tokens_seen": 310565268, "step": 5543 }, { "epoch": 12.345211581291759, "loss": 0.45101678371429443, "loss_ce": 0.00015007876208983362, "loss_iou": 0.1923828125, "loss_num": 0.01312255859375, "loss_xval": 0.451171875, "num_input_tokens_seen": 310565268, "step": 5543 }, { "epoch": 12.347438752783964, "grad_norm": 18.96198081970215, "learning_rate": 1e-06, "loss": 0.3821, "num_input_tokens_seen": 310618196, "step": 5544 }, { "epoch": 12.347438752783964, "loss": 0.4492288827896118, "loss_ce": 0.0001322347525274381, "loss_iou": 0.2060546875, "loss_num": 0.007476806640625, "loss_xval": 0.44921875, "num_input_tokens_seen": 310618196, "step": 5544 }, { "epoch": 12.34966592427617, "grad_norm": 21.896156311035156, "learning_rate": 1e-06, "loss": 0.468, "num_input_tokens_seen": 310673820, "step": 5545 }, { "epoch": 12.34966592427617, "loss": 0.3710940182209015, "loss_ce": 0.00012234911264386028, "loss_iou": 0.1640625, "loss_num": 0.00848388671875, "loss_xval": 0.37109375, "num_input_tokens_seen": 310673820, "step": 5545 }, { "epoch": 12.351893095768375, "grad_norm": 33.179386138916016, "learning_rate": 1e-06, "loss": 0.5911, "num_input_tokens_seen": 310725344, "step": 5546 }, { "epoch": 12.351893095768375, "loss": 0.85118567943573, "loss_ce": 0.0008438542135991156, "loss_iou": 0.375, "loss_num": 0.02001953125, "loss_xval": 0.8515625, "num_input_tokens_seen": 310725344, "step": 5546 }, { "epoch": 12.35412026726058, "grad_norm": 24.786273956298828, "learning_rate": 1e-06, "loss": 0.5604, "num_input_tokens_seen": 310780808, "step": 5547 }, { "epoch": 12.35412026726058, "loss": 0.37451988458633423, "loss_ce": 0.00025231053587049246, "loss_iou": 0.16796875, "loss_num": 0.007720947265625, "loss_xval": 0.375, "num_input_tokens_seen": 310780808, "step": 5547 }, { "epoch": 12.356347438752785, "grad_norm": 22.51511573791504, "learning_rate": 1e-06, "loss": 0.5251, "num_input_tokens_seen": 310839704, "step": 5548 }, { "epoch": 12.356347438752785, "loss": 0.6323890089988708, "loss_ce": 0.00012582968338392675, "loss_iou": 0.25390625, "loss_num": 0.024658203125, "loss_xval": 0.6328125, "num_input_tokens_seen": 310839704, "step": 5548 }, { "epoch": 12.35857461024499, "grad_norm": 19.136016845703125, "learning_rate": 1e-06, "loss": 0.6059, "num_input_tokens_seen": 310896396, "step": 5549 }, { "epoch": 12.35857461024499, "loss": 0.5548136234283447, "loss_ce": 0.00012610982230398804, "loss_iou": 0.2412109375, "loss_num": 0.0142822265625, "loss_xval": 0.5546875, "num_input_tokens_seen": 310896396, "step": 5549 }, { "epoch": 12.360801781737194, "grad_norm": 18.60460090637207, "learning_rate": 1e-06, "loss": 0.4583, "num_input_tokens_seen": 310954004, "step": 5550 }, { "epoch": 12.360801781737194, "loss": 0.4283403754234314, "loss_ce": 0.00011774353333748877, "loss_iou": 0.1796875, "loss_num": 0.0137939453125, "loss_xval": 0.427734375, "num_input_tokens_seen": 310954004, "step": 5550 }, { "epoch": 12.3630289532294, "grad_norm": 21.40949249267578, "learning_rate": 1e-06, "loss": 0.6223, "num_input_tokens_seen": 311010500, "step": 5551 }, { "epoch": 12.3630289532294, "loss": 0.8038879632949829, "loss_ce": 0.00017696505528874695, "loss_iou": 0.314453125, "loss_num": 0.034912109375, "loss_xval": 0.8046875, "num_input_tokens_seen": 311010500, "step": 5551 }, { "epoch": 12.365256124721604, "grad_norm": 18.762351989746094, "learning_rate": 1e-06, "loss": 0.3948, "num_input_tokens_seen": 311070112, "step": 5552 }, { "epoch": 12.365256124721604, "loss": 0.27796292304992676, "loss_ce": 0.00013091039727441967, "loss_iou": 0.11865234375, "loss_num": 0.008056640625, "loss_xval": 0.27734375, "num_input_tokens_seen": 311070112, "step": 5552 }, { "epoch": 12.367483296213809, "grad_norm": 22.09250259399414, "learning_rate": 1e-06, "loss": 0.7311, "num_input_tokens_seen": 311125564, "step": 5553 }, { "epoch": 12.367483296213809, "loss": 0.7942242622375488, "loss_ce": 0.00015692379383835942, "loss_iou": 0.337890625, "loss_num": 0.023193359375, "loss_xval": 0.79296875, "num_input_tokens_seen": 311125564, "step": 5553 }, { "epoch": 12.369710467706014, "grad_norm": 20.51223373413086, "learning_rate": 1e-06, "loss": 0.5099, "num_input_tokens_seen": 311182804, "step": 5554 }, { "epoch": 12.369710467706014, "loss": 0.6075503826141357, "loss_ce": 0.00012847562902607024, "loss_iou": 0.265625, "loss_num": 0.0150146484375, "loss_xval": 0.609375, "num_input_tokens_seen": 311182804, "step": 5554 }, { "epoch": 12.371937639198219, "grad_norm": 27.149372100830078, "learning_rate": 1e-06, "loss": 0.6807, "num_input_tokens_seen": 311239764, "step": 5555 }, { "epoch": 12.371937639198219, "loss": 0.666401743888855, "loss_ce": 0.00014195009134709835, "loss_iou": 0.28125, "loss_num": 0.020751953125, "loss_xval": 0.66796875, "num_input_tokens_seen": 311239764, "step": 5555 }, { "epoch": 12.374164810690424, "grad_norm": 19.294330596923828, "learning_rate": 1e-06, "loss": 0.4761, "num_input_tokens_seen": 311295712, "step": 5556 }, { "epoch": 12.374164810690424, "loss": 0.3280073404312134, "loss_ce": 0.00012647973198909312, "loss_iou": 0.150390625, "loss_num": 0.005340576171875, "loss_xval": 0.328125, "num_input_tokens_seen": 311295712, "step": 5556 }, { "epoch": 12.376391982182628, "grad_norm": 30.496156692504883, "learning_rate": 1e-06, "loss": 0.5462, "num_input_tokens_seen": 311354216, "step": 5557 }, { "epoch": 12.376391982182628, "loss": 0.4644961357116699, "loss_ce": 0.00014063574781175703, "loss_iou": 0.2060546875, "loss_num": 0.0106201171875, "loss_xval": 0.46484375, "num_input_tokens_seen": 311354216, "step": 5557 }, { "epoch": 12.378619153674833, "grad_norm": 18.934402465820312, "learning_rate": 1e-06, "loss": 0.3934, "num_input_tokens_seen": 311409028, "step": 5558 }, { "epoch": 12.378619153674833, "loss": 0.39881080389022827, "loss_ce": 0.0001291544467676431, "loss_iou": 0.166015625, "loss_num": 0.01324462890625, "loss_xval": 0.3984375, "num_input_tokens_seen": 311409028, "step": 5558 }, { "epoch": 12.380846325167038, "grad_norm": 25.294477462768555, "learning_rate": 1e-06, "loss": 0.5882, "num_input_tokens_seen": 311465116, "step": 5559 }, { "epoch": 12.380846325167038, "loss": 0.6579493880271912, "loss_ce": 0.00011249056842643768, "loss_iou": 0.27734375, "loss_num": 0.0208740234375, "loss_xval": 0.65625, "num_input_tokens_seen": 311465116, "step": 5559 }, { "epoch": 12.383073496659243, "grad_norm": 21.457990646362305, "learning_rate": 1e-06, "loss": 0.4843, "num_input_tokens_seen": 311520028, "step": 5560 }, { "epoch": 12.383073496659243, "loss": 0.4148017168045044, "loss_ce": 0.00012887550110463053, "loss_iou": 0.1904296875, "loss_num": 0.00665283203125, "loss_xval": 0.4140625, "num_input_tokens_seen": 311520028, "step": 5560 }, { "epoch": 12.385300668151448, "grad_norm": 22.746858596801758, "learning_rate": 1e-06, "loss": 0.4334, "num_input_tokens_seen": 311572928, "step": 5561 }, { "epoch": 12.385300668151448, "loss": 0.2498151659965515, "loss_ce": 0.00018137965525966138, "loss_iou": 0.09619140625, "loss_num": 0.01153564453125, "loss_xval": 0.25, "num_input_tokens_seen": 311572928, "step": 5561 }, { "epoch": 12.387527839643653, "grad_norm": 28.644351959228516, "learning_rate": 1e-06, "loss": 0.586, "num_input_tokens_seen": 311630476, "step": 5562 }, { "epoch": 12.387527839643653, "loss": 0.6954451203346252, "loss_ce": 0.0001326243655057624, "loss_iou": 0.310546875, "loss_num": 0.01531982421875, "loss_xval": 0.6953125, "num_input_tokens_seen": 311630476, "step": 5562 }, { "epoch": 12.389755011135858, "grad_norm": 38.925472259521484, "learning_rate": 1e-06, "loss": 0.4525, "num_input_tokens_seen": 311687972, "step": 5563 }, { "epoch": 12.389755011135858, "loss": 0.43446385860443115, "loss_ce": 0.00013769854558631778, "loss_iou": 0.201171875, "loss_num": 0.00634765625, "loss_xval": 0.43359375, "num_input_tokens_seen": 311687972, "step": 5563 }, { "epoch": 12.391982182628063, "grad_norm": 19.39438247680664, "learning_rate": 1e-06, "loss": 0.5437, "num_input_tokens_seen": 311744768, "step": 5564 }, { "epoch": 12.391982182628063, "loss": 0.5501693487167358, "loss_ce": 0.00012056773994117975, "loss_iou": 0.251953125, "loss_num": 0.0096435546875, "loss_xval": 0.55078125, "num_input_tokens_seen": 311744768, "step": 5564 }, { "epoch": 12.394209354120267, "grad_norm": 13.309589385986328, "learning_rate": 1e-06, "loss": 0.4274, "num_input_tokens_seen": 311799736, "step": 5565 }, { "epoch": 12.394209354120267, "loss": 0.5023265480995178, "loss_ce": 0.0001292880333494395, "loss_iou": 0.212890625, "loss_num": 0.01513671875, "loss_xval": 0.50390625, "num_input_tokens_seen": 311799736, "step": 5565 }, { "epoch": 12.396436525612472, "grad_norm": 15.245659828186035, "learning_rate": 1e-06, "loss": 0.4462, "num_input_tokens_seen": 311856860, "step": 5566 }, { "epoch": 12.396436525612472, "loss": 0.5222017765045166, "loss_ce": 0.00010704126907512546, "loss_iou": 0.2294921875, "loss_num": 0.01263427734375, "loss_xval": 0.5234375, "num_input_tokens_seen": 311856860, "step": 5566 }, { "epoch": 12.398663697104677, "grad_norm": 12.112483024597168, "learning_rate": 1e-06, "loss": 0.4344, "num_input_tokens_seen": 311909940, "step": 5567 }, { "epoch": 12.398663697104677, "loss": 0.6277226805686951, "loss_ce": 0.00015922555758152157, "loss_iou": 0.287109375, "loss_num": 0.0111083984375, "loss_xval": 0.62890625, "num_input_tokens_seen": 311909940, "step": 5567 }, { "epoch": 12.400890868596882, "grad_norm": 35.967140197753906, "learning_rate": 1e-06, "loss": 0.5718, "num_input_tokens_seen": 311964724, "step": 5568 }, { "epoch": 12.400890868596882, "loss": 0.539790153503418, "loss_ce": 0.0001172933480120264, "loss_iou": 0.2294921875, "loss_num": 0.0159912109375, "loss_xval": 0.5390625, "num_input_tokens_seen": 311964724, "step": 5568 }, { "epoch": 12.403118040089087, "grad_norm": 12.642704963684082, "learning_rate": 1e-06, "loss": 0.4183, "num_input_tokens_seen": 312021624, "step": 5569 }, { "epoch": 12.403118040089087, "loss": 0.2902145981788635, "loss_ce": 0.00011449479643488303, "loss_iou": 0.125, "loss_num": 0.00787353515625, "loss_xval": 0.291015625, "num_input_tokens_seen": 312021624, "step": 5569 }, { "epoch": 12.405345211581292, "grad_norm": 24.059127807617188, "learning_rate": 1e-06, "loss": 0.4588, "num_input_tokens_seen": 312075924, "step": 5570 }, { "epoch": 12.405345211581292, "loss": 0.31721970438957214, "loss_ce": 0.00011153465311508626, "loss_iou": 0.1318359375, "loss_num": 0.0106201171875, "loss_xval": 0.31640625, "num_input_tokens_seen": 312075924, "step": 5570 }, { "epoch": 12.407572383073497, "grad_norm": 16.74148178100586, "learning_rate": 1e-06, "loss": 0.386, "num_input_tokens_seen": 312132284, "step": 5571 }, { "epoch": 12.407572383073497, "loss": 0.43999338150024414, "loss_ce": 0.0001740275911288336, "loss_iou": 0.1884765625, "loss_num": 0.0128173828125, "loss_xval": 0.439453125, "num_input_tokens_seen": 312132284, "step": 5571 }, { "epoch": 12.409799554565701, "grad_norm": 33.77167510986328, "learning_rate": 1e-06, "loss": 0.5518, "num_input_tokens_seen": 312190496, "step": 5572 }, { "epoch": 12.409799554565701, "loss": 0.3965058922767639, "loss_ce": 0.0001435701851733029, "loss_iou": 0.1826171875, "loss_num": 0.006378173828125, "loss_xval": 0.396484375, "num_input_tokens_seen": 312190496, "step": 5572 }, { "epoch": 12.412026726057906, "grad_norm": 45.48998260498047, "learning_rate": 1e-06, "loss": 0.5871, "num_input_tokens_seen": 312246352, "step": 5573 }, { "epoch": 12.412026726057906, "loss": 0.5653384327888489, "loss_ce": 0.00015288355643860996, "loss_iou": 0.240234375, "loss_num": 0.0169677734375, "loss_xval": 0.56640625, "num_input_tokens_seen": 312246352, "step": 5573 }, { "epoch": 12.414253897550111, "grad_norm": 15.57947063446045, "learning_rate": 1e-06, "loss": 0.5422, "num_input_tokens_seen": 312301816, "step": 5574 }, { "epoch": 12.414253897550111, "loss": 0.6265942454338074, "loss_ce": 0.00012941220484208316, "loss_iou": 0.259765625, "loss_num": 0.021240234375, "loss_xval": 0.625, "num_input_tokens_seen": 312301816, "step": 5574 }, { "epoch": 12.416481069042316, "grad_norm": 13.934830665588379, "learning_rate": 1e-06, "loss": 0.3072, "num_input_tokens_seen": 312361472, "step": 5575 }, { "epoch": 12.416481069042316, "loss": 0.23996922373771667, "loss_ce": 0.00010107570415129885, "loss_iou": 0.10595703125, "loss_num": 0.005645751953125, "loss_xval": 0.240234375, "num_input_tokens_seen": 312361472, "step": 5575 }, { "epoch": 12.41870824053452, "grad_norm": 22.51824951171875, "learning_rate": 1e-06, "loss": 0.5321, "num_input_tokens_seen": 312416660, "step": 5576 }, { "epoch": 12.41870824053452, "loss": 0.3880695104598999, "loss_ce": 0.00013007389497943223, "loss_iou": 0.1796875, "loss_num": 0.0054931640625, "loss_xval": 0.388671875, "num_input_tokens_seen": 312416660, "step": 5576 }, { "epoch": 12.420935412026726, "grad_norm": 12.614936828613281, "learning_rate": 1e-06, "loss": 0.3598, "num_input_tokens_seen": 312472660, "step": 5577 }, { "epoch": 12.420935412026726, "loss": 0.42401188611984253, "loss_ce": 0.00018374717910774052, "loss_iou": 0.1767578125, "loss_num": 0.0140380859375, "loss_xval": 0.423828125, "num_input_tokens_seen": 312472660, "step": 5577 }, { "epoch": 12.42316258351893, "grad_norm": 19.709209442138672, "learning_rate": 1e-06, "loss": 0.515, "num_input_tokens_seen": 312528764, "step": 5578 }, { "epoch": 12.42316258351893, "loss": 0.517235279083252, "loss_ce": 0.00014545858721248806, "loss_iou": 0.21875, "loss_num": 0.0159912109375, "loss_xval": 0.515625, "num_input_tokens_seen": 312528764, "step": 5578 }, { "epoch": 12.425389755011135, "grad_norm": 15.936797142028809, "learning_rate": 1e-06, "loss": 0.5646, "num_input_tokens_seen": 312586228, "step": 5579 }, { "epoch": 12.425389755011135, "loss": 0.44118812680244446, "loss_ce": 0.00014807441039010882, "loss_iou": 0.20703125, "loss_num": 0.005523681640625, "loss_xval": 0.44140625, "num_input_tokens_seen": 312586228, "step": 5579 }, { "epoch": 12.42761692650334, "grad_norm": 16.843250274658203, "learning_rate": 1e-06, "loss": 0.3889, "num_input_tokens_seen": 312640980, "step": 5580 }, { "epoch": 12.42761692650334, "loss": 0.3072529435157776, "loss_ce": 0.00012405663437675685, "loss_iou": 0.1318359375, "loss_num": 0.00872802734375, "loss_xval": 0.306640625, "num_input_tokens_seen": 312640980, "step": 5580 }, { "epoch": 12.429844097995545, "grad_norm": 29.384336471557617, "learning_rate": 1e-06, "loss": 0.5642, "num_input_tokens_seen": 312696640, "step": 5581 }, { "epoch": 12.429844097995545, "loss": 0.5222534537315369, "loss_ce": 0.00015875368262641132, "loss_iou": 0.2275390625, "loss_num": 0.01324462890625, "loss_xval": 0.5234375, "num_input_tokens_seen": 312696640, "step": 5581 }, { "epoch": 12.43207126948775, "grad_norm": 31.232393264770508, "learning_rate": 1e-06, "loss": 0.5349, "num_input_tokens_seen": 312751632, "step": 5582 }, { "epoch": 12.43207126948775, "loss": 0.4201045632362366, "loss_ce": 0.00012167952081654221, "loss_iou": 0.19140625, "loss_num": 0.00750732421875, "loss_xval": 0.419921875, "num_input_tokens_seen": 312751632, "step": 5582 }, { "epoch": 12.434298440979955, "grad_norm": 36.4710578918457, "learning_rate": 1e-06, "loss": 0.5033, "num_input_tokens_seen": 312802876, "step": 5583 }, { "epoch": 12.434298440979955, "loss": 0.44115835428237915, "loss_ce": 0.00011831161828013137, "loss_iou": 0.203125, "loss_num": 0.0068359375, "loss_xval": 0.44140625, "num_input_tokens_seen": 312802876, "step": 5583 }, { "epoch": 12.43652561247216, "grad_norm": 17.38640594482422, "learning_rate": 1e-06, "loss": 0.4538, "num_input_tokens_seen": 312861152, "step": 5584 }, { "epoch": 12.43652561247216, "loss": 0.40885454416275024, "loss_ce": 0.0001631533377803862, "loss_iou": 0.1884765625, "loss_num": 0.006378173828125, "loss_xval": 0.408203125, "num_input_tokens_seen": 312861152, "step": 5584 }, { "epoch": 12.438752783964365, "grad_norm": 18.26358985900879, "learning_rate": 1e-06, "loss": 0.4418, "num_input_tokens_seen": 312915440, "step": 5585 }, { "epoch": 12.438752783964365, "loss": 0.542262613773346, "loss_ce": 0.0001483731612097472, "loss_iou": 0.23828125, "loss_num": 0.0133056640625, "loss_xval": 0.54296875, "num_input_tokens_seen": 312915440, "step": 5585 }, { "epoch": 12.44097995545657, "grad_norm": 19.2590274810791, "learning_rate": 1e-06, "loss": 0.5392, "num_input_tokens_seen": 312969320, "step": 5586 }, { "epoch": 12.44097995545657, "loss": 0.5372472405433655, "loss_ce": 0.00013788053183816373, "loss_iou": 0.2412109375, "loss_num": 0.01092529296875, "loss_xval": 0.5390625, "num_input_tokens_seen": 312969320, "step": 5586 }, { "epoch": 12.443207126948774, "grad_norm": 15.812176704406738, "learning_rate": 1e-06, "loss": 0.454, "num_input_tokens_seen": 313026268, "step": 5587 }, { "epoch": 12.443207126948774, "loss": 0.504636287689209, "loss_ce": 0.00011972515494562685, "loss_iou": 0.2001953125, "loss_num": 0.0208740234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 313026268, "step": 5587 }, { "epoch": 12.44543429844098, "grad_norm": 15.350566864013672, "learning_rate": 1e-06, "loss": 0.618, "num_input_tokens_seen": 313082880, "step": 5588 }, { "epoch": 12.44543429844098, "loss": 0.4841225743293762, "loss_ce": 0.00011377451301086694, "loss_iou": 0.1923828125, "loss_num": 0.0201416015625, "loss_xval": 0.484375, "num_input_tokens_seen": 313082880, "step": 5588 }, { "epoch": 12.447661469933184, "grad_norm": 17.000885009765625, "learning_rate": 1e-06, "loss": 0.6271, "num_input_tokens_seen": 313140436, "step": 5589 }, { "epoch": 12.447661469933184, "loss": 0.959378182888031, "loss_ce": 0.0001496242475695908, "loss_iou": 0.37109375, "loss_num": 0.04345703125, "loss_xval": 0.9609375, "num_input_tokens_seen": 313140436, "step": 5589 }, { "epoch": 12.449888641425389, "grad_norm": 24.725290298461914, "learning_rate": 1e-06, "loss": 0.4681, "num_input_tokens_seen": 313195976, "step": 5590 }, { "epoch": 12.449888641425389, "loss": 0.47022485733032227, "loss_ce": 0.00013207047595642507, "loss_iou": 0.2060546875, "loss_num": 0.01153564453125, "loss_xval": 0.470703125, "num_input_tokens_seen": 313195976, "step": 5590 }, { "epoch": 12.452115812917596, "grad_norm": 17.44597053527832, "learning_rate": 1e-06, "loss": 0.376, "num_input_tokens_seen": 313253104, "step": 5591 }, { "epoch": 12.452115812917596, "loss": 0.3353341221809387, "loss_ce": 0.00012904632603749633, "loss_iou": 0.1337890625, "loss_num": 0.01361083984375, "loss_xval": 0.3359375, "num_input_tokens_seen": 313253104, "step": 5591 }, { "epoch": 12.4543429844098, "grad_norm": 22.889427185058594, "learning_rate": 1e-06, "loss": 0.5976, "num_input_tokens_seen": 313309392, "step": 5592 }, { "epoch": 12.4543429844098, "loss": 0.6958190202713013, "loss_ce": 0.0001402627385687083, "loss_iou": 0.314453125, "loss_num": 0.01324462890625, "loss_xval": 0.6953125, "num_input_tokens_seen": 313309392, "step": 5592 }, { "epoch": 12.456570155902005, "grad_norm": 23.9648494720459, "learning_rate": 1e-06, "loss": 0.3895, "num_input_tokens_seen": 313365352, "step": 5593 }, { "epoch": 12.456570155902005, "loss": 0.4225989580154419, "loss_ce": 0.00011362304212525487, "loss_iou": 0.1943359375, "loss_num": 0.0069580078125, "loss_xval": 0.421875, "num_input_tokens_seen": 313365352, "step": 5593 }, { "epoch": 12.45879732739421, "grad_norm": 22.63255500793457, "learning_rate": 1e-06, "loss": 0.5079, "num_input_tokens_seen": 313422004, "step": 5594 }, { "epoch": 12.45879732739421, "loss": 0.37878933548927307, "loss_ce": 0.00012723426334559917, "loss_iou": 0.1728515625, "loss_num": 0.00665283203125, "loss_xval": 0.37890625, "num_input_tokens_seen": 313422004, "step": 5594 }, { "epoch": 12.461024498886415, "grad_norm": 12.69820785522461, "learning_rate": 1e-06, "loss": 0.454, "num_input_tokens_seen": 313480284, "step": 5595 }, { "epoch": 12.461024498886415, "loss": 0.34528595209121704, "loss_ce": 0.0001321507734246552, "loss_iou": 0.150390625, "loss_num": 0.0087890625, "loss_xval": 0.345703125, "num_input_tokens_seen": 313480284, "step": 5595 }, { "epoch": 12.46325167037862, "grad_norm": 155.06365966796875, "learning_rate": 1e-06, "loss": 0.4323, "num_input_tokens_seen": 313537320, "step": 5596 }, { "epoch": 12.46325167037862, "loss": 0.5743743181228638, "loss_ce": 0.0001555891940370202, "loss_iou": 0.23828125, "loss_num": 0.019775390625, "loss_xval": 0.57421875, "num_input_tokens_seen": 313537320, "step": 5596 }, { "epoch": 12.465478841870825, "grad_norm": 14.968194007873535, "learning_rate": 1e-06, "loss": 0.566, "num_input_tokens_seen": 313592364, "step": 5597 }, { "epoch": 12.465478841870825, "loss": 0.4135543406009674, "loss_ce": 0.00010219329124083742, "loss_iou": 0.1748046875, "loss_num": 0.0128173828125, "loss_xval": 0.4140625, "num_input_tokens_seen": 313592364, "step": 5597 }, { "epoch": 12.46770601336303, "grad_norm": 20.62652587890625, "learning_rate": 1e-06, "loss": 0.5161, "num_input_tokens_seen": 313649912, "step": 5598 }, { "epoch": 12.46770601336303, "loss": 0.6557765007019043, "loss_ce": 0.00025890767574310303, "loss_iou": 0.28125, "loss_num": 0.0186767578125, "loss_xval": 0.65625, "num_input_tokens_seen": 313649912, "step": 5598 }, { "epoch": 12.469933184855234, "grad_norm": 30.149272918701172, "learning_rate": 1e-06, "loss": 0.3283, "num_input_tokens_seen": 313707300, "step": 5599 }, { "epoch": 12.469933184855234, "loss": 0.28626585006713867, "loss_ce": 0.00010252131323795766, "loss_iou": 0.1220703125, "loss_num": 0.0084228515625, "loss_xval": 0.287109375, "num_input_tokens_seen": 313707300, "step": 5599 }, { "epoch": 12.47216035634744, "grad_norm": 23.769775390625, "learning_rate": 1e-06, "loss": 0.4423, "num_input_tokens_seen": 313761436, "step": 5600 }, { "epoch": 12.47216035634744, "loss": 0.4040485620498657, "loss_ce": 0.00011789177369792014, "loss_iou": 0.1787109375, "loss_num": 0.00946044921875, "loss_xval": 0.404296875, "num_input_tokens_seen": 313761436, "step": 5600 }, { "epoch": 12.474387527839644, "grad_norm": 15.069268226623535, "learning_rate": 1e-06, "loss": 0.3506, "num_input_tokens_seen": 313818588, "step": 5601 }, { "epoch": 12.474387527839644, "loss": 0.38050034642219543, "loss_ce": 0.00012924219481647015, "loss_iou": 0.1708984375, "loss_num": 0.00762939453125, "loss_xval": 0.380859375, "num_input_tokens_seen": 313818588, "step": 5601 }, { "epoch": 12.476614699331849, "grad_norm": 23.391334533691406, "learning_rate": 1e-06, "loss": 0.6686, "num_input_tokens_seen": 313875568, "step": 5602 }, { "epoch": 12.476614699331849, "loss": 1.0041569471359253, "loss_ce": 0.002203819341957569, "loss_iou": 0.35546875, "loss_num": 0.05810546875, "loss_xval": 1.0, "num_input_tokens_seen": 313875568, "step": 5602 }, { "epoch": 12.478841870824054, "grad_norm": 27.693687438964844, "learning_rate": 1e-06, "loss": 0.4511, "num_input_tokens_seen": 313932528, "step": 5603 }, { "epoch": 12.478841870824054, "loss": 0.4338412880897522, "loss_ce": 0.00012547013466246426, "loss_iou": 0.1796875, "loss_num": 0.0150146484375, "loss_xval": 0.43359375, "num_input_tokens_seen": 313932528, "step": 5603 }, { "epoch": 12.481069042316259, "grad_norm": 14.572260856628418, "learning_rate": 1e-06, "loss": 0.5831, "num_input_tokens_seen": 313990412, "step": 5604 }, { "epoch": 12.481069042316259, "loss": 0.47828370332717896, "loss_ce": 0.00013426817895378917, "loss_iou": 0.212890625, "loss_num": 0.0103759765625, "loss_xval": 0.478515625, "num_input_tokens_seen": 313990412, "step": 5604 }, { "epoch": 12.483296213808464, "grad_norm": 41.894989013671875, "learning_rate": 1e-06, "loss": 0.3697, "num_input_tokens_seen": 314045856, "step": 5605 }, { "epoch": 12.483296213808464, "loss": 0.3678470551967621, "loss_ce": 0.00011023304250556976, "loss_iou": 0.16015625, "loss_num": 0.00970458984375, "loss_xval": 0.3671875, "num_input_tokens_seen": 314045856, "step": 5605 }, { "epoch": 12.485523385300668, "grad_norm": 23.39073371887207, "learning_rate": 1e-06, "loss": 0.377, "num_input_tokens_seen": 314100568, "step": 5606 }, { "epoch": 12.485523385300668, "loss": 0.3292108476161957, "loss_ce": 0.00010928411938948557, "loss_iou": 0.1474609375, "loss_num": 0.00689697265625, "loss_xval": 0.328125, "num_input_tokens_seen": 314100568, "step": 5606 }, { "epoch": 12.487750556792873, "grad_norm": 26.50432014465332, "learning_rate": 1e-06, "loss": 0.7268, "num_input_tokens_seen": 314154296, "step": 5607 }, { "epoch": 12.487750556792873, "loss": 0.7526974081993103, "loss_ce": 0.00013394109555520117, "loss_iou": 0.318359375, "loss_num": 0.0233154296875, "loss_xval": 0.75390625, "num_input_tokens_seen": 314154296, "step": 5607 }, { "epoch": 12.489977728285078, "grad_norm": 16.59043312072754, "learning_rate": 1e-06, "loss": 0.4595, "num_input_tokens_seen": 314211988, "step": 5608 }, { "epoch": 12.489977728285078, "loss": 0.5807017683982849, "loss_ce": 0.00013538115308620036, "loss_iou": 0.24609375, "loss_num": 0.017578125, "loss_xval": 0.58203125, "num_input_tokens_seen": 314211988, "step": 5608 }, { "epoch": 12.492204899777283, "grad_norm": 15.27440071105957, "learning_rate": 1e-06, "loss": 0.4032, "num_input_tokens_seen": 314270680, "step": 5609 }, { "epoch": 12.492204899777283, "loss": 0.349552184343338, "loss_ce": 0.00012591719860211015, "loss_iou": 0.1533203125, "loss_num": 0.008544921875, "loss_xval": 0.349609375, "num_input_tokens_seen": 314270680, "step": 5609 }, { "epoch": 12.494432071269488, "grad_norm": 27.822227478027344, "learning_rate": 1e-06, "loss": 0.3882, "num_input_tokens_seen": 314326436, "step": 5610 }, { "epoch": 12.494432071269488, "loss": 0.28033125400543213, "loss_ce": 0.00011883871047757566, "loss_iou": 0.12890625, "loss_num": 0.004730224609375, "loss_xval": 0.279296875, "num_input_tokens_seen": 314326436, "step": 5610 }, { "epoch": 12.496659242761693, "grad_norm": 18.214462280273438, "learning_rate": 1e-06, "loss": 0.4729, "num_input_tokens_seen": 314383216, "step": 5611 }, { "epoch": 12.496659242761693, "loss": 0.5203236937522888, "loss_ce": 0.00012107704242225736, "loss_iou": 0.2412109375, "loss_num": 0.00762939453125, "loss_xval": 0.51953125, "num_input_tokens_seen": 314383216, "step": 5611 }, { "epoch": 12.498886414253898, "grad_norm": 17.27739715576172, "learning_rate": 1e-06, "loss": 0.412, "num_input_tokens_seen": 314438016, "step": 5612 }, { "epoch": 12.498886414253898, "loss": 0.4331158995628357, "loss_ce": 0.0001325118209933862, "loss_iou": 0.1982421875, "loss_num": 0.007293701171875, "loss_xval": 0.43359375, "num_input_tokens_seen": 314438016, "step": 5612 }, { "epoch": 12.501113585746102, "grad_norm": 25.569000244140625, "learning_rate": 1e-06, "loss": 0.5059, "num_input_tokens_seen": 314494216, "step": 5613 }, { "epoch": 12.501113585746102, "loss": 0.4582577347755432, "loss_ce": 0.00012786393926944584, "loss_iou": 0.1943359375, "loss_num": 0.0137939453125, "loss_xval": 0.458984375, "num_input_tokens_seen": 314494216, "step": 5613 }, { "epoch": 12.503340757238307, "grad_norm": 16.633384704589844, "learning_rate": 1e-06, "loss": 0.529, "num_input_tokens_seen": 314550764, "step": 5614 }, { "epoch": 12.503340757238307, "loss": 0.5543428659439087, "loss_ce": 0.00014360195200424641, "loss_iou": 0.2158203125, "loss_num": 0.024658203125, "loss_xval": 0.5546875, "num_input_tokens_seen": 314550764, "step": 5614 }, { "epoch": 12.505567928730512, "grad_norm": 39.219783782958984, "learning_rate": 1e-06, "loss": 0.6081, "num_input_tokens_seen": 314608284, "step": 5615 }, { "epoch": 12.505567928730512, "loss": 0.4896491467952728, "loss_ce": 0.00014716846635565162, "loss_iou": 0.212890625, "loss_num": 0.0125732421875, "loss_xval": 0.490234375, "num_input_tokens_seen": 314608284, "step": 5615 }, { "epoch": 12.507795100222717, "grad_norm": 30.928625106811523, "learning_rate": 1e-06, "loss": 0.4487, "num_input_tokens_seen": 314660020, "step": 5616 }, { "epoch": 12.507795100222717, "loss": 0.4297997057437897, "loss_ce": 0.00011219394946238026, "loss_iou": 0.1943359375, "loss_num": 0.00836181640625, "loss_xval": 0.4296875, "num_input_tokens_seen": 314660020, "step": 5616 }, { "epoch": 12.510022271714922, "grad_norm": 20.47149085998535, "learning_rate": 1e-06, "loss": 0.4247, "num_input_tokens_seen": 314715408, "step": 5617 }, { "epoch": 12.510022271714922, "loss": 0.3184962868690491, "loss_ce": 0.0001369007513858378, "loss_iou": 0.130859375, "loss_num": 0.01116943359375, "loss_xval": 0.318359375, "num_input_tokens_seen": 314715408, "step": 5617 }, { "epoch": 12.512249443207127, "grad_norm": 23.522342681884766, "learning_rate": 1e-06, "loss": 0.4706, "num_input_tokens_seen": 314768072, "step": 5618 }, { "epoch": 12.512249443207127, "loss": 0.62452632188797, "loss_ce": 0.00013670045882463455, "loss_iou": 0.271484375, "loss_num": 0.01611328125, "loss_xval": 0.625, "num_input_tokens_seen": 314768072, "step": 5618 }, { "epoch": 12.514476614699332, "grad_norm": 24.376148223876953, "learning_rate": 1e-06, "loss": 0.3809, "num_input_tokens_seen": 314824920, "step": 5619 }, { "epoch": 12.514476614699332, "loss": 0.43566471338272095, "loss_ce": 0.00011783596710301936, "loss_iou": 0.17578125, "loss_num": 0.016845703125, "loss_xval": 0.435546875, "num_input_tokens_seen": 314824920, "step": 5619 }, { "epoch": 12.516703786191536, "grad_norm": 21.62662124633789, "learning_rate": 1e-06, "loss": 0.4331, "num_input_tokens_seen": 314881352, "step": 5620 }, { "epoch": 12.516703786191536, "loss": 0.42295846343040466, "loss_ce": 0.00013743109593633562, "loss_iou": 0.185546875, "loss_num": 0.01031494140625, "loss_xval": 0.421875, "num_input_tokens_seen": 314881352, "step": 5620 }, { "epoch": 12.518930957683741, "grad_norm": 17.14795684814453, "learning_rate": 1e-06, "loss": 0.4885, "num_input_tokens_seen": 314938836, "step": 5621 }, { "epoch": 12.518930957683741, "loss": 0.5315926671028137, "loss_ce": 0.00022057080059312284, "loss_iou": 0.2451171875, "loss_num": 0.00830078125, "loss_xval": 0.53125, "num_input_tokens_seen": 314938836, "step": 5621 }, { "epoch": 12.521158129175946, "grad_norm": 32.41496658325195, "learning_rate": 1e-06, "loss": 0.5334, "num_input_tokens_seen": 314997228, "step": 5622 }, { "epoch": 12.521158129175946, "loss": 0.5393984317779541, "loss_ce": 0.00015282572712749243, "loss_iou": 0.22265625, "loss_num": 0.018798828125, "loss_xval": 0.5390625, "num_input_tokens_seen": 314997228, "step": 5622 }, { "epoch": 12.523385300668151, "grad_norm": 21.443031311035156, "learning_rate": 1e-06, "loss": 0.5174, "num_input_tokens_seen": 315052164, "step": 5623 }, { "epoch": 12.523385300668151, "loss": 0.49842369556427, "loss_ce": 0.00013268653128761798, "loss_iou": 0.2138671875, "loss_num": 0.0140380859375, "loss_xval": 0.498046875, "num_input_tokens_seen": 315052164, "step": 5623 }, { "epoch": 12.525612472160356, "grad_norm": 21.690479278564453, "learning_rate": 1e-06, "loss": 0.5104, "num_input_tokens_seen": 315108840, "step": 5624 }, { "epoch": 12.525612472160356, "loss": 0.3883495032787323, "loss_ce": 0.00016590597806498408, "loss_iou": 0.1748046875, "loss_num": 0.0079345703125, "loss_xval": 0.388671875, "num_input_tokens_seen": 315108840, "step": 5624 }, { "epoch": 12.52783964365256, "grad_norm": 32.13372039794922, "learning_rate": 1e-06, "loss": 0.4522, "num_input_tokens_seen": 315165896, "step": 5625 }, { "epoch": 12.52783964365256, "loss": 0.5064772367477417, "loss_ce": 0.00012959350715391338, "loss_iou": 0.2333984375, "loss_num": 0.008056640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 315165896, "step": 5625 }, { "epoch": 12.530066815144766, "grad_norm": 80.0682144165039, "learning_rate": 1e-06, "loss": 0.5871, "num_input_tokens_seen": 315219956, "step": 5626 }, { "epoch": 12.530066815144766, "loss": 0.7462646961212158, "loss_ce": 0.0001709566276986152, "loss_iou": 0.32421875, "loss_num": 0.01953125, "loss_xval": 0.74609375, "num_input_tokens_seen": 315219956, "step": 5626 }, { "epoch": 12.53229398663697, "grad_norm": 20.835941314697266, "learning_rate": 1e-06, "loss": 0.4619, "num_input_tokens_seen": 315277464, "step": 5627 }, { "epoch": 12.53229398663697, "loss": 0.362972229719162, "loss_ce": 0.00017925890279002488, "loss_iou": 0.1494140625, "loss_num": 0.0128173828125, "loss_xval": 0.36328125, "num_input_tokens_seen": 315277464, "step": 5627 }, { "epoch": 12.534521158129175, "grad_norm": 15.360301971435547, "learning_rate": 1e-06, "loss": 0.3773, "num_input_tokens_seen": 315333928, "step": 5628 }, { "epoch": 12.534521158129175, "loss": 0.31863662600517273, "loss_ce": 0.00015518809959758073, "loss_iou": 0.1396484375, "loss_num": 0.007781982421875, "loss_xval": 0.318359375, "num_input_tokens_seen": 315333928, "step": 5628 }, { "epoch": 12.53674832962138, "grad_norm": 18.045223236083984, "learning_rate": 1e-06, "loss": 0.444, "num_input_tokens_seen": 315390648, "step": 5629 }, { "epoch": 12.53674832962138, "loss": 0.30956727266311646, "loss_ce": 0.00011902242840733379, "loss_iou": 0.1298828125, "loss_num": 0.0098876953125, "loss_xval": 0.30859375, "num_input_tokens_seen": 315390648, "step": 5629 }, { "epoch": 12.538975501113585, "grad_norm": 36.879791259765625, "learning_rate": 1e-06, "loss": 0.4498, "num_input_tokens_seen": 315447484, "step": 5630 }, { "epoch": 12.538975501113585, "loss": 0.33547037839889526, "loss_ce": 0.00011271548282820731, "loss_iou": 0.1337890625, "loss_num": 0.0135498046875, "loss_xval": 0.3359375, "num_input_tokens_seen": 315447484, "step": 5630 }, { "epoch": 12.54120267260579, "grad_norm": 18.903820037841797, "learning_rate": 1e-06, "loss": 0.4593, "num_input_tokens_seen": 315503840, "step": 5631 }, { "epoch": 12.54120267260579, "loss": 0.3675609529018402, "loss_ce": 0.0001293124514631927, "loss_iou": 0.1650390625, "loss_num": 0.007659912109375, "loss_xval": 0.3671875, "num_input_tokens_seen": 315503840, "step": 5631 }, { "epoch": 12.543429844097995, "grad_norm": 15.022705078125, "learning_rate": 1e-06, "loss": 0.5607, "num_input_tokens_seen": 315560960, "step": 5632 }, { "epoch": 12.543429844097995, "loss": 0.5480488538742065, "loss_ce": 0.00013630017929244787, "loss_iou": 0.2353515625, "loss_num": 0.015380859375, "loss_xval": 0.546875, "num_input_tokens_seen": 315560960, "step": 5632 }, { "epoch": 12.5456570155902, "grad_norm": 18.663776397705078, "learning_rate": 1e-06, "loss": 0.5616, "num_input_tokens_seen": 315616752, "step": 5633 }, { "epoch": 12.5456570155902, "loss": 0.5263513922691345, "loss_ce": 0.00010624493006616831, "loss_iou": 0.23046875, "loss_num": 0.01324462890625, "loss_xval": 0.52734375, "num_input_tokens_seen": 315616752, "step": 5633 }, { "epoch": 12.547884187082406, "grad_norm": 19.984249114990234, "learning_rate": 1e-06, "loss": 0.5842, "num_input_tokens_seen": 315670220, "step": 5634 }, { "epoch": 12.547884187082406, "loss": 0.47094982862472534, "loss_ce": 0.0001246376777999103, "loss_iou": 0.2177734375, "loss_num": 0.007110595703125, "loss_xval": 0.470703125, "num_input_tokens_seen": 315670220, "step": 5634 }, { "epoch": 12.550111358574611, "grad_norm": 22.835325241088867, "learning_rate": 1e-06, "loss": 0.5918, "num_input_tokens_seen": 315723144, "step": 5635 }, { "epoch": 12.550111358574611, "loss": 0.5138300657272339, "loss_ce": 0.00015820455155335367, "loss_iou": 0.2021484375, "loss_num": 0.02197265625, "loss_xval": 0.515625, "num_input_tokens_seen": 315723144, "step": 5635 }, { "epoch": 12.552338530066816, "grad_norm": 13.647897720336914, "learning_rate": 1e-06, "loss": 0.5345, "num_input_tokens_seen": 315778516, "step": 5636 }, { "epoch": 12.552338530066816, "loss": 0.5501181483268738, "loss_ce": 0.00013035524170845747, "loss_iou": 0.2373046875, "loss_num": 0.01507568359375, "loss_xval": 0.55078125, "num_input_tokens_seen": 315778516, "step": 5636 }, { "epoch": 12.55456570155902, "grad_norm": 17.706628799438477, "learning_rate": 1e-06, "loss": 0.8976, "num_input_tokens_seen": 315835116, "step": 5637 }, { "epoch": 12.55456570155902, "loss": 0.998259425163269, "loss_ce": 0.00015154268476180732, "loss_iou": 0.419921875, "loss_num": 0.031494140625, "loss_xval": 1.0, "num_input_tokens_seen": 315835116, "step": 5637 }, { "epoch": 12.556792873051226, "grad_norm": 16.907272338867188, "learning_rate": 1e-06, "loss": 0.5527, "num_input_tokens_seen": 315890436, "step": 5638 }, { "epoch": 12.556792873051226, "loss": 0.652888298034668, "loss_ce": 0.00011732908751582727, "loss_iou": 0.28515625, "loss_num": 0.0159912109375, "loss_xval": 0.65234375, "num_input_tokens_seen": 315890436, "step": 5638 }, { "epoch": 12.55902004454343, "grad_norm": 14.611957550048828, "learning_rate": 1e-06, "loss": 0.7071, "num_input_tokens_seen": 315944500, "step": 5639 }, { "epoch": 12.55902004454343, "loss": 0.6298529505729675, "loss_ce": 0.0015570501564070582, "loss_iou": 0.236328125, "loss_num": 0.03125, "loss_xval": 0.62890625, "num_input_tokens_seen": 315944500, "step": 5639 }, { "epoch": 12.561247216035635, "grad_norm": 39.95273208618164, "learning_rate": 1e-06, "loss": 0.7408, "num_input_tokens_seen": 315998188, "step": 5640 }, { "epoch": 12.561247216035635, "loss": 0.7573456168174744, "loss_ce": 0.0001434848236385733, "loss_iou": 0.298828125, "loss_num": 0.03173828125, "loss_xval": 0.7578125, "num_input_tokens_seen": 315998188, "step": 5640 }, { "epoch": 12.56347438752784, "grad_norm": 17.88589859008789, "learning_rate": 1e-06, "loss": 0.3115, "num_input_tokens_seen": 316054048, "step": 5641 }, { "epoch": 12.56347438752784, "loss": 0.3503269553184509, "loss_ce": 0.00010725160245783627, "loss_iou": 0.1611328125, "loss_num": 0.005523681640625, "loss_xval": 0.349609375, "num_input_tokens_seen": 316054048, "step": 5641 }, { "epoch": 12.565701559020045, "grad_norm": 76.38765716552734, "learning_rate": 1e-06, "loss": 0.7108, "num_input_tokens_seen": 316110144, "step": 5642 }, { "epoch": 12.565701559020045, "loss": 0.7477288842201233, "loss_ce": 0.00017026919522322714, "loss_iou": 0.33203125, "loss_num": 0.0169677734375, "loss_xval": 0.74609375, "num_input_tokens_seen": 316110144, "step": 5642 }, { "epoch": 12.56792873051225, "grad_norm": 13.18075180053711, "learning_rate": 1e-06, "loss": 0.6063, "num_input_tokens_seen": 316167200, "step": 5643 }, { "epoch": 12.56792873051225, "loss": 0.6305142641067505, "loss_ce": 0.00014317099703475833, "loss_iou": 0.2431640625, "loss_num": 0.0289306640625, "loss_xval": 0.62890625, "num_input_tokens_seen": 316167200, "step": 5643 }, { "epoch": 12.570155902004455, "grad_norm": 14.634928703308105, "learning_rate": 1e-06, "loss": 0.4603, "num_input_tokens_seen": 316224676, "step": 5644 }, { "epoch": 12.570155902004455, "loss": 0.34102046489715576, "loss_ce": 0.00020015044719912112, "loss_iou": 0.1552734375, "loss_num": 0.006256103515625, "loss_xval": 0.33984375, "num_input_tokens_seen": 316224676, "step": 5644 }, { "epoch": 12.57238307349666, "grad_norm": 21.283550262451172, "learning_rate": 1e-06, "loss": 0.5681, "num_input_tokens_seen": 316282056, "step": 5645 }, { "epoch": 12.57238307349666, "loss": 0.6777347326278687, "loss_ce": 0.0001223925792146474, "loss_iou": 0.296875, "loss_num": 0.0169677734375, "loss_xval": 0.67578125, "num_input_tokens_seen": 316282056, "step": 5645 }, { "epoch": 12.574610244988865, "grad_norm": 21.21981430053711, "learning_rate": 1e-06, "loss": 0.604, "num_input_tokens_seen": 316336632, "step": 5646 }, { "epoch": 12.574610244988865, "loss": 0.4394035339355469, "loss_ce": 0.00028606440173462033, "loss_iou": 0.1875, "loss_num": 0.0128173828125, "loss_xval": 0.439453125, "num_input_tokens_seen": 316336632, "step": 5646 }, { "epoch": 12.57683741648107, "grad_norm": 16.894546508789062, "learning_rate": 1e-06, "loss": 0.3813, "num_input_tokens_seen": 316392980, "step": 5647 }, { "epoch": 12.57683741648107, "loss": 0.40146303176879883, "loss_ce": 9.585732186678797e-05, "loss_iou": 0.1826171875, "loss_num": 0.00738525390625, "loss_xval": 0.40234375, "num_input_tokens_seen": 316392980, "step": 5647 }, { "epoch": 12.579064587973274, "grad_norm": 14.076600074768066, "learning_rate": 1e-06, "loss": 0.4109, "num_input_tokens_seen": 316450712, "step": 5648 }, { "epoch": 12.579064587973274, "loss": 0.4312838315963745, "loss_ce": 0.00013148068683221936, "loss_iou": 0.1875, "loss_num": 0.0111083984375, "loss_xval": 0.431640625, "num_input_tokens_seen": 316450712, "step": 5648 }, { "epoch": 12.58129175946548, "grad_norm": 18.681835174560547, "learning_rate": 1e-06, "loss": 0.5381, "num_input_tokens_seen": 316506272, "step": 5649 }, { "epoch": 12.58129175946548, "loss": 0.5017099380493164, "loss_ce": 0.00012300520029384643, "loss_iou": 0.212890625, "loss_num": 0.01519775390625, "loss_xval": 0.5, "num_input_tokens_seen": 316506272, "step": 5649 }, { "epoch": 12.583518930957684, "grad_norm": 19.394519805908203, "learning_rate": 1e-06, "loss": 0.5845, "num_input_tokens_seen": 316560448, "step": 5650 }, { "epoch": 12.583518930957684, "loss": 0.49112582206726074, "loss_ce": 0.00015903441817499697, "loss_iou": 0.197265625, "loss_num": 0.0194091796875, "loss_xval": 0.490234375, "num_input_tokens_seen": 316560448, "step": 5650 }, { "epoch": 12.585746102449889, "grad_norm": 19.638614654541016, "learning_rate": 1e-06, "loss": 0.3961, "num_input_tokens_seen": 316616300, "step": 5651 }, { "epoch": 12.585746102449889, "loss": 0.3637651801109314, "loss_ce": 0.00011775536404456943, "loss_iou": 0.1650390625, "loss_num": 0.00689697265625, "loss_xval": 0.36328125, "num_input_tokens_seen": 316616300, "step": 5651 }, { "epoch": 12.587973273942094, "grad_norm": 20.799516677856445, "learning_rate": 1e-06, "loss": 0.7011, "num_input_tokens_seen": 316672848, "step": 5652 }, { "epoch": 12.587973273942094, "loss": 0.555069088935852, "loss_ce": 0.00013745573232881725, "loss_iou": 0.255859375, "loss_num": 0.0084228515625, "loss_xval": 0.5546875, "num_input_tokens_seen": 316672848, "step": 5652 }, { "epoch": 12.590200445434299, "grad_norm": 16.250247955322266, "learning_rate": 1e-06, "loss": 0.5712, "num_input_tokens_seen": 316726660, "step": 5653 }, { "epoch": 12.590200445434299, "loss": 0.7183680534362793, "loss_ce": 0.00018261410878039896, "loss_iou": 0.271484375, "loss_num": 0.035400390625, "loss_xval": 0.71875, "num_input_tokens_seen": 316726660, "step": 5653 }, { "epoch": 12.592427616926503, "grad_norm": 16.84650993347168, "learning_rate": 1e-06, "loss": 0.4514, "num_input_tokens_seen": 316785652, "step": 5654 }, { "epoch": 12.592427616926503, "loss": 0.4916442036628723, "loss_ce": 0.00012808601604774594, "loss_iou": 0.220703125, "loss_num": 0.01025390625, "loss_xval": 0.4921875, "num_input_tokens_seen": 316785652, "step": 5654 }, { "epoch": 12.594654788418708, "grad_norm": 19.085908889770508, "learning_rate": 1e-06, "loss": 0.549, "num_input_tokens_seen": 316838728, "step": 5655 }, { "epoch": 12.594654788418708, "loss": 0.45017945766448975, "loss_ce": 0.0002282563509652391, "loss_iou": 0.201171875, "loss_num": 0.00970458984375, "loss_xval": 0.44921875, "num_input_tokens_seen": 316838728, "step": 5655 }, { "epoch": 12.596881959910913, "grad_norm": 18.604005813598633, "learning_rate": 1e-06, "loss": 0.4854, "num_input_tokens_seen": 316895116, "step": 5656 }, { "epoch": 12.596881959910913, "loss": 0.4594961106777191, "loss_ce": 0.00014551397180184722, "loss_iou": 0.201171875, "loss_num": 0.01116943359375, "loss_xval": 0.458984375, "num_input_tokens_seen": 316895116, "step": 5656 }, { "epoch": 12.599109131403118, "grad_norm": 19.779462814331055, "learning_rate": 1e-06, "loss": 0.442, "num_input_tokens_seen": 316951228, "step": 5657 }, { "epoch": 12.599109131403118, "loss": 0.43638354539871216, "loss_ce": 0.00010424081847304478, "loss_iou": 0.1884765625, "loss_num": 0.01190185546875, "loss_xval": 0.435546875, "num_input_tokens_seen": 316951228, "step": 5657 }, { "epoch": 12.601336302895323, "grad_norm": 17.714229583740234, "learning_rate": 1e-06, "loss": 0.6931, "num_input_tokens_seen": 317007992, "step": 5658 }, { "epoch": 12.601336302895323, "loss": 0.7979649305343628, "loss_ce": 0.00011335601448081434, "loss_iou": 0.337890625, "loss_num": 0.0242919921875, "loss_xval": 0.796875, "num_input_tokens_seen": 317007992, "step": 5658 }, { "epoch": 12.603563474387528, "grad_norm": 18.232986450195312, "learning_rate": 1e-06, "loss": 0.3883, "num_input_tokens_seen": 317062396, "step": 5659 }, { "epoch": 12.603563474387528, "loss": 0.3348948359489441, "loss_ce": 0.00011698143498506397, "loss_iou": 0.140625, "loss_num": 0.01055908203125, "loss_xval": 0.333984375, "num_input_tokens_seen": 317062396, "step": 5659 }, { "epoch": 12.605790645879733, "grad_norm": 22.718849182128906, "learning_rate": 1e-06, "loss": 0.519, "num_input_tokens_seen": 317118224, "step": 5660 }, { "epoch": 12.605790645879733, "loss": 0.58899986743927, "loss_ce": 0.00013271119678393006, "loss_iou": 0.263671875, "loss_num": 0.01239013671875, "loss_xval": 0.58984375, "num_input_tokens_seen": 317118224, "step": 5660 }, { "epoch": 12.608017817371937, "grad_norm": 29.809764862060547, "learning_rate": 1e-06, "loss": 0.5945, "num_input_tokens_seen": 317172604, "step": 5661 }, { "epoch": 12.608017817371937, "loss": 0.6281754374504089, "loss_ce": 0.0001236619718838483, "loss_iou": 0.271484375, "loss_num": 0.0169677734375, "loss_xval": 0.62890625, "num_input_tokens_seen": 317172604, "step": 5661 }, { "epoch": 12.610244988864142, "grad_norm": 19.71194076538086, "learning_rate": 1e-06, "loss": 0.4504, "num_input_tokens_seen": 317229076, "step": 5662 }, { "epoch": 12.610244988864142, "loss": 0.3518773019313812, "loss_ce": 0.00013169541489332914, "loss_iou": 0.134765625, "loss_num": 0.0164794921875, "loss_xval": 0.3515625, "num_input_tokens_seen": 317229076, "step": 5662 }, { "epoch": 12.612472160356347, "grad_norm": 24.877761840820312, "learning_rate": 1e-06, "loss": 0.5304, "num_input_tokens_seen": 317284080, "step": 5663 }, { "epoch": 12.612472160356347, "loss": 0.5929080247879028, "loss_ce": 0.0001345914788544178, "loss_iou": 0.25390625, "loss_num": 0.0172119140625, "loss_xval": 0.59375, "num_input_tokens_seen": 317284080, "step": 5663 }, { "epoch": 12.614699331848552, "grad_norm": 37.88312530517578, "learning_rate": 1e-06, "loss": 0.7225, "num_input_tokens_seen": 317338048, "step": 5664 }, { "epoch": 12.614699331848552, "loss": 0.859398365020752, "loss_ce": 0.0002674940915312618, "loss_iou": 0.359375, "loss_num": 0.028564453125, "loss_xval": 0.859375, "num_input_tokens_seen": 317338048, "step": 5664 }, { "epoch": 12.616926503340757, "grad_norm": 25.082626342773438, "learning_rate": 1e-06, "loss": 0.6775, "num_input_tokens_seen": 317396176, "step": 5665 }, { "epoch": 12.616926503340757, "loss": 0.6872255802154541, "loss_ce": 0.00033595875720493495, "loss_iou": 0.283203125, "loss_num": 0.024169921875, "loss_xval": 0.6875, "num_input_tokens_seen": 317396176, "step": 5665 }, { "epoch": 12.619153674832962, "grad_norm": 16.618837356567383, "learning_rate": 1e-06, "loss": 0.6738, "num_input_tokens_seen": 317453000, "step": 5666 }, { "epoch": 12.619153674832962, "loss": 0.7559080123901367, "loss_ce": 0.0001707018236629665, "loss_iou": 0.326171875, "loss_num": 0.020751953125, "loss_xval": 0.75390625, "num_input_tokens_seen": 317453000, "step": 5666 }, { "epoch": 12.621380846325167, "grad_norm": 13.969486236572266, "learning_rate": 1e-06, "loss": 0.3752, "num_input_tokens_seen": 317510472, "step": 5667 }, { "epoch": 12.621380846325167, "loss": 0.42377543449401855, "loss_ce": 0.0001303813187405467, "loss_iou": 0.193359375, "loss_num": 0.00732421875, "loss_xval": 0.423828125, "num_input_tokens_seen": 317510472, "step": 5667 }, { "epoch": 12.623608017817372, "grad_norm": 13.138300895690918, "learning_rate": 1e-06, "loss": 0.4098, "num_input_tokens_seen": 317567232, "step": 5668 }, { "epoch": 12.623608017817372, "loss": 0.3994289040565491, "loss_ce": 0.00013694007066078484, "loss_iou": 0.1904296875, "loss_num": 0.0035552978515625, "loss_xval": 0.3984375, "num_input_tokens_seen": 317567232, "step": 5668 }, { "epoch": 12.625835189309576, "grad_norm": 19.55143165588379, "learning_rate": 1e-06, "loss": 0.378, "num_input_tokens_seen": 317624192, "step": 5669 }, { "epoch": 12.625835189309576, "loss": 0.4045639634132385, "loss_ce": 0.00014502542035188526, "loss_iou": 0.1796875, "loss_num": 0.0089111328125, "loss_xval": 0.404296875, "num_input_tokens_seen": 317624192, "step": 5669 }, { "epoch": 12.628062360801781, "grad_norm": 33.738197326660156, "learning_rate": 1e-06, "loss": 0.4112, "num_input_tokens_seen": 317676972, "step": 5670 }, { "epoch": 12.628062360801781, "loss": 0.4077775180339813, "loss_ce": 0.00012370766489766538, "loss_iou": 0.1796875, "loss_num": 0.009765625, "loss_xval": 0.408203125, "num_input_tokens_seen": 317676972, "step": 5670 }, { "epoch": 12.630289532293986, "grad_norm": 26.519407272338867, "learning_rate": 1e-06, "loss": 0.4019, "num_input_tokens_seen": 317734016, "step": 5671 }, { "epoch": 12.630289532293986, "loss": 0.40515443682670593, "loss_ce": 0.00012512919784057885, "loss_iou": 0.1650390625, "loss_num": 0.0147705078125, "loss_xval": 0.404296875, "num_input_tokens_seen": 317734016, "step": 5671 }, { "epoch": 12.632516703786191, "grad_norm": 22.868947982788086, "learning_rate": 1e-06, "loss": 0.439, "num_input_tokens_seen": 317789104, "step": 5672 }, { "epoch": 12.632516703786191, "loss": 0.403316855430603, "loss_ce": 0.00011859952792292461, "loss_iou": 0.1884765625, "loss_num": 0.00518798828125, "loss_xval": 0.40234375, "num_input_tokens_seen": 317789104, "step": 5672 }, { "epoch": 12.634743875278396, "grad_norm": 23.70015525817871, "learning_rate": 1e-06, "loss": 0.4627, "num_input_tokens_seen": 317843592, "step": 5673 }, { "epoch": 12.634743875278396, "loss": 0.5965588092803955, "loss_ce": 0.00012326473370194435, "loss_iou": 0.25390625, "loss_num": 0.0179443359375, "loss_xval": 0.59765625, "num_input_tokens_seen": 317843592, "step": 5673 }, { "epoch": 12.6369710467706, "grad_norm": 19.277725219726562, "learning_rate": 1e-06, "loss": 0.4047, "num_input_tokens_seen": 317900916, "step": 5674 }, { "epoch": 12.6369710467706, "loss": 0.3441046476364136, "loss_ce": 0.00011049180466216058, "loss_iou": 0.1484375, "loss_num": 0.00927734375, "loss_xval": 0.34375, "num_input_tokens_seen": 317900916, "step": 5674 }, { "epoch": 12.639198218262806, "grad_norm": 28.94078826904297, "learning_rate": 1e-06, "loss": 0.4402, "num_input_tokens_seen": 317957468, "step": 5675 }, { "epoch": 12.639198218262806, "loss": 0.3978157639503479, "loss_ce": 0.00011067395826103166, "loss_iou": 0.1591796875, "loss_num": 0.015869140625, "loss_xval": 0.3984375, "num_input_tokens_seen": 317957468, "step": 5675 }, { "epoch": 12.64142538975501, "grad_norm": 19.98812484741211, "learning_rate": 1e-06, "loss": 0.6088, "num_input_tokens_seen": 318014916, "step": 5676 }, { "epoch": 12.64142538975501, "loss": 0.5992527008056641, "loss_ce": 0.0001316650304943323, "loss_iou": 0.263671875, "loss_num": 0.0140380859375, "loss_xval": 0.59765625, "num_input_tokens_seen": 318014916, "step": 5676 }, { "epoch": 12.643652561247215, "grad_norm": 29.456689834594727, "learning_rate": 1e-06, "loss": 0.4071, "num_input_tokens_seen": 318069856, "step": 5677 }, { "epoch": 12.643652561247215, "loss": 0.29436251521110535, "loss_ce": 0.00017306354129686952, "loss_iou": 0.1376953125, "loss_num": 0.003936767578125, "loss_xval": 0.294921875, "num_input_tokens_seen": 318069856, "step": 5677 }, { "epoch": 12.64587973273942, "grad_norm": 29.443206787109375, "learning_rate": 1e-06, "loss": 0.5605, "num_input_tokens_seen": 318127208, "step": 5678 }, { "epoch": 12.64587973273942, "loss": 0.6685495972633362, "loss_ce": 0.0003367103636264801, "loss_iou": 0.28125, "loss_num": 0.021240234375, "loss_xval": 0.66796875, "num_input_tokens_seen": 318127208, "step": 5678 }, { "epoch": 12.648106904231625, "grad_norm": 16.127708435058594, "learning_rate": 1e-06, "loss": 0.4754, "num_input_tokens_seen": 318183468, "step": 5679 }, { "epoch": 12.648106904231625, "loss": 0.43421459197998047, "loss_ce": 0.00013255488011054695, "loss_iou": 0.1884765625, "loss_num": 0.01153564453125, "loss_xval": 0.43359375, "num_input_tokens_seen": 318183468, "step": 5679 }, { "epoch": 12.65033407572383, "grad_norm": 36.4363899230957, "learning_rate": 1e-06, "loss": 0.5273, "num_input_tokens_seen": 318237944, "step": 5680 }, { "epoch": 12.65033407572383, "loss": 0.3793076276779175, "loss_ce": 0.00015721011732239276, "loss_iou": 0.17578125, "loss_num": 0.00555419921875, "loss_xval": 0.37890625, "num_input_tokens_seen": 318237944, "step": 5680 }, { "epoch": 12.652561247216035, "grad_norm": 15.695813179016113, "learning_rate": 1e-06, "loss": 0.5399, "num_input_tokens_seen": 318297084, "step": 5681 }, { "epoch": 12.652561247216035, "loss": 0.46922624111175537, "loss_ce": 0.00011001349048456177, "loss_iou": 0.208984375, "loss_num": 0.0101318359375, "loss_xval": 0.46875, "num_input_tokens_seen": 318297084, "step": 5681 }, { "epoch": 12.654788418708241, "grad_norm": 24.64415168762207, "learning_rate": 1e-06, "loss": 0.4352, "num_input_tokens_seen": 318350612, "step": 5682 }, { "epoch": 12.654788418708241, "loss": 0.5813356041908264, "loss_ce": 0.00015886628534644842, "loss_iou": 0.26171875, "loss_num": 0.01190185546875, "loss_xval": 0.58203125, "num_input_tokens_seen": 318350612, "step": 5682 }, { "epoch": 12.657015590200446, "grad_norm": 14.695323944091797, "learning_rate": 1e-06, "loss": 0.5865, "num_input_tokens_seen": 318406072, "step": 5683 }, { "epoch": 12.657015590200446, "loss": 0.4219498932361603, "loss_ce": 0.000120684111607261, "loss_iou": 0.19140625, "loss_num": 0.00762939453125, "loss_xval": 0.421875, "num_input_tokens_seen": 318406072, "step": 5683 }, { "epoch": 12.659242761692651, "grad_norm": 18.124658584594727, "learning_rate": 1e-06, "loss": 0.5347, "num_input_tokens_seen": 318463084, "step": 5684 }, { "epoch": 12.659242761692651, "loss": 0.6086758971214294, "loss_ce": 0.00015539961168542504, "loss_iou": 0.23046875, "loss_num": 0.0296630859375, "loss_xval": 0.609375, "num_input_tokens_seen": 318463084, "step": 5684 }, { "epoch": 12.661469933184856, "grad_norm": 14.113274574279785, "learning_rate": 1e-06, "loss": 0.466, "num_input_tokens_seen": 318520428, "step": 5685 }, { "epoch": 12.661469933184856, "loss": 0.5447894930839539, "loss_ce": 0.00011178314161952585, "loss_iou": 0.2265625, "loss_num": 0.018310546875, "loss_xval": 0.54296875, "num_input_tokens_seen": 318520428, "step": 5685 }, { "epoch": 12.66369710467706, "grad_norm": 12.743885040283203, "learning_rate": 1e-06, "loss": 0.7553, "num_input_tokens_seen": 318576972, "step": 5686 }, { "epoch": 12.66369710467706, "loss": 0.9210690259933472, "loss_ce": 0.0003536554577294737, "loss_iou": 0.37109375, "loss_num": 0.035888671875, "loss_xval": 0.921875, "num_input_tokens_seen": 318576972, "step": 5686 }, { "epoch": 12.665924276169266, "grad_norm": 14.964234352111816, "learning_rate": 1e-06, "loss": 0.4453, "num_input_tokens_seen": 318630160, "step": 5687 }, { "epoch": 12.665924276169266, "loss": 0.42542383074760437, "loss_ce": 0.00013086556282360107, "loss_iou": 0.1826171875, "loss_num": 0.01202392578125, "loss_xval": 0.42578125, "num_input_tokens_seen": 318630160, "step": 5687 }, { "epoch": 12.66815144766147, "grad_norm": 22.878787994384766, "learning_rate": 1e-06, "loss": 0.5036, "num_input_tokens_seen": 318683880, "step": 5688 }, { "epoch": 12.66815144766147, "loss": 0.47283750772476196, "loss_ce": 0.00012018828419968486, "loss_iou": 0.2119140625, "loss_num": 0.00982666015625, "loss_xval": 0.47265625, "num_input_tokens_seen": 318683880, "step": 5688 }, { "epoch": 12.670378619153675, "grad_norm": 16.93242073059082, "learning_rate": 1e-06, "loss": 0.4694, "num_input_tokens_seen": 318737980, "step": 5689 }, { "epoch": 12.670378619153675, "loss": 0.4250328838825226, "loss_ce": 0.00010613650374580175, "loss_iou": 0.1875, "loss_num": 0.00982666015625, "loss_xval": 0.42578125, "num_input_tokens_seen": 318737980, "step": 5689 }, { "epoch": 12.67260579064588, "grad_norm": 18.397140502929688, "learning_rate": 1e-06, "loss": 0.4985, "num_input_tokens_seen": 318793520, "step": 5690 }, { "epoch": 12.67260579064588, "loss": 0.5429917573928833, "loss_ce": 0.0001451352145522833, "loss_iou": 0.2314453125, "loss_num": 0.015869140625, "loss_xval": 0.54296875, "num_input_tokens_seen": 318793520, "step": 5690 }, { "epoch": 12.674832962138085, "grad_norm": 23.1961669921875, "learning_rate": 1e-06, "loss": 0.4391, "num_input_tokens_seen": 318848756, "step": 5691 }, { "epoch": 12.674832962138085, "loss": 0.45615941286087036, "loss_ce": 0.00010473289148649201, "loss_iou": 0.2001953125, "loss_num": 0.01141357421875, "loss_xval": 0.45703125, "num_input_tokens_seen": 318848756, "step": 5691 }, { "epoch": 12.67706013363029, "grad_norm": 14.611727714538574, "learning_rate": 1e-06, "loss": 0.4351, "num_input_tokens_seen": 318907276, "step": 5692 }, { "epoch": 12.67706013363029, "loss": 0.4373611807823181, "loss_ce": 0.0001053235464496538, "loss_iou": 0.189453125, "loss_num": 0.011474609375, "loss_xval": 0.4375, "num_input_tokens_seen": 318907276, "step": 5692 }, { "epoch": 12.679287305122495, "grad_norm": 16.515594482421875, "learning_rate": 1e-06, "loss": 0.4227, "num_input_tokens_seen": 318962132, "step": 5693 }, { "epoch": 12.679287305122495, "loss": 0.4306679964065552, "loss_ce": 0.00012597074965015054, "loss_iou": 0.1982421875, "loss_num": 0.0067138671875, "loss_xval": 0.4296875, "num_input_tokens_seen": 318962132, "step": 5693 }, { "epoch": 12.6815144766147, "grad_norm": 20.435245513916016, "learning_rate": 1e-06, "loss": 0.6693, "num_input_tokens_seen": 319018288, "step": 5694 }, { "epoch": 12.6815144766147, "loss": 0.9051839709281921, "loss_ce": 0.00015468656783923507, "loss_iou": 0.380859375, "loss_num": 0.029052734375, "loss_xval": 0.90625, "num_input_tokens_seen": 319018288, "step": 5694 }, { "epoch": 12.683741648106905, "grad_norm": 17.04014778137207, "learning_rate": 1e-06, "loss": 0.3592, "num_input_tokens_seen": 319073932, "step": 5695 }, { "epoch": 12.683741648106905, "loss": 0.45563840866088867, "loss_ce": 0.00019404885824769735, "loss_iou": 0.2060546875, "loss_num": 0.0084228515625, "loss_xval": 0.455078125, "num_input_tokens_seen": 319073932, "step": 5695 }, { "epoch": 12.68596881959911, "grad_norm": 18.900136947631836, "learning_rate": 1e-06, "loss": 0.4399, "num_input_tokens_seen": 319131752, "step": 5696 }, { "epoch": 12.68596881959911, "loss": 0.4523688554763794, "loss_ce": 9.835186938289553e-05, "loss_iou": 0.1884765625, "loss_num": 0.01495361328125, "loss_xval": 0.453125, "num_input_tokens_seen": 319131752, "step": 5696 }, { "epoch": 12.688195991091314, "grad_norm": 25.752071380615234, "learning_rate": 1e-06, "loss": 0.4534, "num_input_tokens_seen": 319187908, "step": 5697 }, { "epoch": 12.688195991091314, "loss": 0.42492321133613586, "loss_ce": 0.00011853590694954619, "loss_iou": 0.193359375, "loss_num": 0.007568359375, "loss_xval": 0.42578125, "num_input_tokens_seen": 319187908, "step": 5697 }, { "epoch": 12.690423162583519, "grad_norm": 19.532682418823242, "learning_rate": 1e-06, "loss": 0.538, "num_input_tokens_seen": 319244400, "step": 5698 }, { "epoch": 12.690423162583519, "loss": 0.5101226568222046, "loss_ce": 0.00011285034997854382, "loss_iou": 0.228515625, "loss_num": 0.01068115234375, "loss_xval": 0.51171875, "num_input_tokens_seen": 319244400, "step": 5698 }, { "epoch": 12.692650334075724, "grad_norm": 26.41732406616211, "learning_rate": 1e-06, "loss": 0.4874, "num_input_tokens_seen": 319297184, "step": 5699 }, { "epoch": 12.692650334075724, "loss": 0.57191401720047, "loss_ce": 0.00013665735605172813, "loss_iou": 0.251953125, "loss_num": 0.01385498046875, "loss_xval": 0.5703125, "num_input_tokens_seen": 319297184, "step": 5699 }, { "epoch": 12.694877505567929, "grad_norm": 18.265283584594727, "learning_rate": 1e-06, "loss": 0.4927, "num_input_tokens_seen": 319355800, "step": 5700 }, { "epoch": 12.694877505567929, "loss": 0.5474244356155396, "loss_ce": 0.00012223176599945873, "loss_iou": 0.2314453125, "loss_num": 0.0169677734375, "loss_xval": 0.546875, "num_input_tokens_seen": 319355800, "step": 5700 }, { "epoch": 12.697104677060134, "grad_norm": 26.21422576904297, "learning_rate": 1e-06, "loss": 0.7005, "num_input_tokens_seen": 319413272, "step": 5701 }, { "epoch": 12.697104677060134, "loss": 0.9590729475021362, "loss_ce": 0.00021064060274511576, "loss_iou": 0.359375, "loss_num": 0.04833984375, "loss_xval": 0.95703125, "num_input_tokens_seen": 319413272, "step": 5701 }, { "epoch": 12.699331848552339, "grad_norm": 24.68498992919922, "learning_rate": 1e-06, "loss": 0.4993, "num_input_tokens_seen": 319467372, "step": 5702 }, { "epoch": 12.699331848552339, "loss": 0.5787380933761597, "loss_ce": 0.0001248328626388684, "loss_iou": 0.224609375, "loss_num": 0.02587890625, "loss_xval": 0.578125, "num_input_tokens_seen": 319467372, "step": 5702 }, { "epoch": 12.701559020044543, "grad_norm": 20.236909866333008, "learning_rate": 1e-06, "loss": 0.5838, "num_input_tokens_seen": 319524172, "step": 5703 }, { "epoch": 12.701559020044543, "loss": 0.6807386875152588, "loss_ce": 0.0005628582439385355, "loss_iou": 0.2890625, "loss_num": 0.0203857421875, "loss_xval": 0.6796875, "num_input_tokens_seen": 319524172, "step": 5703 }, { "epoch": 12.703786191536748, "grad_norm": 27.707962036132812, "learning_rate": 1e-06, "loss": 0.6471, "num_input_tokens_seen": 319578916, "step": 5704 }, { "epoch": 12.703786191536748, "loss": 0.8126009702682495, "loss_ce": 0.00010092551383422688, "loss_iou": 0.357421875, "loss_num": 0.019775390625, "loss_xval": 0.8125, "num_input_tokens_seen": 319578916, "step": 5704 }, { "epoch": 12.706013363028953, "grad_norm": 21.496240615844727, "learning_rate": 1e-06, "loss": 0.449, "num_input_tokens_seen": 319634576, "step": 5705 }, { "epoch": 12.706013363028953, "loss": 0.4259480834007263, "loss_ce": 0.00010578571527730674, "loss_iou": 0.181640625, "loss_num": 0.01263427734375, "loss_xval": 0.42578125, "num_input_tokens_seen": 319634576, "step": 5705 }, { "epoch": 12.708240534521158, "grad_norm": 17.127723693847656, "learning_rate": 1e-06, "loss": 0.3865, "num_input_tokens_seen": 319689076, "step": 5706 }, { "epoch": 12.708240534521158, "loss": 0.3831561207771301, "loss_ce": 9.949406376108527e-05, "loss_iou": 0.15234375, "loss_num": 0.0157470703125, "loss_xval": 0.3828125, "num_input_tokens_seen": 319689076, "step": 5706 }, { "epoch": 12.710467706013363, "grad_norm": 19.543319702148438, "learning_rate": 1e-06, "loss": 0.7191, "num_input_tokens_seen": 319745764, "step": 5707 }, { "epoch": 12.710467706013363, "loss": 0.780562162399292, "loss_ce": 0.00028868275694549084, "loss_iou": 0.337890625, "loss_num": 0.02099609375, "loss_xval": 0.78125, "num_input_tokens_seen": 319745764, "step": 5707 }, { "epoch": 12.712694877505568, "grad_norm": 20.706933975219727, "learning_rate": 1e-06, "loss": 0.3687, "num_input_tokens_seen": 319803272, "step": 5708 }, { "epoch": 12.712694877505568, "loss": 0.45703423023223877, "loss_ce": 0.00018610812549013644, "loss_iou": 0.2119140625, "loss_num": 0.006805419921875, "loss_xval": 0.45703125, "num_input_tokens_seen": 319803272, "step": 5708 }, { "epoch": 12.714922048997773, "grad_norm": 13.944723129272461, "learning_rate": 1e-06, "loss": 0.3891, "num_input_tokens_seen": 319860864, "step": 5709 }, { "epoch": 12.714922048997773, "loss": 0.3766050636768341, "loss_ce": 0.00014021531387697905, "loss_iou": 0.16796875, "loss_num": 0.00830078125, "loss_xval": 0.376953125, "num_input_tokens_seen": 319860864, "step": 5709 }, { "epoch": 12.717149220489977, "grad_norm": 20.481975555419922, "learning_rate": 1e-06, "loss": 0.639, "num_input_tokens_seen": 319916192, "step": 5710 }, { "epoch": 12.717149220489977, "loss": 0.6568468809127808, "loss_ce": 0.0001086572592612356, "loss_iou": 0.265625, "loss_num": 0.024658203125, "loss_xval": 0.65625, "num_input_tokens_seen": 319916192, "step": 5710 }, { "epoch": 12.719376391982182, "grad_norm": 14.734237670898438, "learning_rate": 1e-06, "loss": 0.6377, "num_input_tokens_seen": 319974044, "step": 5711 }, { "epoch": 12.719376391982182, "loss": 0.8217337131500244, "loss_ce": 0.00020050689636263996, "loss_iou": 0.3515625, "loss_num": 0.0238037109375, "loss_xval": 0.8203125, "num_input_tokens_seen": 319974044, "step": 5711 }, { "epoch": 12.721603563474387, "grad_norm": 16.80459213256836, "learning_rate": 1e-06, "loss": 0.4424, "num_input_tokens_seen": 320027692, "step": 5712 }, { "epoch": 12.721603563474387, "loss": 0.46265923976898193, "loss_ce": 0.00013479188783094287, "loss_iou": 0.2109375, "loss_num": 0.0081787109375, "loss_xval": 0.462890625, "num_input_tokens_seen": 320027692, "step": 5712 }, { "epoch": 12.723830734966592, "grad_norm": 13.117379188537598, "learning_rate": 1e-06, "loss": 0.5347, "num_input_tokens_seen": 320084636, "step": 5713 }, { "epoch": 12.723830734966592, "loss": 0.4970431923866272, "loss_ce": 9.497034625383094e-05, "loss_iou": 0.2021484375, "loss_num": 0.0186767578125, "loss_xval": 0.49609375, "num_input_tokens_seen": 320084636, "step": 5713 }, { "epoch": 12.726057906458797, "grad_norm": 15.85624885559082, "learning_rate": 1e-06, "loss": 0.5115, "num_input_tokens_seen": 320141228, "step": 5714 }, { "epoch": 12.726057906458797, "loss": 0.5403798222541809, "loss_ce": 0.0002187040081480518, "loss_iou": 0.2294921875, "loss_num": 0.016357421875, "loss_xval": 0.5390625, "num_input_tokens_seen": 320141228, "step": 5714 }, { "epoch": 12.728285077951002, "grad_norm": 15.332324981689453, "learning_rate": 1e-06, "loss": 0.3932, "num_input_tokens_seen": 320196208, "step": 5715 }, { "epoch": 12.728285077951002, "loss": 0.4100413918495178, "loss_ce": 0.00012929215154144913, "loss_iou": 0.1708984375, "loss_num": 0.0137939453125, "loss_xval": 0.41015625, "num_input_tokens_seen": 320196208, "step": 5715 }, { "epoch": 12.730512249443207, "grad_norm": 21.221384048461914, "learning_rate": 1e-06, "loss": 0.3587, "num_input_tokens_seen": 320252508, "step": 5716 }, { "epoch": 12.730512249443207, "loss": 0.27532318234443665, "loss_ce": 0.00011566374450922012, "loss_iou": 0.12109375, "loss_num": 0.00653076171875, "loss_xval": 0.275390625, "num_input_tokens_seen": 320252508, "step": 5716 }, { "epoch": 12.732739420935411, "grad_norm": 19.24123191833496, "learning_rate": 1e-06, "loss": 0.4894, "num_input_tokens_seen": 320308308, "step": 5717 }, { "epoch": 12.732739420935411, "loss": 0.44945940375328064, "loss_ce": 0.00011856977653224021, "loss_iou": 0.1884765625, "loss_num": 0.0142822265625, "loss_xval": 0.44921875, "num_input_tokens_seen": 320308308, "step": 5717 }, { "epoch": 12.734966592427616, "grad_norm": 18.568979263305664, "learning_rate": 1e-06, "loss": 0.5369, "num_input_tokens_seen": 320363968, "step": 5718 }, { "epoch": 12.734966592427616, "loss": 0.5608214139938354, "loss_ce": 0.00015247752889990807, "loss_iou": 0.25390625, "loss_num": 0.01025390625, "loss_xval": 0.5625, "num_input_tokens_seen": 320363968, "step": 5718 }, { "epoch": 12.737193763919821, "grad_norm": 27.49472427368164, "learning_rate": 1e-06, "loss": 0.4842, "num_input_tokens_seen": 320420160, "step": 5719 }, { "epoch": 12.737193763919821, "loss": 0.44296640157699585, "loss_ce": 9.531481191515923e-05, "loss_iou": 0.173828125, "loss_num": 0.0189208984375, "loss_xval": 0.443359375, "num_input_tokens_seen": 320420160, "step": 5719 }, { "epoch": 12.739420935412026, "grad_norm": 35.03486633300781, "learning_rate": 1e-06, "loss": 0.6408, "num_input_tokens_seen": 320476520, "step": 5720 }, { "epoch": 12.739420935412026, "loss": 0.6617658734321594, "loss_ce": 0.000144752484629862, "loss_iou": 0.283203125, "loss_num": 0.019287109375, "loss_xval": 0.66015625, "num_input_tokens_seen": 320476520, "step": 5720 }, { "epoch": 12.74164810690423, "grad_norm": 19.232091903686523, "learning_rate": 1e-06, "loss": 0.3847, "num_input_tokens_seen": 320531336, "step": 5721 }, { "epoch": 12.74164810690423, "loss": 0.41272228956222534, "loss_ce": 0.00012462519225664437, "loss_iou": 0.185546875, "loss_num": 0.00811767578125, "loss_xval": 0.412109375, "num_input_tokens_seen": 320531336, "step": 5721 }, { "epoch": 12.743875278396436, "grad_norm": 19.980670928955078, "learning_rate": 1e-06, "loss": 0.4017, "num_input_tokens_seen": 320590096, "step": 5722 }, { "epoch": 12.743875278396436, "loss": 0.4484846591949463, "loss_ce": 0.00012038621207466349, "loss_iou": 0.1884765625, "loss_num": 0.01422119140625, "loss_xval": 0.44921875, "num_input_tokens_seen": 320590096, "step": 5722 }, { "epoch": 12.74610244988864, "grad_norm": 23.660293579101562, "learning_rate": 1e-06, "loss": 0.4983, "num_input_tokens_seen": 320644036, "step": 5723 }, { "epoch": 12.74610244988864, "loss": 0.5904831290245056, "loss_ce": 0.0001510925212642178, "loss_iou": 0.24609375, "loss_num": 0.019775390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 320644036, "step": 5723 }, { "epoch": 12.748329621380847, "grad_norm": 16.64579963684082, "learning_rate": 1e-06, "loss": 0.4319, "num_input_tokens_seen": 320699152, "step": 5724 }, { "epoch": 12.748329621380847, "loss": 0.4705604612827301, "loss_ce": 0.00010146087151952088, "loss_iou": 0.21484375, "loss_num": 0.00830078125, "loss_xval": 0.470703125, "num_input_tokens_seen": 320699152, "step": 5724 }, { "epoch": 12.750556792873052, "grad_norm": 24.360746383666992, "learning_rate": 1e-06, "loss": 0.6527, "num_input_tokens_seen": 320751832, "step": 5725 }, { "epoch": 12.750556792873052, "loss": 0.7235379219055176, "loss_ce": 0.00014920823741704226, "loss_iou": 0.287109375, "loss_num": 0.0296630859375, "loss_xval": 0.72265625, "num_input_tokens_seen": 320751832, "step": 5725 }, { "epoch": 12.752783964365257, "grad_norm": 18.12118148803711, "learning_rate": 1e-06, "loss": 0.5865, "num_input_tokens_seen": 320807464, "step": 5726 }, { "epoch": 12.752783964365257, "loss": 0.45097601413726807, "loss_ce": 0.00010932252916973084, "loss_iou": 0.2041015625, "loss_num": 0.00872802734375, "loss_xval": 0.451171875, "num_input_tokens_seen": 320807464, "step": 5726 }, { "epoch": 12.755011135857462, "grad_norm": 104.34639739990234, "learning_rate": 1e-06, "loss": 0.6179, "num_input_tokens_seen": 320861640, "step": 5727 }, { "epoch": 12.755011135857462, "loss": 0.5127081871032715, "loss_ce": 0.000134985864860937, "loss_iou": 0.2119140625, "loss_num": 0.017578125, "loss_xval": 0.51171875, "num_input_tokens_seen": 320861640, "step": 5727 }, { "epoch": 12.757238307349667, "grad_norm": 71.3646011352539, "learning_rate": 1e-06, "loss": 0.6442, "num_input_tokens_seen": 320915204, "step": 5728 }, { "epoch": 12.757238307349667, "loss": 0.6957196593284607, "loss_ce": 0.00016302938456647098, "loss_iou": 0.306640625, "loss_num": 0.016357421875, "loss_xval": 0.6953125, "num_input_tokens_seen": 320915204, "step": 5728 }, { "epoch": 12.759465478841872, "grad_norm": 26.151466369628906, "learning_rate": 1e-06, "loss": 0.3624, "num_input_tokens_seen": 320971060, "step": 5729 }, { "epoch": 12.759465478841872, "loss": 0.34516897797584534, "loss_ce": 0.00010673723591025919, "loss_iou": 0.1416015625, "loss_num": 0.012451171875, "loss_xval": 0.345703125, "num_input_tokens_seen": 320971060, "step": 5729 }, { "epoch": 12.761692650334076, "grad_norm": 17.449047088623047, "learning_rate": 1e-06, "loss": 0.4778, "num_input_tokens_seen": 321024964, "step": 5730 }, { "epoch": 12.761692650334076, "loss": 0.5893593430519104, "loss_ce": 0.00012595132284332067, "loss_iou": 0.27734375, "loss_num": 0.00726318359375, "loss_xval": 0.58984375, "num_input_tokens_seen": 321024964, "step": 5730 }, { "epoch": 12.763919821826281, "grad_norm": 18.459321975708008, "learning_rate": 1e-06, "loss": 0.511, "num_input_tokens_seen": 321081864, "step": 5731 }, { "epoch": 12.763919821826281, "loss": 0.5054360628128052, "loss_ce": 0.0005532268551178277, "loss_iou": 0.2236328125, "loss_num": 0.011474609375, "loss_xval": 0.50390625, "num_input_tokens_seen": 321081864, "step": 5731 }, { "epoch": 12.766146993318486, "grad_norm": 14.343801498413086, "learning_rate": 1e-06, "loss": 0.4483, "num_input_tokens_seen": 321134864, "step": 5732 }, { "epoch": 12.766146993318486, "loss": 0.4396534860134125, "loss_ce": 0.00020037099602632225, "loss_iou": 0.1875, "loss_num": 0.0126953125, "loss_xval": 0.439453125, "num_input_tokens_seen": 321134864, "step": 5732 }, { "epoch": 12.768374164810691, "grad_norm": 25.014375686645508, "learning_rate": 1e-06, "loss": 0.4792, "num_input_tokens_seen": 321189340, "step": 5733 }, { "epoch": 12.768374164810691, "loss": 0.48544344305992126, "loss_ce": 9.186341048916802e-05, "loss_iou": 0.21484375, "loss_num": 0.0113525390625, "loss_xval": 0.484375, "num_input_tokens_seen": 321189340, "step": 5733 }, { "epoch": 12.770601336302896, "grad_norm": 21.6157283782959, "learning_rate": 1e-06, "loss": 0.6859, "num_input_tokens_seen": 321243252, "step": 5734 }, { "epoch": 12.770601336302896, "loss": 0.938653290271759, "loss_ce": 0.0001766829373082146, "loss_iou": 0.396484375, "loss_num": 0.02880859375, "loss_xval": 0.9375, "num_input_tokens_seen": 321243252, "step": 5734 }, { "epoch": 12.7728285077951, "grad_norm": 27.44350814819336, "learning_rate": 1e-06, "loss": 0.3906, "num_input_tokens_seen": 321297028, "step": 5735 }, { "epoch": 12.7728285077951, "loss": 0.39947906136512756, "loss_ce": 0.00012604435323737562, "loss_iou": 0.1708984375, "loss_num": 0.01153564453125, "loss_xval": 0.3984375, "num_input_tokens_seen": 321297028, "step": 5735 }, { "epoch": 12.775055679287306, "grad_norm": 18.349063873291016, "learning_rate": 1e-06, "loss": 0.4358, "num_input_tokens_seen": 321354732, "step": 5736 }, { "epoch": 12.775055679287306, "loss": 0.3706546723842621, "loss_ce": 0.00011024883133359253, "loss_iou": 0.1689453125, "loss_num": 0.006561279296875, "loss_xval": 0.37109375, "num_input_tokens_seen": 321354732, "step": 5736 }, { "epoch": 12.77728285077951, "grad_norm": 34.92224884033203, "learning_rate": 1e-06, "loss": 0.6155, "num_input_tokens_seen": 321408740, "step": 5737 }, { "epoch": 12.77728285077951, "loss": 0.7788517475128174, "loss_ce": 0.00016523349040653557, "loss_iou": 0.361328125, "loss_num": 0.0111083984375, "loss_xval": 0.77734375, "num_input_tokens_seen": 321408740, "step": 5737 }, { "epoch": 12.779510022271715, "grad_norm": 19.885868072509766, "learning_rate": 1e-06, "loss": 0.4942, "num_input_tokens_seen": 321465824, "step": 5738 }, { "epoch": 12.779510022271715, "loss": 0.49599504470825195, "loss_ce": 0.0001454516313970089, "loss_iou": 0.2021484375, "loss_num": 0.0184326171875, "loss_xval": 0.49609375, "num_input_tokens_seen": 321465824, "step": 5738 }, { "epoch": 12.78173719376392, "grad_norm": 24.478364944458008, "learning_rate": 1e-06, "loss": 0.4904, "num_input_tokens_seen": 321521952, "step": 5739 }, { "epoch": 12.78173719376392, "loss": 0.34351837635040283, "loss_ce": 0.00013458389730658382, "loss_iou": 0.134765625, "loss_num": 0.01483154296875, "loss_xval": 0.34375, "num_input_tokens_seen": 321521952, "step": 5739 }, { "epoch": 12.783964365256125, "grad_norm": 34.84294128417969, "learning_rate": 1e-06, "loss": 0.4395, "num_input_tokens_seen": 321577264, "step": 5740 }, { "epoch": 12.783964365256125, "loss": 0.396610826253891, "loss_ce": 0.00012645949027501047, "loss_iou": 0.181640625, "loss_num": 0.006591796875, "loss_xval": 0.396484375, "num_input_tokens_seen": 321577264, "step": 5740 }, { "epoch": 12.78619153674833, "grad_norm": 18.09517478942871, "learning_rate": 1e-06, "loss": 0.5365, "num_input_tokens_seen": 321634008, "step": 5741 }, { "epoch": 12.78619153674833, "loss": 0.7020202875137329, "loss_ce": 0.00011601038568187505, "loss_iou": 0.31640625, "loss_num": 0.01385498046875, "loss_xval": 0.703125, "num_input_tokens_seen": 321634008, "step": 5741 }, { "epoch": 12.788418708240535, "grad_norm": 20.708227157592773, "learning_rate": 1e-06, "loss": 0.4856, "num_input_tokens_seen": 321690796, "step": 5742 }, { "epoch": 12.788418708240535, "loss": 0.5002317428588867, "loss_ce": 0.00010964082321152091, "loss_iou": 0.20703125, "loss_num": 0.01708984375, "loss_xval": 0.5, "num_input_tokens_seen": 321690796, "step": 5742 }, { "epoch": 12.79064587973274, "grad_norm": 22.730310440063477, "learning_rate": 1e-06, "loss": 0.676, "num_input_tokens_seen": 321744736, "step": 5743 }, { "epoch": 12.79064587973274, "loss": 0.7551612854003906, "loss_ce": 0.00015634976443834603, "loss_iou": 0.3203125, "loss_num": 0.022705078125, "loss_xval": 0.75390625, "num_input_tokens_seen": 321744736, "step": 5743 }, { "epoch": 12.792873051224944, "grad_norm": 21.29202651977539, "learning_rate": 1e-06, "loss": 0.512, "num_input_tokens_seen": 321801344, "step": 5744 }, { "epoch": 12.792873051224944, "loss": 0.39646512269973755, "loss_ce": 0.00010285238386131823, "loss_iou": 0.171875, "loss_num": 0.01055908203125, "loss_xval": 0.396484375, "num_input_tokens_seen": 321801344, "step": 5744 }, { "epoch": 12.79510022271715, "grad_norm": 29.273391723632812, "learning_rate": 1e-06, "loss": 0.4766, "num_input_tokens_seen": 321857176, "step": 5745 }, { "epoch": 12.79510022271715, "loss": 0.6940093040466309, "loss_ce": 0.0001615973305888474, "loss_iou": 0.30078125, "loss_num": 0.0184326171875, "loss_xval": 0.6953125, "num_input_tokens_seen": 321857176, "step": 5745 }, { "epoch": 12.797327394209354, "grad_norm": 15.866458892822266, "learning_rate": 1e-06, "loss": 0.4327, "num_input_tokens_seen": 321913700, "step": 5746 }, { "epoch": 12.797327394209354, "loss": 0.3660312294960022, "loss_ce": 0.00012545731442514807, "loss_iou": 0.162109375, "loss_num": 0.00836181640625, "loss_xval": 0.365234375, "num_input_tokens_seen": 321913700, "step": 5746 }, { "epoch": 12.799554565701559, "grad_norm": 22.117265701293945, "learning_rate": 1e-06, "loss": 0.4373, "num_input_tokens_seen": 321968160, "step": 5747 }, { "epoch": 12.799554565701559, "loss": 0.40611228346824646, "loss_ce": 0.00010643507994245738, "loss_iou": 0.177734375, "loss_num": 0.0101318359375, "loss_xval": 0.40625, "num_input_tokens_seen": 321968160, "step": 5747 }, { "epoch": 12.801781737193764, "grad_norm": 17.11176300048828, "learning_rate": 1e-06, "loss": 0.6027, "num_input_tokens_seen": 322023584, "step": 5748 }, { "epoch": 12.801781737193764, "loss": 0.6447978615760803, "loss_ce": 0.00014456806820817292, "loss_iou": 0.2578125, "loss_num": 0.0252685546875, "loss_xval": 0.64453125, "num_input_tokens_seen": 322023584, "step": 5748 }, { "epoch": 12.804008908685969, "grad_norm": 31.402372360229492, "learning_rate": 1e-06, "loss": 0.4811, "num_input_tokens_seen": 322082440, "step": 5749 }, { "epoch": 12.804008908685969, "loss": 0.39622175693511963, "loss_ce": 0.00010357976134400815, "loss_iou": 0.1708984375, "loss_num": 0.010986328125, "loss_xval": 0.396484375, "num_input_tokens_seen": 322082440, "step": 5749 }, { "epoch": 12.806236080178174, "grad_norm": 15.063833236694336, "learning_rate": 1e-06, "loss": 0.4572, "num_input_tokens_seen": 322138808, "step": 5750 }, { "epoch": 12.806236080178174, "eval_seeclick_web_CIoU": 0.5856587886810303, "eval_seeclick_web_GIoU": 0.583136647939682, "eval_seeclick_web_IoU": 0.6040867269039154, "eval_seeclick_web_MAE_all": 0.0155873978510499, "eval_seeclick_web_MAE_h": 0.007719088811427355, "eval_seeclick_web_MAE_w": 0.015769829973578453, "eval_seeclick_web_MAE_x_boxes": 0.009643178898841143, "eval_seeclick_web_MAE_y_boxes": 0.021339073311537504, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9100306630134583, "eval_seeclick_web_loss_ce": 0.00018124158668797463, "eval_seeclick_web_loss_iou": 0.417724609375, "eval_seeclick_web_loss_num": 0.012420654296875, "eval_seeclick_web_loss_xval": 0.8966064453125, "eval_seeclick_web_runtime": 22.4296, "eval_seeclick_web_samples_per_second": 2.229, "eval_seeclick_web_steps_per_second": 0.089, "num_input_tokens_seen": 322138808, "step": 5750 }, { "epoch": 12.806236080178174, "eval_icons_CIoU": 0.29134996235370636, "eval_icons_GIoU": 0.31351229548454285, "eval_icons_IoU": 0.3615979105234146, "eval_icons_MAE_all": 0.05436762422323227, "eval_icons_MAE_h": 0.036054016556590796, "eval_icons_MAE_w": 0.045310807414352894, "eval_icons_MAE_x_boxes": 0.05722554586827755, "eval_icons_MAE_y_boxes": 0.03668802231550217, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.6602177619934082, "eval_icons_loss_ce": 0.00021313664910849184, "eval_icons_loss_iou": 0.668701171875, "eval_icons_loss_num": 0.053501129150390625, "eval_icons_loss_xval": 1.60546875, "eval_icons_runtime": 18.5773, "eval_icons_samples_per_second": 2.691, "eval_icons_steps_per_second": 0.108, "num_input_tokens_seen": 322138808, "step": 5750 }, { "epoch": 12.806236080178174, "eval_screenspot_CIoU": 0.35812679926554364, "eval_screenspot_GIoU": 0.37103012204170227, "eval_screenspot_IoU": 0.43642645080884296, "eval_screenspot_MAE_all": 0.05802058925231298, "eval_screenspot_MAE_h": 0.03933714081843694, "eval_screenspot_MAE_w": 0.06783675154050191, "eval_screenspot_MAE_x_boxes": 0.07193685260911782, "eval_screenspot_MAE_y_boxes": 0.03876749984920025, "eval_screenspot_inside_bbox": 0.6966666579246521, "eval_screenspot_loss": 1.6010042428970337, "eval_screenspot_loss_ce": 0.00024160779624556503, "eval_screenspot_loss_iou": 0.664306640625, "eval_screenspot_loss_num": 0.06610488891601562, "eval_screenspot_loss_xval": 1.6591796875, "eval_screenspot_runtime": 32.6846, "eval_screenspot_samples_per_second": 2.723, "eval_screenspot_steps_per_second": 0.092, "num_input_tokens_seen": 322138808, "step": 5750 }, { "epoch": 12.806236080178174, "eval_compot_CIoU": 0.3403613865375519, "eval_compot_GIoU": 0.3508555740118027, "eval_compot_IoU": 0.3995762914419174, "eval_compot_MAE_all": 0.018100187182426453, "eval_compot_MAE_h": 0.009705259930342436, "eval_compot_MAE_w": 0.02139892429113388, "eval_compot_MAE_x_boxes": 0.0300004780292511, "eval_compot_MAE_y_boxes": 0.007140443893149495, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.413923740386963, "eval_compot_loss_ce": 0.00017305201618000865, "eval_compot_loss_iou": 0.6578369140625, "eval_compot_loss_num": 0.016866683959960938, "eval_compot_loss_xval": 1.399169921875, "eval_compot_runtime": 19.9376, "eval_compot_samples_per_second": 2.508, "eval_compot_steps_per_second": 0.1, "num_input_tokens_seen": 322138808, "step": 5750 }, { "epoch": 12.806236080178174, "eval_custom_ui_val_CIoU": 0.47460226383474136, "eval_custom_ui_val_GIoU": 0.4856240898370743, "eval_custom_ui_val_IoU": 0.5360172192255656, "eval_custom_ui_val_MAE_all": 0.028572862967848778, "eval_custom_ui_val_MAE_h": 0.016446984217812616, "eval_custom_ui_val_MAE_w": 0.03571122243172593, "eval_custom_ui_val_MAE_x_boxes": 0.03412022265709109, "eval_custom_ui_val_MAE_y_boxes": 0.014356489810678694, "eval_custom_ui_val_inside_bbox": 0.7685185207260979, "eval_custom_ui_val_loss": 1.1905332803726196, "eval_custom_ui_val_loss_ce": 0.00019961439506409483, "eval_custom_ui_val_loss_iou": 0.5112169053819444, "eval_custom_ui_val_loss_num": 0.026023017035590276, "eval_custom_ui_val_loss_xval": 1.1525065104166667, "eval_custom_ui_val_runtime": 60.1958, "eval_custom_ui_val_samples_per_second": 4.402, "eval_custom_ui_val_steps_per_second": 0.15, "num_input_tokens_seen": 322138808, "step": 5750 }, { "epoch": 12.806236080178174, "loss": 0.9020213484764099, "loss_ce": 0.00016588816652074456, "loss_iou": 0.400390625, "loss_num": 0.0206298828125, "loss_xval": 0.90234375, "num_input_tokens_seen": 322138808, "step": 5750 }, { "epoch": 12.808463251670378, "grad_norm": 38.99467086791992, "learning_rate": 1e-06, "loss": 0.7047, "num_input_tokens_seen": 322196804, "step": 5751 }, { "epoch": 12.808463251670378, "loss": 0.594732940196991, "loss_ce": 0.0001284430327359587, "loss_iou": 0.26171875, "loss_num": 0.013671875, "loss_xval": 0.59375, "num_input_tokens_seen": 322196804, "step": 5751 }, { "epoch": 12.810690423162583, "grad_norm": 18.36638832092285, "learning_rate": 1e-06, "loss": 0.3216, "num_input_tokens_seen": 322252400, "step": 5752 }, { "epoch": 12.810690423162583, "loss": 0.3923182189464569, "loss_ce": 0.00010630811448208988, "loss_iou": 0.1708984375, "loss_num": 0.010009765625, "loss_xval": 0.392578125, "num_input_tokens_seen": 322252400, "step": 5752 }, { "epoch": 12.812917594654788, "grad_norm": 29.22452163696289, "learning_rate": 1e-06, "loss": 0.6287, "num_input_tokens_seen": 322308460, "step": 5753 }, { "epoch": 12.812917594654788, "loss": 0.6383031606674194, "loss_ce": 0.00011960881238337606, "loss_iou": 0.2578125, "loss_num": 0.0244140625, "loss_xval": 0.63671875, "num_input_tokens_seen": 322308460, "step": 5753 }, { "epoch": 12.815144766146993, "grad_norm": 21.79752540588379, "learning_rate": 1e-06, "loss": 0.4675, "num_input_tokens_seen": 322362036, "step": 5754 }, { "epoch": 12.815144766146993, "loss": 0.5063232183456421, "loss_ce": 9.764191054273397e-05, "loss_iou": 0.1904296875, "loss_num": 0.025146484375, "loss_xval": 0.5078125, "num_input_tokens_seen": 322362036, "step": 5754 }, { "epoch": 12.817371937639198, "grad_norm": 16.744384765625, "learning_rate": 1e-06, "loss": 0.6904, "num_input_tokens_seen": 322418496, "step": 5755 }, { "epoch": 12.817371937639198, "loss": 0.6048610806465149, "loss_ce": 0.000124765865621157, "loss_iou": 0.251953125, "loss_num": 0.0206298828125, "loss_xval": 0.60546875, "num_input_tokens_seen": 322418496, "step": 5755 }, { "epoch": 12.819599109131403, "grad_norm": 17.073532104492188, "learning_rate": 1e-06, "loss": 0.5932, "num_input_tokens_seen": 322473040, "step": 5756 }, { "epoch": 12.819599109131403, "loss": 0.733528733253479, "loss_ce": 0.00013029485126025975, "loss_iou": 0.3125, "loss_num": 0.0211181640625, "loss_xval": 0.734375, "num_input_tokens_seen": 322473040, "step": 5756 }, { "epoch": 12.821826280623608, "grad_norm": 20.584901809692383, "learning_rate": 1e-06, "loss": 0.7059, "num_input_tokens_seen": 322528604, "step": 5757 }, { "epoch": 12.821826280623608, "loss": 0.8982992768287659, "loss_ce": 0.00010590371675789356, "loss_iou": 0.38671875, "loss_num": 0.025146484375, "loss_xval": 0.8984375, "num_input_tokens_seen": 322528604, "step": 5757 }, { "epoch": 12.824053452115812, "grad_norm": 20.415708541870117, "learning_rate": 1e-06, "loss": 0.5264, "num_input_tokens_seen": 322583604, "step": 5758 }, { "epoch": 12.824053452115812, "loss": 0.4630866050720215, "loss_ce": 0.00013495869643520564, "loss_iou": 0.1962890625, "loss_num": 0.01409912109375, "loss_xval": 0.462890625, "num_input_tokens_seen": 322583604, "step": 5758 }, { "epoch": 12.826280623608017, "grad_norm": 15.553330421447754, "learning_rate": 1e-06, "loss": 0.3762, "num_input_tokens_seen": 322642080, "step": 5759 }, { "epoch": 12.826280623608017, "loss": 0.4416653513908386, "loss_ce": 0.0002591132652014494, "loss_iou": 0.2041015625, "loss_num": 0.006622314453125, "loss_xval": 0.44140625, "num_input_tokens_seen": 322642080, "step": 5759 }, { "epoch": 12.828507795100222, "grad_norm": 14.952034950256348, "learning_rate": 1e-06, "loss": 0.5567, "num_input_tokens_seen": 322697788, "step": 5760 }, { "epoch": 12.828507795100222, "loss": 0.548295259475708, "loss_ce": 0.00019955809693783522, "loss_iou": 0.2373046875, "loss_num": 0.0146484375, "loss_xval": 0.546875, "num_input_tokens_seen": 322697788, "step": 5760 }, { "epoch": 12.830734966592427, "grad_norm": 25.951000213623047, "learning_rate": 1e-06, "loss": 0.4949, "num_input_tokens_seen": 322752844, "step": 5761 }, { "epoch": 12.830734966592427, "loss": 0.4571736454963684, "loss_ce": 0.00014240591553971171, "loss_iou": 0.20703125, "loss_num": 0.0087890625, "loss_xval": 0.45703125, "num_input_tokens_seen": 322752844, "step": 5761 }, { "epoch": 12.832962138084632, "grad_norm": 43.291751861572266, "learning_rate": 1e-06, "loss": 0.5937, "num_input_tokens_seen": 322808240, "step": 5762 }, { "epoch": 12.832962138084632, "loss": 0.598831832408905, "loss_ce": 0.0001380076282657683, "loss_iou": 0.248046875, "loss_num": 0.0206298828125, "loss_xval": 0.59765625, "num_input_tokens_seen": 322808240, "step": 5762 }, { "epoch": 12.835189309576837, "grad_norm": 21.269405364990234, "learning_rate": 1e-06, "loss": 0.5252, "num_input_tokens_seen": 322863788, "step": 5763 }, { "epoch": 12.835189309576837, "loss": 0.5071128010749817, "loss_ce": 0.00015480615547858179, "loss_iou": 0.2373046875, "loss_num": 0.006744384765625, "loss_xval": 0.5078125, "num_input_tokens_seen": 322863788, "step": 5763 }, { "epoch": 12.837416481069042, "grad_norm": 25.716829299926758, "learning_rate": 1e-06, "loss": 0.5679, "num_input_tokens_seen": 322917120, "step": 5764 }, { "epoch": 12.837416481069042, "loss": 0.6708798408508301, "loss_ce": 0.0002255703293485567, "loss_iou": 0.306640625, "loss_num": 0.01141357421875, "loss_xval": 0.671875, "num_input_tokens_seen": 322917120, "step": 5764 }, { "epoch": 12.839643652561247, "grad_norm": 26.497522354125977, "learning_rate": 1e-06, "loss": 0.7658, "num_input_tokens_seen": 322972616, "step": 5765 }, { "epoch": 12.839643652561247, "loss": 0.7098448872566223, "loss_ce": 0.0001281315489904955, "loss_iou": 0.283203125, "loss_num": 0.0284423828125, "loss_xval": 0.7109375, "num_input_tokens_seen": 322972616, "step": 5765 }, { "epoch": 12.841870824053451, "grad_norm": 76.6500244140625, "learning_rate": 1e-06, "loss": 0.6196, "num_input_tokens_seen": 323026852, "step": 5766 }, { "epoch": 12.841870824053451, "loss": 0.8302084803581238, "loss_ce": 0.00013035557640250772, "loss_iou": 0.337890625, "loss_num": 0.03125, "loss_xval": 0.828125, "num_input_tokens_seen": 323026852, "step": 5766 }, { "epoch": 12.844097995545656, "grad_norm": 62.04082107543945, "learning_rate": 1e-06, "loss": 0.6541, "num_input_tokens_seen": 323081780, "step": 5767 }, { "epoch": 12.844097995545656, "loss": 0.5926365256309509, "loss_ce": 0.00010719904821598902, "loss_iou": 0.2734375, "loss_num": 0.00933837890625, "loss_xval": 0.59375, "num_input_tokens_seen": 323081780, "step": 5767 }, { "epoch": 12.846325167037861, "grad_norm": 15.04085636138916, "learning_rate": 1e-06, "loss": 0.3717, "num_input_tokens_seen": 323138412, "step": 5768 }, { "epoch": 12.846325167037861, "loss": 0.3711279630661011, "loss_ce": 0.00015629868721589446, "loss_iou": 0.162109375, "loss_num": 0.00921630859375, "loss_xval": 0.37109375, "num_input_tokens_seen": 323138412, "step": 5768 }, { "epoch": 12.848552338530066, "grad_norm": 16.58966064453125, "learning_rate": 1e-06, "loss": 0.468, "num_input_tokens_seen": 323194440, "step": 5769 }, { "epoch": 12.848552338530066, "loss": 0.47240063548088074, "loss_ce": 0.00011059406824642792, "loss_iou": 0.201171875, "loss_num": 0.013916015625, "loss_xval": 0.47265625, "num_input_tokens_seen": 323194440, "step": 5769 }, { "epoch": 12.85077951002227, "grad_norm": 15.409582138061523, "learning_rate": 1e-06, "loss": 0.6628, "num_input_tokens_seen": 323250432, "step": 5770 }, { "epoch": 12.85077951002227, "loss": 0.5492154955863953, "loss_ce": 0.0001432021672371775, "loss_iou": 0.2470703125, "loss_num": 0.0108642578125, "loss_xval": 0.55078125, "num_input_tokens_seen": 323250432, "step": 5770 }, { "epoch": 12.853006681514476, "grad_norm": 17.768123626708984, "learning_rate": 1e-06, "loss": 0.467, "num_input_tokens_seen": 323305916, "step": 5771 }, { "epoch": 12.853006681514476, "loss": 0.3561987280845642, "loss_ce": 0.00011960781557718292, "loss_iou": 0.1611328125, "loss_num": 0.0067138671875, "loss_xval": 0.35546875, "num_input_tokens_seen": 323305916, "step": 5771 }, { "epoch": 12.855233853006682, "grad_norm": 26.621932983398438, "learning_rate": 1e-06, "loss": 0.6905, "num_input_tokens_seen": 323363612, "step": 5772 }, { "epoch": 12.855233853006682, "loss": 0.621519923210144, "loss_ce": 0.000304146931739524, "loss_iou": 0.287109375, "loss_num": 0.0093994140625, "loss_xval": 0.62109375, "num_input_tokens_seen": 323363612, "step": 5772 }, { "epoch": 12.857461024498887, "grad_norm": 36.93680953979492, "learning_rate": 1e-06, "loss": 0.4199, "num_input_tokens_seen": 323418516, "step": 5773 }, { "epoch": 12.857461024498887, "loss": 0.4533904790878296, "loss_ce": 0.0001434021396562457, "loss_iou": 0.1923828125, "loss_num": 0.01385498046875, "loss_xval": 0.453125, "num_input_tokens_seen": 323418516, "step": 5773 }, { "epoch": 12.859688195991092, "grad_norm": 12.92249870300293, "learning_rate": 1e-06, "loss": 0.3578, "num_input_tokens_seen": 323475544, "step": 5774 }, { "epoch": 12.859688195991092, "loss": 0.3216310143470764, "loss_ce": 9.778769162949175e-05, "loss_iou": 0.1474609375, "loss_num": 0.005126953125, "loss_xval": 0.322265625, "num_input_tokens_seen": 323475544, "step": 5774 }, { "epoch": 12.861915367483297, "grad_norm": 15.084147453308105, "learning_rate": 1e-06, "loss": 0.5663, "num_input_tokens_seen": 323531332, "step": 5775 }, { "epoch": 12.861915367483297, "loss": 0.5575112104415894, "loss_ce": 0.0001381381880491972, "loss_iou": 0.255859375, "loss_num": 0.00933837890625, "loss_xval": 0.55859375, "num_input_tokens_seen": 323531332, "step": 5775 }, { "epoch": 12.864142538975502, "grad_norm": 23.849468231201172, "learning_rate": 1e-06, "loss": 0.4872, "num_input_tokens_seen": 323586272, "step": 5776 }, { "epoch": 12.864142538975502, "loss": 0.3724247217178345, "loss_ce": 0.00011026560969185084, "loss_iou": 0.16015625, "loss_num": 0.010498046875, "loss_xval": 0.373046875, "num_input_tokens_seen": 323586272, "step": 5776 }, { "epoch": 12.866369710467707, "grad_norm": 16.385759353637695, "learning_rate": 1e-06, "loss": 0.4698, "num_input_tokens_seen": 323642804, "step": 5777 }, { "epoch": 12.866369710467707, "loss": 0.6077675819396973, "loss_ce": 0.00010155436029890552, "loss_iou": 0.26953125, "loss_num": 0.01348876953125, "loss_xval": 0.609375, "num_input_tokens_seen": 323642804, "step": 5777 }, { "epoch": 12.868596881959911, "grad_norm": 13.454336166381836, "learning_rate": 1e-06, "loss": 0.3738, "num_input_tokens_seen": 323699508, "step": 5778 }, { "epoch": 12.868596881959911, "loss": 0.3336215317249298, "loss_ce": 0.00012544200581032783, "loss_iou": 0.1552734375, "loss_num": 0.004486083984375, "loss_xval": 0.333984375, "num_input_tokens_seen": 323699508, "step": 5778 }, { "epoch": 12.870824053452116, "grad_norm": 32.175724029541016, "learning_rate": 1e-06, "loss": 0.3957, "num_input_tokens_seen": 323753524, "step": 5779 }, { "epoch": 12.870824053452116, "loss": 0.38816550374031067, "loss_ce": 0.00010398947779322043, "loss_iou": 0.1787109375, "loss_num": 0.006317138671875, "loss_xval": 0.388671875, "num_input_tokens_seen": 323753524, "step": 5779 }, { "epoch": 12.873051224944321, "grad_norm": 16.53754234313965, "learning_rate": 1e-06, "loss": 0.3905, "num_input_tokens_seen": 323807584, "step": 5780 }, { "epoch": 12.873051224944321, "loss": 0.460315078496933, "loss_ce": 0.00011000905215041712, "loss_iou": 0.189453125, "loss_num": 0.0162353515625, "loss_xval": 0.4609375, "num_input_tokens_seen": 323807584, "step": 5780 }, { "epoch": 12.875278396436526, "grad_norm": 33.93476486206055, "learning_rate": 1e-06, "loss": 0.5997, "num_input_tokens_seen": 323865248, "step": 5781 }, { "epoch": 12.875278396436526, "loss": 0.8404668569564819, "loss_ce": 0.00013486588431987911, "loss_iou": 0.365234375, "loss_num": 0.021728515625, "loss_xval": 0.83984375, "num_input_tokens_seen": 323865248, "step": 5781 }, { "epoch": 12.877505567928731, "grad_norm": 24.43963623046875, "learning_rate": 1e-06, "loss": 0.4762, "num_input_tokens_seen": 323921064, "step": 5782 }, { "epoch": 12.877505567928731, "loss": 0.5513740181922913, "loss_ce": 0.00010449648834764957, "loss_iou": 0.2138671875, "loss_num": 0.0247802734375, "loss_xval": 0.55078125, "num_input_tokens_seen": 323921064, "step": 5782 }, { "epoch": 12.879732739420936, "grad_norm": 19.904743194580078, "learning_rate": 1e-06, "loss": 0.4667, "num_input_tokens_seen": 323979316, "step": 5783 }, { "epoch": 12.879732739420936, "loss": 0.4628022313117981, "loss_ce": 0.00015576645091641694, "loss_iou": 0.1923828125, "loss_num": 0.015625, "loss_xval": 0.462890625, "num_input_tokens_seen": 323979316, "step": 5783 }, { "epoch": 12.88195991091314, "grad_norm": 13.64018440246582, "learning_rate": 1e-06, "loss": 0.3038, "num_input_tokens_seen": 324037372, "step": 5784 }, { "epoch": 12.88195991091314, "loss": 0.361078143119812, "loss_ce": 0.00011623941827565432, "loss_iou": 0.15234375, "loss_num": 0.01116943359375, "loss_xval": 0.361328125, "num_input_tokens_seen": 324037372, "step": 5784 }, { "epoch": 12.884187082405345, "grad_norm": 75.058349609375, "learning_rate": 1e-06, "loss": 0.6451, "num_input_tokens_seen": 324095976, "step": 5785 }, { "epoch": 12.884187082405345, "loss": 0.6566053628921509, "loss_ce": 0.0011488739401102066, "loss_iou": 0.267578125, "loss_num": 0.023681640625, "loss_xval": 0.65625, "num_input_tokens_seen": 324095976, "step": 5785 }, { "epoch": 12.88641425389755, "grad_norm": 13.707096099853516, "learning_rate": 1e-06, "loss": 0.5832, "num_input_tokens_seen": 324150696, "step": 5786 }, { "epoch": 12.88641425389755, "loss": 0.3526431918144226, "loss_ce": 0.00010411690163891762, "loss_iou": 0.154296875, "loss_num": 0.00860595703125, "loss_xval": 0.3515625, "num_input_tokens_seen": 324150696, "step": 5786 }, { "epoch": 12.888641425389755, "grad_norm": 18.37203025817871, "learning_rate": 1e-06, "loss": 0.5096, "num_input_tokens_seen": 324204900, "step": 5787 }, { "epoch": 12.888641425389755, "loss": 0.5298916697502136, "loss_ce": 0.00010648852912709117, "loss_iou": 0.228515625, "loss_num": 0.01434326171875, "loss_xval": 0.53125, "num_input_tokens_seen": 324204900, "step": 5787 }, { "epoch": 12.89086859688196, "grad_norm": 18.70199203491211, "learning_rate": 1e-06, "loss": 0.4261, "num_input_tokens_seen": 324262452, "step": 5788 }, { "epoch": 12.89086859688196, "loss": 0.3846321105957031, "loss_ce": 0.00011065039871027693, "loss_iou": 0.1787109375, "loss_num": 0.005279541015625, "loss_xval": 0.384765625, "num_input_tokens_seen": 324262452, "step": 5788 }, { "epoch": 12.893095768374165, "grad_norm": 22.604270935058594, "learning_rate": 1e-06, "loss": 0.5116, "num_input_tokens_seen": 324313472, "step": 5789 }, { "epoch": 12.893095768374165, "loss": 0.5472476482391357, "loss_ce": 0.00012846475874539465, "loss_iou": 0.240234375, "loss_num": 0.01336669921875, "loss_xval": 0.546875, "num_input_tokens_seen": 324313472, "step": 5789 }, { "epoch": 12.89532293986637, "grad_norm": 19.753644943237305, "learning_rate": 1e-06, "loss": 0.4183, "num_input_tokens_seen": 324369044, "step": 5790 }, { "epoch": 12.89532293986637, "loss": 0.5045735836029053, "loss_ce": 0.00011799067578976974, "loss_iou": 0.21875, "loss_num": 0.01348876953125, "loss_xval": 0.50390625, "num_input_tokens_seen": 324369044, "step": 5790 }, { "epoch": 12.897550111358575, "grad_norm": 31.045259475708008, "learning_rate": 1e-06, "loss": 0.6084, "num_input_tokens_seen": 324424920, "step": 5791 }, { "epoch": 12.897550111358575, "loss": 0.7539187669754028, "loss_ce": 0.0001345592609141022, "loss_iou": 0.326171875, "loss_num": 0.0205078125, "loss_xval": 0.75390625, "num_input_tokens_seen": 324424920, "step": 5791 }, { "epoch": 12.89977728285078, "grad_norm": 23.0539493560791, "learning_rate": 1e-06, "loss": 0.5878, "num_input_tokens_seen": 324481128, "step": 5792 }, { "epoch": 12.89977728285078, "loss": 0.44552081823349, "loss_ce": 0.00014727559755556285, "loss_iou": 0.185546875, "loss_num": 0.0147705078125, "loss_xval": 0.4453125, "num_input_tokens_seen": 324481128, "step": 5792 }, { "epoch": 12.902004454342984, "grad_norm": 20.192691802978516, "learning_rate": 1e-06, "loss": 0.4366, "num_input_tokens_seen": 324537512, "step": 5793 }, { "epoch": 12.902004454342984, "loss": 0.4317473769187927, "loss_ce": 0.00010673434007912874, "loss_iou": 0.1875, "loss_num": 0.0111083984375, "loss_xval": 0.431640625, "num_input_tokens_seen": 324537512, "step": 5793 }, { "epoch": 12.90423162583519, "grad_norm": 23.165922164916992, "learning_rate": 1e-06, "loss": 0.5654, "num_input_tokens_seen": 324591436, "step": 5794 }, { "epoch": 12.90423162583519, "loss": 0.7123044729232788, "loss_ce": 0.000146272053825669, "loss_iou": 0.2890625, "loss_num": 0.026611328125, "loss_xval": 0.7109375, "num_input_tokens_seen": 324591436, "step": 5794 }, { "epoch": 12.906458797327394, "grad_norm": 19.26278305053711, "learning_rate": 1e-06, "loss": 0.6626, "num_input_tokens_seen": 324646868, "step": 5795 }, { "epoch": 12.906458797327394, "loss": 0.9051787257194519, "loss_ce": 0.0001494084281148389, "loss_iou": 0.373046875, "loss_num": 0.031494140625, "loss_xval": 0.90625, "num_input_tokens_seen": 324646868, "step": 5795 }, { "epoch": 12.908685968819599, "grad_norm": 18.52065658569336, "learning_rate": 1e-06, "loss": 0.4669, "num_input_tokens_seen": 324704652, "step": 5796 }, { "epoch": 12.908685968819599, "loss": 0.4736798405647278, "loss_ce": 0.0001691254146862775, "loss_iou": 0.18359375, "loss_num": 0.02099609375, "loss_xval": 0.47265625, "num_input_tokens_seen": 324704652, "step": 5796 }, { "epoch": 12.910913140311804, "grad_norm": 22.1650333404541, "learning_rate": 1e-06, "loss": 0.5009, "num_input_tokens_seen": 324762108, "step": 5797 }, { "epoch": 12.910913140311804, "loss": 0.44127357006073, "loss_ce": 0.00011143650044687092, "loss_iou": 0.1943359375, "loss_num": 0.010498046875, "loss_xval": 0.44140625, "num_input_tokens_seen": 324762108, "step": 5797 }, { "epoch": 12.913140311804009, "grad_norm": 18.897441864013672, "learning_rate": 1e-06, "loss": 0.4502, "num_input_tokens_seen": 324817916, "step": 5798 }, { "epoch": 12.913140311804009, "loss": 0.480578750371933, "loss_ce": 0.00010997521167155355, "loss_iou": 0.2158203125, "loss_num": 0.00958251953125, "loss_xval": 0.48046875, "num_input_tokens_seen": 324817916, "step": 5798 }, { "epoch": 12.915367483296214, "grad_norm": 13.311334609985352, "learning_rate": 1e-06, "loss": 0.5007, "num_input_tokens_seen": 324872960, "step": 5799 }, { "epoch": 12.915367483296214, "loss": 0.5006000995635986, "loss_ce": 0.00011186262418050319, "loss_iou": 0.2158203125, "loss_num": 0.013671875, "loss_xval": 0.5, "num_input_tokens_seen": 324872960, "step": 5799 }, { "epoch": 12.917594654788418, "grad_norm": 12.26301097869873, "learning_rate": 1e-06, "loss": 0.4789, "num_input_tokens_seen": 324927908, "step": 5800 }, { "epoch": 12.917594654788418, "loss": 0.47144681215286255, "loss_ce": 0.00013335124822333455, "loss_iou": 0.2109375, "loss_num": 0.0096435546875, "loss_xval": 0.470703125, "num_input_tokens_seen": 324927908, "step": 5800 }, { "epoch": 12.919821826280623, "grad_norm": 16.52623176574707, "learning_rate": 1e-06, "loss": 0.6662, "num_input_tokens_seen": 324986040, "step": 5801 }, { "epoch": 12.919821826280623, "loss": 0.5938632488250732, "loss_ce": 0.0012118464801460505, "loss_iou": 0.263671875, "loss_num": 0.01275634765625, "loss_xval": 0.59375, "num_input_tokens_seen": 324986040, "step": 5801 }, { "epoch": 12.922048997772828, "grad_norm": 29.222322463989258, "learning_rate": 1e-06, "loss": 0.4547, "num_input_tokens_seen": 325043160, "step": 5802 }, { "epoch": 12.922048997772828, "loss": 0.4256219267845154, "loss_ce": 8.482815610477701e-05, "loss_iou": 0.1787109375, "loss_num": 0.0135498046875, "loss_xval": 0.42578125, "num_input_tokens_seen": 325043160, "step": 5802 }, { "epoch": 12.924276169265033, "grad_norm": 19.258743286132812, "learning_rate": 1e-06, "loss": 0.3797, "num_input_tokens_seen": 325099556, "step": 5803 }, { "epoch": 12.924276169265033, "loss": 0.4342288076877594, "loss_ce": 0.00014675800048280507, "loss_iou": 0.1904296875, "loss_num": 0.01068115234375, "loss_xval": 0.43359375, "num_input_tokens_seen": 325099556, "step": 5803 }, { "epoch": 12.926503340757238, "grad_norm": 18.667795181274414, "learning_rate": 1e-06, "loss": 0.5903, "num_input_tokens_seen": 325154616, "step": 5804 }, { "epoch": 12.926503340757238, "loss": 0.5471016764640808, "loss_ce": 0.00016564295219723135, "loss_iou": 0.255859375, "loss_num": 0.007232666015625, "loss_xval": 0.546875, "num_input_tokens_seen": 325154616, "step": 5804 }, { "epoch": 12.928730512249443, "grad_norm": 19.413196563720703, "learning_rate": 1e-06, "loss": 0.6071, "num_input_tokens_seen": 325208228, "step": 5805 }, { "epoch": 12.928730512249443, "loss": 0.6254149079322815, "loss_ce": 0.00010969527647830546, "loss_iou": 0.25390625, "loss_num": 0.0235595703125, "loss_xval": 0.625, "num_input_tokens_seen": 325208228, "step": 5805 }, { "epoch": 12.930957683741648, "grad_norm": 16.120576858520508, "learning_rate": 1e-06, "loss": 0.5249, "num_input_tokens_seen": 325261784, "step": 5806 }, { "epoch": 12.930957683741648, "loss": 0.5421345829963684, "loss_ce": 0.0001423685171175748, "loss_iou": 0.234375, "loss_num": 0.01470947265625, "loss_xval": 0.54296875, "num_input_tokens_seen": 325261784, "step": 5806 }, { "epoch": 12.933184855233852, "grad_norm": 14.40259075164795, "learning_rate": 1e-06, "loss": 0.4978, "num_input_tokens_seen": 325318096, "step": 5807 }, { "epoch": 12.933184855233852, "loss": 0.33383339643478394, "loss_ce": 0.00015419043484143913, "loss_iou": 0.1494140625, "loss_num": 0.0069580078125, "loss_xval": 0.333984375, "num_input_tokens_seen": 325318096, "step": 5807 }, { "epoch": 12.935412026726057, "grad_norm": 29.450130462646484, "learning_rate": 1e-06, "loss": 0.5965, "num_input_tokens_seen": 325372384, "step": 5808 }, { "epoch": 12.935412026726057, "loss": 0.4921649694442749, "loss_ce": 9.954207052942365e-05, "loss_iou": 0.2158203125, "loss_num": 0.01190185546875, "loss_xval": 0.4921875, "num_input_tokens_seen": 325372384, "step": 5808 }, { "epoch": 12.937639198218262, "grad_norm": 14.617000579833984, "learning_rate": 1e-06, "loss": 0.3659, "num_input_tokens_seen": 325427388, "step": 5809 }, { "epoch": 12.937639198218262, "loss": 0.4301881492137909, "loss_ce": 0.0001344462507404387, "loss_iou": 0.1689453125, "loss_num": 0.0184326171875, "loss_xval": 0.4296875, "num_input_tokens_seen": 325427388, "step": 5809 }, { "epoch": 12.939866369710467, "grad_norm": 24.749897003173828, "learning_rate": 1e-06, "loss": 0.655, "num_input_tokens_seen": 325481232, "step": 5810 }, { "epoch": 12.939866369710467, "loss": 0.7332806587219238, "loss_ce": 0.0001263371086679399, "loss_iou": 0.314453125, "loss_num": 0.0208740234375, "loss_xval": 0.734375, "num_input_tokens_seen": 325481232, "step": 5810 }, { "epoch": 12.942093541202672, "grad_norm": 17.5683650970459, "learning_rate": 1e-06, "loss": 0.4979, "num_input_tokens_seen": 325535700, "step": 5811 }, { "epoch": 12.942093541202672, "loss": 0.6848030090332031, "loss_ce": 0.00011064937280025333, "loss_iou": 0.310546875, "loss_num": 0.01275634765625, "loss_xval": 0.68359375, "num_input_tokens_seen": 325535700, "step": 5811 }, { "epoch": 12.944320712694877, "grad_norm": 25.585346221923828, "learning_rate": 1e-06, "loss": 0.4126, "num_input_tokens_seen": 325594156, "step": 5812 }, { "epoch": 12.944320712694877, "loss": 0.37169232964515686, "loss_ce": 0.00011027594155166298, "loss_iou": 0.1513671875, "loss_num": 0.013671875, "loss_xval": 0.37109375, "num_input_tokens_seen": 325594156, "step": 5812 }, { "epoch": 12.946547884187082, "grad_norm": 18.788799285888672, "learning_rate": 1e-06, "loss": 0.6043, "num_input_tokens_seen": 325648944, "step": 5813 }, { "epoch": 12.946547884187082, "loss": 0.6141434907913208, "loss_ce": 0.0001298107672482729, "loss_iou": 0.25390625, "loss_num": 0.021240234375, "loss_xval": 0.61328125, "num_input_tokens_seen": 325648944, "step": 5813 }, { "epoch": 12.948775055679288, "grad_norm": 21.269174575805664, "learning_rate": 1e-06, "loss": 0.8081, "num_input_tokens_seen": 325706188, "step": 5814 }, { "epoch": 12.948775055679288, "loss": 0.8276509046554565, "loss_ce": 0.0002583618916105479, "loss_iou": 0.326171875, "loss_num": 0.034912109375, "loss_xval": 0.828125, "num_input_tokens_seen": 325706188, "step": 5814 }, { "epoch": 12.951002227171493, "grad_norm": 26.132108688354492, "learning_rate": 1e-06, "loss": 0.5818, "num_input_tokens_seen": 325759016, "step": 5815 }, { "epoch": 12.951002227171493, "loss": 0.7224629521369934, "loss_ce": 0.0004475700843613595, "loss_iou": 0.296875, "loss_num": 0.0257568359375, "loss_xval": 0.72265625, "num_input_tokens_seen": 325759016, "step": 5815 }, { "epoch": 12.953229398663698, "grad_norm": 19.72740364074707, "learning_rate": 1e-06, "loss": 0.3361, "num_input_tokens_seen": 325815532, "step": 5816 }, { "epoch": 12.953229398663698, "loss": 0.43498772382736206, "loss_ce": 0.00011223182082176208, "loss_iou": 0.166015625, "loss_num": 0.0205078125, "loss_xval": 0.435546875, "num_input_tokens_seen": 325815532, "step": 5816 }, { "epoch": 12.955456570155903, "grad_norm": 34.82518005371094, "learning_rate": 1e-06, "loss": 0.6078, "num_input_tokens_seen": 325871972, "step": 5817 }, { "epoch": 12.955456570155903, "loss": 0.743614912033081, "loss_ce": 0.00020671662059612572, "loss_iou": 0.29296875, "loss_num": 0.031494140625, "loss_xval": 0.7421875, "num_input_tokens_seen": 325871972, "step": 5817 }, { "epoch": 12.957683741648108, "grad_norm": 101.52168273925781, "learning_rate": 1e-06, "loss": 0.5968, "num_input_tokens_seen": 325928992, "step": 5818 }, { "epoch": 12.957683741648108, "loss": 0.7953479886054993, "loss_ce": 0.00012093935220036656, "loss_iou": 0.330078125, "loss_num": 0.027099609375, "loss_xval": 0.796875, "num_input_tokens_seen": 325928992, "step": 5818 }, { "epoch": 12.959910913140313, "grad_norm": 19.320589065551758, "learning_rate": 1e-06, "loss": 0.8007, "num_input_tokens_seen": 325984976, "step": 5819 }, { "epoch": 12.959910913140313, "loss": 0.8446329236030579, "loss_ce": 0.00015049244393594563, "loss_iou": 0.341796875, "loss_num": 0.0322265625, "loss_xval": 0.84375, "num_input_tokens_seen": 325984976, "step": 5819 }, { "epoch": 12.962138084632517, "grad_norm": 17.67026710510254, "learning_rate": 1e-06, "loss": 0.4272, "num_input_tokens_seen": 326041104, "step": 5820 }, { "epoch": 12.962138084632517, "loss": 0.3929480314254761, "loss_ce": 0.0001257585099665448, "loss_iou": 0.1748046875, "loss_num": 0.0086669921875, "loss_xval": 0.392578125, "num_input_tokens_seen": 326041104, "step": 5820 }, { "epoch": 12.964365256124722, "grad_norm": 17.830753326416016, "learning_rate": 1e-06, "loss": 0.4385, "num_input_tokens_seen": 326094916, "step": 5821 }, { "epoch": 12.964365256124722, "loss": 0.38365253806114197, "loss_ce": 0.00010762730380520225, "loss_iou": 0.1640625, "loss_num": 0.01104736328125, "loss_xval": 0.3828125, "num_input_tokens_seen": 326094916, "step": 5821 }, { "epoch": 12.966592427616927, "grad_norm": 28.51872444152832, "learning_rate": 1e-06, "loss": 0.495, "num_input_tokens_seen": 326151036, "step": 5822 }, { "epoch": 12.966592427616927, "loss": 0.4512762427330017, "loss_ce": 0.0007147122523747385, "loss_iou": 0.1904296875, "loss_num": 0.0140380859375, "loss_xval": 0.451171875, "num_input_tokens_seen": 326151036, "step": 5822 }, { "epoch": 12.968819599109132, "grad_norm": 19.198762893676758, "learning_rate": 1e-06, "loss": 0.7932, "num_input_tokens_seen": 326205816, "step": 5823 }, { "epoch": 12.968819599109132, "loss": 0.7585012316703796, "loss_ce": 0.00020045909332111478, "loss_iou": 0.3125, "loss_num": 0.02685546875, "loss_xval": 0.7578125, "num_input_tokens_seen": 326205816, "step": 5823 }, { "epoch": 12.971046770601337, "grad_norm": 18.618919372558594, "learning_rate": 1e-06, "loss": 0.5058, "num_input_tokens_seen": 326260604, "step": 5824 }, { "epoch": 12.971046770601337, "loss": 0.6246432662010193, "loss_ce": 0.00013153106556273997, "loss_iou": 0.279296875, "loss_num": 0.013427734375, "loss_xval": 0.625, "num_input_tokens_seen": 326260604, "step": 5824 }, { "epoch": 12.973273942093542, "grad_norm": 14.253881454467773, "learning_rate": 1e-06, "loss": 0.4857, "num_input_tokens_seen": 326318360, "step": 5825 }, { "epoch": 12.973273942093542, "loss": 0.4617213010787964, "loss_ce": 0.00017345792730338871, "loss_iou": 0.1875, "loss_num": 0.0172119140625, "loss_xval": 0.4609375, "num_input_tokens_seen": 326318360, "step": 5825 }, { "epoch": 12.975501113585747, "grad_norm": 21.891193389892578, "learning_rate": 1e-06, "loss": 0.5982, "num_input_tokens_seen": 326375376, "step": 5826 }, { "epoch": 12.975501113585747, "loss": 0.6038088202476501, "loss_ce": 0.00017113759531639516, "loss_iou": 0.236328125, "loss_num": 0.026123046875, "loss_xval": 0.60546875, "num_input_tokens_seen": 326375376, "step": 5826 }, { "epoch": 12.977728285077951, "grad_norm": 24.07076072692871, "learning_rate": 1e-06, "loss": 0.412, "num_input_tokens_seen": 326430360, "step": 5827 }, { "epoch": 12.977728285077951, "loss": 0.5371109247207642, "loss_ce": 0.00012366127339191735, "loss_iou": 0.2421875, "loss_num": 0.01055908203125, "loss_xval": 0.53515625, "num_input_tokens_seen": 326430360, "step": 5827 }, { "epoch": 12.979955456570156, "grad_norm": 24.092050552368164, "learning_rate": 1e-06, "loss": 0.5159, "num_input_tokens_seen": 326485148, "step": 5828 }, { "epoch": 12.979955456570156, "loss": 0.6072977781295776, "loss_ce": 0.00012003826850559562, "loss_iou": 0.26171875, "loss_num": 0.0167236328125, "loss_xval": 0.60546875, "num_input_tokens_seen": 326485148, "step": 5828 }, { "epoch": 12.982182628062361, "grad_norm": 15.061753273010254, "learning_rate": 1e-06, "loss": 0.4169, "num_input_tokens_seen": 326541436, "step": 5829 }, { "epoch": 12.982182628062361, "loss": 0.37990421056747437, "loss_ce": 0.00014345324598252773, "loss_iou": 0.15625, "loss_num": 0.01324462890625, "loss_xval": 0.37890625, "num_input_tokens_seen": 326541436, "step": 5829 }, { "epoch": 12.984409799554566, "grad_norm": 17.364686965942383, "learning_rate": 1e-06, "loss": 0.5024, "num_input_tokens_seen": 326598392, "step": 5830 }, { "epoch": 12.984409799554566, "loss": 0.47910645604133606, "loss_ce": 0.00010255920642521232, "loss_iou": 0.2041015625, "loss_num": 0.01416015625, "loss_xval": 0.478515625, "num_input_tokens_seen": 326598392, "step": 5830 }, { "epoch": 12.98663697104677, "grad_norm": 18.025129318237305, "learning_rate": 1e-06, "loss": 0.4841, "num_input_tokens_seen": 326654788, "step": 5831 }, { "epoch": 12.98663697104677, "loss": 0.39598119258880615, "loss_ce": 0.0001071398874046281, "loss_iou": 0.1787109375, "loss_num": 0.007781982421875, "loss_xval": 0.396484375, "num_input_tokens_seen": 326654788, "step": 5831 }, { "epoch": 12.988864142538976, "grad_norm": 43.578460693359375, "learning_rate": 1e-06, "loss": 0.5461, "num_input_tokens_seen": 326710960, "step": 5832 }, { "epoch": 12.988864142538976, "loss": 0.588495135307312, "loss_ce": 0.00011619819269981235, "loss_iou": 0.2392578125, "loss_num": 0.0218505859375, "loss_xval": 0.58984375, "num_input_tokens_seen": 326710960, "step": 5832 }, { "epoch": 12.99109131403118, "grad_norm": 16.58260154724121, "learning_rate": 1e-06, "loss": 0.5595, "num_input_tokens_seen": 326767560, "step": 5833 }, { "epoch": 12.99109131403118, "loss": 0.6758525371551514, "loss_ce": 0.00019332932424731553, "loss_iou": 0.265625, "loss_num": 0.029296875, "loss_xval": 0.67578125, "num_input_tokens_seen": 326767560, "step": 5833 }, { "epoch": 12.993318485523385, "grad_norm": 100.04027557373047, "learning_rate": 1e-06, "loss": 0.4254, "num_input_tokens_seen": 326824960, "step": 5834 }, { "epoch": 12.993318485523385, "loss": 0.4192938804626465, "loss_ce": 0.00010442556231282651, "loss_iou": 0.17578125, "loss_num": 0.01348876953125, "loss_xval": 0.419921875, "num_input_tokens_seen": 326824960, "step": 5834 }, { "epoch": 12.99554565701559, "grad_norm": 18.884227752685547, "learning_rate": 1e-06, "loss": 0.4109, "num_input_tokens_seen": 326882800, "step": 5835 }, { "epoch": 12.99554565701559, "loss": 0.41633379459381104, "loss_ce": 0.00013507826952263713, "loss_iou": 0.193359375, "loss_num": 0.00604248046875, "loss_xval": 0.416015625, "num_input_tokens_seen": 326882800, "step": 5835 }, { "epoch": 12.997772828507795, "grad_norm": 20.262374877929688, "learning_rate": 1e-06, "loss": 0.4506, "num_input_tokens_seen": 326940800, "step": 5836 }, { "epoch": 12.997772828507795, "loss": 0.48572030663490295, "loss_ce": 0.00012461420556064695, "loss_iou": 0.2099609375, "loss_num": 0.01300048828125, "loss_xval": 0.486328125, "num_input_tokens_seen": 326940800, "step": 5836 }, { "epoch": 13.0, "grad_norm": 17.950050354003906, "learning_rate": 1e-06, "loss": 0.4372, "num_input_tokens_seen": 326994644, "step": 5837 }, { "epoch": 13.0, "loss": 0.47044771909713745, "loss_ce": 0.00011081169941462576, "loss_iou": 0.181640625, "loss_num": 0.021728515625, "loss_xval": 0.470703125, "num_input_tokens_seen": 326994644, "step": 5837 }, { "epoch": 13.002227171492205, "grad_norm": 18.163402557373047, "learning_rate": 1e-06, "loss": 0.3952, "num_input_tokens_seen": 327051472, "step": 5838 }, { "epoch": 13.002227171492205, "loss": 0.45926615595817566, "loss_ce": 0.0001597180962562561, "loss_iou": 0.18359375, "loss_num": 0.0185546875, "loss_xval": 0.458984375, "num_input_tokens_seen": 327051472, "step": 5838 }, { "epoch": 13.00445434298441, "grad_norm": 17.97021484375, "learning_rate": 1e-06, "loss": 0.4858, "num_input_tokens_seen": 327107924, "step": 5839 }, { "epoch": 13.00445434298441, "loss": 0.6263518929481506, "loss_ce": 0.00013117733760736883, "loss_iou": 0.27734375, "loss_num": 0.014404296875, "loss_xval": 0.625, "num_input_tokens_seen": 327107924, "step": 5839 }, { "epoch": 13.006681514476615, "grad_norm": 28.137418746948242, "learning_rate": 1e-06, "loss": 0.4594, "num_input_tokens_seen": 327166408, "step": 5840 }, { "epoch": 13.006681514476615, "loss": 0.4613088369369507, "loss_ce": 0.0001271882065339014, "loss_iou": 0.2138671875, "loss_num": 0.0067138671875, "loss_xval": 0.4609375, "num_input_tokens_seen": 327166408, "step": 5840 }, { "epoch": 13.00890868596882, "grad_norm": 19.74036979675293, "learning_rate": 1e-06, "loss": 0.5521, "num_input_tokens_seen": 327223808, "step": 5841 }, { "epoch": 13.00890868596882, "loss": 0.39747354388237, "loss_ce": 0.00013468455290421844, "loss_iou": 0.173828125, "loss_num": 0.00970458984375, "loss_xval": 0.396484375, "num_input_tokens_seen": 327223808, "step": 5841 }, { "epoch": 13.011135857461024, "grad_norm": 40.84404754638672, "learning_rate": 1e-06, "loss": 0.5209, "num_input_tokens_seen": 327280344, "step": 5842 }, { "epoch": 13.011135857461024, "loss": 0.3576486110687256, "loss_ce": 0.0001046471661538817, "loss_iou": 0.162109375, "loss_num": 0.006500244140625, "loss_xval": 0.357421875, "num_input_tokens_seen": 327280344, "step": 5842 }, { "epoch": 13.01336302895323, "grad_norm": 25.01186180114746, "learning_rate": 1e-06, "loss": 0.4771, "num_input_tokens_seen": 327332880, "step": 5843 }, { "epoch": 13.01336302895323, "loss": 0.4530574679374695, "loss_ce": 0.00011555968376342207, "loss_iou": 0.201171875, "loss_num": 0.0101318359375, "loss_xval": 0.453125, "num_input_tokens_seen": 327332880, "step": 5843 }, { "epoch": 13.015590200445434, "grad_norm": 17.630598068237305, "learning_rate": 1e-06, "loss": 0.5489, "num_input_tokens_seen": 327390252, "step": 5844 }, { "epoch": 13.015590200445434, "loss": 0.5149204730987549, "loss_ce": 0.00011946188897127286, "loss_iou": 0.216796875, "loss_num": 0.0164794921875, "loss_xval": 0.515625, "num_input_tokens_seen": 327390252, "step": 5844 }, { "epoch": 13.017817371937639, "grad_norm": 16.314720153808594, "learning_rate": 1e-06, "loss": 0.4371, "num_input_tokens_seen": 327445668, "step": 5845 }, { "epoch": 13.017817371937639, "loss": 0.34429049491882324, "loss_ce": 0.0001132564211729914, "loss_iou": 0.142578125, "loss_num": 0.01190185546875, "loss_xval": 0.34375, "num_input_tokens_seen": 327445668, "step": 5845 }, { "epoch": 13.020044543429844, "grad_norm": 20.126976013183594, "learning_rate": 1e-06, "loss": 0.3745, "num_input_tokens_seen": 327502572, "step": 5846 }, { "epoch": 13.020044543429844, "loss": 0.3755548298358917, "loss_ce": 0.00012758253433275968, "loss_iou": 0.1787109375, "loss_num": 0.0035247802734375, "loss_xval": 0.375, "num_input_tokens_seen": 327502572, "step": 5846 }, { "epoch": 13.022271714922049, "grad_norm": 18.101938247680664, "learning_rate": 1e-06, "loss": 0.4179, "num_input_tokens_seen": 327560524, "step": 5847 }, { "epoch": 13.022271714922049, "loss": 0.4644816517829895, "loss_ce": 0.00012616706953849643, "loss_iou": 0.208984375, "loss_num": 0.0093994140625, "loss_xval": 0.46484375, "num_input_tokens_seen": 327560524, "step": 5847 }, { "epoch": 13.024498886414253, "grad_norm": 22.762983322143555, "learning_rate": 1e-06, "loss": 0.5214, "num_input_tokens_seen": 327615932, "step": 5848 }, { "epoch": 13.024498886414253, "loss": 0.6657699346542358, "loss_ce": 0.00012048571079503745, "loss_iou": 0.2890625, "loss_num": 0.0174560546875, "loss_xval": 0.6640625, "num_input_tokens_seen": 327615932, "step": 5848 }, { "epoch": 13.026726057906458, "grad_norm": 38.849609375, "learning_rate": 1e-06, "loss": 0.5811, "num_input_tokens_seen": 327669496, "step": 5849 }, { "epoch": 13.026726057906458, "loss": 0.3934794068336487, "loss_ce": 0.00010783905599964783, "loss_iou": 0.158203125, "loss_num": 0.0155029296875, "loss_xval": 0.392578125, "num_input_tokens_seen": 327669496, "step": 5849 }, { "epoch": 13.028953229398663, "grad_norm": 13.994296073913574, "learning_rate": 1e-06, "loss": 0.573, "num_input_tokens_seen": 327726764, "step": 5850 }, { "epoch": 13.028953229398663, "loss": 0.5137879252433777, "loss_ce": 0.00011606388579821214, "loss_iou": 0.22265625, "loss_num": 0.0137939453125, "loss_xval": 0.515625, "num_input_tokens_seen": 327726764, "step": 5850 }, { "epoch": 13.031180400890868, "grad_norm": 29.13859748840332, "learning_rate": 1e-06, "loss": 0.7501, "num_input_tokens_seen": 327783880, "step": 5851 }, { "epoch": 13.031180400890868, "loss": 0.9725739359855652, "loss_ce": 0.00016182669787667692, "loss_iou": 0.3828125, "loss_num": 0.041748046875, "loss_xval": 0.97265625, "num_input_tokens_seen": 327783880, "step": 5851 }, { "epoch": 13.033407572383073, "grad_norm": 15.132269859313965, "learning_rate": 1e-06, "loss": 0.4839, "num_input_tokens_seen": 327842228, "step": 5852 }, { "epoch": 13.033407572383073, "loss": 0.5506539344787598, "loss_ce": 0.00011678160808514804, "loss_iou": 0.240234375, "loss_num": 0.01416015625, "loss_xval": 0.55078125, "num_input_tokens_seen": 327842228, "step": 5852 }, { "epoch": 13.035634743875278, "grad_norm": 21.988449096679688, "learning_rate": 1e-06, "loss": 0.4047, "num_input_tokens_seen": 327899700, "step": 5853 }, { "epoch": 13.035634743875278, "loss": 0.37145259976387024, "loss_ce": 0.00011470088065834716, "loss_iou": 0.1689453125, "loss_num": 0.00653076171875, "loss_xval": 0.37109375, "num_input_tokens_seen": 327899700, "step": 5853 }, { "epoch": 13.037861915367483, "grad_norm": 18.117429733276367, "learning_rate": 1e-06, "loss": 0.5186, "num_input_tokens_seen": 327957944, "step": 5854 }, { "epoch": 13.037861915367483, "loss": 0.3974757194519043, "loss_ce": 0.00013685536396224052, "loss_iou": 0.1767578125, "loss_num": 0.00897216796875, "loss_xval": 0.396484375, "num_input_tokens_seen": 327957944, "step": 5854 }, { "epoch": 13.040089086859687, "grad_norm": 20.6132869720459, "learning_rate": 1e-06, "loss": 0.3864, "num_input_tokens_seen": 328014804, "step": 5855 }, { "epoch": 13.040089086859687, "loss": 0.411630779504776, "loss_ce": 0.00013175193453207612, "loss_iou": 0.189453125, "loss_num": 0.00665283203125, "loss_xval": 0.412109375, "num_input_tokens_seen": 328014804, "step": 5855 }, { "epoch": 13.042316258351892, "grad_norm": 15.854263305664062, "learning_rate": 1e-06, "loss": 0.5165, "num_input_tokens_seen": 328072992, "step": 5856 }, { "epoch": 13.042316258351892, "loss": 0.5808364748954773, "loss_ce": 0.00014802644727751613, "loss_iou": 0.24609375, "loss_num": 0.017578125, "loss_xval": 0.58203125, "num_input_tokens_seen": 328072992, "step": 5856 }, { "epoch": 13.044543429844097, "grad_norm": 14.205804824829102, "learning_rate": 1e-06, "loss": 0.4901, "num_input_tokens_seen": 328130500, "step": 5857 }, { "epoch": 13.044543429844097, "loss": 0.5825508832931519, "loss_ce": 0.00015347708540502936, "loss_iou": 0.2578125, "loss_num": 0.012939453125, "loss_xval": 0.58203125, "num_input_tokens_seen": 328130500, "step": 5857 }, { "epoch": 13.046770601336302, "grad_norm": 18.12480354309082, "learning_rate": 1e-06, "loss": 0.4782, "num_input_tokens_seen": 328185140, "step": 5858 }, { "epoch": 13.046770601336302, "loss": 0.5846756100654602, "loss_ce": 0.0002029853785643354, "loss_iou": 0.2470703125, "loss_num": 0.0179443359375, "loss_xval": 0.5859375, "num_input_tokens_seen": 328185140, "step": 5858 }, { "epoch": 13.048997772828507, "grad_norm": 18.047836303710938, "learning_rate": 1e-06, "loss": 0.4255, "num_input_tokens_seen": 328239304, "step": 5859 }, { "epoch": 13.048997772828507, "loss": 0.5025643110275269, "loss_ce": 0.00012291455641388893, "loss_iou": 0.224609375, "loss_num": 0.01068115234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 328239304, "step": 5859 }, { "epoch": 13.051224944320714, "grad_norm": 19.500625610351562, "learning_rate": 1e-06, "loss": 0.5162, "num_input_tokens_seen": 328295948, "step": 5860 }, { "epoch": 13.051224944320714, "loss": 0.47790929675102234, "loss_ce": 0.0001260775316040963, "loss_iou": 0.212890625, "loss_num": 0.01031494140625, "loss_xval": 0.478515625, "num_input_tokens_seen": 328295948, "step": 5860 }, { "epoch": 13.053452115812918, "grad_norm": 30.18915557861328, "learning_rate": 1e-06, "loss": 0.7097, "num_input_tokens_seen": 328348972, "step": 5861 }, { "epoch": 13.053452115812918, "loss": 0.7575015425682068, "loss_ce": 0.0001773214025888592, "loss_iou": 0.330078125, "loss_num": 0.0194091796875, "loss_xval": 0.7578125, "num_input_tokens_seen": 328348972, "step": 5861 }, { "epoch": 13.055679287305123, "grad_norm": 15.668800354003906, "learning_rate": 1e-06, "loss": 0.3318, "num_input_tokens_seen": 328403404, "step": 5862 }, { "epoch": 13.055679287305123, "loss": 0.3245905935764313, "loss_ce": 0.0001277004776056856, "loss_iou": 0.138671875, "loss_num": 0.0093994140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 328403404, "step": 5862 }, { "epoch": 13.057906458797328, "grad_norm": 15.234992027282715, "learning_rate": 1e-06, "loss": 0.4268, "num_input_tokens_seen": 328459968, "step": 5863 }, { "epoch": 13.057906458797328, "loss": 0.5279508829116821, "loss_ce": 0.00011885567801073194, "loss_iou": 0.2265625, "loss_num": 0.01483154296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 328459968, "step": 5863 }, { "epoch": 13.060133630289533, "grad_norm": 19.412704467773438, "learning_rate": 1e-06, "loss": 0.5039, "num_input_tokens_seen": 328516632, "step": 5864 }, { "epoch": 13.060133630289533, "loss": 0.6012612581253052, "loss_ce": 0.0001870518026407808, "loss_iou": 0.2333984375, "loss_num": 0.0267333984375, "loss_xval": 0.6015625, "num_input_tokens_seen": 328516632, "step": 5864 }, { "epoch": 13.062360801781738, "grad_norm": 14.758916854858398, "learning_rate": 1e-06, "loss": 0.4313, "num_input_tokens_seen": 328571604, "step": 5865 }, { "epoch": 13.062360801781738, "loss": 0.37966063618659973, "loss_ce": 0.00014401419321075082, "loss_iou": 0.1708984375, "loss_num": 0.0074462890625, "loss_xval": 0.37890625, "num_input_tokens_seen": 328571604, "step": 5865 }, { "epoch": 13.064587973273943, "grad_norm": 35.47437286376953, "learning_rate": 1e-06, "loss": 0.5209, "num_input_tokens_seen": 328627528, "step": 5866 }, { "epoch": 13.064587973273943, "loss": 0.5552887916564941, "loss_ce": 0.00011297700984869152, "loss_iou": 0.2119140625, "loss_num": 0.0264892578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 328627528, "step": 5866 }, { "epoch": 13.066815144766148, "grad_norm": 24.737062454223633, "learning_rate": 1e-06, "loss": 0.3764, "num_input_tokens_seen": 328684780, "step": 5867 }, { "epoch": 13.066815144766148, "loss": 0.33116066455841064, "loss_ce": 0.00010598442167975008, "loss_iou": 0.1572265625, "loss_num": 0.0034942626953125, "loss_xval": 0.33203125, "num_input_tokens_seen": 328684780, "step": 5867 }, { "epoch": 13.069042316258352, "grad_norm": 32.23481750488281, "learning_rate": 1e-06, "loss": 0.5629, "num_input_tokens_seen": 328738920, "step": 5868 }, { "epoch": 13.069042316258352, "loss": 0.42393577098846436, "loss_ce": 0.0001076542102964595, "loss_iou": 0.193359375, "loss_num": 0.007232666015625, "loss_xval": 0.423828125, "num_input_tokens_seen": 328738920, "step": 5868 }, { "epoch": 13.071269487750557, "grad_norm": 15.290202140808105, "learning_rate": 1e-06, "loss": 0.3362, "num_input_tokens_seen": 328796456, "step": 5869 }, { "epoch": 13.071269487750557, "loss": 0.33213841915130615, "loss_ce": 0.00022925705707166344, "loss_iou": 0.125, "loss_num": 0.0162353515625, "loss_xval": 0.33203125, "num_input_tokens_seen": 328796456, "step": 5869 }, { "epoch": 13.073496659242762, "grad_norm": 20.73664093017578, "learning_rate": 1e-06, "loss": 0.5255, "num_input_tokens_seen": 328851748, "step": 5870 }, { "epoch": 13.073496659242762, "loss": 0.41393500566482544, "loss_ce": 0.0001166543661383912, "loss_iou": 0.1796875, "loss_num": 0.0108642578125, "loss_xval": 0.4140625, "num_input_tokens_seen": 328851748, "step": 5870 }, { "epoch": 13.075723830734967, "grad_norm": 19.62030601501465, "learning_rate": 1e-06, "loss": 0.4958, "num_input_tokens_seen": 328907888, "step": 5871 }, { "epoch": 13.075723830734967, "loss": 0.5278200507164001, "loss_ce": 0.00011009951413143426, "loss_iou": 0.228515625, "loss_num": 0.0142822265625, "loss_xval": 0.52734375, "num_input_tokens_seen": 328907888, "step": 5871 }, { "epoch": 13.077951002227172, "grad_norm": 24.643142700195312, "learning_rate": 1e-06, "loss": 0.5101, "num_input_tokens_seen": 328964040, "step": 5872 }, { "epoch": 13.077951002227172, "loss": 0.5145541429519653, "loss_ce": 0.0001498450292274356, "loss_iou": 0.2294921875, "loss_num": 0.0111083984375, "loss_xval": 0.515625, "num_input_tokens_seen": 328964040, "step": 5872 }, { "epoch": 13.080178173719377, "grad_norm": 18.29912567138672, "learning_rate": 1e-06, "loss": 0.4219, "num_input_tokens_seen": 329019624, "step": 5873 }, { "epoch": 13.080178173719377, "loss": 0.4937739372253418, "loss_ce": 0.00012156983575550839, "loss_iou": 0.21484375, "loss_num": 0.01263427734375, "loss_xval": 0.494140625, "num_input_tokens_seen": 329019624, "step": 5873 }, { "epoch": 13.082405345211582, "grad_norm": 16.88625717163086, "learning_rate": 1e-06, "loss": 0.4574, "num_input_tokens_seen": 329077484, "step": 5874 }, { "epoch": 13.082405345211582, "loss": 0.3620465397834778, "loss_ce": 0.00010803519398905337, "loss_iou": 0.1611328125, "loss_num": 0.008056640625, "loss_xval": 0.361328125, "num_input_tokens_seen": 329077484, "step": 5874 }, { "epoch": 13.084632516703786, "grad_norm": 13.229454040527344, "learning_rate": 1e-06, "loss": 0.5683, "num_input_tokens_seen": 329135212, "step": 5875 }, { "epoch": 13.084632516703786, "loss": 0.6875127553939819, "loss_ce": 0.0001348142686765641, "loss_iou": 0.271484375, "loss_num": 0.02880859375, "loss_xval": 0.6875, "num_input_tokens_seen": 329135212, "step": 5875 }, { "epoch": 13.086859688195991, "grad_norm": 30.31863021850586, "learning_rate": 1e-06, "loss": 0.5017, "num_input_tokens_seen": 329190660, "step": 5876 }, { "epoch": 13.086859688195991, "loss": 0.5497363805770874, "loss_ce": 0.0001758422004058957, "loss_iou": 0.2353515625, "loss_num": 0.0157470703125, "loss_xval": 0.55078125, "num_input_tokens_seen": 329190660, "step": 5876 }, { "epoch": 13.089086859688196, "grad_norm": 16.870037078857422, "learning_rate": 1e-06, "loss": 0.3866, "num_input_tokens_seen": 329247060, "step": 5877 }, { "epoch": 13.089086859688196, "loss": 0.36009788513183594, "loss_ce": 0.00011254477431066334, "loss_iou": 0.162109375, "loss_num": 0.007293701171875, "loss_xval": 0.359375, "num_input_tokens_seen": 329247060, "step": 5877 }, { "epoch": 13.091314031180401, "grad_norm": 21.952919006347656, "learning_rate": 1e-06, "loss": 0.4696, "num_input_tokens_seen": 329301984, "step": 5878 }, { "epoch": 13.091314031180401, "loss": 0.4436010420322418, "loss_ce": 0.00011958822869928554, "loss_iou": 0.2001953125, "loss_num": 0.0087890625, "loss_xval": 0.443359375, "num_input_tokens_seen": 329301984, "step": 5878 }, { "epoch": 13.093541202672606, "grad_norm": 18.792381286621094, "learning_rate": 1e-06, "loss": 0.4403, "num_input_tokens_seen": 329360572, "step": 5879 }, { "epoch": 13.093541202672606, "loss": 0.5003446340560913, "loss_ce": 0.00010050551645690575, "loss_iou": 0.21484375, "loss_num": 0.01397705078125, "loss_xval": 0.5, "num_input_tokens_seen": 329360572, "step": 5879 }, { "epoch": 13.09576837416481, "grad_norm": 18.393442153930664, "learning_rate": 1e-06, "loss": 0.6429, "num_input_tokens_seen": 329417288, "step": 5880 }, { "epoch": 13.09576837416481, "loss": 0.7582250833511353, "loss_ce": 0.00016841033357195556, "loss_iou": 0.314453125, "loss_num": 0.0257568359375, "loss_xval": 0.7578125, "num_input_tokens_seen": 329417288, "step": 5880 }, { "epoch": 13.097995545657016, "grad_norm": 13.813261985778809, "learning_rate": 1e-06, "loss": 0.4882, "num_input_tokens_seen": 329473344, "step": 5881 }, { "epoch": 13.097995545657016, "loss": 0.6629891395568848, "loss_ce": 0.0001473267620895058, "loss_iou": 0.271484375, "loss_num": 0.0235595703125, "loss_xval": 0.6640625, "num_input_tokens_seen": 329473344, "step": 5881 }, { "epoch": 13.10022271714922, "grad_norm": 15.445968627929688, "learning_rate": 1e-06, "loss": 0.4329, "num_input_tokens_seen": 329531204, "step": 5882 }, { "epoch": 13.10022271714922, "loss": 0.43495747447013855, "loss_ce": 0.00014300504699349403, "loss_iou": 0.197265625, "loss_num": 0.008056640625, "loss_xval": 0.435546875, "num_input_tokens_seen": 329531204, "step": 5882 }, { "epoch": 13.102449888641425, "grad_norm": 14.054932594299316, "learning_rate": 1e-06, "loss": 0.4633, "num_input_tokens_seen": 329586744, "step": 5883 }, { "epoch": 13.102449888641425, "loss": 0.5273807644844055, "loss_ce": 0.0005252888076938689, "loss_iou": 0.2353515625, "loss_num": 0.01129150390625, "loss_xval": 0.52734375, "num_input_tokens_seen": 329586744, "step": 5883 }, { "epoch": 13.10467706013363, "grad_norm": 16.611223220825195, "learning_rate": 1e-06, "loss": 0.397, "num_input_tokens_seen": 329642216, "step": 5884 }, { "epoch": 13.10467706013363, "loss": 0.4468822479248047, "loss_ce": 0.00010488741099834442, "loss_iou": 0.19921875, "loss_num": 0.00946044921875, "loss_xval": 0.447265625, "num_input_tokens_seen": 329642216, "step": 5884 }, { "epoch": 13.106904231625835, "grad_norm": 18.158891677856445, "learning_rate": 1e-06, "loss": 0.324, "num_input_tokens_seen": 329698692, "step": 5885 }, { "epoch": 13.106904231625835, "loss": 0.3763335049152374, "loss_ce": 0.00011280621401965618, "loss_iou": 0.1630859375, "loss_num": 0.0101318359375, "loss_xval": 0.376953125, "num_input_tokens_seen": 329698692, "step": 5885 }, { "epoch": 13.10913140311804, "grad_norm": 20.2602481842041, "learning_rate": 1e-06, "loss": 0.4628, "num_input_tokens_seen": 329753408, "step": 5886 }, { "epoch": 13.10913140311804, "loss": 0.590545117855072, "loss_ce": 0.0005792796146124601, "loss_iou": 0.232421875, "loss_num": 0.0247802734375, "loss_xval": 0.58984375, "num_input_tokens_seen": 329753408, "step": 5886 }, { "epoch": 13.111358574610245, "grad_norm": 17.374492645263672, "learning_rate": 1e-06, "loss": 0.5085, "num_input_tokens_seen": 329811252, "step": 5887 }, { "epoch": 13.111358574610245, "loss": 0.6364624500274658, "loss_ce": 0.00010987733548972756, "loss_iou": 0.28125, "loss_num": 0.0150146484375, "loss_xval": 0.63671875, "num_input_tokens_seen": 329811252, "step": 5887 }, { "epoch": 13.11358574610245, "grad_norm": 24.767221450805664, "learning_rate": 1e-06, "loss": 0.6877, "num_input_tokens_seen": 329864808, "step": 5888 }, { "epoch": 13.11358574610245, "loss": 0.9313529133796692, "loss_ce": 0.0002005502174142748, "loss_iou": 0.384765625, "loss_num": 0.0322265625, "loss_xval": 0.9296875, "num_input_tokens_seen": 329864808, "step": 5888 }, { "epoch": 13.115812917594655, "grad_norm": 18.444570541381836, "learning_rate": 1e-06, "loss": 0.3468, "num_input_tokens_seen": 329923220, "step": 5889 }, { "epoch": 13.115812917594655, "loss": 0.38743722438812256, "loss_ce": 0.00010810734238475561, "loss_iou": 0.1806640625, "loss_num": 0.0052490234375, "loss_xval": 0.38671875, "num_input_tokens_seen": 329923220, "step": 5889 }, { "epoch": 13.11804008908686, "grad_norm": 14.813982009887695, "learning_rate": 1e-06, "loss": 0.5645, "num_input_tokens_seen": 329982272, "step": 5890 }, { "epoch": 13.11804008908686, "loss": 0.7087510824203491, "loss_ce": 0.00013293002848513424, "loss_iou": 0.296875, "loss_num": 0.02294921875, "loss_xval": 0.70703125, "num_input_tokens_seen": 329982272, "step": 5890 }, { "epoch": 13.120267260579064, "grad_norm": 17.98593521118164, "learning_rate": 1e-06, "loss": 0.6082, "num_input_tokens_seen": 330037796, "step": 5891 }, { "epoch": 13.120267260579064, "loss": 0.6543379426002502, "loss_ce": 0.00031572196166962385, "loss_iou": 0.265625, "loss_num": 0.0247802734375, "loss_xval": 0.65234375, "num_input_tokens_seen": 330037796, "step": 5891 }, { "epoch": 13.122494432071269, "grad_norm": 16.788930892944336, "learning_rate": 1e-06, "loss": 0.4808, "num_input_tokens_seen": 330092396, "step": 5892 }, { "epoch": 13.122494432071269, "loss": 0.5958355665206909, "loss_ce": 0.00013241068518254906, "loss_iou": 0.23828125, "loss_num": 0.0238037109375, "loss_xval": 0.59375, "num_input_tokens_seen": 330092396, "step": 5892 }, { "epoch": 13.124721603563474, "grad_norm": 17.42109489440918, "learning_rate": 1e-06, "loss": 0.405, "num_input_tokens_seen": 330147804, "step": 5893 }, { "epoch": 13.124721603563474, "loss": 0.4320144057273865, "loss_ce": 0.000129641528474167, "loss_iou": 0.193359375, "loss_num": 0.00933837890625, "loss_xval": 0.431640625, "num_input_tokens_seen": 330147804, "step": 5893 }, { "epoch": 13.126948775055679, "grad_norm": 17.75126838684082, "learning_rate": 1e-06, "loss": 0.5447, "num_input_tokens_seen": 330205316, "step": 5894 }, { "epoch": 13.126948775055679, "loss": 0.6002463698387146, "loss_ce": 0.00014872349856887013, "loss_iou": 0.2470703125, "loss_num": 0.0213623046875, "loss_xval": 0.6015625, "num_input_tokens_seen": 330205316, "step": 5894 }, { "epoch": 13.129175946547884, "grad_norm": 22.149547576904297, "learning_rate": 1e-06, "loss": 0.4957, "num_input_tokens_seen": 330261848, "step": 5895 }, { "epoch": 13.129175946547884, "loss": 0.6235384941101074, "loss_ce": 0.00012537876318674535, "loss_iou": 0.275390625, "loss_num": 0.0146484375, "loss_xval": 0.625, "num_input_tokens_seen": 330261848, "step": 5895 }, { "epoch": 13.131403118040089, "grad_norm": 18.989896774291992, "learning_rate": 1e-06, "loss": 0.5355, "num_input_tokens_seen": 330318236, "step": 5896 }, { "epoch": 13.131403118040089, "loss": 0.4923059344291687, "loss_ce": 0.00011842740059364587, "loss_iou": 0.208984375, "loss_num": 0.01495361328125, "loss_xval": 0.4921875, "num_input_tokens_seen": 330318236, "step": 5896 }, { "epoch": 13.133630289532293, "grad_norm": 15.263077735900879, "learning_rate": 1e-06, "loss": 0.486, "num_input_tokens_seen": 330377552, "step": 5897 }, { "epoch": 13.133630289532293, "loss": 0.4600605368614197, "loss_ce": 9.961408795788884e-05, "loss_iou": 0.1796875, "loss_num": 0.0201416015625, "loss_xval": 0.4609375, "num_input_tokens_seen": 330377552, "step": 5897 }, { "epoch": 13.135857461024498, "grad_norm": 17.812576293945312, "learning_rate": 1e-06, "loss": 0.5491, "num_input_tokens_seen": 330433484, "step": 5898 }, { "epoch": 13.135857461024498, "loss": 0.5521707534790039, "loss_ce": 0.00010780214506667107, "loss_iou": 0.234375, "loss_num": 0.0164794921875, "loss_xval": 0.55078125, "num_input_tokens_seen": 330433484, "step": 5898 }, { "epoch": 13.138084632516703, "grad_norm": 28.198577880859375, "learning_rate": 1e-06, "loss": 0.6077, "num_input_tokens_seen": 330489352, "step": 5899 }, { "epoch": 13.138084632516703, "loss": 0.6835901737213135, "loss_ce": 0.0001184626089525409, "loss_iou": 0.2890625, "loss_num": 0.0205078125, "loss_xval": 0.68359375, "num_input_tokens_seen": 330489352, "step": 5899 }, { "epoch": 13.140311804008908, "grad_norm": 16.04581069946289, "learning_rate": 1e-06, "loss": 0.5163, "num_input_tokens_seen": 330544888, "step": 5900 }, { "epoch": 13.140311804008908, "loss": 0.5014405846595764, "loss_ce": 0.00012069322110619396, "loss_iou": 0.21875, "loss_num": 0.01275634765625, "loss_xval": 0.5, "num_input_tokens_seen": 330544888, "step": 5900 }, { "epoch": 13.142538975501113, "grad_norm": 18.238014221191406, "learning_rate": 1e-06, "loss": 0.3431, "num_input_tokens_seen": 330602552, "step": 5901 }, { "epoch": 13.142538975501113, "loss": 0.3762207627296448, "loss_ce": 0.0001221080165123567, "loss_iou": 0.1708984375, "loss_num": 0.006927490234375, "loss_xval": 0.376953125, "num_input_tokens_seen": 330602552, "step": 5901 }, { "epoch": 13.144766146993318, "grad_norm": 32.46430206298828, "learning_rate": 1e-06, "loss": 0.5533, "num_input_tokens_seen": 330657020, "step": 5902 }, { "epoch": 13.144766146993318, "loss": 0.6620012521743774, "loss_ce": 0.00013605058484245092, "loss_iou": 0.27734375, "loss_num": 0.0216064453125, "loss_xval": 0.66015625, "num_input_tokens_seen": 330657020, "step": 5902 }, { "epoch": 13.146993318485523, "grad_norm": 30.36663246154785, "learning_rate": 1e-06, "loss": 0.4032, "num_input_tokens_seen": 330713536, "step": 5903 }, { "epoch": 13.146993318485523, "loss": 0.36765336990356445, "loss_ce": 0.0005879499949514866, "loss_iou": 0.146484375, "loss_num": 0.0147705078125, "loss_xval": 0.3671875, "num_input_tokens_seen": 330713536, "step": 5903 }, { "epoch": 13.14922048997773, "grad_norm": 15.999008178710938, "learning_rate": 1e-06, "loss": 0.4386, "num_input_tokens_seen": 330767572, "step": 5904 }, { "epoch": 13.14922048997773, "loss": 0.5433182716369629, "loss_ce": 0.00010540579387452453, "loss_iou": 0.2265625, "loss_num": 0.0181884765625, "loss_xval": 0.54296875, "num_input_tokens_seen": 330767572, "step": 5904 }, { "epoch": 13.151447661469934, "grad_norm": 17.65342903137207, "learning_rate": 1e-06, "loss": 0.3948, "num_input_tokens_seen": 330825640, "step": 5905 }, { "epoch": 13.151447661469934, "loss": 0.3161456882953644, "loss_ce": 0.000105641498521436, "loss_iou": 0.1396484375, "loss_num": 0.0074462890625, "loss_xval": 0.31640625, "num_input_tokens_seen": 330825640, "step": 5905 }, { "epoch": 13.153674832962139, "grad_norm": 31.29542350769043, "learning_rate": 1e-06, "loss": 0.7188, "num_input_tokens_seen": 330884276, "step": 5906 }, { "epoch": 13.153674832962139, "loss": 0.7189792990684509, "loss_ce": 0.00010728358756750822, "loss_iou": 0.31640625, "loss_num": 0.0174560546875, "loss_xval": 0.71875, "num_input_tokens_seen": 330884276, "step": 5906 }, { "epoch": 13.155902004454344, "grad_norm": 18.346426010131836, "learning_rate": 1e-06, "loss": 0.5431, "num_input_tokens_seen": 330938632, "step": 5907 }, { "epoch": 13.155902004454344, "loss": 0.4295383095741272, "loss_ce": 9.494097321294248e-05, "loss_iou": 0.169921875, "loss_num": 0.0179443359375, "loss_xval": 0.4296875, "num_input_tokens_seen": 330938632, "step": 5907 }, { "epoch": 13.158129175946549, "grad_norm": 22.875947952270508, "learning_rate": 1e-06, "loss": 0.6056, "num_input_tokens_seen": 330995628, "step": 5908 }, { "epoch": 13.158129175946549, "loss": 0.5698830485343933, "loss_ce": 0.00011983538570348173, "loss_iou": 0.23046875, "loss_num": 0.021728515625, "loss_xval": 0.5703125, "num_input_tokens_seen": 330995628, "step": 5908 }, { "epoch": 13.160356347438753, "grad_norm": 15.61497974395752, "learning_rate": 1e-06, "loss": 0.4997, "num_input_tokens_seen": 331049196, "step": 5909 }, { "epoch": 13.160356347438753, "loss": 0.37679868936538696, "loss_ce": 8.96861165529117e-05, "loss_iou": 0.1513671875, "loss_num": 0.0147705078125, "loss_xval": 0.376953125, "num_input_tokens_seen": 331049196, "step": 5909 }, { "epoch": 13.162583518930958, "grad_norm": 15.534133911132812, "learning_rate": 1e-06, "loss": 0.6097, "num_input_tokens_seen": 331104396, "step": 5910 }, { "epoch": 13.162583518930958, "loss": 0.8099589347839355, "loss_ce": 0.0001444965455448255, "loss_iou": 0.33984375, "loss_num": 0.0262451171875, "loss_xval": 0.80859375, "num_input_tokens_seen": 331104396, "step": 5910 }, { "epoch": 13.164810690423163, "grad_norm": 41.32310104370117, "learning_rate": 1e-06, "loss": 0.581, "num_input_tokens_seen": 331159788, "step": 5911 }, { "epoch": 13.164810690423163, "loss": 0.4186583161354065, "loss_ce": 0.00014023938274476677, "loss_iou": 0.181640625, "loss_num": 0.010986328125, "loss_xval": 0.41796875, "num_input_tokens_seen": 331159788, "step": 5911 }, { "epoch": 13.167037861915368, "grad_norm": 17.732685089111328, "learning_rate": 1e-06, "loss": 0.5119, "num_input_tokens_seen": 331216032, "step": 5912 }, { "epoch": 13.167037861915368, "loss": 0.588236391544342, "loss_ce": 0.00010163354454562068, "loss_iou": 0.2578125, "loss_num": 0.014892578125, "loss_xval": 0.58984375, "num_input_tokens_seen": 331216032, "step": 5912 }, { "epoch": 13.169265033407573, "grad_norm": 20.534914016723633, "learning_rate": 1e-06, "loss": 0.3891, "num_input_tokens_seen": 331273480, "step": 5913 }, { "epoch": 13.169265033407573, "loss": 0.3801114559173584, "loss_ce": 0.00010655220103217289, "loss_iou": 0.169921875, "loss_num": 0.0081787109375, "loss_xval": 0.380859375, "num_input_tokens_seen": 331273480, "step": 5913 }, { "epoch": 13.171492204899778, "grad_norm": 45.9957389831543, "learning_rate": 1e-06, "loss": 0.5419, "num_input_tokens_seen": 331328248, "step": 5914 }, { "epoch": 13.171492204899778, "loss": 0.45751655101776123, "loss_ce": 0.00011909787281183526, "loss_iou": 0.208984375, "loss_num": 0.0076904296875, "loss_xval": 0.45703125, "num_input_tokens_seen": 331328248, "step": 5914 }, { "epoch": 13.173719376391983, "grad_norm": 21.617551803588867, "learning_rate": 1e-06, "loss": 0.4976, "num_input_tokens_seen": 331383188, "step": 5915 }, { "epoch": 13.173719376391983, "loss": 0.5962991118431091, "loss_ce": 0.0010842570336535573, "loss_iou": 0.26171875, "loss_num": 0.01397705078125, "loss_xval": 0.59375, "num_input_tokens_seen": 331383188, "step": 5915 }, { "epoch": 13.175946547884188, "grad_norm": 18.615833282470703, "learning_rate": 1e-06, "loss": 0.442, "num_input_tokens_seen": 331439448, "step": 5916 }, { "epoch": 13.175946547884188, "loss": 0.6015082001686096, "loss_ce": 0.00022037216695025563, "loss_iou": 0.236328125, "loss_num": 0.02587890625, "loss_xval": 0.6015625, "num_input_tokens_seen": 331439448, "step": 5916 }, { "epoch": 13.178173719376392, "grad_norm": 26.171741485595703, "learning_rate": 1e-06, "loss": 0.5615, "num_input_tokens_seen": 331492752, "step": 5917 }, { "epoch": 13.178173719376392, "loss": 0.47891539335250854, "loss_ce": 0.00015565170906484127, "loss_iou": 0.224609375, "loss_num": 0.006011962890625, "loss_xval": 0.478515625, "num_input_tokens_seen": 331492752, "step": 5917 }, { "epoch": 13.180400890868597, "grad_norm": 29.380762100219727, "learning_rate": 1e-06, "loss": 0.4608, "num_input_tokens_seen": 331548328, "step": 5918 }, { "epoch": 13.180400890868597, "loss": 0.37973326444625854, "loss_ce": 9.459797001909465e-05, "loss_iou": 0.154296875, "loss_num": 0.01409912109375, "loss_xval": 0.37890625, "num_input_tokens_seen": 331548328, "step": 5918 }, { "epoch": 13.182628062360802, "grad_norm": 19.07701301574707, "learning_rate": 1e-06, "loss": 0.5202, "num_input_tokens_seen": 331603848, "step": 5919 }, { "epoch": 13.182628062360802, "loss": 0.5613878965377808, "loss_ce": 0.00010859225585591048, "loss_iou": 0.24609375, "loss_num": 0.013671875, "loss_xval": 0.5625, "num_input_tokens_seen": 331603848, "step": 5919 }, { "epoch": 13.184855233853007, "grad_norm": 23.228954315185547, "learning_rate": 1e-06, "loss": 0.4674, "num_input_tokens_seen": 331663044, "step": 5920 }, { "epoch": 13.184855233853007, "loss": 0.6362162828445435, "loss_ce": 0.00010789166844915599, "loss_iou": 0.27734375, "loss_num": 0.0167236328125, "loss_xval": 0.63671875, "num_input_tokens_seen": 331663044, "step": 5920 }, { "epoch": 13.187082405345212, "grad_norm": 24.631132125854492, "learning_rate": 1e-06, "loss": 0.7198, "num_input_tokens_seen": 331717048, "step": 5921 }, { "epoch": 13.187082405345212, "loss": 0.5893706679344177, "loss_ce": 0.00013725095777772367, "loss_iou": 0.2578125, "loss_num": 0.01470947265625, "loss_xval": 0.58984375, "num_input_tokens_seen": 331717048, "step": 5921 }, { "epoch": 13.189309576837417, "grad_norm": 27.25778579711914, "learning_rate": 1e-06, "loss": 0.4561, "num_input_tokens_seen": 331770924, "step": 5922 }, { "epoch": 13.189309576837417, "loss": 0.3740364909172058, "loss_ce": 0.00013512761506717652, "loss_iou": 0.1572265625, "loss_num": 0.0120849609375, "loss_xval": 0.373046875, "num_input_tokens_seen": 331770924, "step": 5922 }, { "epoch": 13.191536748329622, "grad_norm": 13.549928665161133, "learning_rate": 1e-06, "loss": 0.5417, "num_input_tokens_seen": 331826796, "step": 5923 }, { "epoch": 13.191536748329622, "loss": 0.3583794832229614, "loss_ce": 0.00010311156802345067, "loss_iou": 0.16015625, "loss_num": 0.00738525390625, "loss_xval": 0.357421875, "num_input_tokens_seen": 331826796, "step": 5923 }, { "epoch": 13.193763919821826, "grad_norm": 19.371231079101562, "learning_rate": 1e-06, "loss": 0.4902, "num_input_tokens_seen": 331882152, "step": 5924 }, { "epoch": 13.193763919821826, "loss": 0.6329140067100525, "loss_ce": 0.00010151312744710594, "loss_iou": 0.291015625, "loss_num": 0.01019287109375, "loss_xval": 0.6328125, "num_input_tokens_seen": 331882152, "step": 5924 }, { "epoch": 13.195991091314031, "grad_norm": 20.2581787109375, "learning_rate": 1e-06, "loss": 0.4575, "num_input_tokens_seen": 331939604, "step": 5925 }, { "epoch": 13.195991091314031, "loss": 0.3990633487701416, "loss_ce": 0.0001375791907776147, "loss_iou": 0.1708984375, "loss_num": 0.011474609375, "loss_xval": 0.3984375, "num_input_tokens_seen": 331939604, "step": 5925 }, { "epoch": 13.198218262806236, "grad_norm": 86.76207733154297, "learning_rate": 1e-06, "loss": 0.5783, "num_input_tokens_seen": 331993812, "step": 5926 }, { "epoch": 13.198218262806236, "loss": 0.5379533171653748, "loss_ce": 0.00011149346391903237, "loss_iou": 0.2392578125, "loss_num": 0.0118408203125, "loss_xval": 0.5390625, "num_input_tokens_seen": 331993812, "step": 5926 }, { "epoch": 13.200445434298441, "grad_norm": 19.799470901489258, "learning_rate": 1e-06, "loss": 0.5504, "num_input_tokens_seen": 332050244, "step": 5927 }, { "epoch": 13.200445434298441, "loss": 0.5327932834625244, "loss_ce": 0.00020055694039911032, "loss_iou": 0.23828125, "loss_num": 0.01129150390625, "loss_xval": 0.53125, "num_input_tokens_seen": 332050244, "step": 5927 }, { "epoch": 13.202672605790646, "grad_norm": 21.825603485107422, "learning_rate": 1e-06, "loss": 0.5413, "num_input_tokens_seen": 332107696, "step": 5928 }, { "epoch": 13.202672605790646, "loss": 0.4124048948287964, "loss_ce": 0.00011244384222663939, "loss_iou": 0.1845703125, "loss_num": 0.00885009765625, "loss_xval": 0.412109375, "num_input_tokens_seen": 332107696, "step": 5928 }, { "epoch": 13.20489977728285, "grad_norm": 22.338329315185547, "learning_rate": 1e-06, "loss": 0.4693, "num_input_tokens_seen": 332164076, "step": 5929 }, { "epoch": 13.20489977728285, "loss": 0.2473558485507965, "loss_ce": 0.00010242719145026058, "loss_iou": 0.10595703125, "loss_num": 0.007110595703125, "loss_xval": 0.2470703125, "num_input_tokens_seen": 332164076, "step": 5929 }, { "epoch": 13.207126948775056, "grad_norm": 13.758119583129883, "learning_rate": 1e-06, "loss": 0.3369, "num_input_tokens_seen": 332222060, "step": 5930 }, { "epoch": 13.207126948775056, "loss": 0.3643418550491333, "loss_ce": 0.00032818166073411703, "loss_iou": 0.140625, "loss_num": 0.0167236328125, "loss_xval": 0.36328125, "num_input_tokens_seen": 332222060, "step": 5930 }, { "epoch": 13.20935412026726, "grad_norm": 20.242292404174805, "learning_rate": 1e-06, "loss": 0.4062, "num_input_tokens_seen": 332280184, "step": 5931 }, { "epoch": 13.20935412026726, "loss": 0.3549681603908539, "loss_ce": 0.00023182205040939152, "loss_iou": 0.16015625, "loss_num": 0.007080078125, "loss_xval": 0.35546875, "num_input_tokens_seen": 332280184, "step": 5931 }, { "epoch": 13.211581291759465, "grad_norm": 57.8745002746582, "learning_rate": 1e-06, "loss": 0.5825, "num_input_tokens_seen": 332335648, "step": 5932 }, { "epoch": 13.211581291759465, "loss": 0.5452816486358643, "loss_ce": 0.00011567381443455815, "loss_iou": 0.216796875, "loss_num": 0.0224609375, "loss_xval": 0.546875, "num_input_tokens_seen": 332335648, "step": 5932 }, { "epoch": 13.21380846325167, "grad_norm": 23.37744140625, "learning_rate": 1e-06, "loss": 0.5968, "num_input_tokens_seen": 332391416, "step": 5933 }, { "epoch": 13.21380846325167, "loss": 0.5802435278892517, "loss_ce": 0.00034849648363888264, "loss_iou": 0.240234375, "loss_num": 0.0198974609375, "loss_xval": 0.578125, "num_input_tokens_seen": 332391416, "step": 5933 }, { "epoch": 13.216035634743875, "grad_norm": 24.591421127319336, "learning_rate": 1e-06, "loss": 0.4096, "num_input_tokens_seen": 332448788, "step": 5934 }, { "epoch": 13.216035634743875, "loss": 0.5153904557228088, "loss_ce": 0.00013165900600142777, "loss_iou": 0.23828125, "loss_num": 0.00787353515625, "loss_xval": 0.515625, "num_input_tokens_seen": 332448788, "step": 5934 }, { "epoch": 13.21826280623608, "grad_norm": 23.88517951965332, "learning_rate": 1e-06, "loss": 0.4557, "num_input_tokens_seen": 332505524, "step": 5935 }, { "epoch": 13.21826280623608, "loss": 0.4439461827278137, "loss_ce": 9.8556381999515e-05, "loss_iou": 0.1826171875, "loss_num": 0.01556396484375, "loss_xval": 0.443359375, "num_input_tokens_seen": 332505524, "step": 5935 }, { "epoch": 13.220489977728285, "grad_norm": 18.264699935913086, "learning_rate": 1e-06, "loss": 0.4795, "num_input_tokens_seen": 332562732, "step": 5936 }, { "epoch": 13.220489977728285, "loss": 0.5457608699798584, "loss_ce": 0.00010662610293366015, "loss_iou": 0.228515625, "loss_num": 0.017822265625, "loss_xval": 0.546875, "num_input_tokens_seen": 332562732, "step": 5936 }, { "epoch": 13.22271714922049, "grad_norm": 20.514163970947266, "learning_rate": 1e-06, "loss": 0.5394, "num_input_tokens_seen": 332617164, "step": 5937 }, { "epoch": 13.22271714922049, "loss": 0.5808427333831787, "loss_ce": 0.00015426846221089363, "loss_iou": 0.25390625, "loss_num": 0.0147705078125, "loss_xval": 0.58203125, "num_input_tokens_seen": 332617164, "step": 5937 }, { "epoch": 13.224944320712694, "grad_norm": 19.999271392822266, "learning_rate": 1e-06, "loss": 0.4511, "num_input_tokens_seen": 332672196, "step": 5938 }, { "epoch": 13.224944320712694, "loss": 0.5069501996040344, "loss_ce": 0.00017530655895825475, "loss_iou": 0.208984375, "loss_num": 0.0179443359375, "loss_xval": 0.5078125, "num_input_tokens_seen": 332672196, "step": 5938 }, { "epoch": 13.2271714922049, "grad_norm": 19.271041870117188, "learning_rate": 1e-06, "loss": 0.5313, "num_input_tokens_seen": 332729132, "step": 5939 }, { "epoch": 13.2271714922049, "loss": 0.5557813048362732, "loss_ce": 0.00011723280476871878, "loss_iou": 0.255859375, "loss_num": 0.00860595703125, "loss_xval": 0.5546875, "num_input_tokens_seen": 332729132, "step": 5939 }, { "epoch": 13.229398663697104, "grad_norm": 22.940664291381836, "learning_rate": 1e-06, "loss": 0.7251, "num_input_tokens_seen": 332783480, "step": 5940 }, { "epoch": 13.229398663697104, "loss": 0.8985084891319275, "loss_ce": 0.00019303051522001624, "loss_iou": 0.330078125, "loss_num": 0.0478515625, "loss_xval": 0.8984375, "num_input_tokens_seen": 332783480, "step": 5940 }, { "epoch": 13.231625835189309, "grad_norm": 17.761545181274414, "learning_rate": 1e-06, "loss": 0.4187, "num_input_tokens_seen": 332840084, "step": 5941 }, { "epoch": 13.231625835189309, "loss": 0.5540998578071594, "loss_ce": 0.00014480168465524912, "loss_iou": 0.244140625, "loss_num": 0.0133056640625, "loss_xval": 0.5546875, "num_input_tokens_seen": 332840084, "step": 5941 }, { "epoch": 13.233853006681514, "grad_norm": 13.681570053100586, "learning_rate": 1e-06, "loss": 0.36, "num_input_tokens_seen": 332896372, "step": 5942 }, { "epoch": 13.233853006681514, "loss": 0.34540247917175293, "loss_ce": 0.00018764848937280476, "loss_iou": 0.1494140625, "loss_num": 0.0091552734375, "loss_xval": 0.345703125, "num_input_tokens_seen": 332896372, "step": 5942 }, { "epoch": 13.236080178173719, "grad_norm": 17.690418243408203, "learning_rate": 1e-06, "loss": 0.5782, "num_input_tokens_seen": 332953536, "step": 5943 }, { "epoch": 13.236080178173719, "loss": 0.43799030780792236, "loss_ce": 0.00012410686758812517, "loss_iou": 0.20703125, "loss_num": 0.004608154296875, "loss_xval": 0.4375, "num_input_tokens_seen": 332953536, "step": 5943 }, { "epoch": 13.238307349665924, "grad_norm": 25.94896125793457, "learning_rate": 1e-06, "loss": 0.5025, "num_input_tokens_seen": 333008228, "step": 5944 }, { "epoch": 13.238307349665924, "loss": 0.5409551858901978, "loss_ce": 0.00012263350072316825, "loss_iou": 0.2314453125, "loss_num": 0.0157470703125, "loss_xval": 0.5390625, "num_input_tokens_seen": 333008228, "step": 5944 }, { "epoch": 13.240534521158128, "grad_norm": 20.952259063720703, "learning_rate": 1e-06, "loss": 0.4657, "num_input_tokens_seen": 333065924, "step": 5945 }, { "epoch": 13.240534521158128, "loss": 0.2911282777786255, "loss_ce": 0.00011265826469752938, "loss_iou": 0.1328125, "loss_num": 0.004913330078125, "loss_xval": 0.291015625, "num_input_tokens_seen": 333065924, "step": 5945 }, { "epoch": 13.242761692650333, "grad_norm": 117.26244354248047, "learning_rate": 1e-06, "loss": 0.6348, "num_input_tokens_seen": 333117748, "step": 5946 }, { "epoch": 13.242761692650333, "loss": 0.9416841864585876, "loss_ce": 0.00015579743194393814, "loss_iou": 0.421875, "loss_num": 0.0196533203125, "loss_xval": 0.94140625, "num_input_tokens_seen": 333117748, "step": 5946 }, { "epoch": 13.244988864142538, "grad_norm": 20.61402130126953, "learning_rate": 1e-06, "loss": 0.5212, "num_input_tokens_seen": 333175660, "step": 5947 }, { "epoch": 13.244988864142538, "loss": 0.5732554793357849, "loss_ce": 0.00025745650054886937, "loss_iou": 0.2470703125, "loss_num": 0.0157470703125, "loss_xval": 0.57421875, "num_input_tokens_seen": 333175660, "step": 5947 }, { "epoch": 13.247216035634743, "grad_norm": 18.954225540161133, "learning_rate": 1e-06, "loss": 0.621, "num_input_tokens_seen": 333231676, "step": 5948 }, { "epoch": 13.247216035634743, "loss": 0.6287169456481934, "loss_ce": 0.0002990136854350567, "loss_iou": 0.271484375, "loss_num": 0.016845703125, "loss_xval": 0.62890625, "num_input_tokens_seen": 333231676, "step": 5948 }, { "epoch": 13.249443207126948, "grad_norm": 16.13975715637207, "learning_rate": 1e-06, "loss": 0.5298, "num_input_tokens_seen": 333287440, "step": 5949 }, { "epoch": 13.249443207126948, "loss": 0.5134612917900085, "loss_ce": 0.00015562049520667642, "loss_iou": 0.224609375, "loss_num": 0.01275634765625, "loss_xval": 0.51171875, "num_input_tokens_seen": 333287440, "step": 5949 }, { "epoch": 13.251670378619155, "grad_norm": 21.270713806152344, "learning_rate": 1e-06, "loss": 0.4151, "num_input_tokens_seen": 333340480, "step": 5950 }, { "epoch": 13.251670378619155, "loss": 0.4000067412853241, "loss_ce": 0.00010438874596729875, "loss_iou": 0.1728515625, "loss_num": 0.01080322265625, "loss_xval": 0.400390625, "num_input_tokens_seen": 333340480, "step": 5950 }, { "epoch": 13.25389755011136, "grad_norm": 12.74732780456543, "learning_rate": 1e-06, "loss": 0.3358, "num_input_tokens_seen": 333396808, "step": 5951 }, { "epoch": 13.25389755011136, "loss": 0.2938316762447357, "loss_ce": 0.00013049585686530918, "loss_iou": 0.109375, "loss_num": 0.01507568359375, "loss_xval": 0.29296875, "num_input_tokens_seen": 333396808, "step": 5951 }, { "epoch": 13.256124721603564, "grad_norm": 16.515743255615234, "learning_rate": 1e-06, "loss": 0.4615, "num_input_tokens_seen": 333450628, "step": 5952 }, { "epoch": 13.256124721603564, "loss": 0.48923662304878235, "loss_ce": 0.00010087803093483672, "loss_iou": 0.201171875, "loss_num": 0.0172119140625, "loss_xval": 0.48828125, "num_input_tokens_seen": 333450628, "step": 5952 }, { "epoch": 13.25835189309577, "grad_norm": 29.306018829345703, "learning_rate": 1e-06, "loss": 0.3888, "num_input_tokens_seen": 333507188, "step": 5953 }, { "epoch": 13.25835189309577, "loss": 0.2866702973842621, "loss_ce": 0.00011024670675396919, "loss_iou": 0.1259765625, "loss_num": 0.006805419921875, "loss_xval": 0.287109375, "num_input_tokens_seen": 333507188, "step": 5953 }, { "epoch": 13.260579064587974, "grad_norm": 17.44719696044922, "learning_rate": 1e-06, "loss": 0.5703, "num_input_tokens_seen": 333560676, "step": 5954 }, { "epoch": 13.260579064587974, "loss": 0.6639593839645386, "loss_ce": 0.00014105206355452538, "loss_iou": 0.302734375, "loss_num": 0.01153564453125, "loss_xval": 0.6640625, "num_input_tokens_seen": 333560676, "step": 5954 }, { "epoch": 13.262806236080179, "grad_norm": 128.22251892089844, "learning_rate": 1e-06, "loss": 0.5567, "num_input_tokens_seen": 333618720, "step": 5955 }, { "epoch": 13.262806236080179, "loss": 0.6405112743377686, "loss_ce": 0.00013040687190368772, "loss_iou": 0.263671875, "loss_num": 0.02294921875, "loss_xval": 0.640625, "num_input_tokens_seen": 333618720, "step": 5955 }, { "epoch": 13.265033407572384, "grad_norm": 18.05571746826172, "learning_rate": 1e-06, "loss": 0.4476, "num_input_tokens_seen": 333676704, "step": 5956 }, { "epoch": 13.265033407572384, "loss": 0.4810963273048401, "loss_ce": 0.00013927766121923923, "loss_iou": 0.19921875, "loss_num": 0.0164794921875, "loss_xval": 0.48046875, "num_input_tokens_seen": 333676704, "step": 5956 }, { "epoch": 13.267260579064589, "grad_norm": 17.209970474243164, "learning_rate": 1e-06, "loss": 0.4548, "num_input_tokens_seen": 333732432, "step": 5957 }, { "epoch": 13.267260579064589, "loss": 0.4918234944343567, "loss_ce": 0.00012429816706571728, "loss_iou": 0.2041015625, "loss_num": 0.016845703125, "loss_xval": 0.4921875, "num_input_tokens_seen": 333732432, "step": 5957 }, { "epoch": 13.269487750556793, "grad_norm": 24.414487838745117, "learning_rate": 1e-06, "loss": 0.3929, "num_input_tokens_seen": 333790064, "step": 5958 }, { "epoch": 13.269487750556793, "loss": 0.2766364514827728, "loss_ce": 0.00014718460442963988, "loss_iou": 0.1171875, "loss_num": 0.00836181640625, "loss_xval": 0.27734375, "num_input_tokens_seen": 333790064, "step": 5958 }, { "epoch": 13.271714922048998, "grad_norm": 24.38360023498535, "learning_rate": 1e-06, "loss": 0.3817, "num_input_tokens_seen": 333845108, "step": 5959 }, { "epoch": 13.271714922048998, "loss": 0.34324610233306885, "loss_ce": 0.00047269114293158054, "loss_iou": 0.1572265625, "loss_num": 0.0057373046875, "loss_xval": 0.34375, "num_input_tokens_seen": 333845108, "step": 5959 }, { "epoch": 13.273942093541203, "grad_norm": 28.868574142456055, "learning_rate": 1e-06, "loss": 0.5365, "num_input_tokens_seen": 333899952, "step": 5960 }, { "epoch": 13.273942093541203, "loss": 0.45695704221725464, "loss_ce": 0.00010890320118051022, "loss_iou": 0.19140625, "loss_num": 0.0150146484375, "loss_xval": 0.45703125, "num_input_tokens_seen": 333899952, "step": 5960 }, { "epoch": 13.276169265033408, "grad_norm": 17.488313674926758, "learning_rate": 1e-06, "loss": 0.5148, "num_input_tokens_seen": 333955864, "step": 5961 }, { "epoch": 13.276169265033408, "loss": 0.64747554063797, "loss_ce": 0.00013665850565303117, "loss_iou": 0.275390625, "loss_num": 0.019287109375, "loss_xval": 0.6484375, "num_input_tokens_seen": 333955864, "step": 5961 }, { "epoch": 13.278396436525613, "grad_norm": 14.644098281860352, "learning_rate": 1e-06, "loss": 0.3249, "num_input_tokens_seen": 334012288, "step": 5962 }, { "epoch": 13.278396436525613, "loss": 0.32973378896713257, "loss_ce": 0.00014394470781553537, "loss_iou": 0.14453125, "loss_num": 0.00811767578125, "loss_xval": 0.330078125, "num_input_tokens_seen": 334012288, "step": 5962 }, { "epoch": 13.280623608017818, "grad_norm": 14.35937213897705, "learning_rate": 1e-06, "loss": 0.4478, "num_input_tokens_seen": 334070048, "step": 5963 }, { "epoch": 13.280623608017818, "loss": 0.5133100152015686, "loss_ce": 0.00012641007197089493, "loss_iou": 0.2236328125, "loss_num": 0.01318359375, "loss_xval": 0.51171875, "num_input_tokens_seen": 334070048, "step": 5963 }, { "epoch": 13.282850779510023, "grad_norm": 20.978622436523438, "learning_rate": 1e-06, "loss": 0.6486, "num_input_tokens_seen": 334125576, "step": 5964 }, { "epoch": 13.282850779510023, "loss": 0.6014399528503418, "loss_ce": 0.00012159476318629459, "loss_iou": 0.26171875, "loss_num": 0.015869140625, "loss_xval": 0.6015625, "num_input_tokens_seen": 334125576, "step": 5964 }, { "epoch": 13.285077951002227, "grad_norm": 17.045791625976562, "learning_rate": 1e-06, "loss": 0.4465, "num_input_tokens_seen": 334179024, "step": 5965 }, { "epoch": 13.285077951002227, "loss": 0.40493443608283997, "loss_ce": 0.00014927683514542878, "loss_iou": 0.18359375, "loss_num": 0.007415771484375, "loss_xval": 0.404296875, "num_input_tokens_seen": 334179024, "step": 5965 }, { "epoch": 13.287305122494432, "grad_norm": 17.58751678466797, "learning_rate": 1e-06, "loss": 0.5238, "num_input_tokens_seen": 334233464, "step": 5966 }, { "epoch": 13.287305122494432, "loss": 0.4632442593574524, "loss_ce": 0.00010949977149721235, "loss_iou": 0.2138671875, "loss_num": 0.007049560546875, "loss_xval": 0.462890625, "num_input_tokens_seen": 334233464, "step": 5966 }, { "epoch": 13.289532293986637, "grad_norm": 17.390804290771484, "learning_rate": 1e-06, "loss": 0.5365, "num_input_tokens_seen": 334288260, "step": 5967 }, { "epoch": 13.289532293986637, "loss": 0.6280704140663147, "loss_ce": 0.00014069479948375374, "loss_iou": 0.267578125, "loss_num": 0.018798828125, "loss_xval": 0.62890625, "num_input_tokens_seen": 334288260, "step": 5967 }, { "epoch": 13.291759465478842, "grad_norm": 20.9576358795166, "learning_rate": 1e-06, "loss": 0.3652, "num_input_tokens_seen": 334345852, "step": 5968 }, { "epoch": 13.291759465478842, "loss": 0.3794044554233551, "loss_ce": 0.0001319996954407543, "loss_iou": 0.1650390625, "loss_num": 0.0098876953125, "loss_xval": 0.37890625, "num_input_tokens_seen": 334345852, "step": 5968 }, { "epoch": 13.293986636971047, "grad_norm": 20.519437789916992, "learning_rate": 1e-06, "loss": 0.6077, "num_input_tokens_seen": 334402480, "step": 5969 }, { "epoch": 13.293986636971047, "loss": 0.8281785249710083, "loss_ce": 0.00011449880548752844, "loss_iou": 0.345703125, "loss_num": 0.02783203125, "loss_xval": 0.828125, "num_input_tokens_seen": 334402480, "step": 5969 }, { "epoch": 13.296213808463252, "grad_norm": 17.099870681762695, "learning_rate": 1e-06, "loss": 0.4134, "num_input_tokens_seen": 334459196, "step": 5970 }, { "epoch": 13.296213808463252, "loss": 0.35288766026496887, "loss_ce": 0.0001044573073158972, "loss_iou": 0.1533203125, "loss_num": 0.00921630859375, "loss_xval": 0.353515625, "num_input_tokens_seen": 334459196, "step": 5970 }, { "epoch": 13.298440979955457, "grad_norm": 16.155277252197266, "learning_rate": 1e-06, "loss": 0.4195, "num_input_tokens_seen": 334514964, "step": 5971 }, { "epoch": 13.298440979955457, "loss": 0.4192987084388733, "loss_ce": 0.00010926800314337015, "loss_iou": 0.1865234375, "loss_num": 0.00909423828125, "loss_xval": 0.419921875, "num_input_tokens_seen": 334514964, "step": 5971 }, { "epoch": 13.300668151447661, "grad_norm": 22.117937088012695, "learning_rate": 1e-06, "loss": 0.5437, "num_input_tokens_seen": 334570392, "step": 5972 }, { "epoch": 13.300668151447661, "loss": 0.524844765663147, "loss_ce": 0.0005832784809172153, "loss_iou": 0.205078125, "loss_num": 0.022705078125, "loss_xval": 0.5234375, "num_input_tokens_seen": 334570392, "step": 5972 }, { "epoch": 13.302895322939866, "grad_norm": 52.279701232910156, "learning_rate": 1e-06, "loss": 0.588, "num_input_tokens_seen": 334628400, "step": 5973 }, { "epoch": 13.302895322939866, "loss": 0.4728614389896393, "loss_ce": 0.001250399393029511, "loss_iou": 0.212890625, "loss_num": 0.00927734375, "loss_xval": 0.470703125, "num_input_tokens_seen": 334628400, "step": 5973 }, { "epoch": 13.305122494432071, "grad_norm": 42.509918212890625, "learning_rate": 1e-06, "loss": 0.7125, "num_input_tokens_seen": 334682692, "step": 5974 }, { "epoch": 13.305122494432071, "loss": 1.0085320472717285, "loss_ce": 0.0012078466825187206, "loss_iou": 0.41015625, "loss_num": 0.037109375, "loss_xval": 1.0078125, "num_input_tokens_seen": 334682692, "step": 5974 }, { "epoch": 13.307349665924276, "grad_norm": 23.17171287536621, "learning_rate": 1e-06, "loss": 0.5796, "num_input_tokens_seen": 334738240, "step": 5975 }, { "epoch": 13.307349665924276, "loss": 0.5287770628929138, "loss_ce": 9.056585986400023e-05, "loss_iou": 0.2060546875, "loss_num": 0.0230712890625, "loss_xval": 0.52734375, "num_input_tokens_seen": 334738240, "step": 5975 }, { "epoch": 13.309576837416481, "grad_norm": 15.882946014404297, "learning_rate": 1e-06, "loss": 0.449, "num_input_tokens_seen": 334795800, "step": 5976 }, { "epoch": 13.309576837416481, "loss": 0.51024329662323, "loss_ce": 0.00011146925680804998, "loss_iou": 0.224609375, "loss_num": 0.0123291015625, "loss_xval": 0.51171875, "num_input_tokens_seen": 334795800, "step": 5976 }, { "epoch": 13.311804008908686, "grad_norm": 16.047292709350586, "learning_rate": 1e-06, "loss": 0.525, "num_input_tokens_seen": 334853320, "step": 5977 }, { "epoch": 13.311804008908686, "loss": 0.42943665385246277, "loss_ce": 0.0001153927732957527, "loss_iou": 0.19140625, "loss_num": 0.0091552734375, "loss_xval": 0.4296875, "num_input_tokens_seen": 334853320, "step": 5977 }, { "epoch": 13.31403118040089, "grad_norm": 18.631088256835938, "learning_rate": 1e-06, "loss": 0.4337, "num_input_tokens_seen": 334908868, "step": 5978 }, { "epoch": 13.31403118040089, "loss": 0.3484327793121338, "loss_ce": 0.00010514883615542203, "loss_iou": 0.154296875, "loss_num": 0.0078125, "loss_xval": 0.34765625, "num_input_tokens_seen": 334908868, "step": 5978 }, { "epoch": 13.316258351893095, "grad_norm": 21.7668514251709, "learning_rate": 1e-06, "loss": 0.5039, "num_input_tokens_seen": 334966060, "step": 5979 }, { "epoch": 13.316258351893095, "loss": 0.5759820938110352, "loss_ce": 0.00011536551028257236, "loss_iou": 0.265625, "loss_num": 0.0091552734375, "loss_xval": 0.57421875, "num_input_tokens_seen": 334966060, "step": 5979 }, { "epoch": 13.3184855233853, "grad_norm": 18.8905029296875, "learning_rate": 1e-06, "loss": 0.3987, "num_input_tokens_seen": 335019804, "step": 5980 }, { "epoch": 13.3184855233853, "loss": 0.3916875720024109, "loss_ce": 0.00020808231784030795, "loss_iou": 0.1630859375, "loss_num": 0.01318359375, "loss_xval": 0.390625, "num_input_tokens_seen": 335019804, "step": 5980 }, { "epoch": 13.320712694877505, "grad_norm": 33.502071380615234, "learning_rate": 1e-06, "loss": 0.5049, "num_input_tokens_seen": 335077580, "step": 5981 }, { "epoch": 13.320712694877505, "loss": 0.5615602731704712, "loss_ce": 0.00015890991198830307, "loss_iou": 0.2265625, "loss_num": 0.021728515625, "loss_xval": 0.5625, "num_input_tokens_seen": 335077580, "step": 5981 }, { "epoch": 13.32293986636971, "grad_norm": 48.20892333984375, "learning_rate": 1e-06, "loss": 0.4792, "num_input_tokens_seen": 335131592, "step": 5982 }, { "epoch": 13.32293986636971, "loss": 0.5993452668190002, "loss_ce": 0.00010209815809503198, "loss_iou": 0.255859375, "loss_num": 0.01708984375, "loss_xval": 0.59765625, "num_input_tokens_seen": 335131592, "step": 5982 }, { "epoch": 13.325167037861915, "grad_norm": 14.08517837524414, "learning_rate": 1e-06, "loss": 0.4242, "num_input_tokens_seen": 335186972, "step": 5983 }, { "epoch": 13.325167037861915, "loss": 0.5378490090370178, "loss_ce": 0.0010219014948233962, "loss_iou": 0.203125, "loss_num": 0.0263671875, "loss_xval": 0.53515625, "num_input_tokens_seen": 335186972, "step": 5983 }, { "epoch": 13.32739420935412, "grad_norm": 15.391449928283691, "learning_rate": 1e-06, "loss": 0.6247, "num_input_tokens_seen": 335240660, "step": 5984 }, { "epoch": 13.32739420935412, "loss": 0.46067914366722107, "loss_ce": 0.00010786119673866779, "loss_iou": 0.201171875, "loss_num": 0.01177978515625, "loss_xval": 0.4609375, "num_input_tokens_seen": 335240660, "step": 5984 }, { "epoch": 13.329621380846325, "grad_norm": 25.606029510498047, "learning_rate": 1e-06, "loss": 0.4295, "num_input_tokens_seen": 335297500, "step": 5985 }, { "epoch": 13.329621380846325, "loss": 0.4411547780036926, "loss_ce": 0.00023681171296630055, "loss_iou": 0.1962890625, "loss_num": 0.00970458984375, "loss_xval": 0.44140625, "num_input_tokens_seen": 335297500, "step": 5985 }, { "epoch": 13.33184855233853, "grad_norm": 31.844512939453125, "learning_rate": 1e-06, "loss": 0.4698, "num_input_tokens_seen": 335350156, "step": 5986 }, { "epoch": 13.33184855233853, "loss": 0.41549986600875854, "loss_ce": 9.457894338993356e-05, "loss_iou": 0.166015625, "loss_num": 0.0167236328125, "loss_xval": 0.416015625, "num_input_tokens_seen": 335350156, "step": 5986 }, { "epoch": 13.334075723830734, "grad_norm": 24.63311195373535, "learning_rate": 1e-06, "loss": 0.5548, "num_input_tokens_seen": 335405788, "step": 5987 }, { "epoch": 13.334075723830734, "loss": 0.36372703313827515, "loss_ce": 0.00014061132969800383, "loss_iou": 0.1728515625, "loss_num": 0.0037994384765625, "loss_xval": 0.36328125, "num_input_tokens_seen": 335405788, "step": 5987 }, { "epoch": 13.33630289532294, "grad_norm": 13.776124000549316, "learning_rate": 1e-06, "loss": 0.4612, "num_input_tokens_seen": 335460976, "step": 5988 }, { "epoch": 13.33630289532294, "loss": 0.6514391899108887, "loss_ce": 0.00013307490735314786, "loss_iou": 0.29296875, "loss_num": 0.01263427734375, "loss_xval": 0.65234375, "num_input_tokens_seen": 335460976, "step": 5988 }, { "epoch": 13.338530066815144, "grad_norm": 15.759697914123535, "learning_rate": 1e-06, "loss": 0.5365, "num_input_tokens_seen": 335516704, "step": 5989 }, { "epoch": 13.338530066815144, "loss": 0.3756389021873474, "loss_ce": 0.00015063249156810343, "loss_iou": 0.138671875, "loss_num": 0.0196533203125, "loss_xval": 0.375, "num_input_tokens_seen": 335516704, "step": 5989 }, { "epoch": 13.340757238307349, "grad_norm": 14.048306465148926, "learning_rate": 1e-06, "loss": 0.2585, "num_input_tokens_seen": 335573700, "step": 5990 }, { "epoch": 13.340757238307349, "loss": 0.2530948221683502, "loss_ce": 0.00010410351387690753, "loss_iou": 0.11181640625, "loss_num": 0.00579833984375, "loss_xval": 0.25390625, "num_input_tokens_seen": 335573700, "step": 5990 }, { "epoch": 13.342984409799554, "grad_norm": 45.206886291503906, "learning_rate": 1e-06, "loss": 0.4845, "num_input_tokens_seen": 335628184, "step": 5991 }, { "epoch": 13.342984409799554, "loss": 0.6019352078437805, "loss_ce": 0.0001285744656343013, "loss_iou": 0.267578125, "loss_num": 0.01324462890625, "loss_xval": 0.6015625, "num_input_tokens_seen": 335628184, "step": 5991 }, { "epoch": 13.345211581291759, "grad_norm": 99.1800765991211, "learning_rate": 1e-06, "loss": 0.5862, "num_input_tokens_seen": 335684580, "step": 5992 }, { "epoch": 13.345211581291759, "loss": 0.6407464146614075, "loss_ce": 0.00012141239130869508, "loss_iou": 0.2890625, "loss_num": 0.0123291015625, "loss_xval": 0.640625, "num_input_tokens_seen": 335684580, "step": 5992 }, { "epoch": 13.347438752783964, "grad_norm": 16.95970916748047, "learning_rate": 1e-06, "loss": 0.4526, "num_input_tokens_seen": 335741296, "step": 5993 }, { "epoch": 13.347438752783964, "loss": 0.32951620221138, "loss_ce": 0.00010945653048111126, "loss_iou": 0.142578125, "loss_num": 0.0087890625, "loss_xval": 0.330078125, "num_input_tokens_seen": 335741296, "step": 5993 }, { "epoch": 13.34966592427617, "grad_norm": 40.8525505065918, "learning_rate": 1e-06, "loss": 0.4401, "num_input_tokens_seen": 335797804, "step": 5994 }, { "epoch": 13.34966592427617, "loss": 0.46965697407722473, "loss_ce": 0.00017455026682000607, "loss_iou": 0.1748046875, "loss_num": 0.0240478515625, "loss_xval": 0.46875, "num_input_tokens_seen": 335797804, "step": 5994 }, { "epoch": 13.351893095768375, "grad_norm": 23.179540634155273, "learning_rate": 1e-06, "loss": 0.6922, "num_input_tokens_seen": 335855320, "step": 5995 }, { "epoch": 13.351893095768375, "loss": 0.5909324288368225, "loss_ce": 0.00011209775402676314, "loss_iou": 0.271484375, "loss_num": 0.009521484375, "loss_xval": 0.58984375, "num_input_tokens_seen": 335855320, "step": 5995 }, { "epoch": 13.35412026726058, "grad_norm": 28.706586837768555, "learning_rate": 1e-06, "loss": 0.4458, "num_input_tokens_seen": 335911324, "step": 5996 }, { "epoch": 13.35412026726058, "loss": 0.4113472104072571, "loss_ce": 0.00021440400450956076, "loss_iou": 0.1904296875, "loss_num": 0.006195068359375, "loss_xval": 0.41015625, "num_input_tokens_seen": 335911324, "step": 5996 }, { "epoch": 13.356347438752785, "grad_norm": 14.33905029296875, "learning_rate": 1e-06, "loss": 0.3891, "num_input_tokens_seen": 335964808, "step": 5997 }, { "epoch": 13.356347438752785, "loss": 0.41698315739631653, "loss_ce": 0.00011303767678327858, "loss_iou": 0.1640625, "loss_num": 0.017578125, "loss_xval": 0.416015625, "num_input_tokens_seen": 335964808, "step": 5997 }, { "epoch": 13.35857461024499, "grad_norm": 15.864407539367676, "learning_rate": 1e-06, "loss": 0.3952, "num_input_tokens_seen": 336021256, "step": 5998 }, { "epoch": 13.35857461024499, "loss": 0.4110654890537262, "loss_ce": 0.00017680224846117198, "loss_iou": 0.1796875, "loss_num": 0.01025390625, "loss_xval": 0.41015625, "num_input_tokens_seen": 336021256, "step": 5998 }, { "epoch": 13.360801781737194, "grad_norm": 28.089126586914062, "learning_rate": 1e-06, "loss": 0.4279, "num_input_tokens_seen": 336074836, "step": 5999 }, { "epoch": 13.360801781737194, "loss": 0.4389985203742981, "loss_ce": 0.00017103503341786563, "loss_iou": 0.1689453125, "loss_num": 0.02001953125, "loss_xval": 0.439453125, "num_input_tokens_seen": 336074836, "step": 5999 }, { "epoch": 13.3630289532294, "grad_norm": 16.476625442504883, "learning_rate": 1e-06, "loss": 0.5683, "num_input_tokens_seen": 336130500, "step": 6000 }, { "epoch": 13.3630289532294, "eval_seeclick_web_CIoU": 0.589046448469162, "eval_seeclick_web_GIoU": 0.5870300531387329, "eval_seeclick_web_IoU": 0.6074443459510803, "eval_seeclick_web_MAE_all": 0.015537765808403492, "eval_seeclick_web_MAE_h": 0.007694335887208581, "eval_seeclick_web_MAE_w": 0.015699473209679127, "eval_seeclick_web_MAE_x_boxes": 0.009341908851638436, "eval_seeclick_web_MAE_y_boxes": 0.021628314396366477, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.888150691986084, "eval_seeclick_web_loss_ce": 0.0001769300361047499, "eval_seeclick_web_loss_iou": 0.40771484375, "eval_seeclick_web_loss_num": 0.0123291015625, "eval_seeclick_web_loss_xval": 0.8765869140625, "eval_seeclick_web_runtime": 19.879, "eval_seeclick_web_samples_per_second": 2.515, "eval_seeclick_web_steps_per_second": 0.101, "num_input_tokens_seen": 336130500, "step": 6000 }, { "epoch": 13.3630289532294, "eval_icons_CIoU": 0.2701048105955124, "eval_icons_GIoU": 0.294067844748497, "eval_icons_IoU": 0.3528982400894165, "eval_icons_MAE_all": 0.06405339390039444, "eval_icons_MAE_h": 0.03851390350610018, "eval_icons_MAE_w": 0.06734280101954937, "eval_icons_MAE_x_boxes": 0.06041870452463627, "eval_icons_MAE_y_boxes": 0.03873829450458288, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.748345971107483, "eval_icons_loss_ce": 0.00021618494793074206, "eval_icons_loss_iou": 0.6826171875, "eval_icons_loss_num": 0.061618804931640625, "eval_icons_loss_xval": 1.673828125, "eval_icons_runtime": 18.4453, "eval_icons_samples_per_second": 2.711, "eval_icons_steps_per_second": 0.108, "num_input_tokens_seen": 336130500, "step": 6000 }, { "epoch": 13.3630289532294, "eval_screenspot_CIoU": 0.35929131507873535, "eval_screenspot_GIoU": 0.3761854072411855, "eval_screenspot_IoU": 0.43523843089739483, "eval_screenspot_MAE_all": 0.05693357313672701, "eval_screenspot_MAE_h": 0.039716811850667, "eval_screenspot_MAE_w": 0.06415350238482158, "eval_screenspot_MAE_x_boxes": 0.069387707238396, "eval_screenspot_MAE_y_boxes": 0.03772336399803559, "eval_screenspot_inside_bbox": 0.6966666579246521, "eval_screenspot_loss": 1.5850136280059814, "eval_screenspot_loss_ce": 0.00022644254689415297, "eval_screenspot_loss_iou": 0.6593424479166666, "eval_screenspot_loss_num": 0.0646069844563802, "eval_screenspot_loss_xval": 1.640625, "eval_screenspot_runtime": 29.6001, "eval_screenspot_samples_per_second": 3.007, "eval_screenspot_steps_per_second": 0.101, "num_input_tokens_seen": 336130500, "step": 6000 }, { "epoch": 13.3630289532294, "eval_compot_CIoU": 0.3427671641111374, "eval_compot_GIoU": 0.3569800406694412, "eval_compot_IoU": 0.4003662168979645, "eval_compot_MAE_all": 0.019289949908852577, "eval_compot_MAE_h": 0.012674622237682343, "eval_compot_MAE_w": 0.021172930486500263, "eval_compot_MAE_x_boxes": 0.02997487783432007, "eval_compot_MAE_y_boxes": 0.007259466219693422, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4314215183258057, "eval_compot_loss_ce": 0.00017105540609918535, "eval_compot_loss_iou": 0.6610107421875, "eval_compot_loss_num": 0.018083572387695312, "eval_compot_loss_xval": 1.413330078125, "eval_compot_runtime": 19.5253, "eval_compot_samples_per_second": 2.561, "eval_compot_steps_per_second": 0.102, "num_input_tokens_seen": 336130500, "step": 6000 }, { "epoch": 13.3630289532294, "eval_custom_ui_val_CIoU": 0.48122422645489377, "eval_custom_ui_val_GIoU": 0.4910411420795653, "eval_custom_ui_val_IoU": 0.5414565900961558, "eval_custom_ui_val_MAE_all": 0.027185753505263064, "eval_custom_ui_val_MAE_h": 0.014914580983006291, "eval_custom_ui_val_MAE_w": 0.03390147609429227, "eval_custom_ui_val_MAE_x_boxes": 0.03341104726617535, "eval_custom_ui_val_MAE_y_boxes": 0.013283828376895852, "eval_custom_ui_val_inside_bbox": 0.7754629651705424, "eval_custom_ui_val_loss": 1.1734305620193481, "eval_custom_ui_val_loss_ce": 0.0001952857144513271, "eval_custom_ui_val_loss_iou": 0.5054389105902778, "eval_custom_ui_val_loss_num": 0.024320814344618056, "eval_custom_ui_val_loss_xval": 1.1326497395833333, "eval_custom_ui_val_runtime": 56.7538, "eval_custom_ui_val_samples_per_second": 4.669, "eval_custom_ui_val_steps_per_second": 0.159, "num_input_tokens_seen": 336130500, "step": 6000 }, { "epoch": 13.3630289532294, "loss": 0.8641664385795593, "loss_ce": 0.00015277693455573171, "loss_iou": 0.392578125, "loss_num": 0.015869140625, "loss_xval": 0.86328125, "num_input_tokens_seen": 336130500, "step": 6000 }, { "epoch": 13.365256124721604, "grad_norm": 16.492589950561523, "learning_rate": 1e-06, "loss": 0.3624, "num_input_tokens_seen": 336187644, "step": 6001 }, { "epoch": 13.365256124721604, "loss": 0.38099753856658936, "loss_ce": 0.00026025049737654626, "loss_iou": 0.1728515625, "loss_num": 0.00701904296875, "loss_xval": 0.380859375, "num_input_tokens_seen": 336187644, "step": 6001 }, { "epoch": 13.367483296213809, "grad_norm": 16.520002365112305, "learning_rate": 1e-06, "loss": 0.4096, "num_input_tokens_seen": 336244060, "step": 6002 }, { "epoch": 13.367483296213809, "loss": 0.2786784768104553, "loss_ce": 0.00011401639494579285, "loss_iou": 0.11181640625, "loss_num": 0.010986328125, "loss_xval": 0.279296875, "num_input_tokens_seen": 336244060, "step": 6002 }, { "epoch": 13.369710467706014, "grad_norm": 27.00398826599121, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 336298968, "step": 6003 }, { "epoch": 13.369710467706014, "loss": 0.3439924120903015, "loss_ce": 0.00012036073167109862, "loss_iou": 0.146484375, "loss_num": 0.0103759765625, "loss_xval": 0.34375, "num_input_tokens_seen": 336298968, "step": 6003 }, { "epoch": 13.371937639198219, "grad_norm": 14.05265998840332, "learning_rate": 1e-06, "loss": 0.3699, "num_input_tokens_seen": 336354760, "step": 6004 }, { "epoch": 13.371937639198219, "loss": 0.3934824466705322, "loss_ce": 0.0001108648139052093, "loss_iou": 0.1767578125, "loss_num": 0.0079345703125, "loss_xval": 0.392578125, "num_input_tokens_seen": 336354760, "step": 6004 }, { "epoch": 13.374164810690424, "grad_norm": 24.29564094543457, "learning_rate": 1e-06, "loss": 0.5934, "num_input_tokens_seen": 336412268, "step": 6005 }, { "epoch": 13.374164810690424, "loss": 0.372583270072937, "loss_ce": 0.00011621671728789806, "loss_iou": 0.166015625, "loss_num": 0.008056640625, "loss_xval": 0.373046875, "num_input_tokens_seen": 336412268, "step": 6005 }, { "epoch": 13.376391982182628, "grad_norm": 18.128660202026367, "learning_rate": 1e-06, "loss": 0.4611, "num_input_tokens_seen": 336467380, "step": 6006 }, { "epoch": 13.376391982182628, "loss": 0.4521748423576355, "loss_ce": 0.00014848702994640917, "loss_iou": 0.20703125, "loss_num": 0.007781982421875, "loss_xval": 0.451171875, "num_input_tokens_seen": 336467380, "step": 6006 }, { "epoch": 13.378619153674833, "grad_norm": 17.15667152404785, "learning_rate": 1e-06, "loss": 0.4673, "num_input_tokens_seen": 336522912, "step": 6007 }, { "epoch": 13.378619153674833, "loss": 0.4950922429561615, "loss_ce": 9.710421727504581e-05, "loss_iou": 0.2158203125, "loss_num": 0.012939453125, "loss_xval": 0.494140625, "num_input_tokens_seen": 336522912, "step": 6007 }, { "epoch": 13.380846325167038, "grad_norm": 36.65664291381836, "learning_rate": 1e-06, "loss": 0.4762, "num_input_tokens_seen": 336578132, "step": 6008 }, { "epoch": 13.380846325167038, "loss": 0.4739997982978821, "loss_ce": 0.0002449120220262557, "loss_iou": 0.1728515625, "loss_num": 0.025634765625, "loss_xval": 0.474609375, "num_input_tokens_seen": 336578132, "step": 6008 }, { "epoch": 13.383073496659243, "grad_norm": 17.556377410888672, "learning_rate": 1e-06, "loss": 0.4995, "num_input_tokens_seen": 336636856, "step": 6009 }, { "epoch": 13.383073496659243, "loss": 0.5950038433074951, "loss_ce": 0.0001551837776787579, "loss_iou": 0.25390625, "loss_num": 0.017578125, "loss_xval": 0.59375, "num_input_tokens_seen": 336636856, "step": 6009 }, { "epoch": 13.385300668151448, "grad_norm": 30.719839096069336, "learning_rate": 1e-06, "loss": 0.5576, "num_input_tokens_seen": 336694384, "step": 6010 }, { "epoch": 13.385300668151448, "loss": 0.5820613503456116, "loss_ce": 0.00015218451153486967, "loss_iou": 0.24609375, "loss_num": 0.0179443359375, "loss_xval": 0.58203125, "num_input_tokens_seen": 336694384, "step": 6010 }, { "epoch": 13.387527839643653, "grad_norm": 16.480695724487305, "learning_rate": 1e-06, "loss": 0.5837, "num_input_tokens_seen": 336751420, "step": 6011 }, { "epoch": 13.387527839643653, "loss": 0.4113236367702484, "loss_ce": 0.00012982796761207283, "loss_iou": 0.1796875, "loss_num": 0.010498046875, "loss_xval": 0.412109375, "num_input_tokens_seen": 336751420, "step": 6011 }, { "epoch": 13.389755011135858, "grad_norm": 27.67957305908203, "learning_rate": 1e-06, "loss": 0.4264, "num_input_tokens_seen": 336804964, "step": 6012 }, { "epoch": 13.389755011135858, "loss": 0.5694878101348877, "loss_ce": 0.00015187214012257755, "loss_iou": 0.265625, "loss_num": 0.00732421875, "loss_xval": 0.5703125, "num_input_tokens_seen": 336804964, "step": 6012 }, { "epoch": 13.391982182628063, "grad_norm": 15.864426612854004, "learning_rate": 1e-06, "loss": 0.6295, "num_input_tokens_seen": 336862960, "step": 6013 }, { "epoch": 13.391982182628063, "loss": 0.425285279750824, "loss_ce": 0.00011438117508077994, "loss_iou": 0.189453125, "loss_num": 0.00927734375, "loss_xval": 0.42578125, "num_input_tokens_seen": 336862960, "step": 6013 }, { "epoch": 13.394209354120267, "grad_norm": 13.584254264831543, "learning_rate": 1e-06, "loss": 0.6117, "num_input_tokens_seen": 336920076, "step": 6014 }, { "epoch": 13.394209354120267, "loss": 0.4187964200973511, "loss_ce": 9.524912456981838e-05, "loss_iou": 0.1865234375, "loss_num": 0.00927734375, "loss_xval": 0.41796875, "num_input_tokens_seen": 336920076, "step": 6014 }, { "epoch": 13.396436525612472, "grad_norm": 15.942584037780762, "learning_rate": 1e-06, "loss": 0.4851, "num_input_tokens_seen": 336975628, "step": 6015 }, { "epoch": 13.396436525612472, "loss": 0.5123189687728882, "loss_ce": 0.00011192913370905444, "loss_iou": 0.2294921875, "loss_num": 0.0107421875, "loss_xval": 0.51171875, "num_input_tokens_seen": 336975628, "step": 6015 }, { "epoch": 13.398663697104677, "grad_norm": 29.960575103759766, "learning_rate": 1e-06, "loss": 0.4581, "num_input_tokens_seen": 337031400, "step": 6016 }, { "epoch": 13.398663697104677, "loss": 0.4230641722679138, "loss_ce": 9.053543908521533e-05, "loss_iou": 0.1728515625, "loss_num": 0.015625, "loss_xval": 0.423828125, "num_input_tokens_seen": 337031400, "step": 6016 }, { "epoch": 13.400890868596882, "grad_norm": 17.33017921447754, "learning_rate": 1e-06, "loss": 0.3594, "num_input_tokens_seen": 337086372, "step": 6017 }, { "epoch": 13.400890868596882, "loss": 0.46412086486816406, "loss_ce": 0.00013159040827304125, "loss_iou": 0.2138671875, "loss_num": 0.0072021484375, "loss_xval": 0.46484375, "num_input_tokens_seen": 337086372, "step": 6017 }, { "epoch": 13.403118040089087, "grad_norm": 24.617557525634766, "learning_rate": 1e-06, "loss": 0.6371, "num_input_tokens_seen": 337141460, "step": 6018 }, { "epoch": 13.403118040089087, "loss": 0.4511684775352478, "loss_ce": 0.00011867978173540905, "loss_iou": 0.201171875, "loss_num": 0.0098876953125, "loss_xval": 0.451171875, "num_input_tokens_seen": 337141460, "step": 6018 }, { "epoch": 13.405345211581292, "grad_norm": 22.865720748901367, "learning_rate": 1e-06, "loss": 0.4544, "num_input_tokens_seen": 337199884, "step": 6019 }, { "epoch": 13.405345211581292, "loss": 0.4763756990432739, "loss_ce": 0.00011837005149573088, "loss_iou": 0.216796875, "loss_num": 0.0084228515625, "loss_xval": 0.4765625, "num_input_tokens_seen": 337199884, "step": 6019 }, { "epoch": 13.407572383073497, "grad_norm": 30.050506591796875, "learning_rate": 1e-06, "loss": 0.3933, "num_input_tokens_seen": 337255584, "step": 6020 }, { "epoch": 13.407572383073497, "loss": 0.5196518301963806, "loss_ce": 0.00012056018749717623, "loss_iou": 0.203125, "loss_num": 0.022705078125, "loss_xval": 0.51953125, "num_input_tokens_seen": 337255584, "step": 6020 }, { "epoch": 13.409799554565701, "grad_norm": 16.1778564453125, "learning_rate": 1e-06, "loss": 0.4133, "num_input_tokens_seen": 337314328, "step": 6021 }, { "epoch": 13.409799554565701, "loss": 0.4304283857345581, "loss_ce": 0.00013054994633421302, "loss_iou": 0.19921875, "loss_num": 0.006317138671875, "loss_xval": 0.4296875, "num_input_tokens_seen": 337314328, "step": 6021 }, { "epoch": 13.412026726057906, "grad_norm": 19.43562126159668, "learning_rate": 1e-06, "loss": 0.5453, "num_input_tokens_seen": 337371160, "step": 6022 }, { "epoch": 13.412026726057906, "loss": 0.8006750345230103, "loss_ce": 0.00013796251732856035, "loss_iou": 0.3125, "loss_num": 0.03515625, "loss_xval": 0.80078125, "num_input_tokens_seen": 337371160, "step": 6022 }, { "epoch": 13.414253897550111, "grad_norm": 20.63788414001465, "learning_rate": 1e-06, "loss": 0.3787, "num_input_tokens_seen": 337427584, "step": 6023 }, { "epoch": 13.414253897550111, "loss": 0.45666825771331787, "loss_ce": 0.00012530997628346086, "loss_iou": 0.2001953125, "loss_num": 0.0113525390625, "loss_xval": 0.45703125, "num_input_tokens_seen": 337427584, "step": 6023 }, { "epoch": 13.416481069042316, "grad_norm": 14.40611457824707, "learning_rate": 1e-06, "loss": 0.4986, "num_input_tokens_seen": 337486896, "step": 6024 }, { "epoch": 13.416481069042316, "loss": 0.4070853590965271, "loss_ce": 0.00010296083928551525, "loss_iou": 0.1875, "loss_num": 0.00653076171875, "loss_xval": 0.40625, "num_input_tokens_seen": 337486896, "step": 6024 }, { "epoch": 13.41870824053452, "grad_norm": 17.12459945678711, "learning_rate": 1e-06, "loss": 0.3375, "num_input_tokens_seen": 337544700, "step": 6025 }, { "epoch": 13.41870824053452, "loss": 0.28037208318710327, "loss_ce": 9.865299216471612e-05, "loss_iou": 0.12109375, "loss_num": 0.007476806640625, "loss_xval": 0.28125, "num_input_tokens_seen": 337544700, "step": 6025 }, { "epoch": 13.420935412026726, "grad_norm": 17.389301300048828, "learning_rate": 1e-06, "loss": 0.5866, "num_input_tokens_seen": 337602792, "step": 6026 }, { "epoch": 13.420935412026726, "loss": 0.34812402725219727, "loss_ce": 0.00010155315976589918, "loss_iou": 0.15234375, "loss_num": 0.00872802734375, "loss_xval": 0.34765625, "num_input_tokens_seen": 337602792, "step": 6026 }, { "epoch": 13.42316258351893, "grad_norm": 14.135052680969238, "learning_rate": 1e-06, "loss": 0.6344, "num_input_tokens_seen": 337657324, "step": 6027 }, { "epoch": 13.42316258351893, "loss": 0.9954932332038879, "loss_ce": 0.00013188435696065426, "loss_iou": 0.419921875, "loss_num": 0.031494140625, "loss_xval": 0.99609375, "num_input_tokens_seen": 337657324, "step": 6027 }, { "epoch": 13.425389755011135, "grad_norm": 17.855140686035156, "learning_rate": 1e-06, "loss": 0.4056, "num_input_tokens_seen": 337713028, "step": 6028 }, { "epoch": 13.425389755011135, "loss": 0.30981168150901794, "loss_ce": 0.00011929747415706515, "loss_iou": 0.1318359375, "loss_num": 0.00921630859375, "loss_xval": 0.310546875, "num_input_tokens_seen": 337713028, "step": 6028 }, { "epoch": 13.42761692650334, "grad_norm": 19.777183532714844, "learning_rate": 1e-06, "loss": 0.4961, "num_input_tokens_seen": 337767148, "step": 6029 }, { "epoch": 13.42761692650334, "loss": 0.45299914479255676, "loss_ce": 0.00011829864524770528, "loss_iou": 0.19921875, "loss_num": 0.01104736328125, "loss_xval": 0.453125, "num_input_tokens_seen": 337767148, "step": 6029 }, { "epoch": 13.429844097995545, "grad_norm": 28.941667556762695, "learning_rate": 1e-06, "loss": 0.3823, "num_input_tokens_seen": 337824252, "step": 6030 }, { "epoch": 13.429844097995545, "loss": 0.4544570744037628, "loss_ce": 0.00011135141539853066, "loss_iou": 0.2001953125, "loss_num": 0.0108642578125, "loss_xval": 0.455078125, "num_input_tokens_seen": 337824252, "step": 6030 }, { "epoch": 13.43207126948775, "grad_norm": 20.595233917236328, "learning_rate": 1e-06, "loss": 0.7976, "num_input_tokens_seen": 337879876, "step": 6031 }, { "epoch": 13.43207126948775, "loss": 0.8424028754234314, "loss_ce": 0.0001787315122783184, "loss_iou": 0.34765625, "loss_num": 0.029296875, "loss_xval": 0.84375, "num_input_tokens_seen": 337879876, "step": 6031 }, { "epoch": 13.434298440979955, "grad_norm": 37.23560333251953, "learning_rate": 1e-06, "loss": 0.5585, "num_input_tokens_seen": 337936932, "step": 6032 }, { "epoch": 13.434298440979955, "loss": 0.4548329710960388, "loss_ce": 0.00012105993664590642, "loss_iou": 0.181640625, "loss_num": 0.018310546875, "loss_xval": 0.455078125, "num_input_tokens_seen": 337936932, "step": 6032 }, { "epoch": 13.43652561247216, "grad_norm": 20.569580078125, "learning_rate": 1e-06, "loss": 0.4455, "num_input_tokens_seen": 337993396, "step": 6033 }, { "epoch": 13.43652561247216, "loss": 0.6414711475372314, "loss_ce": 0.00011370141146471724, "loss_iou": 0.2294921875, "loss_num": 0.036376953125, "loss_xval": 0.640625, "num_input_tokens_seen": 337993396, "step": 6033 }, { "epoch": 13.438752783964365, "grad_norm": 21.35686492919922, "learning_rate": 1e-06, "loss": 0.5028, "num_input_tokens_seen": 338048168, "step": 6034 }, { "epoch": 13.438752783964365, "loss": 0.27768754959106445, "loss_ce": 0.0001606793375685811, "loss_iou": 0.1279296875, "loss_num": 0.004180908203125, "loss_xval": 0.27734375, "num_input_tokens_seen": 338048168, "step": 6034 }, { "epoch": 13.44097995545657, "grad_norm": 14.87231159210205, "learning_rate": 1e-06, "loss": 0.6545, "num_input_tokens_seen": 338104776, "step": 6035 }, { "epoch": 13.44097995545657, "loss": 0.5546998381614685, "loss_ce": 0.00013442571798805147, "loss_iou": 0.2294921875, "loss_num": 0.0191650390625, "loss_xval": 0.5546875, "num_input_tokens_seen": 338104776, "step": 6035 }, { "epoch": 13.443207126948774, "grad_norm": 17.35746955871582, "learning_rate": 1e-06, "loss": 0.3714, "num_input_tokens_seen": 338160688, "step": 6036 }, { "epoch": 13.443207126948774, "loss": 0.2007206380367279, "loss_ce": 8.280624751932919e-05, "loss_iou": 0.080078125, "loss_num": 0.00799560546875, "loss_xval": 0.2001953125, "num_input_tokens_seen": 338160688, "step": 6036 }, { "epoch": 13.44543429844098, "grad_norm": 32.42356872558594, "learning_rate": 1e-06, "loss": 0.5975, "num_input_tokens_seen": 338216708, "step": 6037 }, { "epoch": 13.44543429844098, "loss": 0.4356258511543274, "loss_ce": 0.00020103095448575914, "loss_iou": 0.171875, "loss_num": 0.0185546875, "loss_xval": 0.435546875, "num_input_tokens_seen": 338216708, "step": 6037 }, { "epoch": 13.447661469933184, "grad_norm": 16.385221481323242, "learning_rate": 1e-06, "loss": 0.3499, "num_input_tokens_seen": 338273736, "step": 6038 }, { "epoch": 13.447661469933184, "loss": 0.3894416391849518, "loss_ce": 0.00015943063772283494, "loss_iou": 0.16015625, "loss_num": 0.01361083984375, "loss_xval": 0.388671875, "num_input_tokens_seen": 338273736, "step": 6038 }, { "epoch": 13.449888641425389, "grad_norm": 16.43487548828125, "learning_rate": 1e-06, "loss": 0.4453, "num_input_tokens_seen": 338329848, "step": 6039 }, { "epoch": 13.449888641425389, "loss": 0.4355180859565735, "loss_ce": 9.329354361398146e-05, "loss_iou": 0.18359375, "loss_num": 0.01373291015625, "loss_xval": 0.435546875, "num_input_tokens_seen": 338329848, "step": 6039 }, { "epoch": 13.452115812917596, "grad_norm": 10.888897895812988, "learning_rate": 1e-06, "loss": 0.2805, "num_input_tokens_seen": 338384060, "step": 6040 }, { "epoch": 13.452115812917596, "loss": 0.24022263288497925, "loss_ce": 0.0001103131435229443, "loss_iou": 0.09912109375, "loss_num": 0.0084228515625, "loss_xval": 0.240234375, "num_input_tokens_seen": 338384060, "step": 6040 }, { "epoch": 13.4543429844098, "grad_norm": 16.131216049194336, "learning_rate": 1e-06, "loss": 0.3434, "num_input_tokens_seen": 338439568, "step": 6041 }, { "epoch": 13.4543429844098, "loss": 0.44243180751800537, "loss_ce": 0.00011001640814356506, "loss_iou": 0.1884765625, "loss_num": 0.012939453125, "loss_xval": 0.44140625, "num_input_tokens_seen": 338439568, "step": 6041 }, { "epoch": 13.456570155902005, "grad_norm": 24.778518676757812, "learning_rate": 1e-06, "loss": 0.3777, "num_input_tokens_seen": 338495172, "step": 6042 }, { "epoch": 13.456570155902005, "loss": 0.5041686296463013, "loss_ce": 0.00035395825398154557, "loss_iou": 0.2197265625, "loss_num": 0.01287841796875, "loss_xval": 0.50390625, "num_input_tokens_seen": 338495172, "step": 6042 }, { "epoch": 13.45879732739421, "grad_norm": 25.524629592895508, "learning_rate": 1e-06, "loss": 0.5037, "num_input_tokens_seen": 338550088, "step": 6043 }, { "epoch": 13.45879732739421, "loss": 0.5563878417015076, "loss_ce": 0.00011343901860527694, "loss_iou": 0.248046875, "loss_num": 0.01202392578125, "loss_xval": 0.5546875, "num_input_tokens_seen": 338550088, "step": 6043 }, { "epoch": 13.461024498886415, "grad_norm": 19.04424285888672, "learning_rate": 1e-06, "loss": 0.583, "num_input_tokens_seen": 338605712, "step": 6044 }, { "epoch": 13.461024498886415, "loss": 0.6645296812057495, "loss_ce": 0.00022306920436676592, "loss_iou": 0.2578125, "loss_num": 0.0301513671875, "loss_xval": 0.6640625, "num_input_tokens_seen": 338605712, "step": 6044 }, { "epoch": 13.46325167037862, "grad_norm": 14.406312942504883, "learning_rate": 1e-06, "loss": 0.379, "num_input_tokens_seen": 338662752, "step": 6045 }, { "epoch": 13.46325167037862, "loss": 0.3758947253227234, "loss_ce": 0.00010126342385774478, "loss_iou": 0.1533203125, "loss_num": 0.013671875, "loss_xval": 0.375, "num_input_tokens_seen": 338662752, "step": 6045 }, { "epoch": 13.465478841870825, "grad_norm": 40.62894821166992, "learning_rate": 1e-06, "loss": 0.5142, "num_input_tokens_seen": 338716672, "step": 6046 }, { "epoch": 13.465478841870825, "loss": 0.471059650182724, "loss_ce": 0.00011239292507525533, "loss_iou": 0.2099609375, "loss_num": 0.01025390625, "loss_xval": 0.470703125, "num_input_tokens_seen": 338716672, "step": 6046 }, { "epoch": 13.46770601336303, "grad_norm": 22.999338150024414, "learning_rate": 1e-06, "loss": 0.6799, "num_input_tokens_seen": 338773908, "step": 6047 }, { "epoch": 13.46770601336303, "loss": 0.46885955333709717, "loss_ce": 0.00010957221093121916, "loss_iou": 0.2119140625, "loss_num": 0.00885009765625, "loss_xval": 0.46875, "num_input_tokens_seen": 338773908, "step": 6047 }, { "epoch": 13.469933184855234, "grad_norm": 17.78192138671875, "learning_rate": 1e-06, "loss": 0.4976, "num_input_tokens_seen": 338831676, "step": 6048 }, { "epoch": 13.469933184855234, "loss": 0.47100013494491577, "loss_ce": 0.00017495593056082726, "loss_iou": 0.1953125, "loss_num": 0.0159912109375, "loss_xval": 0.470703125, "num_input_tokens_seen": 338831676, "step": 6048 }, { "epoch": 13.47216035634744, "grad_norm": 31.006885528564453, "learning_rate": 1e-06, "loss": 0.5294, "num_input_tokens_seen": 338886208, "step": 6049 }, { "epoch": 13.47216035634744, "loss": 0.49864301085472107, "loss_ce": 0.00010783917969092727, "loss_iou": 0.2275390625, "loss_num": 0.00872802734375, "loss_xval": 0.498046875, "num_input_tokens_seen": 338886208, "step": 6049 }, { "epoch": 13.474387527839644, "grad_norm": 17.400957107543945, "learning_rate": 1e-06, "loss": 0.5978, "num_input_tokens_seen": 338941916, "step": 6050 }, { "epoch": 13.474387527839644, "loss": 0.5912978649139404, "loss_ce": 0.00017235292762052268, "loss_iou": 0.263671875, "loss_num": 0.01263427734375, "loss_xval": 0.58984375, "num_input_tokens_seen": 338941916, "step": 6050 }, { "epoch": 13.476614699331849, "grad_norm": 17.695465087890625, "learning_rate": 1e-06, "loss": 0.4803, "num_input_tokens_seen": 338998860, "step": 6051 }, { "epoch": 13.476614699331849, "loss": 0.4964655339717865, "loss_ce": 0.0001276445691473782, "loss_iou": 0.21875, "loss_num": 0.01190185546875, "loss_xval": 0.49609375, "num_input_tokens_seen": 338998860, "step": 6051 }, { "epoch": 13.478841870824054, "grad_norm": 21.691905975341797, "learning_rate": 1e-06, "loss": 0.6199, "num_input_tokens_seen": 339051660, "step": 6052 }, { "epoch": 13.478841870824054, "loss": 0.5180391073226929, "loss_ce": 9.476025297772139e-05, "loss_iou": 0.2197265625, "loss_num": 0.015869140625, "loss_xval": 0.51953125, "num_input_tokens_seen": 339051660, "step": 6052 }, { "epoch": 13.481069042316259, "grad_norm": 17.7266845703125, "learning_rate": 1e-06, "loss": 0.5722, "num_input_tokens_seen": 339107488, "step": 6053 }, { "epoch": 13.481069042316259, "loss": 0.4541052579879761, "loss_ce": 0.00012577215966302902, "loss_iou": 0.205078125, "loss_num": 0.0087890625, "loss_xval": 0.453125, "num_input_tokens_seen": 339107488, "step": 6053 }, { "epoch": 13.483296213808464, "grad_norm": 15.927001953125, "learning_rate": 1e-06, "loss": 0.4988, "num_input_tokens_seen": 339162448, "step": 6054 }, { "epoch": 13.483296213808464, "loss": 0.6007080078125, "loss_ce": 0.00012205771054141223, "loss_iou": 0.26171875, "loss_num": 0.0157470703125, "loss_xval": 0.6015625, "num_input_tokens_seen": 339162448, "step": 6054 }, { "epoch": 13.485523385300668, "grad_norm": 23.987932205200195, "learning_rate": 1e-06, "loss": 0.6223, "num_input_tokens_seen": 339219908, "step": 6055 }, { "epoch": 13.485523385300668, "loss": 0.4472610354423523, "loss_ce": 0.00011747775715775788, "loss_iou": 0.1953125, "loss_num": 0.011474609375, "loss_xval": 0.447265625, "num_input_tokens_seen": 339219908, "step": 6055 }, { "epoch": 13.487750556792873, "grad_norm": 26.649721145629883, "learning_rate": 1e-06, "loss": 0.3933, "num_input_tokens_seen": 339274728, "step": 6056 }, { "epoch": 13.487750556792873, "loss": 0.37924766540527344, "loss_ce": 9.728018630994484e-05, "loss_iou": 0.1640625, "loss_num": 0.0101318359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 339274728, "step": 6056 }, { "epoch": 13.489977728285078, "grad_norm": 11.94345760345459, "learning_rate": 1e-06, "loss": 0.3739, "num_input_tokens_seen": 339332392, "step": 6057 }, { "epoch": 13.489977728285078, "loss": 0.36925819516181946, "loss_ce": 0.00011755910963984206, "loss_iou": 0.1728515625, "loss_num": 0.004791259765625, "loss_xval": 0.369140625, "num_input_tokens_seen": 339332392, "step": 6057 }, { "epoch": 13.492204899777283, "grad_norm": 19.75101089477539, "learning_rate": 1e-06, "loss": 0.5577, "num_input_tokens_seen": 339389392, "step": 6058 }, { "epoch": 13.492204899777283, "loss": 0.7065274715423584, "loss_ce": 0.00010661823762347922, "loss_iou": 0.279296875, "loss_num": 0.0294189453125, "loss_xval": 0.70703125, "num_input_tokens_seen": 339389392, "step": 6058 }, { "epoch": 13.494432071269488, "grad_norm": 15.584753036499023, "learning_rate": 1e-06, "loss": 0.3834, "num_input_tokens_seen": 339446776, "step": 6059 }, { "epoch": 13.494432071269488, "loss": 0.4769487977027893, "loss_ce": 0.00011162673763465136, "loss_iou": 0.1884765625, "loss_num": 0.02001953125, "loss_xval": 0.4765625, "num_input_tokens_seen": 339446776, "step": 6059 }, { "epoch": 13.496659242761693, "grad_norm": 17.79234504699707, "learning_rate": 1e-06, "loss": 0.504, "num_input_tokens_seen": 339503364, "step": 6060 }, { "epoch": 13.496659242761693, "loss": 0.5225712060928345, "loss_ce": 0.00011029178131138906, "loss_iou": 0.2001953125, "loss_num": 0.0242919921875, "loss_xval": 0.5234375, "num_input_tokens_seen": 339503364, "step": 6060 }, { "epoch": 13.498886414253898, "grad_norm": 16.18769073486328, "learning_rate": 1e-06, "loss": 0.4275, "num_input_tokens_seen": 339561052, "step": 6061 }, { "epoch": 13.498886414253898, "loss": 0.40653717517852783, "loss_ce": 0.00013457259046845138, "loss_iou": 0.166015625, "loss_num": 0.0150146484375, "loss_xval": 0.40625, "num_input_tokens_seen": 339561052, "step": 6061 }, { "epoch": 13.501113585746102, "grad_norm": 47.07794952392578, "learning_rate": 1e-06, "loss": 0.4221, "num_input_tokens_seen": 339615028, "step": 6062 }, { "epoch": 13.501113585746102, "loss": 0.5137805342674255, "loss_ce": 0.00010863743227673694, "loss_iou": 0.220703125, "loss_num": 0.0145263671875, "loss_xval": 0.515625, "num_input_tokens_seen": 339615028, "step": 6062 }, { "epoch": 13.503340757238307, "grad_norm": 23.532276153564453, "learning_rate": 1e-06, "loss": 0.5707, "num_input_tokens_seen": 339671324, "step": 6063 }, { "epoch": 13.503340757238307, "loss": 0.5221333503723145, "loss_ce": 0.00016070085985120386, "loss_iou": 0.2314453125, "loss_num": 0.01171875, "loss_xval": 0.5234375, "num_input_tokens_seen": 339671324, "step": 6063 }, { "epoch": 13.505567928730512, "grad_norm": 15.250123977661133, "learning_rate": 1e-06, "loss": 0.3295, "num_input_tokens_seen": 339726244, "step": 6064 }, { "epoch": 13.505567928730512, "loss": 0.3152201473712921, "loss_ce": 9.563060302753001e-05, "loss_iou": 0.12451171875, "loss_num": 0.01318359375, "loss_xval": 0.314453125, "num_input_tokens_seen": 339726244, "step": 6064 }, { "epoch": 13.507795100222717, "grad_norm": 30.90220832824707, "learning_rate": 1e-06, "loss": 0.6724, "num_input_tokens_seen": 339782572, "step": 6065 }, { "epoch": 13.507795100222717, "loss": 0.5396183729171753, "loss_ce": 0.00012867330224253237, "loss_iou": 0.234375, "loss_num": 0.0142822265625, "loss_xval": 0.5390625, "num_input_tokens_seen": 339782572, "step": 6065 }, { "epoch": 13.510022271714922, "grad_norm": 23.081607818603516, "learning_rate": 1e-06, "loss": 0.4508, "num_input_tokens_seen": 339836684, "step": 6066 }, { "epoch": 13.510022271714922, "loss": 0.37926536798477173, "loss_ce": 0.00011500762775540352, "loss_iou": 0.1611328125, "loss_num": 0.01123046875, "loss_xval": 0.37890625, "num_input_tokens_seen": 339836684, "step": 6066 }, { "epoch": 13.512249443207127, "grad_norm": 16.77637481689453, "learning_rate": 1e-06, "loss": 0.4764, "num_input_tokens_seen": 339895292, "step": 6067 }, { "epoch": 13.512249443207127, "loss": 0.5568765997886658, "loss_ce": 0.00011388568964321166, "loss_iou": 0.2451171875, "loss_num": 0.01318359375, "loss_xval": 0.55859375, "num_input_tokens_seen": 339895292, "step": 6067 }, { "epoch": 13.514476614699332, "grad_norm": 14.442283630371094, "learning_rate": 1e-06, "loss": 0.3909, "num_input_tokens_seen": 339952572, "step": 6068 }, { "epoch": 13.514476614699332, "loss": 0.33204570412635803, "loss_ce": 0.000136529139126651, "loss_iou": 0.13671875, "loss_num": 0.011474609375, "loss_xval": 0.33203125, "num_input_tokens_seen": 339952572, "step": 6068 }, { "epoch": 13.516703786191536, "grad_norm": 28.08944320678711, "learning_rate": 1e-06, "loss": 0.4879, "num_input_tokens_seen": 340010084, "step": 6069 }, { "epoch": 13.516703786191536, "loss": 0.3379353880882263, "loss_ce": 0.00010579422814771533, "loss_iou": 0.142578125, "loss_num": 0.01055908203125, "loss_xval": 0.337890625, "num_input_tokens_seen": 340010084, "step": 6069 }, { "epoch": 13.518930957683741, "grad_norm": 17.983135223388672, "learning_rate": 1e-06, "loss": 0.4794, "num_input_tokens_seen": 340068108, "step": 6070 }, { "epoch": 13.518930957683741, "loss": 0.3784530460834503, "loss_ce": 9.612538269720972e-05, "loss_iou": 0.1552734375, "loss_num": 0.01373291015625, "loss_xval": 0.37890625, "num_input_tokens_seen": 340068108, "step": 6070 }, { "epoch": 13.521158129175946, "grad_norm": 31.96721076965332, "learning_rate": 1e-06, "loss": 0.4647, "num_input_tokens_seen": 340125068, "step": 6071 }, { "epoch": 13.521158129175946, "loss": 0.4451667070388794, "loss_ce": 9.836716344580054e-05, "loss_iou": 0.1875, "loss_num": 0.0140380859375, "loss_xval": 0.4453125, "num_input_tokens_seen": 340125068, "step": 6071 }, { "epoch": 13.523385300668151, "grad_norm": 13.225302696228027, "learning_rate": 1e-06, "loss": 0.4289, "num_input_tokens_seen": 340183096, "step": 6072 }, { "epoch": 13.523385300668151, "loss": 0.4312567710876465, "loss_ce": 0.00010440793994348496, "loss_iou": 0.19140625, "loss_num": 0.00970458984375, "loss_xval": 0.431640625, "num_input_tokens_seen": 340183096, "step": 6072 }, { "epoch": 13.525612472160356, "grad_norm": 11.266600608825684, "learning_rate": 1e-06, "loss": 0.2725, "num_input_tokens_seen": 340239536, "step": 6073 }, { "epoch": 13.525612472160356, "loss": 0.353848934173584, "loss_ce": 0.00011968802573392168, "loss_iou": 0.1572265625, "loss_num": 0.00787353515625, "loss_xval": 0.353515625, "num_input_tokens_seen": 340239536, "step": 6073 }, { "epoch": 13.52783964365256, "grad_norm": 17.064437866210938, "learning_rate": 1e-06, "loss": 0.4341, "num_input_tokens_seen": 340295956, "step": 6074 }, { "epoch": 13.52783964365256, "loss": 0.6365495920181274, "loss_ce": 0.00013600349484477192, "loss_iou": 0.25390625, "loss_num": 0.0255126953125, "loss_xval": 0.63671875, "num_input_tokens_seen": 340295956, "step": 6074 }, { "epoch": 13.530066815144766, "grad_norm": 22.03349494934082, "learning_rate": 1e-06, "loss": 0.4328, "num_input_tokens_seen": 340353864, "step": 6075 }, { "epoch": 13.530066815144766, "loss": 0.44873154163360596, "loss_ce": 0.00012314703781157732, "loss_iou": 0.2119140625, "loss_num": 0.005126953125, "loss_xval": 0.44921875, "num_input_tokens_seen": 340353864, "step": 6075 }, { "epoch": 13.53229398663697, "grad_norm": 19.127866744995117, "learning_rate": 1e-06, "loss": 0.6748, "num_input_tokens_seen": 340407388, "step": 6076 }, { "epoch": 13.53229398663697, "loss": 0.7257640361785889, "loss_ce": 0.0001475829049013555, "loss_iou": 0.26953125, "loss_num": 0.03759765625, "loss_xval": 0.7265625, "num_input_tokens_seen": 340407388, "step": 6076 }, { "epoch": 13.534521158129175, "grad_norm": 21.562355041503906, "learning_rate": 1e-06, "loss": 0.5317, "num_input_tokens_seen": 340463944, "step": 6077 }, { "epoch": 13.534521158129175, "loss": 0.5379794836044312, "loss_ce": 0.00013768361532129347, "loss_iou": 0.2255859375, "loss_num": 0.017333984375, "loss_xval": 0.5390625, "num_input_tokens_seen": 340463944, "step": 6077 }, { "epoch": 13.53674832962138, "grad_norm": 18.547332763671875, "learning_rate": 1e-06, "loss": 0.4863, "num_input_tokens_seen": 340516692, "step": 6078 }, { "epoch": 13.53674832962138, "loss": 0.3476347327232361, "loss_ce": 0.0001005365265882574, "loss_iou": 0.16015625, "loss_num": 0.005523681640625, "loss_xval": 0.34765625, "num_input_tokens_seen": 340516692, "step": 6078 }, { "epoch": 13.538975501113585, "grad_norm": 29.104293823242188, "learning_rate": 1e-06, "loss": 0.5956, "num_input_tokens_seen": 340571300, "step": 6079 }, { "epoch": 13.538975501113585, "loss": 0.6296223402023315, "loss_ce": 0.00010570493031991646, "loss_iou": 0.2578125, "loss_num": 0.02294921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 340571300, "step": 6079 }, { "epoch": 13.54120267260579, "grad_norm": 21.421527862548828, "learning_rate": 1e-06, "loss": 0.4915, "num_input_tokens_seen": 340628576, "step": 6080 }, { "epoch": 13.54120267260579, "loss": 0.39269521832466125, "loss_ce": 0.00011710192484315485, "loss_iou": 0.1826171875, "loss_num": 0.00543212890625, "loss_xval": 0.392578125, "num_input_tokens_seen": 340628576, "step": 6080 }, { "epoch": 13.543429844097995, "grad_norm": 20.787569046020508, "learning_rate": 1e-06, "loss": 0.5235, "num_input_tokens_seen": 340685408, "step": 6081 }, { "epoch": 13.543429844097995, "loss": 0.7552393078804016, "loss_ce": 0.0008448001462966204, "loss_iou": 0.30859375, "loss_num": 0.027099609375, "loss_xval": 0.75390625, "num_input_tokens_seen": 340685408, "step": 6081 }, { "epoch": 13.5456570155902, "grad_norm": 18.21969985961914, "learning_rate": 1e-06, "loss": 0.5217, "num_input_tokens_seen": 340741140, "step": 6082 }, { "epoch": 13.5456570155902, "loss": 0.579943060874939, "loss_ce": 0.00010910046694334596, "loss_iou": 0.25390625, "loss_num": 0.01416015625, "loss_xval": 0.578125, "num_input_tokens_seen": 340741140, "step": 6082 }, { "epoch": 13.547884187082406, "grad_norm": 22.53654670715332, "learning_rate": 1e-06, "loss": 0.4789, "num_input_tokens_seen": 340796904, "step": 6083 }, { "epoch": 13.547884187082406, "loss": 0.3791908621788025, "loss_ce": 0.00010148352885153145, "loss_iou": 0.171875, "loss_num": 0.006927490234375, "loss_xval": 0.37890625, "num_input_tokens_seen": 340796904, "step": 6083 }, { "epoch": 13.550111358574611, "grad_norm": 25.32193946838379, "learning_rate": 1e-06, "loss": 0.3296, "num_input_tokens_seen": 340852732, "step": 6084 }, { "epoch": 13.550111358574611, "loss": 0.39512139558792114, "loss_ce": 0.00010186532745137811, "loss_iou": 0.1689453125, "loss_num": 0.0115966796875, "loss_xval": 0.39453125, "num_input_tokens_seen": 340852732, "step": 6084 }, { "epoch": 13.552338530066816, "grad_norm": 16.27466583251953, "learning_rate": 1e-06, "loss": 0.3523, "num_input_tokens_seen": 340909556, "step": 6085 }, { "epoch": 13.552338530066816, "loss": 0.38939177989959717, "loss_ce": 0.00010956230107694864, "loss_iou": 0.1806640625, "loss_num": 0.005706787109375, "loss_xval": 0.388671875, "num_input_tokens_seen": 340909556, "step": 6085 }, { "epoch": 13.55456570155902, "grad_norm": 82.14557647705078, "learning_rate": 1e-06, "loss": 0.5106, "num_input_tokens_seen": 340967244, "step": 6086 }, { "epoch": 13.55456570155902, "loss": 0.40943825244903564, "loss_ce": 0.00013651512563228607, "loss_iou": 0.1728515625, "loss_num": 0.0126953125, "loss_xval": 0.41015625, "num_input_tokens_seen": 340967244, "step": 6086 }, { "epoch": 13.556792873051226, "grad_norm": 24.278966903686523, "learning_rate": 1e-06, "loss": 0.3729, "num_input_tokens_seen": 341022336, "step": 6087 }, { "epoch": 13.556792873051226, "loss": 0.3812282681465149, "loss_ce": 0.00012474997492972761, "loss_iou": 0.1689453125, "loss_num": 0.0086669921875, "loss_xval": 0.380859375, "num_input_tokens_seen": 341022336, "step": 6087 }, { "epoch": 13.55902004454343, "grad_norm": 17.74454689025879, "learning_rate": 1e-06, "loss": 0.5962, "num_input_tokens_seen": 341078164, "step": 6088 }, { "epoch": 13.55902004454343, "loss": 0.34824979305267334, "loss_ce": 0.00010525665857130662, "loss_iou": 0.150390625, "loss_num": 0.0096435546875, "loss_xval": 0.34765625, "num_input_tokens_seen": 341078164, "step": 6088 }, { "epoch": 13.561247216035635, "grad_norm": 15.242683410644531, "learning_rate": 1e-06, "loss": 0.3475, "num_input_tokens_seen": 341135916, "step": 6089 }, { "epoch": 13.561247216035635, "loss": 0.34324419498443604, "loss_ce": 0.00010453617142047733, "loss_iou": 0.158203125, "loss_num": 0.005462646484375, "loss_xval": 0.34375, "num_input_tokens_seen": 341135916, "step": 6089 }, { "epoch": 13.56347438752784, "grad_norm": 19.24591827392578, "learning_rate": 1e-06, "loss": 0.5041, "num_input_tokens_seen": 341190420, "step": 6090 }, { "epoch": 13.56347438752784, "loss": 0.36112260818481445, "loss_ce": 0.00016069506818894297, "loss_iou": 0.1708984375, "loss_num": 0.003875732421875, "loss_xval": 0.361328125, "num_input_tokens_seen": 341190420, "step": 6090 }, { "epoch": 13.565701559020045, "grad_norm": 20.001699447631836, "learning_rate": 1e-06, "loss": 0.7154, "num_input_tokens_seen": 341245680, "step": 6091 }, { "epoch": 13.565701559020045, "loss": 0.670586109161377, "loss_ce": 0.00017597324040252715, "loss_iou": 0.2890625, "loss_num": 0.0184326171875, "loss_xval": 0.671875, "num_input_tokens_seen": 341245680, "step": 6091 }, { "epoch": 13.56792873051225, "grad_norm": 21.868173599243164, "learning_rate": 1e-06, "loss": 0.4888, "num_input_tokens_seen": 341299816, "step": 6092 }, { "epoch": 13.56792873051225, "loss": 0.49984219670295715, "loss_ce": 0.00011683723278110847, "loss_iou": 0.21484375, "loss_num": 0.01397705078125, "loss_xval": 0.5, "num_input_tokens_seen": 341299816, "step": 6092 }, { "epoch": 13.570155902004455, "grad_norm": 26.928741455078125, "learning_rate": 1e-06, "loss": 0.4256, "num_input_tokens_seen": 341351004, "step": 6093 }, { "epoch": 13.570155902004455, "loss": 0.5890183448791504, "loss_ce": 0.00015114745474420488, "loss_iou": 0.2392578125, "loss_num": 0.0220947265625, "loss_xval": 0.58984375, "num_input_tokens_seen": 341351004, "step": 6093 }, { "epoch": 13.57238307349666, "grad_norm": 15.756858825683594, "learning_rate": 1e-06, "loss": 0.4628, "num_input_tokens_seen": 341406828, "step": 6094 }, { "epoch": 13.57238307349666, "loss": 0.608989953994751, "loss_ce": 0.00010319190187146887, "loss_iou": 0.259765625, "loss_num": 0.01806640625, "loss_xval": 0.609375, "num_input_tokens_seen": 341406828, "step": 6094 }, { "epoch": 13.574610244988865, "grad_norm": 16.31437110900879, "learning_rate": 1e-06, "loss": 0.5845, "num_input_tokens_seen": 341463276, "step": 6095 }, { "epoch": 13.574610244988865, "loss": 0.5452686548233032, "loss_ce": 0.0001026621539494954, "loss_iou": 0.2412109375, "loss_num": 0.01263427734375, "loss_xval": 0.546875, "num_input_tokens_seen": 341463276, "step": 6095 }, { "epoch": 13.57683741648107, "grad_norm": 22.215185165405273, "learning_rate": 1e-06, "loss": 0.4455, "num_input_tokens_seen": 341520420, "step": 6096 }, { "epoch": 13.57683741648107, "loss": 0.4676268398761749, "loss_ce": 9.754978236742318e-05, "loss_iou": 0.208984375, "loss_num": 0.00994873046875, "loss_xval": 0.466796875, "num_input_tokens_seen": 341520420, "step": 6096 }, { "epoch": 13.579064587973274, "grad_norm": 15.833555221557617, "learning_rate": 1e-06, "loss": 0.5093, "num_input_tokens_seen": 341577448, "step": 6097 }, { "epoch": 13.579064587973274, "loss": 0.5223226547241211, "loss_ce": 0.00010583880066405982, "loss_iou": 0.2314453125, "loss_num": 0.0118408203125, "loss_xval": 0.5234375, "num_input_tokens_seen": 341577448, "step": 6097 }, { "epoch": 13.58129175946548, "grad_norm": 22.064420700073242, "learning_rate": 1e-06, "loss": 0.4894, "num_input_tokens_seen": 341634480, "step": 6098 }, { "epoch": 13.58129175946548, "loss": 0.40562325716018677, "loss_ce": 0.00010569434380158782, "loss_iou": 0.1796875, "loss_num": 0.00909423828125, "loss_xval": 0.40625, "num_input_tokens_seen": 341634480, "step": 6098 }, { "epoch": 13.583518930957684, "grad_norm": 16.516891479492188, "learning_rate": 1e-06, "loss": 0.4845, "num_input_tokens_seen": 341690908, "step": 6099 }, { "epoch": 13.583518930957684, "loss": 0.36192750930786133, "loss_ce": 0.00011110490595456213, "loss_iou": 0.1591796875, "loss_num": 0.00885009765625, "loss_xval": 0.361328125, "num_input_tokens_seen": 341690908, "step": 6099 }, { "epoch": 13.585746102449889, "grad_norm": 30.61893081665039, "learning_rate": 1e-06, "loss": 0.4235, "num_input_tokens_seen": 341744072, "step": 6100 }, { "epoch": 13.585746102449889, "loss": 0.38622716069221497, "loss_ce": 0.0001187745074275881, "loss_iou": 0.1728515625, "loss_num": 0.008056640625, "loss_xval": 0.38671875, "num_input_tokens_seen": 341744072, "step": 6100 }, { "epoch": 13.587973273942094, "grad_norm": 14.904489517211914, "learning_rate": 1e-06, "loss": 0.3583, "num_input_tokens_seen": 341802688, "step": 6101 }, { "epoch": 13.587973273942094, "loss": 0.3524148464202881, "loss_ce": 0.00011993409134447575, "loss_iou": 0.1640625, "loss_num": 0.004791259765625, "loss_xval": 0.3515625, "num_input_tokens_seen": 341802688, "step": 6101 }, { "epoch": 13.590200445434299, "grad_norm": 24.03605842590332, "learning_rate": 1e-06, "loss": 0.4686, "num_input_tokens_seen": 341857712, "step": 6102 }, { "epoch": 13.590200445434299, "loss": 0.44786059856414795, "loss_ce": 0.00022875834838487208, "loss_iou": 0.1904296875, "loss_num": 0.0133056640625, "loss_xval": 0.447265625, "num_input_tokens_seen": 341857712, "step": 6102 }, { "epoch": 13.592427616926503, "grad_norm": 41.318817138671875, "learning_rate": 1e-06, "loss": 0.4597, "num_input_tokens_seen": 341911152, "step": 6103 }, { "epoch": 13.592427616926503, "loss": 0.3626706302165985, "loss_ce": 0.0001218135585077107, "loss_iou": 0.1650390625, "loss_num": 0.006591796875, "loss_xval": 0.36328125, "num_input_tokens_seen": 341911152, "step": 6103 }, { "epoch": 13.594654788418708, "grad_norm": 27.05071449279785, "learning_rate": 1e-06, "loss": 0.4711, "num_input_tokens_seen": 341965840, "step": 6104 }, { "epoch": 13.594654788418708, "loss": 0.4232081174850464, "loss_ce": 0.00011239905143156648, "loss_iou": 0.181640625, "loss_num": 0.0118408203125, "loss_xval": 0.423828125, "num_input_tokens_seen": 341965840, "step": 6104 }, { "epoch": 13.596881959910913, "grad_norm": 16.308061599731445, "learning_rate": 1e-06, "loss": 0.4512, "num_input_tokens_seen": 342024032, "step": 6105 }, { "epoch": 13.596881959910913, "loss": 0.41770923137664795, "loss_ce": 0.0001066841505235061, "loss_iou": 0.1845703125, "loss_num": 0.00958251953125, "loss_xval": 0.41796875, "num_input_tokens_seen": 342024032, "step": 6105 }, { "epoch": 13.599109131403118, "grad_norm": 15.531981468200684, "learning_rate": 1e-06, "loss": 0.3986, "num_input_tokens_seen": 342078468, "step": 6106 }, { "epoch": 13.599109131403118, "loss": 0.4702088534832001, "loss_ce": 0.00011607841588556767, "loss_iou": 0.20703125, "loss_num": 0.01104736328125, "loss_xval": 0.470703125, "num_input_tokens_seen": 342078468, "step": 6106 }, { "epoch": 13.601336302895323, "grad_norm": 21.346141815185547, "learning_rate": 1e-06, "loss": 0.4758, "num_input_tokens_seen": 342134352, "step": 6107 }, { "epoch": 13.601336302895323, "loss": 0.43420565128326416, "loss_ce": 0.00012361952394712716, "loss_iou": 0.1923828125, "loss_num": 0.009765625, "loss_xval": 0.43359375, "num_input_tokens_seen": 342134352, "step": 6107 }, { "epoch": 13.603563474387528, "grad_norm": 18.531774520874023, "learning_rate": 1e-06, "loss": 0.4012, "num_input_tokens_seen": 342192804, "step": 6108 }, { "epoch": 13.603563474387528, "loss": 0.3070923388004303, "loss_ce": 8.548818004783243e-05, "loss_iou": 0.13671875, "loss_num": 0.00689697265625, "loss_xval": 0.306640625, "num_input_tokens_seen": 342192804, "step": 6108 }, { "epoch": 13.605790645879733, "grad_norm": 26.71335792541504, "learning_rate": 1e-06, "loss": 0.5715, "num_input_tokens_seen": 342249992, "step": 6109 }, { "epoch": 13.605790645879733, "loss": 0.5036748051643372, "loss_ce": 0.00013476383173838258, "loss_iou": 0.203125, "loss_num": 0.0194091796875, "loss_xval": 0.50390625, "num_input_tokens_seen": 342249992, "step": 6109 }, { "epoch": 13.608017817371937, "grad_norm": 18.72128677368164, "learning_rate": 1e-06, "loss": 0.4366, "num_input_tokens_seen": 342306096, "step": 6110 }, { "epoch": 13.608017817371937, "loss": 0.39169156551361084, "loss_ce": 0.0001129057418438606, "loss_iou": 0.1787109375, "loss_num": 0.00665283203125, "loss_xval": 0.390625, "num_input_tokens_seen": 342306096, "step": 6110 }, { "epoch": 13.610244988864142, "grad_norm": 19.031667709350586, "learning_rate": 1e-06, "loss": 0.4382, "num_input_tokens_seen": 342360516, "step": 6111 }, { "epoch": 13.610244988864142, "loss": 0.417463481426239, "loss_ce": 0.00010505890531931072, "loss_iou": 0.181640625, "loss_num": 0.0108642578125, "loss_xval": 0.41796875, "num_input_tokens_seen": 342360516, "step": 6111 }, { "epoch": 13.612472160356347, "grad_norm": 16.581167221069336, "learning_rate": 1e-06, "loss": 0.4032, "num_input_tokens_seen": 342419136, "step": 6112 }, { "epoch": 13.612472160356347, "loss": 0.46885186433792114, "loss_ce": 0.00010187575389863923, "loss_iou": 0.19140625, "loss_num": 0.0174560546875, "loss_xval": 0.46875, "num_input_tokens_seen": 342419136, "step": 6112 }, { "epoch": 13.614699331848552, "grad_norm": 21.586606979370117, "learning_rate": 1e-06, "loss": 0.4784, "num_input_tokens_seen": 342474436, "step": 6113 }, { "epoch": 13.614699331848552, "loss": 0.6273195743560791, "loss_ce": 0.00024438908440060914, "loss_iou": 0.26171875, "loss_num": 0.02099609375, "loss_xval": 0.62890625, "num_input_tokens_seen": 342474436, "step": 6113 }, { "epoch": 13.616926503340757, "grad_norm": 19.1909122467041, "learning_rate": 1e-06, "loss": 0.4076, "num_input_tokens_seen": 342531496, "step": 6114 }, { "epoch": 13.616926503340757, "loss": 0.5540522933006287, "loss_ce": 9.722121467348188e-05, "loss_iou": 0.2451171875, "loss_num": 0.0125732421875, "loss_xval": 0.5546875, "num_input_tokens_seen": 342531496, "step": 6114 }, { "epoch": 13.619153674832962, "grad_norm": 15.85007381439209, "learning_rate": 1e-06, "loss": 0.4735, "num_input_tokens_seen": 342587152, "step": 6115 }, { "epoch": 13.619153674832962, "loss": 0.47263315320014954, "loss_ce": 0.000709333224222064, "loss_iou": 0.19921875, "loss_num": 0.014892578125, "loss_xval": 0.47265625, "num_input_tokens_seen": 342587152, "step": 6115 }, { "epoch": 13.621380846325167, "grad_norm": 18.876895904541016, "learning_rate": 1e-06, "loss": 0.4915, "num_input_tokens_seen": 342642656, "step": 6116 }, { "epoch": 13.621380846325167, "loss": 0.5616579651832581, "loss_ce": 0.0001345211494481191, "loss_iou": 0.2392578125, "loss_num": 0.016845703125, "loss_xval": 0.5625, "num_input_tokens_seen": 342642656, "step": 6116 }, { "epoch": 13.623608017817372, "grad_norm": 18.534420013427734, "learning_rate": 1e-06, "loss": 0.4426, "num_input_tokens_seen": 342696908, "step": 6117 }, { "epoch": 13.623608017817372, "loss": 0.55345219373703, "loss_ce": 0.00010746420593932271, "loss_iou": 0.2421875, "loss_num": 0.01373291015625, "loss_xval": 0.5546875, "num_input_tokens_seen": 342696908, "step": 6117 }, { "epoch": 13.625835189309576, "grad_norm": 13.372831344604492, "learning_rate": 1e-06, "loss": 0.4674, "num_input_tokens_seen": 342754328, "step": 6118 }, { "epoch": 13.625835189309576, "loss": 0.5626158714294434, "loss_ce": 0.00011584434105316177, "loss_iou": 0.216796875, "loss_num": 0.0257568359375, "loss_xval": 0.5625, "num_input_tokens_seen": 342754328, "step": 6118 }, { "epoch": 13.628062360801781, "grad_norm": 34.681114196777344, "learning_rate": 1e-06, "loss": 0.5469, "num_input_tokens_seen": 342812480, "step": 6119 }, { "epoch": 13.628062360801781, "loss": 0.5858219861984253, "loss_ce": 0.000128652696730569, "loss_iou": 0.259765625, "loss_num": 0.01300048828125, "loss_xval": 0.5859375, "num_input_tokens_seen": 342812480, "step": 6119 }, { "epoch": 13.630289532293986, "grad_norm": 20.8580265045166, "learning_rate": 1e-06, "loss": 0.5921, "num_input_tokens_seen": 342868916, "step": 6120 }, { "epoch": 13.630289532293986, "loss": 0.49220043420791626, "loss_ce": 0.00013499979104381055, "loss_iou": 0.2275390625, "loss_num": 0.00750732421875, "loss_xval": 0.4921875, "num_input_tokens_seen": 342868916, "step": 6120 }, { "epoch": 13.632516703786191, "grad_norm": 28.065649032592773, "learning_rate": 1e-06, "loss": 0.4325, "num_input_tokens_seen": 342923592, "step": 6121 }, { "epoch": 13.632516703786191, "loss": 0.41245564818382263, "loss_ce": 0.00010210397886112332, "loss_iou": 0.189453125, "loss_num": 0.006622314453125, "loss_xval": 0.412109375, "num_input_tokens_seen": 342923592, "step": 6121 }, { "epoch": 13.634743875278396, "grad_norm": 21.294170379638672, "learning_rate": 1e-06, "loss": 0.4094, "num_input_tokens_seen": 342978364, "step": 6122 }, { "epoch": 13.634743875278396, "loss": 0.43907153606414795, "loss_ce": 0.00010670385381672531, "loss_iou": 0.19921875, "loss_num": 0.00823974609375, "loss_xval": 0.439453125, "num_input_tokens_seen": 342978364, "step": 6122 }, { "epoch": 13.6369710467706, "grad_norm": 15.803836822509766, "learning_rate": 1e-06, "loss": 0.6057, "num_input_tokens_seen": 343034688, "step": 6123 }, { "epoch": 13.6369710467706, "loss": 0.6668506264686584, "loss_ce": 0.00010255983943352476, "loss_iou": 0.302734375, "loss_num": 0.0120849609375, "loss_xval": 0.66796875, "num_input_tokens_seen": 343034688, "step": 6123 }, { "epoch": 13.639198218262806, "grad_norm": 16.880170822143555, "learning_rate": 1e-06, "loss": 0.5102, "num_input_tokens_seen": 343091224, "step": 6124 }, { "epoch": 13.639198218262806, "loss": 0.4283756613731384, "loss_ce": 0.0001530120789539069, "loss_iou": 0.19140625, "loss_num": 0.009033203125, "loss_xval": 0.427734375, "num_input_tokens_seen": 343091224, "step": 6124 }, { "epoch": 13.64142538975501, "grad_norm": 22.758419036865234, "learning_rate": 1e-06, "loss": 0.5235, "num_input_tokens_seen": 343147108, "step": 6125 }, { "epoch": 13.64142538975501, "loss": 0.44180482625961304, "loss_ce": 0.00015441025607287884, "loss_iou": 0.1845703125, "loss_num": 0.01446533203125, "loss_xval": 0.44140625, "num_input_tokens_seen": 343147108, "step": 6125 }, { "epoch": 13.643652561247215, "grad_norm": 24.595998764038086, "learning_rate": 1e-06, "loss": 0.5212, "num_input_tokens_seen": 343202528, "step": 6126 }, { "epoch": 13.643652561247215, "loss": 0.6394776701927185, "loss_ce": 0.0001954361068783328, "loss_iou": 0.279296875, "loss_num": 0.0164794921875, "loss_xval": 0.640625, "num_input_tokens_seen": 343202528, "step": 6126 }, { "epoch": 13.64587973273942, "grad_norm": 13.369487762451172, "learning_rate": 1e-06, "loss": 0.383, "num_input_tokens_seen": 343259188, "step": 6127 }, { "epoch": 13.64587973273942, "loss": 0.451729953289032, "loss_ce": 0.0004359965678304434, "loss_iou": 0.1826171875, "loss_num": 0.01708984375, "loss_xval": 0.451171875, "num_input_tokens_seen": 343259188, "step": 6127 }, { "epoch": 13.648106904231625, "grad_norm": 20.52755355834961, "learning_rate": 1e-06, "loss": 0.4196, "num_input_tokens_seen": 343316188, "step": 6128 }, { "epoch": 13.648106904231625, "loss": 0.3270159065723419, "loss_ce": 0.00011160006397403777, "loss_iou": 0.154296875, "loss_num": 0.003692626953125, "loss_xval": 0.326171875, "num_input_tokens_seen": 343316188, "step": 6128 }, { "epoch": 13.65033407572383, "grad_norm": 21.22062873840332, "learning_rate": 1e-06, "loss": 0.4893, "num_input_tokens_seen": 343369164, "step": 6129 }, { "epoch": 13.65033407572383, "loss": 0.4859585165977478, "loss_ce": 0.00011869698209920898, "loss_iou": 0.2158203125, "loss_num": 0.01104736328125, "loss_xval": 0.486328125, "num_input_tokens_seen": 343369164, "step": 6129 }, { "epoch": 13.652561247216035, "grad_norm": 21.014507293701172, "learning_rate": 1e-06, "loss": 0.5389, "num_input_tokens_seen": 343425304, "step": 6130 }, { "epoch": 13.652561247216035, "loss": 0.4401944875717163, "loss_ce": 0.00013100114301778376, "loss_iou": 0.19921875, "loss_num": 0.0084228515625, "loss_xval": 0.439453125, "num_input_tokens_seen": 343425304, "step": 6130 }, { "epoch": 13.654788418708241, "grad_norm": 25.746784210205078, "learning_rate": 1e-06, "loss": 0.5463, "num_input_tokens_seen": 343483376, "step": 6131 }, { "epoch": 13.654788418708241, "loss": 0.7010675668716431, "loss_ce": 0.00013978403876535594, "loss_iou": 0.302734375, "loss_num": 0.0189208984375, "loss_xval": 0.69921875, "num_input_tokens_seen": 343483376, "step": 6131 }, { "epoch": 13.657015590200446, "grad_norm": 13.484420776367188, "learning_rate": 1e-06, "loss": 0.4436, "num_input_tokens_seen": 343540884, "step": 6132 }, { "epoch": 13.657015590200446, "loss": 0.44479644298553467, "loss_ce": 9.430477803107351e-05, "loss_iou": 0.201171875, "loss_num": 0.008544921875, "loss_xval": 0.4453125, "num_input_tokens_seen": 343540884, "step": 6132 }, { "epoch": 13.659242761692651, "grad_norm": 27.06049346923828, "learning_rate": 1e-06, "loss": 0.4747, "num_input_tokens_seen": 343592412, "step": 6133 }, { "epoch": 13.659242761692651, "loss": 0.40442535281181335, "loss_ce": 0.00012847778270952404, "loss_iou": 0.173828125, "loss_num": 0.0113525390625, "loss_xval": 0.404296875, "num_input_tokens_seen": 343592412, "step": 6133 }, { "epoch": 13.661469933184856, "grad_norm": 17.517444610595703, "learning_rate": 1e-06, "loss": 0.5275, "num_input_tokens_seen": 343646720, "step": 6134 }, { "epoch": 13.661469933184856, "loss": 0.44858014583587646, "loss_ce": 0.00012434981181286275, "loss_iou": 0.201171875, "loss_num": 0.009033203125, "loss_xval": 0.44921875, "num_input_tokens_seen": 343646720, "step": 6134 }, { "epoch": 13.66369710467706, "grad_norm": 13.671886444091797, "learning_rate": 1e-06, "loss": 0.4997, "num_input_tokens_seen": 343703044, "step": 6135 }, { "epoch": 13.66369710467706, "loss": 0.3881835341453552, "loss_ce": 0.00015255186008289456, "loss_iou": 0.173828125, "loss_num": 0.00799560546875, "loss_xval": 0.388671875, "num_input_tokens_seen": 343703044, "step": 6135 }, { "epoch": 13.665924276169266, "grad_norm": 28.485546112060547, "learning_rate": 1e-06, "loss": 0.4675, "num_input_tokens_seen": 343759188, "step": 6136 }, { "epoch": 13.665924276169266, "loss": 0.4605761170387268, "loss_ce": 0.00012687427806667984, "loss_iou": 0.1904296875, "loss_num": 0.0159912109375, "loss_xval": 0.4609375, "num_input_tokens_seen": 343759188, "step": 6136 }, { "epoch": 13.66815144766147, "grad_norm": 23.070680618286133, "learning_rate": 1e-06, "loss": 0.3344, "num_input_tokens_seen": 343815236, "step": 6137 }, { "epoch": 13.66815144766147, "loss": 0.29162734746932983, "loss_ce": 0.0001234151714015752, "loss_iou": 0.1103515625, "loss_num": 0.0140380859375, "loss_xval": 0.291015625, "num_input_tokens_seen": 343815236, "step": 6137 }, { "epoch": 13.670378619153675, "grad_norm": 19.009531021118164, "learning_rate": 1e-06, "loss": 0.6555, "num_input_tokens_seen": 343872764, "step": 6138 }, { "epoch": 13.670378619153675, "loss": 0.7084763050079346, "loss_ce": 0.00010225173900835216, "loss_iou": 0.27734375, "loss_num": 0.0303955078125, "loss_xval": 0.70703125, "num_input_tokens_seen": 343872764, "step": 6138 }, { "epoch": 13.67260579064588, "grad_norm": 15.859116554260254, "learning_rate": 1e-06, "loss": 0.5408, "num_input_tokens_seen": 343930516, "step": 6139 }, { "epoch": 13.67260579064588, "loss": 0.6228233575820923, "loss_ce": 0.0001426705566700548, "loss_iou": 0.26953125, "loss_num": 0.0164794921875, "loss_xval": 0.62109375, "num_input_tokens_seen": 343930516, "step": 6139 }, { "epoch": 13.674832962138085, "grad_norm": 16.675167083740234, "learning_rate": 1e-06, "loss": 0.6865, "num_input_tokens_seen": 343987948, "step": 6140 }, { "epoch": 13.674832962138085, "loss": 0.7435585856437683, "loss_ce": 0.0013711238279938698, "loss_iou": 0.296875, "loss_num": 0.0296630859375, "loss_xval": 0.7421875, "num_input_tokens_seen": 343987948, "step": 6140 }, { "epoch": 13.67706013363029, "grad_norm": 18.57615852355957, "learning_rate": 1e-06, "loss": 0.5157, "num_input_tokens_seen": 344043668, "step": 6141 }, { "epoch": 13.67706013363029, "loss": 0.5919273495674133, "loss_ce": 0.0001304733450524509, "loss_iou": 0.2470703125, "loss_num": 0.01953125, "loss_xval": 0.59375, "num_input_tokens_seen": 344043668, "step": 6141 }, { "epoch": 13.679287305122495, "grad_norm": 17.68915367126465, "learning_rate": 1e-06, "loss": 0.3544, "num_input_tokens_seen": 344098800, "step": 6142 }, { "epoch": 13.679287305122495, "loss": 0.3864895701408386, "loss_ce": 0.00013704363664146513, "loss_iou": 0.1748046875, "loss_num": 0.00738525390625, "loss_xval": 0.38671875, "num_input_tokens_seen": 344098800, "step": 6142 }, { "epoch": 13.6815144766147, "grad_norm": 25.44866371154785, "learning_rate": 1e-06, "loss": 0.3647, "num_input_tokens_seen": 344156784, "step": 6143 }, { "epoch": 13.6815144766147, "loss": 0.3855780065059662, "loss_ce": 0.00011047557200072333, "loss_iou": 0.171875, "loss_num": 0.00848388671875, "loss_xval": 0.384765625, "num_input_tokens_seen": 344156784, "step": 6143 }, { "epoch": 13.683741648106905, "grad_norm": 15.047554969787598, "learning_rate": 1e-06, "loss": 0.3801, "num_input_tokens_seen": 344215212, "step": 6144 }, { "epoch": 13.683741648106905, "loss": 0.2978753447532654, "loss_ce": 8.48290219437331e-05, "loss_iou": 0.1298828125, "loss_num": 0.007568359375, "loss_xval": 0.296875, "num_input_tokens_seen": 344215212, "step": 6144 }, { "epoch": 13.68596881959911, "grad_norm": 48.20288848876953, "learning_rate": 1e-06, "loss": 0.4122, "num_input_tokens_seen": 344271784, "step": 6145 }, { "epoch": 13.68596881959911, "loss": 0.40208396315574646, "loss_ce": 0.00010644205030985177, "loss_iou": 0.185546875, "loss_num": 0.006317138671875, "loss_xval": 0.40234375, "num_input_tokens_seen": 344271784, "step": 6145 }, { "epoch": 13.688195991091314, "grad_norm": 71.81108093261719, "learning_rate": 1e-06, "loss": 0.576, "num_input_tokens_seen": 344329204, "step": 6146 }, { "epoch": 13.688195991091314, "loss": 0.5167222023010254, "loss_ce": 0.00012068171781720594, "loss_iou": 0.2236328125, "loss_num": 0.0140380859375, "loss_xval": 0.515625, "num_input_tokens_seen": 344329204, "step": 6146 }, { "epoch": 13.690423162583519, "grad_norm": 12.681405067443848, "learning_rate": 1e-06, "loss": 0.4676, "num_input_tokens_seen": 344385616, "step": 6147 }, { "epoch": 13.690423162583519, "loss": 0.5067310929298401, "loss_ce": 0.0005054936627857387, "loss_iou": 0.1962890625, "loss_num": 0.0230712890625, "loss_xval": 0.5078125, "num_input_tokens_seen": 344385616, "step": 6147 }, { "epoch": 13.692650334075724, "grad_norm": 25.468963623046875, "learning_rate": 1e-06, "loss": 0.3891, "num_input_tokens_seen": 344442500, "step": 6148 }, { "epoch": 13.692650334075724, "loss": 0.443470299243927, "loss_ce": 0.0001109380682464689, "loss_iou": 0.1962890625, "loss_num": 0.0101318359375, "loss_xval": 0.443359375, "num_input_tokens_seen": 344442500, "step": 6148 }, { "epoch": 13.694877505567929, "grad_norm": 29.40053367614746, "learning_rate": 1e-06, "loss": 0.5509, "num_input_tokens_seen": 344499324, "step": 6149 }, { "epoch": 13.694877505567929, "loss": 0.5093941688537598, "loss_ce": 0.00011685908248182386, "loss_iou": 0.23046875, "loss_num": 0.0096435546875, "loss_xval": 0.5078125, "num_input_tokens_seen": 344499324, "step": 6149 }, { "epoch": 13.697104677060134, "grad_norm": 21.182100296020508, "learning_rate": 1e-06, "loss": 0.4615, "num_input_tokens_seen": 344556944, "step": 6150 }, { "epoch": 13.697104677060134, "loss": 0.568960428237915, "loss_ce": 0.00011279522004770115, "loss_iou": 0.23828125, "loss_num": 0.0185546875, "loss_xval": 0.5703125, "num_input_tokens_seen": 344556944, "step": 6150 }, { "epoch": 13.699331848552339, "grad_norm": 34.760746002197266, "learning_rate": 1e-06, "loss": 0.4418, "num_input_tokens_seen": 344613508, "step": 6151 }, { "epoch": 13.699331848552339, "loss": 0.49267372488975525, "loss_ce": 0.00012003096344415098, "loss_iou": 0.2138671875, "loss_num": 0.01312255859375, "loss_xval": 0.4921875, "num_input_tokens_seen": 344613508, "step": 6151 }, { "epoch": 13.701559020044543, "grad_norm": 34.04802322387695, "learning_rate": 1e-06, "loss": 0.4694, "num_input_tokens_seen": 344669656, "step": 6152 }, { "epoch": 13.701559020044543, "loss": 0.38548383116722107, "loss_ce": 0.00010786794882733375, "loss_iou": 0.1796875, "loss_num": 0.00506591796875, "loss_xval": 0.384765625, "num_input_tokens_seen": 344669656, "step": 6152 }, { "epoch": 13.703786191536748, "grad_norm": 19.769567489624023, "learning_rate": 1e-06, "loss": 0.4088, "num_input_tokens_seen": 344725372, "step": 6153 }, { "epoch": 13.703786191536748, "loss": 0.49809765815734863, "loss_ce": 0.00017283624038100243, "loss_iou": 0.216796875, "loss_num": 0.01318359375, "loss_xval": 0.498046875, "num_input_tokens_seen": 344725372, "step": 6153 }, { "epoch": 13.706013363028953, "grad_norm": 23.97338104248047, "learning_rate": 1e-06, "loss": 0.7272, "num_input_tokens_seen": 344781164, "step": 6154 }, { "epoch": 13.706013363028953, "loss": 0.5699473023414612, "loss_ce": 0.0001230589987244457, "loss_iou": 0.23828125, "loss_num": 0.0186767578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 344781164, "step": 6154 }, { "epoch": 13.708240534521158, "grad_norm": 39.72063064575195, "learning_rate": 1e-06, "loss": 0.4243, "num_input_tokens_seen": 344836304, "step": 6155 }, { "epoch": 13.708240534521158, "loss": 0.37404966354370117, "loss_ce": 0.00014831856242381036, "loss_iou": 0.1630859375, "loss_num": 0.00970458984375, "loss_xval": 0.373046875, "num_input_tokens_seen": 344836304, "step": 6155 }, { "epoch": 13.710467706013363, "grad_norm": 23.376707077026367, "learning_rate": 1e-06, "loss": 0.5827, "num_input_tokens_seen": 344894388, "step": 6156 }, { "epoch": 13.710467706013363, "loss": 0.42637595534324646, "loss_ce": 0.00010641853441484272, "loss_iou": 0.189453125, "loss_num": 0.00933837890625, "loss_xval": 0.42578125, "num_input_tokens_seen": 344894388, "step": 6156 }, { "epoch": 13.712694877505568, "grad_norm": 13.803214073181152, "learning_rate": 1e-06, "loss": 0.3646, "num_input_tokens_seen": 344951236, "step": 6157 }, { "epoch": 13.712694877505568, "loss": 0.3642551600933075, "loss_ce": 0.00011941190314246342, "loss_iou": 0.1669921875, "loss_num": 0.006134033203125, "loss_xval": 0.36328125, "num_input_tokens_seen": 344951236, "step": 6157 }, { "epoch": 13.714922048997773, "grad_norm": 22.018762588500977, "learning_rate": 1e-06, "loss": 0.6011, "num_input_tokens_seen": 345007564, "step": 6158 }, { "epoch": 13.714922048997773, "loss": 0.6203541159629822, "loss_ce": 0.00011484955030027777, "loss_iou": 0.265625, "loss_num": 0.0179443359375, "loss_xval": 0.62109375, "num_input_tokens_seen": 345007564, "step": 6158 }, { "epoch": 13.717149220489977, "grad_norm": 17.11638069152832, "learning_rate": 1e-06, "loss": 0.4277, "num_input_tokens_seen": 345064000, "step": 6159 }, { "epoch": 13.717149220489977, "loss": 0.5495654344558716, "loss_ce": 0.00012692378368228674, "loss_iou": 0.2470703125, "loss_num": 0.01129150390625, "loss_xval": 0.55078125, "num_input_tokens_seen": 345064000, "step": 6159 }, { "epoch": 13.719376391982182, "grad_norm": 20.523475646972656, "learning_rate": 1e-06, "loss": 0.3536, "num_input_tokens_seen": 345120072, "step": 6160 }, { "epoch": 13.719376391982182, "loss": 0.37014520168304443, "loss_ce": 0.00027214642614126205, "loss_iou": 0.1650390625, "loss_num": 0.0081787109375, "loss_xval": 0.369140625, "num_input_tokens_seen": 345120072, "step": 6160 }, { "epoch": 13.721603563474387, "grad_norm": 32.90972900390625, "learning_rate": 1e-06, "loss": 0.5949, "num_input_tokens_seen": 345176544, "step": 6161 }, { "epoch": 13.721603563474387, "loss": 0.44534897804260254, "loss_ce": 9.752592450240627e-05, "loss_iou": 0.18359375, "loss_num": 0.0157470703125, "loss_xval": 0.4453125, "num_input_tokens_seen": 345176544, "step": 6161 }, { "epoch": 13.723830734966592, "grad_norm": 18.18287467956543, "learning_rate": 1e-06, "loss": 0.379, "num_input_tokens_seen": 345233380, "step": 6162 }, { "epoch": 13.723830734966592, "loss": 0.2666533589363098, "loss_ce": 8.995502139441669e-05, "loss_iou": 0.11279296875, "loss_num": 0.00830078125, "loss_xval": 0.265625, "num_input_tokens_seen": 345233380, "step": 6162 }, { "epoch": 13.726057906458797, "grad_norm": 15.033989906311035, "learning_rate": 1e-06, "loss": 0.6433, "num_input_tokens_seen": 345289992, "step": 6163 }, { "epoch": 13.726057906458797, "loss": 0.7077064514160156, "loss_ce": 0.0009193975711241364, "loss_iou": 0.3046875, "loss_num": 0.0194091796875, "loss_xval": 0.70703125, "num_input_tokens_seen": 345289992, "step": 6163 }, { "epoch": 13.728285077951002, "grad_norm": 12.950138092041016, "learning_rate": 1e-06, "loss": 0.4458, "num_input_tokens_seen": 345345848, "step": 6164 }, { "epoch": 13.728285077951002, "loss": 0.27292484045028687, "loss_ce": 9.76905066636391e-05, "loss_iou": 0.11669921875, "loss_num": 0.00787353515625, "loss_xval": 0.2734375, "num_input_tokens_seen": 345345848, "step": 6164 }, { "epoch": 13.730512249443207, "grad_norm": 17.856874465942383, "learning_rate": 1e-06, "loss": 0.4661, "num_input_tokens_seen": 345402492, "step": 6165 }, { "epoch": 13.730512249443207, "loss": 0.5341655015945435, "loss_ce": 0.00022992276353761554, "loss_iou": 0.2109375, "loss_num": 0.0225830078125, "loss_xval": 0.53515625, "num_input_tokens_seen": 345402492, "step": 6165 }, { "epoch": 13.732739420935411, "grad_norm": 17.676965713500977, "learning_rate": 1e-06, "loss": 0.4308, "num_input_tokens_seen": 345459516, "step": 6166 }, { "epoch": 13.732739420935411, "loss": 0.5320949554443359, "loss_ce": 0.00011253212142037228, "loss_iou": 0.2373046875, "loss_num": 0.0113525390625, "loss_xval": 0.53125, "num_input_tokens_seen": 345459516, "step": 6166 }, { "epoch": 13.734966592427616, "grad_norm": 19.01355743408203, "learning_rate": 1e-06, "loss": 0.4474, "num_input_tokens_seen": 345514608, "step": 6167 }, { "epoch": 13.734966592427616, "loss": 0.4031931757926941, "loss_ce": 0.0001169660608866252, "loss_iou": 0.1845703125, "loss_num": 0.006866455078125, "loss_xval": 0.40234375, "num_input_tokens_seen": 345514608, "step": 6167 }, { "epoch": 13.737193763919821, "grad_norm": 19.29640769958496, "learning_rate": 1e-06, "loss": 0.5236, "num_input_tokens_seen": 345571524, "step": 6168 }, { "epoch": 13.737193763919821, "loss": 0.491804838180542, "loss_ce": 0.00010560073133092374, "loss_iou": 0.2177734375, "loss_num": 0.01123046875, "loss_xval": 0.4921875, "num_input_tokens_seen": 345571524, "step": 6168 }, { "epoch": 13.739420935412026, "grad_norm": 21.25169563293457, "learning_rate": 1e-06, "loss": 0.4618, "num_input_tokens_seen": 345627688, "step": 6169 }, { "epoch": 13.739420935412026, "loss": 0.4485178589820862, "loss_ce": 0.00012308621080592275, "loss_iou": 0.1923828125, "loss_num": 0.0126953125, "loss_xval": 0.44921875, "num_input_tokens_seen": 345627688, "step": 6169 }, { "epoch": 13.74164810690423, "grad_norm": 16.865291595458984, "learning_rate": 1e-06, "loss": 0.5896, "num_input_tokens_seen": 345684380, "step": 6170 }, { "epoch": 13.74164810690423, "loss": 0.5811623334884644, "loss_ce": 0.00010766902414616197, "loss_iou": 0.26171875, "loss_num": 0.01123046875, "loss_xval": 0.58203125, "num_input_tokens_seen": 345684380, "step": 6170 }, { "epoch": 13.743875278396436, "grad_norm": 20.563676834106445, "learning_rate": 1e-06, "loss": 0.5899, "num_input_tokens_seen": 345740580, "step": 6171 }, { "epoch": 13.743875278396436, "loss": 0.44517427682876587, "loss_ce": 0.00010591248428681865, "loss_iou": 0.1923828125, "loss_num": 0.01202392578125, "loss_xval": 0.4453125, "num_input_tokens_seen": 345740580, "step": 6171 }, { "epoch": 13.74610244988864, "grad_norm": 16.448043823242188, "learning_rate": 1e-06, "loss": 0.5441, "num_input_tokens_seen": 345799396, "step": 6172 }, { "epoch": 13.74610244988864, "loss": 0.6715624332427979, "loss_ce": 0.00011470924073364586, "loss_iou": 0.2890625, "loss_num": 0.0181884765625, "loss_xval": 0.671875, "num_input_tokens_seen": 345799396, "step": 6172 }, { "epoch": 13.748329621380847, "grad_norm": 47.44057846069336, "learning_rate": 1e-06, "loss": 0.5772, "num_input_tokens_seen": 345855724, "step": 6173 }, { "epoch": 13.748329621380847, "loss": 0.7166056632995605, "loss_ce": 0.00017500200192444026, "loss_iou": 0.322265625, "loss_num": 0.0147705078125, "loss_xval": 0.71484375, "num_input_tokens_seen": 345855724, "step": 6173 }, { "epoch": 13.750556792873052, "grad_norm": 16.256362915039062, "learning_rate": 1e-06, "loss": 0.5361, "num_input_tokens_seen": 345912732, "step": 6174 }, { "epoch": 13.750556792873052, "loss": 0.4795916676521301, "loss_ce": 9.949602099368349e-05, "loss_iou": 0.1923828125, "loss_num": 0.01904296875, "loss_xval": 0.48046875, "num_input_tokens_seen": 345912732, "step": 6174 }, { "epoch": 13.752783964365257, "grad_norm": 21.9121036529541, "learning_rate": 1e-06, "loss": 0.495, "num_input_tokens_seen": 345968416, "step": 6175 }, { "epoch": 13.752783964365257, "loss": 0.3780425786972046, "loss_ce": 0.00011291421833448112, "loss_iou": 0.169921875, "loss_num": 0.007476806640625, "loss_xval": 0.37890625, "num_input_tokens_seen": 345968416, "step": 6175 }, { "epoch": 13.755011135857462, "grad_norm": 15.832091331481934, "learning_rate": 1e-06, "loss": 0.6457, "num_input_tokens_seen": 346026396, "step": 6176 }, { "epoch": 13.755011135857462, "loss": 0.4686300754547119, "loss_ce": 0.00012421910651028156, "loss_iou": 0.2138671875, "loss_num": 0.00830078125, "loss_xval": 0.46875, "num_input_tokens_seen": 346026396, "step": 6176 }, { "epoch": 13.757238307349667, "grad_norm": 21.13070297241211, "learning_rate": 1e-06, "loss": 0.5723, "num_input_tokens_seen": 346080884, "step": 6177 }, { "epoch": 13.757238307349667, "loss": 0.5670015811920166, "loss_ce": 0.00010699567792471498, "loss_iou": 0.244140625, "loss_num": 0.0159912109375, "loss_xval": 0.56640625, "num_input_tokens_seen": 346080884, "step": 6177 }, { "epoch": 13.759465478841872, "grad_norm": 23.448190689086914, "learning_rate": 1e-06, "loss": 0.5021, "num_input_tokens_seen": 346134556, "step": 6178 }, { "epoch": 13.759465478841872, "loss": 0.47252950072288513, "loss_ce": 0.00011740469199139625, "loss_iou": 0.1923828125, "loss_num": 0.017333984375, "loss_xval": 0.47265625, "num_input_tokens_seen": 346134556, "step": 6178 }, { "epoch": 13.761692650334076, "grad_norm": 18.187257766723633, "learning_rate": 1e-06, "loss": 0.3635, "num_input_tokens_seen": 346188980, "step": 6179 }, { "epoch": 13.761692650334076, "loss": 0.35398566722869873, "loss_ce": 0.00010384367487858981, "loss_iou": 0.1630859375, "loss_num": 0.005615234375, "loss_xval": 0.353515625, "num_input_tokens_seen": 346188980, "step": 6179 }, { "epoch": 13.763919821826281, "grad_norm": 31.72901153564453, "learning_rate": 1e-06, "loss": 0.3924, "num_input_tokens_seen": 346245600, "step": 6180 }, { "epoch": 13.763919821826281, "loss": 0.3832623362541199, "loss_ce": 8.365388202946633e-05, "loss_iou": 0.173828125, "loss_num": 0.007110595703125, "loss_xval": 0.3828125, "num_input_tokens_seen": 346245600, "step": 6180 }, { "epoch": 13.766146993318486, "grad_norm": 21.675914764404297, "learning_rate": 1e-06, "loss": 0.4688, "num_input_tokens_seen": 346299216, "step": 6181 }, { "epoch": 13.766146993318486, "loss": 0.5464923977851868, "loss_ce": 0.0001056903856806457, "loss_iou": 0.216796875, "loss_num": 0.0225830078125, "loss_xval": 0.546875, "num_input_tokens_seen": 346299216, "step": 6181 }, { "epoch": 13.768374164810691, "grad_norm": 24.515111923217773, "learning_rate": 1e-06, "loss": 0.5284, "num_input_tokens_seen": 346353404, "step": 6182 }, { "epoch": 13.768374164810691, "loss": 0.47796687483787537, "loss_ce": 0.00018368265591561794, "loss_iou": 0.197265625, "loss_num": 0.0164794921875, "loss_xval": 0.478515625, "num_input_tokens_seen": 346353404, "step": 6182 }, { "epoch": 13.770601336302896, "grad_norm": 13.62939453125, "learning_rate": 1e-06, "loss": 0.4113, "num_input_tokens_seen": 346409016, "step": 6183 }, { "epoch": 13.770601336302896, "loss": 0.395562082529068, "loss_ce": 0.00011531692871358246, "loss_iou": 0.1708984375, "loss_num": 0.0106201171875, "loss_xval": 0.39453125, "num_input_tokens_seen": 346409016, "step": 6183 }, { "epoch": 13.7728285077951, "grad_norm": 14.132533073425293, "learning_rate": 1e-06, "loss": 0.418, "num_input_tokens_seen": 346464900, "step": 6184 }, { "epoch": 13.7728285077951, "loss": 0.416792631149292, "loss_ce": 0.00016667114687152207, "loss_iou": 0.1923828125, "loss_num": 0.006439208984375, "loss_xval": 0.416015625, "num_input_tokens_seen": 346464900, "step": 6184 }, { "epoch": 13.775055679287306, "grad_norm": 78.39266967773438, "learning_rate": 1e-06, "loss": 0.6477, "num_input_tokens_seen": 346521172, "step": 6185 }, { "epoch": 13.775055679287306, "loss": 0.45030081272125244, "loss_ce": 0.00010552178719080985, "loss_iou": 0.197265625, "loss_num": 0.01116943359375, "loss_xval": 0.44921875, "num_input_tokens_seen": 346521172, "step": 6185 }, { "epoch": 13.77728285077951, "grad_norm": 23.97130584716797, "learning_rate": 1e-06, "loss": 0.5591, "num_input_tokens_seen": 346576376, "step": 6186 }, { "epoch": 13.77728285077951, "loss": 0.7201170921325684, "loss_ce": 0.00014645657211076468, "loss_iou": 0.322265625, "loss_num": 0.0155029296875, "loss_xval": 0.71875, "num_input_tokens_seen": 346576376, "step": 6186 }, { "epoch": 13.779510022271715, "grad_norm": 16.92420196533203, "learning_rate": 1e-06, "loss": 0.4977, "num_input_tokens_seen": 346633420, "step": 6187 }, { "epoch": 13.779510022271715, "loss": 0.633420467376709, "loss_ce": 0.00011968903709203005, "loss_iou": 0.26171875, "loss_num": 0.022216796875, "loss_xval": 0.6328125, "num_input_tokens_seen": 346633420, "step": 6187 }, { "epoch": 13.78173719376392, "grad_norm": 18.442733764648438, "learning_rate": 1e-06, "loss": 0.5796, "num_input_tokens_seen": 346689692, "step": 6188 }, { "epoch": 13.78173719376392, "loss": 0.7934539318084717, "loss_ce": 0.00011891472968272865, "loss_iou": 0.345703125, "loss_num": 0.020263671875, "loss_xval": 0.79296875, "num_input_tokens_seen": 346689692, "step": 6188 }, { "epoch": 13.783964365256125, "grad_norm": 21.14725685119629, "learning_rate": 1e-06, "loss": 0.3824, "num_input_tokens_seen": 346745212, "step": 6189 }, { "epoch": 13.783964365256125, "loss": 0.3489500880241394, "loss_ce": 0.00013415844296105206, "loss_iou": 0.15625, "loss_num": 0.007232666015625, "loss_xval": 0.349609375, "num_input_tokens_seen": 346745212, "step": 6189 }, { "epoch": 13.78619153674833, "grad_norm": 23.13970375061035, "learning_rate": 1e-06, "loss": 0.3716, "num_input_tokens_seen": 346803220, "step": 6190 }, { "epoch": 13.78619153674833, "loss": 0.43285852670669556, "loss_ce": 0.00011927761079277843, "loss_iou": 0.205078125, "loss_num": 0.00439453125, "loss_xval": 0.43359375, "num_input_tokens_seen": 346803220, "step": 6190 }, { "epoch": 13.788418708240535, "grad_norm": 17.715566635131836, "learning_rate": 1e-06, "loss": 0.49, "num_input_tokens_seen": 346858040, "step": 6191 }, { "epoch": 13.788418708240535, "loss": 0.29737383127212524, "loss_ce": 0.0001326015335507691, "loss_iou": 0.1328125, "loss_num": 0.0062255859375, "loss_xval": 0.296875, "num_input_tokens_seen": 346858040, "step": 6191 }, { "epoch": 13.79064587973274, "grad_norm": 25.321876525878906, "learning_rate": 1e-06, "loss": 0.5248, "num_input_tokens_seen": 346914080, "step": 6192 }, { "epoch": 13.79064587973274, "loss": 0.6016849279403687, "loss_ce": 0.00010718886915128678, "loss_iou": 0.25390625, "loss_num": 0.0185546875, "loss_xval": 0.6015625, "num_input_tokens_seen": 346914080, "step": 6192 }, { "epoch": 13.792873051224944, "grad_norm": 15.98505973815918, "learning_rate": 1e-06, "loss": 0.5626, "num_input_tokens_seen": 346967296, "step": 6193 }, { "epoch": 13.792873051224944, "loss": 0.677720844745636, "loss_ce": 0.0001085417898138985, "loss_iou": 0.30078125, "loss_num": 0.01507568359375, "loss_xval": 0.67578125, "num_input_tokens_seen": 346967296, "step": 6193 }, { "epoch": 13.79510022271715, "grad_norm": 16.50602149963379, "learning_rate": 1e-06, "loss": 0.6727, "num_input_tokens_seen": 347023416, "step": 6194 }, { "epoch": 13.79510022271715, "loss": 0.8356806039810181, "loss_ce": 0.00010931311408057809, "loss_iou": 0.33203125, "loss_num": 0.033935546875, "loss_xval": 0.8359375, "num_input_tokens_seen": 347023416, "step": 6194 }, { "epoch": 13.797327394209354, "grad_norm": 25.20470428466797, "learning_rate": 1e-06, "loss": 0.4481, "num_input_tokens_seen": 347078308, "step": 6195 }, { "epoch": 13.797327394209354, "loss": 0.39737868309020996, "loss_ce": 0.00016190031601581722, "loss_iou": 0.1787109375, "loss_num": 0.00799560546875, "loss_xval": 0.396484375, "num_input_tokens_seen": 347078308, "step": 6195 }, { "epoch": 13.799554565701559, "grad_norm": 19.322734832763672, "learning_rate": 1e-06, "loss": 0.5349, "num_input_tokens_seen": 347134956, "step": 6196 }, { "epoch": 13.799554565701559, "loss": 0.510852038860321, "loss_ce": 0.0007202195120044053, "loss_iou": 0.2236328125, "loss_num": 0.01239013671875, "loss_xval": 0.51171875, "num_input_tokens_seen": 347134956, "step": 6196 }, { "epoch": 13.801781737193764, "grad_norm": 25.328693389892578, "learning_rate": 1e-06, "loss": 0.7473, "num_input_tokens_seen": 347193060, "step": 6197 }, { "epoch": 13.801781737193764, "loss": 0.9841349720954895, "loss_ce": 0.00012613809667527676, "loss_iou": 0.40234375, "loss_num": 0.03564453125, "loss_xval": 0.984375, "num_input_tokens_seen": 347193060, "step": 6197 }, { "epoch": 13.804008908685969, "grad_norm": 17.876684188842773, "learning_rate": 1e-06, "loss": 0.4347, "num_input_tokens_seen": 347247788, "step": 6198 }, { "epoch": 13.804008908685969, "loss": 0.5318405628204346, "loss_ce": 0.00010229815234197304, "loss_iou": 0.2255859375, "loss_num": 0.01611328125, "loss_xval": 0.53125, "num_input_tokens_seen": 347247788, "step": 6198 }, { "epoch": 13.806236080178174, "grad_norm": 21.041126251220703, "learning_rate": 1e-06, "loss": 0.6075, "num_input_tokens_seen": 347302380, "step": 6199 }, { "epoch": 13.806236080178174, "loss": 0.7548277378082275, "loss_ce": 0.00025013202684931457, "loss_iou": 0.314453125, "loss_num": 0.0257568359375, "loss_xval": 0.75390625, "num_input_tokens_seen": 347302380, "step": 6199 }, { "epoch": 13.808463251670378, "grad_norm": 14.308113098144531, "learning_rate": 1e-06, "loss": 0.5091, "num_input_tokens_seen": 347355180, "step": 6200 }, { "epoch": 13.808463251670378, "loss": 0.6481593251228333, "loss_ce": 0.00014905582065694034, "loss_iou": 0.255859375, "loss_num": 0.027099609375, "loss_xval": 0.6484375, "num_input_tokens_seen": 347355180, "step": 6200 }, { "epoch": 13.810690423162583, "grad_norm": 24.352041244506836, "learning_rate": 1e-06, "loss": 0.4425, "num_input_tokens_seen": 347410936, "step": 6201 }, { "epoch": 13.810690423162583, "loss": 0.40238019824028015, "loss_ce": 9.74750000750646e-05, "loss_iou": 0.17578125, "loss_num": 0.01025390625, "loss_xval": 0.40234375, "num_input_tokens_seen": 347410936, "step": 6201 }, { "epoch": 13.812917594654788, "grad_norm": 12.734814643859863, "learning_rate": 1e-06, "loss": 0.4761, "num_input_tokens_seen": 347466652, "step": 6202 }, { "epoch": 13.812917594654788, "loss": 0.5395181179046631, "loss_ce": 0.00010463084618095309, "loss_iou": 0.23046875, "loss_num": 0.015380859375, "loss_xval": 0.5390625, "num_input_tokens_seen": 347466652, "step": 6202 }, { "epoch": 13.815144766146993, "grad_norm": 19.63954734802246, "learning_rate": 1e-06, "loss": 0.5178, "num_input_tokens_seen": 347523340, "step": 6203 }, { "epoch": 13.815144766146993, "loss": 0.6680713295936584, "loss_ce": 0.00010255551023874432, "loss_iou": 0.294921875, "loss_num": 0.015380859375, "loss_xval": 0.66796875, "num_input_tokens_seen": 347523340, "step": 6203 }, { "epoch": 13.817371937639198, "grad_norm": 27.84977149963379, "learning_rate": 1e-06, "loss": 0.4513, "num_input_tokens_seen": 347581752, "step": 6204 }, { "epoch": 13.817371937639198, "loss": 0.509273886680603, "loss_ce": 0.00011864411499118432, "loss_iou": 0.220703125, "loss_num": 0.0135498046875, "loss_xval": 0.5078125, "num_input_tokens_seen": 347581752, "step": 6204 }, { "epoch": 13.819599109131403, "grad_norm": 18.612722396850586, "learning_rate": 1e-06, "loss": 0.5813, "num_input_tokens_seen": 347641196, "step": 6205 }, { "epoch": 13.819599109131403, "loss": 0.6570059061050415, "loss_ce": 0.00011501011613290757, "loss_iou": 0.275390625, "loss_num": 0.0213623046875, "loss_xval": 0.65625, "num_input_tokens_seen": 347641196, "step": 6205 }, { "epoch": 13.821826280623608, "grad_norm": 17.84321403503418, "learning_rate": 1e-06, "loss": 0.3995, "num_input_tokens_seen": 347699640, "step": 6206 }, { "epoch": 13.821826280623608, "loss": 0.42600324749946594, "loss_ce": 9.991762635763735e-05, "loss_iou": 0.1884765625, "loss_num": 0.00982666015625, "loss_xval": 0.42578125, "num_input_tokens_seen": 347699640, "step": 6206 }, { "epoch": 13.824053452115812, "grad_norm": 17.020217895507812, "learning_rate": 1e-06, "loss": 0.5993, "num_input_tokens_seen": 347755124, "step": 6207 }, { "epoch": 13.824053452115812, "loss": 0.4133188724517822, "loss_ce": 0.00011088576866313815, "loss_iou": 0.1748046875, "loss_num": 0.0126953125, "loss_xval": 0.4140625, "num_input_tokens_seen": 347755124, "step": 6207 }, { "epoch": 13.826280623608017, "grad_norm": 16.579845428466797, "learning_rate": 1e-06, "loss": 0.5781, "num_input_tokens_seen": 347809184, "step": 6208 }, { "epoch": 13.826280623608017, "loss": 0.5595046281814575, "loss_ce": 0.00017845621914602816, "loss_iou": 0.2265625, "loss_num": 0.0213623046875, "loss_xval": 0.55859375, "num_input_tokens_seen": 347809184, "step": 6208 }, { "epoch": 13.828507795100222, "grad_norm": 21.396059036254883, "learning_rate": 1e-06, "loss": 0.5244, "num_input_tokens_seen": 347865520, "step": 6209 }, { "epoch": 13.828507795100222, "loss": 0.5646365880966187, "loss_ce": 0.00012240419164299965, "loss_iou": 0.2255859375, "loss_num": 0.02294921875, "loss_xval": 0.56640625, "num_input_tokens_seen": 347865520, "step": 6209 }, { "epoch": 13.830734966592427, "grad_norm": 12.559648513793945, "learning_rate": 1e-06, "loss": 0.4268, "num_input_tokens_seen": 347922004, "step": 6210 }, { "epoch": 13.830734966592427, "loss": 0.5307496786117554, "loss_ce": 0.00011002724932041019, "loss_iou": 0.2314453125, "loss_num": 0.01348876953125, "loss_xval": 0.53125, "num_input_tokens_seen": 347922004, "step": 6210 }, { "epoch": 13.832962138084632, "grad_norm": 20.28154945373535, "learning_rate": 1e-06, "loss": 0.5987, "num_input_tokens_seen": 347977316, "step": 6211 }, { "epoch": 13.832962138084632, "loss": 0.8992745876312256, "loss_ce": 0.00010463996295584366, "loss_iou": 0.357421875, "loss_num": 0.036865234375, "loss_xval": 0.8984375, "num_input_tokens_seen": 347977316, "step": 6211 }, { "epoch": 13.835189309576837, "grad_norm": 21.434589385986328, "learning_rate": 1e-06, "loss": 0.5093, "num_input_tokens_seen": 348031884, "step": 6212 }, { "epoch": 13.835189309576837, "loss": 0.5445865392684937, "loss_ce": 0.0001529275468783453, "loss_iou": 0.23046875, "loss_num": 0.0164794921875, "loss_xval": 0.54296875, "num_input_tokens_seen": 348031884, "step": 6212 }, { "epoch": 13.837416481069042, "grad_norm": 13.820694923400879, "learning_rate": 1e-06, "loss": 0.5881, "num_input_tokens_seen": 348088760, "step": 6213 }, { "epoch": 13.837416481069042, "loss": 0.5043725371360779, "loss_ce": 0.0001000510819721967, "loss_iou": 0.203125, "loss_num": 0.0196533203125, "loss_xval": 0.50390625, "num_input_tokens_seen": 348088760, "step": 6213 }, { "epoch": 13.839643652561247, "grad_norm": 16.25212287902832, "learning_rate": 1e-06, "loss": 0.4709, "num_input_tokens_seen": 348145232, "step": 6214 }, { "epoch": 13.839643652561247, "loss": 0.5301461219787598, "loss_ce": 0.0001167980080936104, "loss_iou": 0.23828125, "loss_num": 0.0106201171875, "loss_xval": 0.53125, "num_input_tokens_seen": 348145232, "step": 6214 }, { "epoch": 13.841870824053451, "grad_norm": 13.156241416931152, "learning_rate": 1e-06, "loss": 0.6487, "num_input_tokens_seen": 348203452, "step": 6215 }, { "epoch": 13.841870824053451, "loss": 0.9121133089065552, "loss_ce": 0.00012593530118465424, "loss_iou": 0.396484375, "loss_num": 0.02392578125, "loss_xval": 0.91015625, "num_input_tokens_seen": 348203452, "step": 6215 }, { "epoch": 13.844097995545656, "grad_norm": 19.261072158813477, "learning_rate": 1e-06, "loss": 0.5774, "num_input_tokens_seen": 348261088, "step": 6216 }, { "epoch": 13.844097995545656, "loss": 0.3862478733062744, "loss_ce": 7.846013613743708e-05, "loss_iou": 0.1708984375, "loss_num": 0.0087890625, "loss_xval": 0.38671875, "num_input_tokens_seen": 348261088, "step": 6216 }, { "epoch": 13.846325167037861, "grad_norm": 14.543188095092773, "learning_rate": 1e-06, "loss": 0.3715, "num_input_tokens_seen": 348316648, "step": 6217 }, { "epoch": 13.846325167037861, "loss": 0.398532897233963, "loss_ce": 9.540202154312283e-05, "loss_iou": 0.171875, "loss_num": 0.0108642578125, "loss_xval": 0.3984375, "num_input_tokens_seen": 348316648, "step": 6217 }, { "epoch": 13.848552338530066, "grad_norm": 16.337297439575195, "learning_rate": 1e-06, "loss": 0.6434, "num_input_tokens_seen": 348376184, "step": 6218 }, { "epoch": 13.848552338530066, "loss": 0.41022244095802307, "loss_ce": 0.00012721509847324342, "loss_iou": 0.177734375, "loss_num": 0.01080322265625, "loss_xval": 0.41015625, "num_input_tokens_seen": 348376184, "step": 6218 }, { "epoch": 13.85077951002227, "grad_norm": 34.131065368652344, "learning_rate": 1e-06, "loss": 0.6145, "num_input_tokens_seen": 348429572, "step": 6219 }, { "epoch": 13.85077951002227, "loss": 0.7063025236129761, "loss_ce": 0.0001257747644558549, "loss_iou": 0.306640625, "loss_num": 0.01904296875, "loss_xval": 0.70703125, "num_input_tokens_seen": 348429572, "step": 6219 }, { "epoch": 13.853006681514476, "grad_norm": 17.740169525146484, "learning_rate": 1e-06, "loss": 0.4083, "num_input_tokens_seen": 348487176, "step": 6220 }, { "epoch": 13.853006681514476, "loss": 0.4807821214199066, "loss_ce": 0.00013025107909925282, "loss_iou": 0.2041015625, "loss_num": 0.01458740234375, "loss_xval": 0.48046875, "num_input_tokens_seen": 348487176, "step": 6220 }, { "epoch": 13.855233853006682, "grad_norm": 25.03499984741211, "learning_rate": 1e-06, "loss": 0.5935, "num_input_tokens_seen": 348542216, "step": 6221 }, { "epoch": 13.855233853006682, "loss": 0.7307603359222412, "loss_ce": 0.0001694800885161385, "loss_iou": 0.30859375, "loss_num": 0.0228271484375, "loss_xval": 0.73046875, "num_input_tokens_seen": 348542216, "step": 6221 }, { "epoch": 13.857461024498887, "grad_norm": 22.183853149414062, "learning_rate": 1e-06, "loss": 0.4923, "num_input_tokens_seen": 348599416, "step": 6222 }, { "epoch": 13.857461024498887, "loss": 0.5290480852127075, "loss_ce": 0.00011743127834051847, "loss_iou": 0.24609375, "loss_num": 0.0074462890625, "loss_xval": 0.52734375, "num_input_tokens_seen": 348599416, "step": 6222 }, { "epoch": 13.859688195991092, "grad_norm": 16.716617584228516, "learning_rate": 1e-06, "loss": 0.4975, "num_input_tokens_seen": 348657108, "step": 6223 }, { "epoch": 13.859688195991092, "loss": 0.5516940355300903, "loss_ce": 0.00011933541827602312, "loss_iou": 0.2431640625, "loss_num": 0.012939453125, "loss_xval": 0.55078125, "num_input_tokens_seen": 348657108, "step": 6223 }, { "epoch": 13.861915367483297, "grad_norm": 24.67982292175293, "learning_rate": 1e-06, "loss": 0.4872, "num_input_tokens_seen": 348711952, "step": 6224 }, { "epoch": 13.861915367483297, "loss": 0.36566367745399475, "loss_ce": 0.0001241198042407632, "loss_iou": 0.158203125, "loss_num": 0.009765625, "loss_xval": 0.365234375, "num_input_tokens_seen": 348711952, "step": 6224 }, { "epoch": 13.864142538975502, "grad_norm": 23.012540817260742, "learning_rate": 1e-06, "loss": 0.4581, "num_input_tokens_seen": 348767728, "step": 6225 }, { "epoch": 13.864142538975502, "loss": 0.5795168280601501, "loss_ce": 0.00011010624439222738, "loss_iou": 0.263671875, "loss_num": 0.0103759765625, "loss_xval": 0.578125, "num_input_tokens_seen": 348767728, "step": 6225 }, { "epoch": 13.866369710467707, "grad_norm": 26.270526885986328, "learning_rate": 1e-06, "loss": 0.3405, "num_input_tokens_seen": 348821880, "step": 6226 }, { "epoch": 13.866369710467707, "loss": 0.3175492286682129, "loss_ce": 0.00010535772162256762, "loss_iou": 0.1259765625, "loss_num": 0.0130615234375, "loss_xval": 0.318359375, "num_input_tokens_seen": 348821880, "step": 6226 }, { "epoch": 13.868596881959911, "grad_norm": 19.418540954589844, "learning_rate": 1e-06, "loss": 0.3874, "num_input_tokens_seen": 348875740, "step": 6227 }, { "epoch": 13.868596881959911, "loss": 0.3931885063648224, "loss_ce": 0.00012211257126182318, "loss_iou": 0.173828125, "loss_num": 0.0089111328125, "loss_xval": 0.392578125, "num_input_tokens_seen": 348875740, "step": 6227 }, { "epoch": 13.870824053452116, "grad_norm": 19.871246337890625, "learning_rate": 1e-06, "loss": 0.6113, "num_input_tokens_seen": 348931196, "step": 6228 }, { "epoch": 13.870824053452116, "loss": 0.4289402961730957, "loss_ce": 0.000107303996628616, "loss_iou": 0.1982421875, "loss_num": 0.00634765625, "loss_xval": 0.4296875, "num_input_tokens_seen": 348931196, "step": 6228 }, { "epoch": 13.873051224944321, "grad_norm": 37.94038772583008, "learning_rate": 1e-06, "loss": 0.4865, "num_input_tokens_seen": 348988736, "step": 6229 }, { "epoch": 13.873051224944321, "loss": 0.48054730892181396, "loss_ce": 0.00013959786156192422, "loss_iou": 0.20703125, "loss_num": 0.01348876953125, "loss_xval": 0.48046875, "num_input_tokens_seen": 348988736, "step": 6229 }, { "epoch": 13.875278396436526, "grad_norm": 17.617862701416016, "learning_rate": 1e-06, "loss": 0.3775, "num_input_tokens_seen": 349044504, "step": 6230 }, { "epoch": 13.875278396436526, "loss": 0.3631332814693451, "loss_ce": 9.617961768526584e-05, "loss_iou": 0.1484375, "loss_num": 0.01336669921875, "loss_xval": 0.36328125, "num_input_tokens_seen": 349044504, "step": 6230 }, { "epoch": 13.877505567928731, "grad_norm": 19.312604904174805, "learning_rate": 1e-06, "loss": 0.533, "num_input_tokens_seen": 349101952, "step": 6231 }, { "epoch": 13.877505567928731, "loss": 0.4969325065612793, "loss_ce": 0.00010634450882207602, "loss_iou": 0.220703125, "loss_num": 0.01104736328125, "loss_xval": 0.49609375, "num_input_tokens_seen": 349101952, "step": 6231 }, { "epoch": 13.879732739420936, "grad_norm": 13.717912673950195, "learning_rate": 1e-06, "loss": 0.3243, "num_input_tokens_seen": 349155880, "step": 6232 }, { "epoch": 13.879732739420936, "loss": 0.3837384581565857, "loss_ce": 0.0001325202756561339, "loss_iou": 0.171875, "loss_num": 0.00787353515625, "loss_xval": 0.3828125, "num_input_tokens_seen": 349155880, "step": 6232 }, { "epoch": 13.88195991091314, "grad_norm": 22.239849090576172, "learning_rate": 1e-06, "loss": 0.4274, "num_input_tokens_seen": 349208756, "step": 6233 }, { "epoch": 13.88195991091314, "loss": 0.47713950276374817, "loss_ce": 8.873137267073616e-05, "loss_iou": 0.2177734375, "loss_num": 0.0081787109375, "loss_xval": 0.4765625, "num_input_tokens_seen": 349208756, "step": 6233 }, { "epoch": 13.884187082405345, "grad_norm": 17.712202072143555, "learning_rate": 1e-06, "loss": 0.4714, "num_input_tokens_seen": 349265684, "step": 6234 }, { "epoch": 13.884187082405345, "loss": 0.5098845958709717, "loss_ce": 0.00011895706120412797, "loss_iou": 0.2265625, "loss_num": 0.01129150390625, "loss_xval": 0.5078125, "num_input_tokens_seen": 349265684, "step": 6234 }, { "epoch": 13.88641425389755, "grad_norm": 20.694042205810547, "learning_rate": 1e-06, "loss": 0.5065, "num_input_tokens_seen": 349323304, "step": 6235 }, { "epoch": 13.88641425389755, "loss": 0.505508542060852, "loss_ce": 0.00013747680350206792, "loss_iou": 0.203125, "loss_num": 0.0196533203125, "loss_xval": 0.50390625, "num_input_tokens_seen": 349323304, "step": 6235 }, { "epoch": 13.888641425389755, "grad_norm": 17.659744262695312, "learning_rate": 1e-06, "loss": 0.6468, "num_input_tokens_seen": 349378928, "step": 6236 }, { "epoch": 13.888641425389755, "loss": 0.7527920603752136, "loss_ce": 0.00010650817421264946, "loss_iou": 0.326171875, "loss_num": 0.0205078125, "loss_xval": 0.75390625, "num_input_tokens_seen": 349378928, "step": 6236 }, { "epoch": 13.89086859688196, "grad_norm": 13.622602462768555, "learning_rate": 1e-06, "loss": 0.3588, "num_input_tokens_seen": 349436664, "step": 6237 }, { "epoch": 13.89086859688196, "loss": 0.42641109228134155, "loss_ce": 0.00014156656106933951, "loss_iou": 0.169921875, "loss_num": 0.01708984375, "loss_xval": 0.42578125, "num_input_tokens_seen": 349436664, "step": 6237 }, { "epoch": 13.893095768374165, "grad_norm": 16.968406677246094, "learning_rate": 1e-06, "loss": 0.4017, "num_input_tokens_seen": 349492432, "step": 6238 }, { "epoch": 13.893095768374165, "loss": 0.4099974036216736, "loss_ce": 8.531904313713312e-05, "loss_iou": 0.181640625, "loss_num": 0.00946044921875, "loss_xval": 0.41015625, "num_input_tokens_seen": 349492432, "step": 6238 }, { "epoch": 13.89532293986637, "grad_norm": 28.478656768798828, "learning_rate": 1e-06, "loss": 0.4145, "num_input_tokens_seen": 349550060, "step": 6239 }, { "epoch": 13.89532293986637, "loss": 0.37417882680892944, "loss_ce": 0.000277488463325426, "loss_iou": 0.1630859375, "loss_num": 0.0093994140625, "loss_xval": 0.373046875, "num_input_tokens_seen": 349550060, "step": 6239 }, { "epoch": 13.897550111358575, "grad_norm": 23.06316375732422, "learning_rate": 1e-06, "loss": 0.4668, "num_input_tokens_seen": 349603676, "step": 6240 }, { "epoch": 13.897550111358575, "loss": 0.5238842964172363, "loss_ce": 0.0001416070736013353, "loss_iou": 0.2236328125, "loss_num": 0.0152587890625, "loss_xval": 0.5234375, "num_input_tokens_seen": 349603676, "step": 6240 }, { "epoch": 13.89977728285078, "grad_norm": 15.570908546447754, "learning_rate": 1e-06, "loss": 0.6621, "num_input_tokens_seen": 349655680, "step": 6241 }, { "epoch": 13.89977728285078, "loss": 0.6655749678611755, "loss_ce": 0.00010867592936847359, "loss_iou": 0.263671875, "loss_num": 0.0272216796875, "loss_xval": 0.6640625, "num_input_tokens_seen": 349655680, "step": 6241 }, { "epoch": 13.902004454342984, "grad_norm": 20.07220458984375, "learning_rate": 1e-06, "loss": 0.4852, "num_input_tokens_seen": 349710520, "step": 6242 }, { "epoch": 13.902004454342984, "loss": 0.48412150144577026, "loss_ce": 0.00011269759124843404, "loss_iou": 0.22265625, "loss_num": 0.00787353515625, "loss_xval": 0.484375, "num_input_tokens_seen": 349710520, "step": 6242 }, { "epoch": 13.90423162583519, "grad_norm": 19.768949508666992, "learning_rate": 1e-06, "loss": 0.4714, "num_input_tokens_seen": 349766024, "step": 6243 }, { "epoch": 13.90423162583519, "loss": 0.5210692882537842, "loss_ce": 0.0003172849537804723, "loss_iou": 0.212890625, "loss_num": 0.0191650390625, "loss_xval": 0.51953125, "num_input_tokens_seen": 349766024, "step": 6243 }, { "epoch": 13.906458797327394, "grad_norm": 22.428667068481445, "learning_rate": 1e-06, "loss": 0.5109, "num_input_tokens_seen": 349821500, "step": 6244 }, { "epoch": 13.906458797327394, "loss": 0.5417461395263672, "loss_ce": 0.0001201537816086784, "loss_iou": 0.2236328125, "loss_num": 0.0189208984375, "loss_xval": 0.54296875, "num_input_tokens_seen": 349821500, "step": 6244 }, { "epoch": 13.908685968819599, "grad_norm": 14.843369483947754, "learning_rate": 1e-06, "loss": 0.4807, "num_input_tokens_seen": 349877108, "step": 6245 }, { "epoch": 13.908685968819599, "loss": 0.6390482187271118, "loss_ce": 0.0001321482559433207, "loss_iou": 0.263671875, "loss_num": 0.0223388671875, "loss_xval": 0.640625, "num_input_tokens_seen": 349877108, "step": 6245 }, { "epoch": 13.910913140311804, "grad_norm": 77.71258544921875, "learning_rate": 1e-06, "loss": 0.5519, "num_input_tokens_seen": 349930668, "step": 6246 }, { "epoch": 13.910913140311804, "loss": 0.7169502377510071, "loss_ce": 0.00015332532348111272, "loss_iou": 0.298828125, "loss_num": 0.024169921875, "loss_xval": 0.71875, "num_input_tokens_seen": 349930668, "step": 6246 }, { "epoch": 13.913140311804009, "grad_norm": 16.5616512298584, "learning_rate": 1e-06, "loss": 0.4834, "num_input_tokens_seen": 349988348, "step": 6247 }, { "epoch": 13.913140311804009, "loss": 0.39076149463653564, "loss_ce": 0.00013649219181388617, "loss_iou": 0.1640625, "loss_num": 0.0126953125, "loss_xval": 0.390625, "num_input_tokens_seen": 349988348, "step": 6247 }, { "epoch": 13.915367483296214, "grad_norm": 20.010635375976562, "learning_rate": 1e-06, "loss": 0.5611, "num_input_tokens_seen": 350046812, "step": 6248 }, { "epoch": 13.915367483296214, "loss": 0.6575692892074585, "loss_ce": 9.861437138170004e-05, "loss_iou": 0.296875, "loss_num": 0.01251220703125, "loss_xval": 0.65625, "num_input_tokens_seen": 350046812, "step": 6248 }, { "epoch": 13.917594654788418, "grad_norm": 15.879632949829102, "learning_rate": 1e-06, "loss": 0.4344, "num_input_tokens_seen": 350104192, "step": 6249 }, { "epoch": 13.917594654788418, "loss": 0.38620513677597046, "loss_ce": 9.672513988334686e-05, "loss_iou": 0.14453125, "loss_num": 0.0194091796875, "loss_xval": 0.38671875, "num_input_tokens_seen": 350104192, "step": 6249 }, { "epoch": 13.919821826280623, "grad_norm": 13.887146949768066, "learning_rate": 1e-06, "loss": 0.4196, "num_input_tokens_seen": 350161060, "step": 6250 }, { "epoch": 13.919821826280623, "eval_seeclick_web_CIoU": 0.5887904167175293, "eval_seeclick_web_GIoU": 0.5874381363391876, "eval_seeclick_web_IoU": 0.6068101227283478, "eval_seeclick_web_MAE_all": 0.015451115556061268, "eval_seeclick_web_MAE_h": 0.007658603135496378, "eval_seeclick_web_MAE_w": 0.015746516175568104, "eval_seeclick_web_MAE_x_boxes": 0.009071735199540854, "eval_seeclick_web_MAE_y_boxes": 0.02139524114318192, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.8951788544654846, "eval_seeclick_web_loss_ce": 0.00017347045650240034, "eval_seeclick_web_loss_iou": 0.41064453125, "eval_seeclick_web_loss_num": 0.01229095458984375, "eval_seeclick_web_loss_xval": 0.88232421875, "eval_seeclick_web_runtime": 21.1304, "eval_seeclick_web_samples_per_second": 2.366, "eval_seeclick_web_steps_per_second": 0.095, "num_input_tokens_seen": 350161060, "step": 6250 }, { "epoch": 13.919821826280623, "eval_icons_CIoU": 0.2605983316898346, "eval_icons_GIoU": 0.29324978590011597, "eval_icons_IoU": 0.3456629067659378, "eval_icons_MAE_all": 0.06302645802497864, "eval_icons_MAE_h": 0.033296503126621246, "eval_icons_MAE_w": 0.07835287041962147, "eval_icons_MAE_x_boxes": 0.05176934972405434, "eval_icons_MAE_y_boxes": 0.03694954700767994, "eval_icons_inside_bbox": 0.6059027910232544, "eval_icons_loss": 1.7142837047576904, "eval_icons_loss_ce": 0.00022611367603531107, "eval_icons_loss_iou": 0.67041015625, "eval_icons_loss_num": 0.06027984619140625, "eval_icons_loss_xval": 1.642822265625, "eval_icons_runtime": 19.5905, "eval_icons_samples_per_second": 2.552, "eval_icons_steps_per_second": 0.102, "num_input_tokens_seen": 350161060, "step": 6250 }, { "epoch": 13.919821826280623, "eval_screenspot_CIoU": 0.38164886832237244, "eval_screenspot_GIoU": 0.39806386828422546, "eval_screenspot_IoU": 0.4498288830121358, "eval_screenspot_MAE_all": 0.05408271153767904, "eval_screenspot_MAE_h": 0.03916273762782415, "eval_screenspot_MAE_w": 0.056641269475221634, "eval_screenspot_MAE_x_boxes": 0.06717804819345474, "eval_screenspot_MAE_y_boxes": 0.036980644799768925, "eval_screenspot_inside_bbox": 0.7070833245913187, "eval_screenspot_loss": 1.5276081562042236, "eval_screenspot_loss_ce": 0.00023197308473754674, "eval_screenspot_loss_iou": 0.6381022135416666, "eval_screenspot_loss_num": 0.061542510986328125, "eval_screenspot_loss_xval": 1.5841471354166667, "eval_screenspot_runtime": 34.0345, "eval_screenspot_samples_per_second": 2.615, "eval_screenspot_steps_per_second": 0.088, "num_input_tokens_seen": 350161060, "step": 6250 }, { "epoch": 13.919821826280623, "eval_compot_CIoU": 0.3389701098203659, "eval_compot_GIoU": 0.3561352342367172, "eval_compot_IoU": 0.39824149012565613, "eval_compot_MAE_all": 0.019840517081320286, "eval_compot_MAE_h": 0.013703062664717436, "eval_compot_MAE_w": 0.021303851157426834, "eval_compot_MAE_x_boxes": 0.030402760952711105, "eval_compot_MAE_y_boxes": 0.006665045628324151, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4226690530776978, "eval_compot_loss_ce": 0.0001683719819993712, "eval_compot_loss_iou": 0.655029296875, "eval_compot_loss_num": 0.018640518188476562, "eval_compot_loss_xval": 1.403564453125, "eval_compot_runtime": 20.5568, "eval_compot_samples_per_second": 2.432, "eval_compot_steps_per_second": 0.097, "num_input_tokens_seen": 350161060, "step": 6250 }, { "epoch": 13.919821826280623, "eval_custom_ui_val_CIoU": 0.4785856149262852, "eval_custom_ui_val_GIoU": 0.4871194263299306, "eval_custom_ui_val_IoU": 0.5391448040803274, "eval_custom_ui_val_MAE_all": 0.027749659959226847, "eval_custom_ui_val_MAE_h": 0.015134843920047084, "eval_custom_ui_val_MAE_w": 0.03520229996906386, "eval_custom_ui_val_MAE_x_boxes": 0.03391206937117709, "eval_custom_ui_val_MAE_y_boxes": 0.01371948312347134, "eval_custom_ui_val_inside_bbox": 0.7650462985038757, "eval_custom_ui_val_loss": 1.1787360906600952, "eval_custom_ui_val_loss_ce": 0.00019364922385041913, "eval_custom_ui_val_loss_iou": 0.5048014322916666, "eval_custom_ui_val_loss_num": 0.024472342597113714, "eval_custom_ui_val_loss_xval": 1.1316189236111112, "eval_custom_ui_val_runtime": 61.6566, "eval_custom_ui_val_samples_per_second": 4.298, "eval_custom_ui_val_steps_per_second": 0.146, "num_input_tokens_seen": 350161060, "step": 6250 }, { "epoch": 13.919821826280623, "loss": 0.8202172517776489, "loss_ce": 0.00014893364277668297, "loss_iou": 0.369140625, "loss_num": 0.016357421875, "loss_xval": 0.8203125, "num_input_tokens_seen": 350161060, "step": 6250 }, { "epoch": 13.922048997772828, "grad_norm": 18.40821647644043, "learning_rate": 1e-06, "loss": 0.5081, "num_input_tokens_seen": 350218528, "step": 6251 }, { "epoch": 13.922048997772828, "loss": 0.4476110637187958, "loss_ce": 0.00010129214206244797, "loss_iou": 0.19140625, "loss_num": 0.01275634765625, "loss_xval": 0.447265625, "num_input_tokens_seen": 350218528, "step": 6251 }, { "epoch": 13.924276169265033, "grad_norm": 28.36144256591797, "learning_rate": 1e-06, "loss": 0.4074, "num_input_tokens_seen": 350276052, "step": 6252 }, { "epoch": 13.924276169265033, "loss": 0.3698493242263794, "loss_ce": 9.837304969551042e-05, "loss_iou": 0.171875, "loss_num": 0.005462646484375, "loss_xval": 0.369140625, "num_input_tokens_seen": 350276052, "step": 6252 }, { "epoch": 13.926503340757238, "grad_norm": 21.219388961791992, "learning_rate": 1e-06, "loss": 0.4852, "num_input_tokens_seen": 350333528, "step": 6253 }, { "epoch": 13.926503340757238, "loss": 0.43346133828163147, "loss_ce": 0.00011172999802511185, "loss_iou": 0.197265625, "loss_num": 0.00775146484375, "loss_xval": 0.43359375, "num_input_tokens_seen": 350333528, "step": 6253 }, { "epoch": 13.928730512249443, "grad_norm": 12.669755935668945, "learning_rate": 1e-06, "loss": 0.3754, "num_input_tokens_seen": 350391076, "step": 6254 }, { "epoch": 13.928730512249443, "loss": 0.30187392234802246, "loss_ce": 0.00011611805530264974, "loss_iou": 0.140625, "loss_num": 0.0042724609375, "loss_xval": 0.30078125, "num_input_tokens_seen": 350391076, "step": 6254 }, { "epoch": 13.930957683741648, "grad_norm": 13.667628288269043, "learning_rate": 1e-06, "loss": 0.2749, "num_input_tokens_seen": 350447892, "step": 6255 }, { "epoch": 13.930957683741648, "loss": 0.21279628574848175, "loss_ce": 8.877179789124057e-05, "loss_iou": 0.0859375, "loss_num": 0.00823974609375, "loss_xval": 0.212890625, "num_input_tokens_seen": 350447892, "step": 6255 }, { "epoch": 13.933184855233852, "grad_norm": 28.72197723388672, "learning_rate": 1e-06, "loss": 0.47, "num_input_tokens_seen": 350503796, "step": 6256 }, { "epoch": 13.933184855233852, "loss": 0.46128642559051514, "loss_ce": 0.00010479921184014529, "loss_iou": 0.212890625, "loss_num": 0.006866455078125, "loss_xval": 0.4609375, "num_input_tokens_seen": 350503796, "step": 6256 }, { "epoch": 13.935412026726057, "grad_norm": 21.46579933166504, "learning_rate": 1e-06, "loss": 0.5377, "num_input_tokens_seen": 350558404, "step": 6257 }, { "epoch": 13.935412026726057, "loss": 0.32118260860443115, "loss_ce": 0.00010715379903558642, "loss_iou": 0.1357421875, "loss_num": 0.010009765625, "loss_xval": 0.3203125, "num_input_tokens_seen": 350558404, "step": 6257 }, { "epoch": 13.937639198218262, "grad_norm": 21.574478149414062, "learning_rate": 1e-06, "loss": 0.5635, "num_input_tokens_seen": 350617276, "step": 6258 }, { "epoch": 13.937639198218262, "loss": 0.4228344261646271, "loss_ce": 0.00010491947614355013, "loss_iou": 0.1875, "loss_num": 0.00958251953125, "loss_xval": 0.421875, "num_input_tokens_seen": 350617276, "step": 6258 }, { "epoch": 13.939866369710467, "grad_norm": 23.370849609375, "learning_rate": 1e-06, "loss": 0.392, "num_input_tokens_seen": 350674816, "step": 6259 }, { "epoch": 13.939866369710467, "loss": 0.5362622737884521, "loss_ce": 0.0001294276735279709, "loss_iou": 0.2392578125, "loss_num": 0.0115966796875, "loss_xval": 0.53515625, "num_input_tokens_seen": 350674816, "step": 6259 }, { "epoch": 13.942093541202672, "grad_norm": 27.103878021240234, "learning_rate": 1e-06, "loss": 0.4583, "num_input_tokens_seen": 350729952, "step": 6260 }, { "epoch": 13.942093541202672, "loss": 0.41460520029067993, "loss_ce": 0.00011545630695763975, "loss_iou": 0.1884765625, "loss_num": 0.00750732421875, "loss_xval": 0.4140625, "num_input_tokens_seen": 350729952, "step": 6260 }, { "epoch": 13.944320712694877, "grad_norm": 17.797826766967773, "learning_rate": 1e-06, "loss": 0.3609, "num_input_tokens_seen": 350784792, "step": 6261 }, { "epoch": 13.944320712694877, "loss": 0.33117377758026123, "loss_ce": 0.00011909526074305177, "loss_iou": 0.1552734375, "loss_num": 0.00408935546875, "loss_xval": 0.33203125, "num_input_tokens_seen": 350784792, "step": 6261 }, { "epoch": 13.946547884187082, "grad_norm": 19.662729263305664, "learning_rate": 1e-06, "loss": 0.5463, "num_input_tokens_seen": 350838324, "step": 6262 }, { "epoch": 13.946547884187082, "loss": 0.5496599674224854, "loss_ce": 9.940941527020186e-05, "loss_iou": 0.2421875, "loss_num": 0.01318359375, "loss_xval": 0.55078125, "num_input_tokens_seen": 350838324, "step": 6262 }, { "epoch": 13.948775055679288, "grad_norm": 15.954992294311523, "learning_rate": 1e-06, "loss": 0.3793, "num_input_tokens_seen": 350897484, "step": 6263 }, { "epoch": 13.948775055679288, "loss": 0.46910548210144043, "loss_ce": 0.00011134070518892258, "loss_iou": 0.212890625, "loss_num": 0.00860595703125, "loss_xval": 0.46875, "num_input_tokens_seen": 350897484, "step": 6263 }, { "epoch": 13.951002227171493, "grad_norm": 28.188735961914062, "learning_rate": 1e-06, "loss": 0.606, "num_input_tokens_seen": 350953472, "step": 6264 }, { "epoch": 13.951002227171493, "loss": 0.5297653079032898, "loss_ce": 0.00010221092088613659, "loss_iou": 0.203125, "loss_num": 0.0247802734375, "loss_xval": 0.53125, "num_input_tokens_seen": 350953472, "step": 6264 }, { "epoch": 13.953229398663698, "grad_norm": 18.34393310546875, "learning_rate": 1e-06, "loss": 0.4006, "num_input_tokens_seen": 351008120, "step": 6265 }, { "epoch": 13.953229398663698, "loss": 0.5186876058578491, "loss_ce": 0.00013288386981002986, "loss_iou": 0.224609375, "loss_num": 0.013671875, "loss_xval": 0.51953125, "num_input_tokens_seen": 351008120, "step": 6265 }, { "epoch": 13.955456570155903, "grad_norm": 19.615419387817383, "learning_rate": 1e-06, "loss": 0.5483, "num_input_tokens_seen": 351064248, "step": 6266 }, { "epoch": 13.955456570155903, "loss": 0.7774511575698853, "loss_ce": 0.00010741624282673001, "loss_iou": 0.328125, "loss_num": 0.0242919921875, "loss_xval": 0.77734375, "num_input_tokens_seen": 351064248, "step": 6266 }, { "epoch": 13.957683741648108, "grad_norm": 25.06759262084961, "learning_rate": 1e-06, "loss": 0.5834, "num_input_tokens_seen": 351117008, "step": 6267 }, { "epoch": 13.957683741648108, "loss": 0.4678989052772522, "loss_ce": 0.00012549315579235554, "loss_iou": 0.20703125, "loss_num": 0.0108642578125, "loss_xval": 0.46875, "num_input_tokens_seen": 351117008, "step": 6267 }, { "epoch": 13.959910913140313, "grad_norm": 14.953537940979004, "learning_rate": 1e-06, "loss": 0.3766, "num_input_tokens_seen": 351171192, "step": 6268 }, { "epoch": 13.959910913140313, "loss": 0.3148655891418457, "loss_ce": 0.00010729426867328584, "loss_iou": 0.140625, "loss_num": 0.006866455078125, "loss_xval": 0.314453125, "num_input_tokens_seen": 351171192, "step": 6268 }, { "epoch": 13.962138084632517, "grad_norm": 17.59319305419922, "learning_rate": 1e-06, "loss": 0.4396, "num_input_tokens_seen": 351224944, "step": 6269 }, { "epoch": 13.962138084632517, "loss": 0.5811617374420166, "loss_ce": 0.00016810203669592738, "loss_iou": 0.267578125, "loss_num": 0.00897216796875, "loss_xval": 0.58203125, "num_input_tokens_seen": 351224944, "step": 6269 }, { "epoch": 13.964365256124722, "grad_norm": 12.526680946350098, "learning_rate": 1e-06, "loss": 0.4156, "num_input_tokens_seen": 351278692, "step": 6270 }, { "epoch": 13.964365256124722, "loss": 0.4158197343349457, "loss_ce": 0.0001092795209842734, "loss_iou": 0.177734375, "loss_num": 0.01190185546875, "loss_xval": 0.416015625, "num_input_tokens_seen": 351278692, "step": 6270 }, { "epoch": 13.966592427616927, "grad_norm": 15.219315528869629, "learning_rate": 1e-06, "loss": 0.4471, "num_input_tokens_seen": 351330732, "step": 6271 }, { "epoch": 13.966592427616927, "loss": 0.4622631371021271, "loss_ce": 0.0001049246930051595, "loss_iou": 0.2119140625, "loss_num": 0.0078125, "loss_xval": 0.462890625, "num_input_tokens_seen": 351330732, "step": 6271 }, { "epoch": 13.968819599109132, "grad_norm": 16.425418853759766, "learning_rate": 1e-06, "loss": 0.6101, "num_input_tokens_seen": 351389508, "step": 6272 }, { "epoch": 13.968819599109132, "loss": 0.4642532467842102, "loss_ce": 0.0001418821921106428, "loss_iou": 0.1708984375, "loss_num": 0.0245361328125, "loss_xval": 0.46484375, "num_input_tokens_seen": 351389508, "step": 6272 }, { "epoch": 13.971046770601337, "grad_norm": 15.943306922912598, "learning_rate": 1e-06, "loss": 0.5201, "num_input_tokens_seen": 351447368, "step": 6273 }, { "epoch": 13.971046770601337, "loss": 0.5003509521484375, "loss_ce": 0.00010678636317607015, "loss_iou": 0.2177734375, "loss_num": 0.01287841796875, "loss_xval": 0.5, "num_input_tokens_seen": 351447368, "step": 6273 }, { "epoch": 13.973273942093542, "grad_norm": 21.748048782348633, "learning_rate": 1e-06, "loss": 0.4717, "num_input_tokens_seen": 351506568, "step": 6274 }, { "epoch": 13.973273942093542, "loss": 0.5563892126083374, "loss_ce": 0.00011485861614346504, "loss_iou": 0.228515625, "loss_num": 0.0198974609375, "loss_xval": 0.5546875, "num_input_tokens_seen": 351506568, "step": 6274 }, { "epoch": 13.975501113585747, "grad_norm": 23.033185958862305, "learning_rate": 1e-06, "loss": 0.4948, "num_input_tokens_seen": 351562164, "step": 6275 }, { "epoch": 13.975501113585747, "loss": 0.662088930606842, "loss_ce": 0.00010161636600969359, "loss_iou": 0.294921875, "loss_num": 0.014404296875, "loss_xval": 0.66015625, "num_input_tokens_seen": 351562164, "step": 6275 }, { "epoch": 13.977728285077951, "grad_norm": 14.771820068359375, "learning_rate": 1e-06, "loss": 0.3768, "num_input_tokens_seen": 351616064, "step": 6276 }, { "epoch": 13.977728285077951, "loss": 0.5282983779907227, "loss_ce": 0.00022217544028535485, "loss_iou": 0.2353515625, "loss_num": 0.01153564453125, "loss_xval": 0.52734375, "num_input_tokens_seen": 351616064, "step": 6276 }, { "epoch": 13.979955456570156, "grad_norm": 15.470437049865723, "learning_rate": 1e-06, "loss": 0.4431, "num_input_tokens_seen": 351671208, "step": 6277 }, { "epoch": 13.979955456570156, "loss": 0.4118441939353943, "loss_ce": 0.00010104169632541016, "loss_iou": 0.1865234375, "loss_num": 0.0076904296875, "loss_xval": 0.412109375, "num_input_tokens_seen": 351671208, "step": 6277 }, { "epoch": 13.982182628062361, "grad_norm": 19.21230697631836, "learning_rate": 1e-06, "loss": 0.4367, "num_input_tokens_seen": 351725684, "step": 6278 }, { "epoch": 13.982182628062361, "loss": 0.27793729305267334, "loss_ce": 0.00010527193080633879, "loss_iou": 0.11669921875, "loss_num": 0.0089111328125, "loss_xval": 0.27734375, "num_input_tokens_seen": 351725684, "step": 6278 }, { "epoch": 13.984409799554566, "grad_norm": 21.042078018188477, "learning_rate": 1e-06, "loss": 0.3497, "num_input_tokens_seen": 351782280, "step": 6279 }, { "epoch": 13.984409799554566, "loss": 0.2832984924316406, "loss_ce": 9.53472190303728e-05, "loss_iou": 0.12451171875, "loss_num": 0.00677490234375, "loss_xval": 0.283203125, "num_input_tokens_seen": 351782280, "step": 6279 }, { "epoch": 13.98663697104677, "grad_norm": 19.898780822753906, "learning_rate": 1e-06, "loss": 0.6716, "num_input_tokens_seen": 351838828, "step": 6280 }, { "epoch": 13.98663697104677, "loss": 0.9190574288368225, "loss_ce": 0.00011211737000849098, "loss_iou": 0.36328125, "loss_num": 0.038818359375, "loss_xval": 0.91796875, "num_input_tokens_seen": 351838828, "step": 6280 }, { "epoch": 13.988864142538976, "grad_norm": 20.307798385620117, "learning_rate": 1e-06, "loss": 0.4632, "num_input_tokens_seen": 351894124, "step": 6281 }, { "epoch": 13.988864142538976, "loss": 0.3406025767326355, "loss_ce": 0.00014847918646410108, "loss_iou": 0.15234375, "loss_num": 0.006988525390625, "loss_xval": 0.33984375, "num_input_tokens_seen": 351894124, "step": 6281 }, { "epoch": 13.99109131403118, "grad_norm": 16.50027084350586, "learning_rate": 1e-06, "loss": 0.4563, "num_input_tokens_seen": 351949824, "step": 6282 }, { "epoch": 13.99109131403118, "loss": 0.38493967056274414, "loss_ce": 0.0001129888987634331, "loss_iou": 0.1806640625, "loss_num": 0.004547119140625, "loss_xval": 0.384765625, "num_input_tokens_seen": 351949824, "step": 6282 }, { "epoch": 13.993318485523385, "grad_norm": 30.10491180419922, "learning_rate": 1e-06, "loss": 0.5799, "num_input_tokens_seen": 352004840, "step": 6283 }, { "epoch": 13.993318485523385, "loss": 0.3842771351337433, "loss_ce": 0.00012185641389805824, "loss_iou": 0.16796875, "loss_num": 0.00982666015625, "loss_xval": 0.384765625, "num_input_tokens_seen": 352004840, "step": 6283 }, { "epoch": 13.99554565701559, "grad_norm": 26.573183059692383, "learning_rate": 1e-06, "loss": 0.6592, "num_input_tokens_seen": 352060908, "step": 6284 }, { "epoch": 13.99554565701559, "loss": 0.8157883882522583, "loss_ce": 0.00011452929174993187, "loss_iou": 0.33203125, "loss_num": 0.030029296875, "loss_xval": 0.81640625, "num_input_tokens_seen": 352060908, "step": 6284 }, { "epoch": 13.997772828507795, "grad_norm": 21.5324649810791, "learning_rate": 1e-06, "loss": 0.5067, "num_input_tokens_seen": 352115772, "step": 6285 }, { "epoch": 13.997772828507795, "loss": 0.6693138480186462, "loss_ce": 0.00012438424164429307, "loss_iou": 0.271484375, "loss_num": 0.0252685546875, "loss_xval": 0.66796875, "num_input_tokens_seen": 352115772, "step": 6285 }, { "epoch": 14.0, "grad_norm": 16.17325210571289, "learning_rate": 1e-06, "loss": 0.3972, "num_input_tokens_seen": 352173692, "step": 6286 }, { "epoch": 14.0, "loss": 0.43904364109039307, "loss_ce": 0.00010934725287370384, "loss_iou": 0.1845703125, "loss_num": 0.01409912109375, "loss_xval": 0.439453125, "num_input_tokens_seen": 352173692, "step": 6286 }, { "epoch": 14.002227171492205, "grad_norm": 22.501575469970703, "learning_rate": 1e-06, "loss": 0.4806, "num_input_tokens_seen": 352229596, "step": 6287 }, { "epoch": 14.002227171492205, "loss": 0.4168241024017334, "loss_ce": 0.00010654539801180363, "loss_iou": 0.1845703125, "loss_num": 0.00946044921875, "loss_xval": 0.416015625, "num_input_tokens_seen": 352229596, "step": 6287 }, { "epoch": 14.00445434298441, "grad_norm": 19.57029151916504, "learning_rate": 1e-06, "loss": 0.5103, "num_input_tokens_seen": 352285008, "step": 6288 }, { "epoch": 14.00445434298441, "loss": 0.5710501670837402, "loss_ce": 0.0008597684209235013, "loss_iou": 0.2470703125, "loss_num": 0.01519775390625, "loss_xval": 0.5703125, "num_input_tokens_seen": 352285008, "step": 6288 }, { "epoch": 14.006681514476615, "grad_norm": 13.75488567352295, "learning_rate": 1e-06, "loss": 0.3481, "num_input_tokens_seen": 352342884, "step": 6289 }, { "epoch": 14.006681514476615, "loss": 0.4669305682182312, "loss_ce": 0.0001337063149549067, "loss_iou": 0.2158203125, "loss_num": 0.0069580078125, "loss_xval": 0.466796875, "num_input_tokens_seen": 352342884, "step": 6289 }, { "epoch": 14.00890868596882, "grad_norm": 19.051847457885742, "learning_rate": 1e-06, "loss": 0.4405, "num_input_tokens_seen": 352397744, "step": 6290 }, { "epoch": 14.00890868596882, "loss": 0.5650573968887329, "loss_ce": 0.00011596856347750872, "loss_iou": 0.255859375, "loss_num": 0.01068115234375, "loss_xval": 0.56640625, "num_input_tokens_seen": 352397744, "step": 6290 }, { "epoch": 14.011135857461024, "grad_norm": 15.41951847076416, "learning_rate": 1e-06, "loss": 0.4502, "num_input_tokens_seen": 352454612, "step": 6291 }, { "epoch": 14.011135857461024, "loss": 0.5639560222625732, "loss_ce": 0.00011326879757689312, "loss_iou": 0.2431640625, "loss_num": 0.0155029296875, "loss_xval": 0.5625, "num_input_tokens_seen": 352454612, "step": 6291 }, { "epoch": 14.01336302895323, "grad_norm": 12.051553726196289, "learning_rate": 1e-06, "loss": 0.4497, "num_input_tokens_seen": 352511128, "step": 6292 }, { "epoch": 14.01336302895323, "loss": 0.2919390797615051, "loss_ce": 0.00013000219769310206, "loss_iou": 0.12060546875, "loss_num": 0.0101318359375, "loss_xval": 0.291015625, "num_input_tokens_seen": 352511128, "step": 6292 }, { "epoch": 14.015590200445434, "grad_norm": 22.342031478881836, "learning_rate": 1e-06, "loss": 0.3669, "num_input_tokens_seen": 352567572, "step": 6293 }, { "epoch": 14.015590200445434, "loss": 0.4516177773475647, "loss_ce": 0.00014069155440665781, "loss_iou": 0.1962890625, "loss_num": 0.01165771484375, "loss_xval": 0.451171875, "num_input_tokens_seen": 352567572, "step": 6293 }, { "epoch": 14.017817371937639, "grad_norm": 21.999927520751953, "learning_rate": 1e-06, "loss": 0.495, "num_input_tokens_seen": 352623720, "step": 6294 }, { "epoch": 14.017817371937639, "loss": 0.3385887145996094, "loss_ce": 0.00010300398571416736, "loss_iou": 0.15625, "loss_num": 0.005340576171875, "loss_xval": 0.337890625, "num_input_tokens_seen": 352623720, "step": 6294 }, { "epoch": 14.020044543429844, "grad_norm": 18.128568649291992, "learning_rate": 1e-06, "loss": 0.5133, "num_input_tokens_seen": 352679200, "step": 6295 }, { "epoch": 14.020044543429844, "loss": 0.4763103127479553, "loss_ce": 0.0001140405802289024, "loss_iou": 0.216796875, "loss_num": 0.00848388671875, "loss_xval": 0.4765625, "num_input_tokens_seen": 352679200, "step": 6295 }, { "epoch": 14.022271714922049, "grad_norm": 22.718978881835938, "learning_rate": 1e-06, "loss": 0.4883, "num_input_tokens_seen": 352737092, "step": 6296 }, { "epoch": 14.022271714922049, "loss": 0.6851996183395386, "loss_ce": 0.00014106131857261062, "loss_iou": 0.30859375, "loss_num": 0.013427734375, "loss_xval": 0.68359375, "num_input_tokens_seen": 352737092, "step": 6296 }, { "epoch": 14.024498886414253, "grad_norm": 23.775402069091797, "learning_rate": 1e-06, "loss": 0.5388, "num_input_tokens_seen": 352791712, "step": 6297 }, { "epoch": 14.024498886414253, "loss": 0.728790819644928, "loss_ce": 9.21202008612454e-05, "loss_iou": 0.3203125, "loss_num": 0.017578125, "loss_xval": 0.73046875, "num_input_tokens_seen": 352791712, "step": 6297 }, { "epoch": 14.026726057906458, "grad_norm": 27.953927993774414, "learning_rate": 1e-06, "loss": 0.4414, "num_input_tokens_seen": 352846396, "step": 6298 }, { "epoch": 14.026726057906458, "loss": 0.41728514432907104, "loss_ce": 0.00010984927939716727, "loss_iou": 0.1845703125, "loss_num": 0.0096435546875, "loss_xval": 0.41796875, "num_input_tokens_seen": 352846396, "step": 6298 }, { "epoch": 14.028953229398663, "grad_norm": 14.124860763549805, "learning_rate": 1e-06, "loss": 0.573, "num_input_tokens_seen": 352902036, "step": 6299 }, { "epoch": 14.028953229398663, "loss": 0.3383561968803406, "loss_ce": 9.933360706781968e-05, "loss_iou": 0.1591796875, "loss_num": 0.0040283203125, "loss_xval": 0.337890625, "num_input_tokens_seen": 352902036, "step": 6299 }, { "epoch": 14.031180400890868, "grad_norm": 22.924598693847656, "learning_rate": 1e-06, "loss": 0.4167, "num_input_tokens_seen": 352956388, "step": 6300 }, { "epoch": 14.031180400890868, "loss": 0.41771095991134644, "loss_ce": 0.00010839892638614401, "loss_iou": 0.1943359375, "loss_num": 0.0059814453125, "loss_xval": 0.41796875, "num_input_tokens_seen": 352956388, "step": 6300 }, { "epoch": 14.033407572383073, "grad_norm": 16.806791305541992, "learning_rate": 1e-06, "loss": 0.4694, "num_input_tokens_seen": 353012648, "step": 6301 }, { "epoch": 14.033407572383073, "loss": 0.6352797150611877, "loss_ce": 0.0001478988560847938, "loss_iou": 0.259765625, "loss_num": 0.0230712890625, "loss_xval": 0.63671875, "num_input_tokens_seen": 353012648, "step": 6301 }, { "epoch": 14.035634743875278, "grad_norm": 16.72489356994629, "learning_rate": 1e-06, "loss": 0.4935, "num_input_tokens_seen": 353071536, "step": 6302 }, { "epoch": 14.035634743875278, "loss": 0.5169607996940613, "loss_ce": 0.0001150843090726994, "loss_iou": 0.205078125, "loss_num": 0.0211181640625, "loss_xval": 0.515625, "num_input_tokens_seen": 353071536, "step": 6302 }, { "epoch": 14.037861915367483, "grad_norm": 19.627099990844727, "learning_rate": 1e-06, "loss": 0.4313, "num_input_tokens_seen": 353128896, "step": 6303 }, { "epoch": 14.037861915367483, "loss": 0.3792652487754822, "loss_ce": 0.00011487019946798682, "loss_iou": 0.1728515625, "loss_num": 0.006805419921875, "loss_xval": 0.37890625, "num_input_tokens_seen": 353128896, "step": 6303 }, { "epoch": 14.040089086859687, "grad_norm": 18.96303367614746, "learning_rate": 1e-06, "loss": 0.5243, "num_input_tokens_seen": 353185372, "step": 6304 }, { "epoch": 14.040089086859687, "loss": 0.5035980939865112, "loss_ce": 0.00011909975728485733, "loss_iou": 0.21875, "loss_num": 0.01324462890625, "loss_xval": 0.50390625, "num_input_tokens_seen": 353185372, "step": 6304 }, { "epoch": 14.042316258351892, "grad_norm": 24.597492218017578, "learning_rate": 1e-06, "loss": 0.5827, "num_input_tokens_seen": 353240952, "step": 6305 }, { "epoch": 14.042316258351892, "loss": 0.3702905774116516, "loss_ce": 0.0002954796073026955, "loss_iou": 0.1630859375, "loss_num": 0.0086669921875, "loss_xval": 0.369140625, "num_input_tokens_seen": 353240952, "step": 6305 }, { "epoch": 14.044543429844097, "grad_norm": 25.436721801757812, "learning_rate": 1e-06, "loss": 0.6157, "num_input_tokens_seen": 353297520, "step": 6306 }, { "epoch": 14.044543429844097, "loss": 0.5884321928024292, "loss_ce": 0.0001753760880092159, "loss_iou": 0.248046875, "loss_num": 0.0185546875, "loss_xval": 0.58984375, "num_input_tokens_seen": 353297520, "step": 6306 }, { "epoch": 14.046770601336302, "grad_norm": 18.7614688873291, "learning_rate": 1e-06, "loss": 0.5636, "num_input_tokens_seen": 353352068, "step": 6307 }, { "epoch": 14.046770601336302, "loss": 0.7366272807121277, "loss_ce": 0.00011604969040490687, "loss_iou": 0.28125, "loss_num": 0.034912109375, "loss_xval": 0.73828125, "num_input_tokens_seen": 353352068, "step": 6307 }, { "epoch": 14.048997772828507, "grad_norm": 15.662527084350586, "learning_rate": 1e-06, "loss": 0.4108, "num_input_tokens_seen": 353409180, "step": 6308 }, { "epoch": 14.048997772828507, "loss": 0.4494924545288086, "loss_ce": 9.055372356669977e-05, "loss_iou": 0.201171875, "loss_num": 0.00946044921875, "loss_xval": 0.44921875, "num_input_tokens_seen": 353409180, "step": 6308 }, { "epoch": 14.051224944320714, "grad_norm": 16.739437103271484, "learning_rate": 1e-06, "loss": 0.3383, "num_input_tokens_seen": 353465760, "step": 6309 }, { "epoch": 14.051224944320714, "loss": 0.4493185877799988, "loss_ce": 9.985115320887417e-05, "loss_iou": 0.1884765625, "loss_num": 0.01446533203125, "loss_xval": 0.44921875, "num_input_tokens_seen": 353465760, "step": 6309 }, { "epoch": 14.053452115812918, "grad_norm": 15.760394096374512, "learning_rate": 1e-06, "loss": 0.3149, "num_input_tokens_seen": 353519740, "step": 6310 }, { "epoch": 14.053452115812918, "loss": 0.28842777013778687, "loss_ce": 9.770273754838854e-05, "loss_iou": 0.1279296875, "loss_num": 0.006500244140625, "loss_xval": 0.2890625, "num_input_tokens_seen": 353519740, "step": 6310 }, { "epoch": 14.055679287305123, "grad_norm": 22.58936882019043, "learning_rate": 1e-06, "loss": 0.4747, "num_input_tokens_seen": 353575876, "step": 6311 }, { "epoch": 14.055679287305123, "loss": 0.4683777093887329, "loss_ce": 0.00011599110439419746, "loss_iou": 0.220703125, "loss_num": 0.00555419921875, "loss_xval": 0.46875, "num_input_tokens_seen": 353575876, "step": 6311 }, { "epoch": 14.057906458797328, "grad_norm": 15.249661445617676, "learning_rate": 1e-06, "loss": 0.4432, "num_input_tokens_seen": 353631208, "step": 6312 }, { "epoch": 14.057906458797328, "loss": 0.462121844291687, "loss_ce": 8.568944758735597e-05, "loss_iou": 0.205078125, "loss_num": 0.01025390625, "loss_xval": 0.462890625, "num_input_tokens_seen": 353631208, "step": 6312 }, { "epoch": 14.060133630289533, "grad_norm": 14.119874954223633, "learning_rate": 1e-06, "loss": 0.365, "num_input_tokens_seen": 353687380, "step": 6313 }, { "epoch": 14.060133630289533, "loss": 0.3032883107662201, "loss_ce": 9.618209878681228e-05, "loss_iou": 0.130859375, "loss_num": 0.00836181640625, "loss_xval": 0.302734375, "num_input_tokens_seen": 353687380, "step": 6313 }, { "epoch": 14.062360801781738, "grad_norm": 21.891582489013672, "learning_rate": 1e-06, "loss": 0.7123, "num_input_tokens_seen": 353742548, "step": 6314 }, { "epoch": 14.062360801781738, "loss": 0.7177752256393433, "loss_ce": 0.00012388010509312153, "loss_iou": 0.318359375, "loss_num": 0.0162353515625, "loss_xval": 0.71875, "num_input_tokens_seen": 353742548, "step": 6314 }, { "epoch": 14.064587973273943, "grad_norm": 18.22274398803711, "learning_rate": 1e-06, "loss": 0.4084, "num_input_tokens_seen": 353796396, "step": 6315 }, { "epoch": 14.064587973273943, "loss": 0.4653054475784302, "loss_ce": 9.54567440203391e-05, "loss_iou": 0.2060546875, "loss_num": 0.010498046875, "loss_xval": 0.46484375, "num_input_tokens_seen": 353796396, "step": 6315 }, { "epoch": 14.066815144766148, "grad_norm": 14.07210922241211, "learning_rate": 1e-06, "loss": 0.3755, "num_input_tokens_seen": 353852356, "step": 6316 }, { "epoch": 14.066815144766148, "loss": 0.3321226239204407, "loss_ce": 9.135504660662264e-05, "loss_iou": 0.142578125, "loss_num": 0.00958251953125, "loss_xval": 0.33203125, "num_input_tokens_seen": 353852356, "step": 6316 }, { "epoch": 14.069042316258352, "grad_norm": 12.254171371459961, "learning_rate": 1e-06, "loss": 0.3489, "num_input_tokens_seen": 353908400, "step": 6317 }, { "epoch": 14.069042316258352, "loss": 0.37034958600997925, "loss_ce": 0.00011033388000214472, "loss_iou": 0.171875, "loss_num": 0.00543212890625, "loss_xval": 0.37109375, "num_input_tokens_seen": 353908400, "step": 6317 }, { "epoch": 14.071269487750557, "grad_norm": 19.02251625061035, "learning_rate": 1e-06, "loss": 0.3671, "num_input_tokens_seen": 353963924, "step": 6318 }, { "epoch": 14.071269487750557, "loss": 0.32162177562713623, "loss_ce": 8.858899673214182e-05, "loss_iou": 0.138671875, "loss_num": 0.0087890625, "loss_xval": 0.322265625, "num_input_tokens_seen": 353963924, "step": 6318 }, { "epoch": 14.073496659242762, "grad_norm": 21.90892791748047, "learning_rate": 1e-06, "loss": 0.5865, "num_input_tokens_seen": 354021696, "step": 6319 }, { "epoch": 14.073496659242762, "loss": 0.7010501623153687, "loss_ce": 0.00012240943033248186, "loss_iou": 0.283203125, "loss_num": 0.027099609375, "loss_xval": 0.69921875, "num_input_tokens_seen": 354021696, "step": 6319 }, { "epoch": 14.075723830734967, "grad_norm": 22.8824520111084, "learning_rate": 1e-06, "loss": 0.3748, "num_input_tokens_seen": 354076836, "step": 6320 }, { "epoch": 14.075723830734967, "loss": 0.43087825179100037, "loss_ce": 9.210931602865458e-05, "loss_iou": 0.1767578125, "loss_num": 0.015380859375, "loss_xval": 0.431640625, "num_input_tokens_seen": 354076836, "step": 6320 }, { "epoch": 14.077951002227172, "grad_norm": 18.361591339111328, "learning_rate": 1e-06, "loss": 0.6328, "num_input_tokens_seen": 354131804, "step": 6321 }, { "epoch": 14.077951002227172, "loss": 0.9000133275985718, "loss_ce": 0.00011099971743533388, "loss_iou": 0.359375, "loss_num": 0.03662109375, "loss_xval": 0.8984375, "num_input_tokens_seen": 354131804, "step": 6321 }, { "epoch": 14.080178173719377, "grad_norm": 41.84800338745117, "learning_rate": 1e-06, "loss": 0.4503, "num_input_tokens_seen": 354187764, "step": 6322 }, { "epoch": 14.080178173719377, "loss": 0.4067497253417969, "loss_ce": 0.00013353029498830438, "loss_iou": 0.1650390625, "loss_num": 0.015380859375, "loss_xval": 0.40625, "num_input_tokens_seen": 354187764, "step": 6322 }, { "epoch": 14.082405345211582, "grad_norm": 24.2533016204834, "learning_rate": 1e-06, "loss": 0.5849, "num_input_tokens_seen": 354243268, "step": 6323 }, { "epoch": 14.082405345211582, "loss": 0.5544114708900452, "loss_ce": 9.021456935442984e-05, "loss_iou": 0.23828125, "loss_num": 0.0152587890625, "loss_xval": 0.5546875, "num_input_tokens_seen": 354243268, "step": 6323 }, { "epoch": 14.084632516703786, "grad_norm": 18.090354919433594, "learning_rate": 1e-06, "loss": 0.4398, "num_input_tokens_seen": 354301500, "step": 6324 }, { "epoch": 14.084632516703786, "loss": 0.4318804442882538, "loss_ce": 0.00011774900485761464, "loss_iou": 0.189453125, "loss_num": 0.01055908203125, "loss_xval": 0.431640625, "num_input_tokens_seen": 354301500, "step": 6324 }, { "epoch": 14.086859688195991, "grad_norm": 14.694714546203613, "learning_rate": 1e-06, "loss": 0.547, "num_input_tokens_seen": 354357332, "step": 6325 }, { "epoch": 14.086859688195991, "loss": 0.42843663692474365, "loss_ce": 9.19099838938564e-05, "loss_iou": 0.189453125, "loss_num": 0.009765625, "loss_xval": 0.427734375, "num_input_tokens_seen": 354357332, "step": 6325 }, { "epoch": 14.089086859688196, "grad_norm": 23.925561904907227, "learning_rate": 1e-06, "loss": 0.3976, "num_input_tokens_seen": 354411896, "step": 6326 }, { "epoch": 14.089086859688196, "loss": 0.47738510370254517, "loss_ce": 9.019082790473476e-05, "loss_iou": 0.1953125, "loss_num": 0.0174560546875, "loss_xval": 0.4765625, "num_input_tokens_seen": 354411896, "step": 6326 }, { "epoch": 14.091314031180401, "grad_norm": 17.225788116455078, "learning_rate": 1e-06, "loss": 0.5451, "num_input_tokens_seen": 354468544, "step": 6327 }, { "epoch": 14.091314031180401, "loss": 0.47155094146728516, "loss_ce": 0.00011540517152752727, "loss_iou": 0.2041015625, "loss_num": 0.01275634765625, "loss_xval": 0.470703125, "num_input_tokens_seen": 354468544, "step": 6327 }, { "epoch": 14.093541202672606, "grad_norm": 37.0084342956543, "learning_rate": 1e-06, "loss": 0.3675, "num_input_tokens_seen": 354527544, "step": 6328 }, { "epoch": 14.093541202672606, "loss": 0.3508872985839844, "loss_ce": 0.00011826444824691862, "loss_iou": 0.150390625, "loss_num": 0.0101318359375, "loss_xval": 0.3515625, "num_input_tokens_seen": 354527544, "step": 6328 }, { "epoch": 14.09576837416481, "grad_norm": 20.719806671142578, "learning_rate": 1e-06, "loss": 0.3541, "num_input_tokens_seen": 354581636, "step": 6329 }, { "epoch": 14.09576837416481, "loss": 0.2988535761833191, "loss_ce": 8.648804941913113e-05, "loss_iou": 0.134765625, "loss_num": 0.00592041015625, "loss_xval": 0.298828125, "num_input_tokens_seen": 354581636, "step": 6329 }, { "epoch": 14.097995545657016, "grad_norm": 23.697895050048828, "learning_rate": 1e-06, "loss": 0.4439, "num_input_tokens_seen": 354638068, "step": 6330 }, { "epoch": 14.097995545657016, "loss": 0.3387835621833801, "loss_ce": 9.947673243004829e-05, "loss_iou": 0.1474609375, "loss_num": 0.0086669921875, "loss_xval": 0.337890625, "num_input_tokens_seen": 354638068, "step": 6330 }, { "epoch": 14.10022271714922, "grad_norm": 13.052238464355469, "learning_rate": 1e-06, "loss": 0.3469, "num_input_tokens_seen": 354695576, "step": 6331 }, { "epoch": 14.10022271714922, "loss": 0.3428958058357239, "loss_ce": 0.00012233792222104967, "loss_iou": 0.138671875, "loss_num": 0.01287841796875, "loss_xval": 0.34375, "num_input_tokens_seen": 354695576, "step": 6331 }, { "epoch": 14.102449888641425, "grad_norm": 12.137063980102539, "learning_rate": 1e-06, "loss": 0.4448, "num_input_tokens_seen": 354750020, "step": 6332 }, { "epoch": 14.102449888641425, "loss": 0.5491877198219299, "loss_ce": 0.00011541788990143687, "loss_iou": 0.255859375, "loss_num": 0.007354736328125, "loss_xval": 0.55078125, "num_input_tokens_seen": 354750020, "step": 6332 }, { "epoch": 14.10467706013363, "grad_norm": 18.966337203979492, "learning_rate": 1e-06, "loss": 0.3768, "num_input_tokens_seen": 354808800, "step": 6333 }, { "epoch": 14.10467706013363, "loss": 0.30045419931411743, "loss_ce": 0.00010020330955740064, "loss_iou": 0.130859375, "loss_num": 0.007537841796875, "loss_xval": 0.30078125, "num_input_tokens_seen": 354808800, "step": 6333 }, { "epoch": 14.106904231625835, "grad_norm": 69.14574432373047, "learning_rate": 1e-06, "loss": 0.5943, "num_input_tokens_seen": 354863896, "step": 6334 }, { "epoch": 14.106904231625835, "loss": 0.6161885261535645, "loss_ce": 9.962108015315607e-05, "loss_iou": 0.248046875, "loss_num": 0.0240478515625, "loss_xval": 0.6171875, "num_input_tokens_seen": 354863896, "step": 6334 }, { "epoch": 14.10913140311804, "grad_norm": 29.898832321166992, "learning_rate": 1e-06, "loss": 0.5497, "num_input_tokens_seen": 354920516, "step": 6335 }, { "epoch": 14.10913140311804, "loss": 0.5754295587539673, "loss_ce": 0.00011214042024221271, "loss_iou": 0.271484375, "loss_num": 0.00689697265625, "loss_xval": 0.57421875, "num_input_tokens_seen": 354920516, "step": 6335 }, { "epoch": 14.111358574610245, "grad_norm": 17.41396141052246, "learning_rate": 1e-06, "loss": 0.2817, "num_input_tokens_seen": 354976696, "step": 6336 }, { "epoch": 14.111358574610245, "loss": 0.3508448898792267, "loss_ce": 0.0001368697703583166, "loss_iou": 0.1416015625, "loss_num": 0.0135498046875, "loss_xval": 0.3515625, "num_input_tokens_seen": 354976696, "step": 6336 }, { "epoch": 14.11358574610245, "grad_norm": 41.86484146118164, "learning_rate": 1e-06, "loss": 0.5174, "num_input_tokens_seen": 355034820, "step": 6337 }, { "epoch": 14.11358574610245, "loss": 0.38658392429351807, "loss_ce": 0.00010932501754723489, "loss_iou": 0.171875, "loss_num": 0.0084228515625, "loss_xval": 0.38671875, "num_input_tokens_seen": 355034820, "step": 6337 }, { "epoch": 14.115812917594655, "grad_norm": 22.32383918762207, "learning_rate": 1e-06, "loss": 0.6725, "num_input_tokens_seen": 355092456, "step": 6338 }, { "epoch": 14.115812917594655, "loss": 0.47444137930870056, "loss_ce": 0.00013718288391828537, "loss_iou": 0.19140625, "loss_num": 0.0184326171875, "loss_xval": 0.474609375, "num_input_tokens_seen": 355092456, "step": 6338 }, { "epoch": 14.11804008908686, "grad_norm": 15.70544147491455, "learning_rate": 1e-06, "loss": 0.3347, "num_input_tokens_seen": 355146944, "step": 6339 }, { "epoch": 14.11804008908686, "loss": 0.33699989318847656, "loss_ce": 8.58691637404263e-05, "loss_iou": 0.1396484375, "loss_num": 0.01171875, "loss_xval": 0.3359375, "num_input_tokens_seen": 355146944, "step": 6339 }, { "epoch": 14.120267260579064, "grad_norm": 16.518766403198242, "learning_rate": 1e-06, "loss": 0.4509, "num_input_tokens_seen": 355204304, "step": 6340 }, { "epoch": 14.120267260579064, "loss": 0.626310408115387, "loss_ce": 8.97101781447418e-05, "loss_iou": 0.2578125, "loss_num": 0.0224609375, "loss_xval": 0.625, "num_input_tokens_seen": 355204304, "step": 6340 }, { "epoch": 14.122494432071269, "grad_norm": 18.224716186523438, "learning_rate": 1e-06, "loss": 0.4472, "num_input_tokens_seen": 355260784, "step": 6341 }, { "epoch": 14.122494432071269, "loss": 0.504758894443512, "loss_ce": 0.00012020649592159316, "loss_iou": 0.2353515625, "loss_num": 0.00677490234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 355260784, "step": 6341 }, { "epoch": 14.124721603563474, "grad_norm": 16.273900985717773, "learning_rate": 1e-06, "loss": 0.47, "num_input_tokens_seen": 355315728, "step": 6342 }, { "epoch": 14.124721603563474, "loss": 0.4144068956375122, "loss_ce": 0.0001002842909656465, "loss_iou": 0.19140625, "loss_num": 0.006256103515625, "loss_xval": 0.4140625, "num_input_tokens_seen": 355315728, "step": 6342 }, { "epoch": 14.126948775055679, "grad_norm": 13.019576072692871, "learning_rate": 1e-06, "loss": 0.3871, "num_input_tokens_seen": 355369944, "step": 6343 }, { "epoch": 14.126948775055679, "loss": 0.40841740369796753, "loss_ce": 9.22168546821922e-05, "loss_iou": 0.1728515625, "loss_num": 0.01275634765625, "loss_xval": 0.408203125, "num_input_tokens_seen": 355369944, "step": 6343 }, { "epoch": 14.129175946547884, "grad_norm": 20.190988540649414, "learning_rate": 1e-06, "loss": 0.4851, "num_input_tokens_seen": 355423788, "step": 6344 }, { "epoch": 14.129175946547884, "loss": 0.4641231596469879, "loss_ce": 0.00013389563537202775, "loss_iou": 0.21875, "loss_num": 0.00518798828125, "loss_xval": 0.46484375, "num_input_tokens_seen": 355423788, "step": 6344 }, { "epoch": 14.131403118040089, "grad_norm": 18.983137130737305, "learning_rate": 1e-06, "loss": 0.2741, "num_input_tokens_seen": 355483244, "step": 6345 }, { "epoch": 14.131403118040089, "loss": 0.33998721837997437, "loss_ce": 0.0001434668229194358, "loss_iou": 0.1494140625, "loss_num": 0.0081787109375, "loss_xval": 0.33984375, "num_input_tokens_seen": 355483244, "step": 6345 }, { "epoch": 14.133630289532293, "grad_norm": 15.164654731750488, "learning_rate": 1e-06, "loss": 0.4301, "num_input_tokens_seen": 355538616, "step": 6346 }, { "epoch": 14.133630289532293, "loss": 0.669284462928772, "loss_ce": 0.00015608461399096996, "loss_iou": 0.291015625, "loss_num": 0.0177001953125, "loss_xval": 0.66796875, "num_input_tokens_seen": 355538616, "step": 6346 }, { "epoch": 14.135857461024498, "grad_norm": 35.34605026245117, "learning_rate": 1e-06, "loss": 0.5471, "num_input_tokens_seen": 355594156, "step": 6347 }, { "epoch": 14.135857461024498, "loss": 0.6321955323219299, "loss_ce": 0.00011544384324224666, "loss_iou": 0.2734375, "loss_num": 0.0167236328125, "loss_xval": 0.6328125, "num_input_tokens_seen": 355594156, "step": 6347 }, { "epoch": 14.138084632516703, "grad_norm": 20.306251525878906, "learning_rate": 1e-06, "loss": 0.3952, "num_input_tokens_seen": 355649844, "step": 6348 }, { "epoch": 14.138084632516703, "loss": 0.3479854166507721, "loss_ce": 8.503998105879873e-05, "loss_iou": 0.1533203125, "loss_num": 0.00830078125, "loss_xval": 0.34765625, "num_input_tokens_seen": 355649844, "step": 6348 }, { "epoch": 14.140311804008908, "grad_norm": 15.569748878479004, "learning_rate": 1e-06, "loss": 0.4208, "num_input_tokens_seen": 355703872, "step": 6349 }, { "epoch": 14.140311804008908, "loss": 0.37485039234161377, "loss_ce": 9.451658843318e-05, "loss_iou": 0.162109375, "loss_num": 0.01007080078125, "loss_xval": 0.375, "num_input_tokens_seen": 355703872, "step": 6349 }, { "epoch": 14.142538975501113, "grad_norm": 26.381547927856445, "learning_rate": 1e-06, "loss": 0.561, "num_input_tokens_seen": 355758976, "step": 6350 }, { "epoch": 14.142538975501113, "loss": 0.6393318176269531, "loss_ce": 0.0001717099512461573, "loss_iou": 0.28515625, "loss_num": 0.01361083984375, "loss_xval": 0.640625, "num_input_tokens_seen": 355758976, "step": 6350 }, { "epoch": 14.144766146993318, "grad_norm": 18.737266540527344, "learning_rate": 1e-06, "loss": 0.5725, "num_input_tokens_seen": 355810544, "step": 6351 }, { "epoch": 14.144766146993318, "loss": 0.4250592589378357, "loss_ce": 0.00013248772302176803, "loss_iou": 0.1875, "loss_num": 0.0098876953125, "loss_xval": 0.42578125, "num_input_tokens_seen": 355810544, "step": 6351 }, { "epoch": 14.146993318485523, "grad_norm": 20.644193649291992, "learning_rate": 1e-06, "loss": 0.4436, "num_input_tokens_seen": 355865660, "step": 6352 }, { "epoch": 14.146993318485523, "loss": 0.3755820393562317, "loss_ce": 9.376133675687015e-05, "loss_iou": 0.1689453125, "loss_num": 0.0074462890625, "loss_xval": 0.375, "num_input_tokens_seen": 355865660, "step": 6352 }, { "epoch": 14.14922048997773, "grad_norm": 23.84221839904785, "learning_rate": 1e-06, "loss": 0.3666, "num_input_tokens_seen": 355921244, "step": 6353 }, { "epoch": 14.14922048997773, "loss": 0.3702180087566376, "loss_ce": 0.00010083305824082345, "loss_iou": 0.1689453125, "loss_num": 0.006500244140625, "loss_xval": 0.37109375, "num_input_tokens_seen": 355921244, "step": 6353 }, { "epoch": 14.151447661469934, "grad_norm": 28.195049285888672, "learning_rate": 1e-06, "loss": 0.4026, "num_input_tokens_seen": 355978880, "step": 6354 }, { "epoch": 14.151447661469934, "loss": 0.48697197437286377, "loss_ce": 0.00015556240396108478, "loss_iou": 0.2021484375, "loss_num": 0.016357421875, "loss_xval": 0.486328125, "num_input_tokens_seen": 355978880, "step": 6354 }, { "epoch": 14.153674832962139, "grad_norm": 19.111854553222656, "learning_rate": 1e-06, "loss": 0.529, "num_input_tokens_seen": 356033656, "step": 6355 }, { "epoch": 14.153674832962139, "loss": 0.5453453063964844, "loss_ce": 0.00011828625429188833, "loss_iou": 0.240234375, "loss_num": 0.01324462890625, "loss_xval": 0.546875, "num_input_tokens_seen": 356033656, "step": 6355 }, { "epoch": 14.155902004454344, "grad_norm": 17.10947036743164, "learning_rate": 1e-06, "loss": 0.4607, "num_input_tokens_seen": 356092412, "step": 6356 }, { "epoch": 14.155902004454344, "loss": 0.3630419373512268, "loss_ce": 0.00012691874871961772, "loss_iou": 0.1650390625, "loss_num": 0.0064697265625, "loss_xval": 0.36328125, "num_input_tokens_seen": 356092412, "step": 6356 }, { "epoch": 14.158129175946549, "grad_norm": 21.90672492980957, "learning_rate": 1e-06, "loss": 0.3746, "num_input_tokens_seen": 356149732, "step": 6357 }, { "epoch": 14.158129175946549, "loss": 0.35400235652923584, "loss_ce": 0.0002426167920930311, "loss_iou": 0.158203125, "loss_num": 0.007476806640625, "loss_xval": 0.353515625, "num_input_tokens_seen": 356149732, "step": 6357 }, { "epoch": 14.160356347438753, "grad_norm": 31.798358917236328, "learning_rate": 1e-06, "loss": 0.4167, "num_input_tokens_seen": 356204552, "step": 6358 }, { "epoch": 14.160356347438753, "loss": 0.5162287950515747, "loss_ce": 0.00011550244380487129, "loss_iou": 0.234375, "loss_num": 0.00933837890625, "loss_xval": 0.515625, "num_input_tokens_seen": 356204552, "step": 6358 }, { "epoch": 14.162583518930958, "grad_norm": 15.809988021850586, "learning_rate": 1e-06, "loss": 0.3727, "num_input_tokens_seen": 356262836, "step": 6359 }, { "epoch": 14.162583518930958, "loss": 0.4119860827922821, "loss_ce": 0.00012085679190931842, "loss_iou": 0.1875, "loss_num": 0.00726318359375, "loss_xval": 0.412109375, "num_input_tokens_seen": 356262836, "step": 6359 }, { "epoch": 14.164810690423163, "grad_norm": 14.865153312683105, "learning_rate": 1e-06, "loss": 0.4879, "num_input_tokens_seen": 356319092, "step": 6360 }, { "epoch": 14.164810690423163, "loss": 0.34744900465011597, "loss_ce": 9.792236960493028e-05, "loss_iou": 0.16015625, "loss_num": 0.005279541015625, "loss_xval": 0.34765625, "num_input_tokens_seen": 356319092, "step": 6360 }, { "epoch": 14.167037861915368, "grad_norm": 21.529083251953125, "learning_rate": 1e-06, "loss": 0.4694, "num_input_tokens_seen": 356375520, "step": 6361 }, { "epoch": 14.167037861915368, "loss": 0.3479962944984436, "loss_ce": 9.590189438313246e-05, "loss_iou": 0.1494140625, "loss_num": 0.010009765625, "loss_xval": 0.34765625, "num_input_tokens_seen": 356375520, "step": 6361 }, { "epoch": 14.169265033407573, "grad_norm": 15.021240234375, "learning_rate": 1e-06, "loss": 0.2992, "num_input_tokens_seen": 356434312, "step": 6362 }, { "epoch": 14.169265033407573, "loss": 0.27756401896476746, "loss_ce": 9.818993567023426e-05, "loss_iou": 0.11572265625, "loss_num": 0.00921630859375, "loss_xval": 0.27734375, "num_input_tokens_seen": 356434312, "step": 6362 }, { "epoch": 14.171492204899778, "grad_norm": 51.64773941040039, "learning_rate": 1e-06, "loss": 0.3894, "num_input_tokens_seen": 356489392, "step": 6363 }, { "epoch": 14.171492204899778, "loss": 0.5095750689506531, "loss_ce": 0.0002977301483042538, "loss_iou": 0.2001953125, "loss_num": 0.0218505859375, "loss_xval": 0.5078125, "num_input_tokens_seen": 356489392, "step": 6363 }, { "epoch": 14.173719376391983, "grad_norm": 16.491901397705078, "learning_rate": 1e-06, "loss": 0.371, "num_input_tokens_seen": 356548064, "step": 6364 }, { "epoch": 14.173719376391983, "loss": 0.3434831500053406, "loss_ce": 9.938179573509842e-05, "loss_iou": 0.15234375, "loss_num": 0.00762939453125, "loss_xval": 0.34375, "num_input_tokens_seen": 356548064, "step": 6364 }, { "epoch": 14.175946547884188, "grad_norm": 12.178879737854004, "learning_rate": 1e-06, "loss": 0.4276, "num_input_tokens_seen": 356606084, "step": 6365 }, { "epoch": 14.175946547884188, "loss": 0.39342576265335083, "loss_ce": 0.00011522185377543792, "loss_iou": 0.173828125, "loss_num": 0.0091552734375, "loss_xval": 0.392578125, "num_input_tokens_seen": 356606084, "step": 6365 }, { "epoch": 14.178173719376392, "grad_norm": 31.509164810180664, "learning_rate": 1e-06, "loss": 0.5393, "num_input_tokens_seen": 356662456, "step": 6366 }, { "epoch": 14.178173719376392, "loss": 0.522935152053833, "loss_ce": 0.00010799485608004034, "loss_iou": 0.232421875, "loss_num": 0.011474609375, "loss_xval": 0.5234375, "num_input_tokens_seen": 356662456, "step": 6366 }, { "epoch": 14.180400890868597, "grad_norm": 15.755391120910645, "learning_rate": 1e-06, "loss": 0.478, "num_input_tokens_seen": 356718496, "step": 6367 }, { "epoch": 14.180400890868597, "loss": 0.46932005882263184, "loss_ce": 8.176272240234539e-05, "loss_iou": 0.1962890625, "loss_num": 0.01513671875, "loss_xval": 0.46875, "num_input_tokens_seen": 356718496, "step": 6367 }, { "epoch": 14.182628062360802, "grad_norm": 20.124523162841797, "learning_rate": 1e-06, "loss": 0.443, "num_input_tokens_seen": 356772880, "step": 6368 }, { "epoch": 14.182628062360802, "loss": 0.4469994902610779, "loss_ce": 0.00010007787204813212, "loss_iou": 0.1787109375, "loss_num": 0.0179443359375, "loss_xval": 0.447265625, "num_input_tokens_seen": 356772880, "step": 6368 }, { "epoch": 14.184855233853007, "grad_norm": 14.184540748596191, "learning_rate": 1e-06, "loss": 0.364, "num_input_tokens_seen": 356829648, "step": 6369 }, { "epoch": 14.184855233853007, "loss": 0.49447864294052124, "loss_ce": 9.389698243467137e-05, "loss_iou": 0.2177734375, "loss_num": 0.01171875, "loss_xval": 0.494140625, "num_input_tokens_seen": 356829648, "step": 6369 }, { "epoch": 14.187082405345212, "grad_norm": 17.749513626098633, "learning_rate": 1e-06, "loss": 0.4139, "num_input_tokens_seen": 356886340, "step": 6370 }, { "epoch": 14.187082405345212, "loss": 0.38717857003211975, "loss_ce": 9.360718831885606e-05, "loss_iou": 0.171875, "loss_num": 0.008544921875, "loss_xval": 0.38671875, "num_input_tokens_seen": 356886340, "step": 6370 }, { "epoch": 14.189309576837417, "grad_norm": 49.11314392089844, "learning_rate": 1e-06, "loss": 0.6898, "num_input_tokens_seen": 356942864, "step": 6371 }, { "epoch": 14.189309576837417, "loss": 0.91033935546875, "loss_ce": 0.00018309304141439497, "loss_iou": 0.3984375, "loss_num": 0.0225830078125, "loss_xval": 0.91015625, "num_input_tokens_seen": 356942864, "step": 6371 }, { "epoch": 14.191536748329622, "grad_norm": 14.434630393981934, "learning_rate": 1e-06, "loss": 0.4182, "num_input_tokens_seen": 356998272, "step": 6372 }, { "epoch": 14.191536748329622, "loss": 0.3537047207355499, "loss_ce": 0.00012805727601516992, "loss_iou": 0.1611328125, "loss_num": 0.006195068359375, "loss_xval": 0.353515625, "num_input_tokens_seen": 356998272, "step": 6372 }, { "epoch": 14.193763919821826, "grad_norm": 26.62126350402832, "learning_rate": 1e-06, "loss": 0.3928, "num_input_tokens_seen": 357056092, "step": 6373 }, { "epoch": 14.193763919821826, "loss": 0.3926212787628174, "loss_ce": 0.00010417943849461153, "loss_iou": 0.1640625, "loss_num": 0.0130615234375, "loss_xval": 0.392578125, "num_input_tokens_seen": 357056092, "step": 6373 }, { "epoch": 14.195991091314031, "grad_norm": 19.854206085205078, "learning_rate": 1e-06, "loss": 0.4684, "num_input_tokens_seen": 357114944, "step": 6374 }, { "epoch": 14.195991091314031, "loss": 0.5363778471946716, "loss_ce": 0.00012294600310269743, "loss_iou": 0.2353515625, "loss_num": 0.01318359375, "loss_xval": 0.53515625, "num_input_tokens_seen": 357114944, "step": 6374 }, { "epoch": 14.198218262806236, "grad_norm": 17.966840744018555, "learning_rate": 1e-06, "loss": 0.4877, "num_input_tokens_seen": 357170380, "step": 6375 }, { "epoch": 14.198218262806236, "loss": 0.5723801851272583, "loss_ce": 0.00017562352877575904, "loss_iou": 0.2333984375, "loss_num": 0.0208740234375, "loss_xval": 0.5703125, "num_input_tokens_seen": 357170380, "step": 6375 }, { "epoch": 14.200445434298441, "grad_norm": 20.891889572143555, "learning_rate": 1e-06, "loss": 0.5619, "num_input_tokens_seen": 357227228, "step": 6376 }, { "epoch": 14.200445434298441, "loss": 0.6446791887283325, "loss_ce": 0.00011739273031707853, "loss_iou": 0.267578125, "loss_num": 0.021728515625, "loss_xval": 0.64453125, "num_input_tokens_seen": 357227228, "step": 6376 }, { "epoch": 14.202672605790646, "grad_norm": 18.76251792907715, "learning_rate": 1e-06, "loss": 0.3435, "num_input_tokens_seen": 357285012, "step": 6377 }, { "epoch": 14.202672605790646, "loss": 0.3950054347515106, "loss_ce": 0.00010797058348543942, "loss_iou": 0.173828125, "loss_num": 0.00927734375, "loss_xval": 0.39453125, "num_input_tokens_seen": 357285012, "step": 6377 }, { "epoch": 14.20489977728285, "grad_norm": 13.638835906982422, "learning_rate": 1e-06, "loss": 0.3753, "num_input_tokens_seen": 357341596, "step": 6378 }, { "epoch": 14.20489977728285, "loss": 0.4419354498386383, "loss_ce": 0.00010194515925832093, "loss_iou": 0.185546875, "loss_num": 0.01409912109375, "loss_xval": 0.44140625, "num_input_tokens_seen": 357341596, "step": 6378 }, { "epoch": 14.207126948775056, "grad_norm": 17.007076263427734, "learning_rate": 1e-06, "loss": 0.5073, "num_input_tokens_seen": 357397024, "step": 6379 }, { "epoch": 14.207126948775056, "loss": 0.2852746248245239, "loss_ce": 0.00011835141776828095, "loss_iou": 0.1240234375, "loss_num": 0.007415771484375, "loss_xval": 0.28515625, "num_input_tokens_seen": 357397024, "step": 6379 }, { "epoch": 14.20935412026726, "grad_norm": 15.55772590637207, "learning_rate": 1e-06, "loss": 0.4203, "num_input_tokens_seen": 357453348, "step": 6380 }, { "epoch": 14.20935412026726, "loss": 0.3796178698539734, "loss_ce": 0.00010125982225872576, "loss_iou": 0.17578125, "loss_num": 0.00567626953125, "loss_xval": 0.37890625, "num_input_tokens_seen": 357453348, "step": 6380 }, { "epoch": 14.211581291759465, "grad_norm": 21.045106887817383, "learning_rate": 1e-06, "loss": 0.4578, "num_input_tokens_seen": 357508568, "step": 6381 }, { "epoch": 14.211581291759465, "loss": 0.40396052598953247, "loss_ce": 0.00015192307182587683, "loss_iou": 0.16796875, "loss_num": 0.013671875, "loss_xval": 0.404296875, "num_input_tokens_seen": 357508568, "step": 6381 }, { "epoch": 14.21380846325167, "grad_norm": 27.14858627319336, "learning_rate": 1e-06, "loss": 0.4897, "num_input_tokens_seen": 357561732, "step": 6382 }, { "epoch": 14.21380846325167, "loss": 0.5612715482711792, "loss_ce": 0.00023641872394364327, "loss_iou": 0.24609375, "loss_num": 0.01385498046875, "loss_xval": 0.5625, "num_input_tokens_seen": 357561732, "step": 6382 }, { "epoch": 14.216035634743875, "grad_norm": 17.41556167602539, "learning_rate": 1e-06, "loss": 0.4793, "num_input_tokens_seen": 357618236, "step": 6383 }, { "epoch": 14.216035634743875, "loss": 0.5158074498176575, "loss_ce": 0.00012142492778366432, "loss_iou": 0.2138671875, "loss_num": 0.0177001953125, "loss_xval": 0.515625, "num_input_tokens_seen": 357618236, "step": 6383 }, { "epoch": 14.21826280623608, "grad_norm": 15.721260070800781, "learning_rate": 1e-06, "loss": 0.3657, "num_input_tokens_seen": 357676752, "step": 6384 }, { "epoch": 14.21826280623608, "loss": 0.4123426675796509, "loss_ce": 0.0001112363679567352, "loss_iou": 0.1826171875, "loss_num": 0.00927734375, "loss_xval": 0.412109375, "num_input_tokens_seen": 357676752, "step": 6384 }, { "epoch": 14.220489977728285, "grad_norm": 26.944326400756836, "learning_rate": 1e-06, "loss": 0.4681, "num_input_tokens_seen": 357734280, "step": 6385 }, { "epoch": 14.220489977728285, "loss": 0.46214836835861206, "loss_ce": 0.00011222571629332379, "loss_iou": 0.197265625, "loss_num": 0.013427734375, "loss_xval": 0.462890625, "num_input_tokens_seen": 357734280, "step": 6385 }, { "epoch": 14.22271714922049, "grad_norm": 19.070985794067383, "learning_rate": 1e-06, "loss": 0.3808, "num_input_tokens_seen": 357788536, "step": 6386 }, { "epoch": 14.22271714922049, "loss": 0.32470569014549255, "loss_ce": 0.00012072586105205119, "loss_iou": 0.1416015625, "loss_num": 0.00836181640625, "loss_xval": 0.32421875, "num_input_tokens_seen": 357788536, "step": 6386 }, { "epoch": 14.224944320712694, "grad_norm": 12.709535598754883, "learning_rate": 1e-06, "loss": 0.5202, "num_input_tokens_seen": 357844904, "step": 6387 }, { "epoch": 14.224944320712694, "loss": 0.35143405199050903, "loss_ce": 0.00011568302579689771, "loss_iou": 0.1611328125, "loss_num": 0.005706787109375, "loss_xval": 0.3515625, "num_input_tokens_seen": 357844904, "step": 6387 }, { "epoch": 14.2271714922049, "grad_norm": 15.428078651428223, "learning_rate": 1e-06, "loss": 0.4241, "num_input_tokens_seen": 357902220, "step": 6388 }, { "epoch": 14.2271714922049, "loss": 0.5605834126472473, "loss_ce": 0.00015861910651437938, "loss_iou": 0.26171875, "loss_num": 0.007354736328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 357902220, "step": 6388 }, { "epoch": 14.229398663697104, "grad_norm": 25.415952682495117, "learning_rate": 1e-06, "loss": 0.4713, "num_input_tokens_seen": 357958080, "step": 6389 }, { "epoch": 14.229398663697104, "loss": 0.47960424423217773, "loss_ce": 0.00011205507325939834, "loss_iou": 0.224609375, "loss_num": 0.00604248046875, "loss_xval": 0.48046875, "num_input_tokens_seen": 357958080, "step": 6389 }, { "epoch": 14.231625835189309, "grad_norm": 14.51382827758789, "learning_rate": 1e-06, "loss": 0.4541, "num_input_tokens_seen": 358015476, "step": 6390 }, { "epoch": 14.231625835189309, "loss": 0.3873499631881714, "loss_ce": 0.00038707145722582936, "loss_iou": 0.1796875, "loss_num": 0.00543212890625, "loss_xval": 0.38671875, "num_input_tokens_seen": 358015476, "step": 6390 }, { "epoch": 14.233853006681514, "grad_norm": 20.579307556152344, "learning_rate": 1e-06, "loss": 0.592, "num_input_tokens_seen": 358073920, "step": 6391 }, { "epoch": 14.233853006681514, "loss": 0.554311215877533, "loss_ce": 0.00011194508260814473, "loss_iou": 0.234375, "loss_num": 0.016845703125, "loss_xval": 0.5546875, "num_input_tokens_seen": 358073920, "step": 6391 }, { "epoch": 14.236080178173719, "grad_norm": 18.920732498168945, "learning_rate": 1e-06, "loss": 0.4462, "num_input_tokens_seen": 358130704, "step": 6392 }, { "epoch": 14.236080178173719, "loss": 0.5425920486450195, "loss_ce": 0.00011164323223056272, "loss_iou": 0.2412109375, "loss_num": 0.01202392578125, "loss_xval": 0.54296875, "num_input_tokens_seen": 358130704, "step": 6392 }, { "epoch": 14.238307349665924, "grad_norm": 16.76801872253418, "learning_rate": 1e-06, "loss": 0.3899, "num_input_tokens_seen": 358189660, "step": 6393 }, { "epoch": 14.238307349665924, "loss": 0.345196008682251, "loss_ce": 0.00010322515299776569, "loss_iou": 0.150390625, "loss_num": 0.0087890625, "loss_xval": 0.345703125, "num_input_tokens_seen": 358189660, "step": 6393 }, { "epoch": 14.240534521158128, "grad_norm": 20.114219665527344, "learning_rate": 1e-06, "loss": 0.3621, "num_input_tokens_seen": 358247428, "step": 6394 }, { "epoch": 14.240534521158128, "loss": 0.3770490884780884, "loss_ce": 9.597234020475298e-05, "loss_iou": 0.17578125, "loss_num": 0.005096435546875, "loss_xval": 0.376953125, "num_input_tokens_seen": 358247428, "step": 6394 }, { "epoch": 14.242761692650333, "grad_norm": 20.959352493286133, "learning_rate": 1e-06, "loss": 0.5468, "num_input_tokens_seen": 358304388, "step": 6395 }, { "epoch": 14.242761692650333, "loss": 0.6914682388305664, "loss_ce": 0.00030615812283940613, "loss_iou": 0.298828125, "loss_num": 0.0185546875, "loss_xval": 0.69140625, "num_input_tokens_seen": 358304388, "step": 6395 }, { "epoch": 14.244988864142538, "grad_norm": 26.55893325805664, "learning_rate": 1e-06, "loss": 0.5794, "num_input_tokens_seen": 358360252, "step": 6396 }, { "epoch": 14.244988864142538, "loss": 0.46212613582611084, "loss_ce": 8.996979158837348e-05, "loss_iou": 0.2021484375, "loss_num": 0.01141357421875, "loss_xval": 0.462890625, "num_input_tokens_seen": 358360252, "step": 6396 }, { "epoch": 14.247216035634743, "grad_norm": 18.9809627532959, "learning_rate": 1e-06, "loss": 0.4802, "num_input_tokens_seen": 358418356, "step": 6397 }, { "epoch": 14.247216035634743, "loss": 0.3323664665222168, "loss_ce": 9.106392099056393e-05, "loss_iou": 0.1494140625, "loss_num": 0.006561279296875, "loss_xval": 0.33203125, "num_input_tokens_seen": 358418356, "step": 6397 }, { "epoch": 14.249443207126948, "grad_norm": 16.457611083984375, "learning_rate": 1e-06, "loss": 0.4579, "num_input_tokens_seen": 358475404, "step": 6398 }, { "epoch": 14.249443207126948, "loss": 0.6108676791191101, "loss_ce": 0.00014991118223406374, "loss_iou": 0.232421875, "loss_num": 0.029296875, "loss_xval": 0.609375, "num_input_tokens_seen": 358475404, "step": 6398 }, { "epoch": 14.251670378619155, "grad_norm": 19.050983428955078, "learning_rate": 1e-06, "loss": 0.4976, "num_input_tokens_seen": 358528924, "step": 6399 }, { "epoch": 14.251670378619155, "loss": 0.6439430713653564, "loss_ce": 0.0001442273351131007, "loss_iou": 0.2890625, "loss_num": 0.01312255859375, "loss_xval": 0.64453125, "num_input_tokens_seen": 358528924, "step": 6399 }, { "epoch": 14.25389755011136, "grad_norm": 14.289558410644531, "learning_rate": 1e-06, "loss": 0.46, "num_input_tokens_seen": 358585368, "step": 6400 }, { "epoch": 14.25389755011136, "loss": 0.6162996292114258, "loss_ce": 8.872315811458975e-05, "loss_iou": 0.255859375, "loss_num": 0.0206298828125, "loss_xval": 0.6171875, "num_input_tokens_seen": 358585368, "step": 6400 }, { "epoch": 14.256124721603564, "grad_norm": 22.632549285888672, "learning_rate": 1e-06, "loss": 0.4443, "num_input_tokens_seen": 358642216, "step": 6401 }, { "epoch": 14.256124721603564, "loss": 0.39599111676216125, "loss_ce": 0.00011710106628015637, "loss_iou": 0.1708984375, "loss_num": 0.01068115234375, "loss_xval": 0.396484375, "num_input_tokens_seen": 358642216, "step": 6401 }, { "epoch": 14.25835189309577, "grad_norm": 15.929319381713867, "learning_rate": 1e-06, "loss": 0.4255, "num_input_tokens_seen": 358698924, "step": 6402 }, { "epoch": 14.25835189309577, "loss": 0.35032692551612854, "loss_ce": 0.00010718655539676547, "loss_iou": 0.1533203125, "loss_num": 0.00860595703125, "loss_xval": 0.349609375, "num_input_tokens_seen": 358698924, "step": 6402 }, { "epoch": 14.260579064587974, "grad_norm": 13.306114196777344, "learning_rate": 1e-06, "loss": 0.3793, "num_input_tokens_seen": 358755212, "step": 6403 }, { "epoch": 14.260579064587974, "loss": 0.2552165389060974, "loss_ce": 8.956858073361218e-05, "loss_iou": 0.11181640625, "loss_num": 0.0062255859375, "loss_xval": 0.255859375, "num_input_tokens_seen": 358755212, "step": 6403 }, { "epoch": 14.262806236080179, "grad_norm": 17.36546516418457, "learning_rate": 1e-06, "loss": 0.4423, "num_input_tokens_seen": 358814232, "step": 6404 }, { "epoch": 14.262806236080179, "loss": 0.4097899794578552, "loss_ce": 0.00012203957885503769, "loss_iou": 0.1806640625, "loss_num": 0.0096435546875, "loss_xval": 0.41015625, "num_input_tokens_seen": 358814232, "step": 6404 }, { "epoch": 14.265033407572384, "grad_norm": 29.2259464263916, "learning_rate": 1e-06, "loss": 0.5395, "num_input_tokens_seen": 358871184, "step": 6405 }, { "epoch": 14.265033407572384, "loss": 0.682025671005249, "loss_ce": 0.00014085797010920942, "loss_iou": 0.28125, "loss_num": 0.0235595703125, "loss_xval": 0.68359375, "num_input_tokens_seen": 358871184, "step": 6405 }, { "epoch": 14.267260579064589, "grad_norm": 12.634069442749023, "learning_rate": 1e-06, "loss": 0.2714, "num_input_tokens_seen": 358928648, "step": 6406 }, { "epoch": 14.267260579064589, "loss": 0.2787477970123291, "loss_ce": 9.179921471513808e-05, "loss_iou": 0.1123046875, "loss_num": 0.01092529296875, "loss_xval": 0.279296875, "num_input_tokens_seen": 358928648, "step": 6406 }, { "epoch": 14.269487750556793, "grad_norm": 25.011402130126953, "learning_rate": 1e-06, "loss": 0.5663, "num_input_tokens_seen": 358987088, "step": 6407 }, { "epoch": 14.269487750556793, "loss": 0.4417472183704376, "loss_ce": 9.685405530035496e-05, "loss_iou": 0.1845703125, "loss_num": 0.01470947265625, "loss_xval": 0.44140625, "num_input_tokens_seen": 358987088, "step": 6407 }, { "epoch": 14.271714922048998, "grad_norm": 14.262114524841309, "learning_rate": 1e-06, "loss": 0.4572, "num_input_tokens_seen": 359043584, "step": 6408 }, { "epoch": 14.271714922048998, "loss": 0.40514233708381653, "loss_ce": 0.00011304439249215648, "loss_iou": 0.1669921875, "loss_num": 0.01446533203125, "loss_xval": 0.404296875, "num_input_tokens_seen": 359043584, "step": 6408 }, { "epoch": 14.273942093541203, "grad_norm": 19.64780616760254, "learning_rate": 1e-06, "loss": 0.3768, "num_input_tokens_seen": 359097740, "step": 6409 }, { "epoch": 14.273942093541203, "loss": 0.39305636286735535, "loss_ce": 0.00011201576853636652, "loss_iou": 0.1845703125, "loss_num": 0.004852294921875, "loss_xval": 0.392578125, "num_input_tokens_seen": 359097740, "step": 6409 }, { "epoch": 14.276169265033408, "grad_norm": 16.076786041259766, "learning_rate": 1e-06, "loss": 0.5821, "num_input_tokens_seen": 359153996, "step": 6410 }, { "epoch": 14.276169265033408, "loss": 0.5809260606765747, "loss_ce": 0.00011550600902410224, "loss_iou": 0.2470703125, "loss_num": 0.0174560546875, "loss_xval": 0.58203125, "num_input_tokens_seen": 359153996, "step": 6410 }, { "epoch": 14.278396436525613, "grad_norm": 14.925246238708496, "learning_rate": 1e-06, "loss": 0.3981, "num_input_tokens_seen": 359209560, "step": 6411 }, { "epoch": 14.278396436525613, "loss": 0.4395759403705597, "loss_ce": 0.00012282837997190654, "loss_iou": 0.19921875, "loss_num": 0.00830078125, "loss_xval": 0.439453125, "num_input_tokens_seen": 359209560, "step": 6411 }, { "epoch": 14.280623608017818, "grad_norm": 20.60141372680664, "learning_rate": 1e-06, "loss": 0.6134, "num_input_tokens_seen": 359263596, "step": 6412 }, { "epoch": 14.280623608017818, "loss": 0.5152615308761597, "loss_ce": 0.00012483607861213386, "loss_iou": 0.220703125, "loss_num": 0.01483154296875, "loss_xval": 0.515625, "num_input_tokens_seen": 359263596, "step": 6412 }, { "epoch": 14.282850779510023, "grad_norm": 43.90515899658203, "learning_rate": 1e-06, "loss": 0.6257, "num_input_tokens_seen": 359322224, "step": 6413 }, { "epoch": 14.282850779510023, "loss": 0.6065540313720703, "loss_ce": 0.00010870952974073589, "loss_iou": 0.26953125, "loss_num": 0.0135498046875, "loss_xval": 0.60546875, "num_input_tokens_seen": 359322224, "step": 6413 }, { "epoch": 14.285077951002227, "grad_norm": 22.319034576416016, "learning_rate": 1e-06, "loss": 0.3332, "num_input_tokens_seen": 359377716, "step": 6414 }, { "epoch": 14.285077951002227, "loss": 0.2222072184085846, "loss_ce": 0.00010028109682025388, "loss_iou": 0.091796875, "loss_num": 0.007568359375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 359377716, "step": 6414 }, { "epoch": 14.287305122494432, "grad_norm": 16.046680450439453, "learning_rate": 1e-06, "loss": 0.4248, "num_input_tokens_seen": 359432444, "step": 6415 }, { "epoch": 14.287305122494432, "loss": 0.37368685007095337, "loss_ce": 9.067119390238076e-05, "loss_iou": 0.16015625, "loss_num": 0.01080322265625, "loss_xval": 0.373046875, "num_input_tokens_seen": 359432444, "step": 6415 }, { "epoch": 14.289532293986637, "grad_norm": 14.897786140441895, "learning_rate": 1e-06, "loss": 0.3186, "num_input_tokens_seen": 359489464, "step": 6416 }, { "epoch": 14.289532293986637, "loss": 0.36753690242767334, "loss_ce": 0.00010526920959819108, "loss_iou": 0.15625, "loss_num": 0.010986328125, "loss_xval": 0.3671875, "num_input_tokens_seen": 359489464, "step": 6416 }, { "epoch": 14.291759465478842, "grad_norm": 17.839426040649414, "learning_rate": 1e-06, "loss": 0.455, "num_input_tokens_seen": 359543156, "step": 6417 }, { "epoch": 14.291759465478842, "loss": 0.5617268085479736, "loss_ce": 0.0001423186477040872, "loss_iou": 0.2294921875, "loss_num": 0.0205078125, "loss_xval": 0.5625, "num_input_tokens_seen": 359543156, "step": 6417 }, { "epoch": 14.293986636971047, "grad_norm": 18.264699935913086, "learning_rate": 1e-06, "loss": 0.5459, "num_input_tokens_seen": 359597988, "step": 6418 }, { "epoch": 14.293986636971047, "loss": 0.5761609077453613, "loss_ce": 0.00011114442168036476, "loss_iou": 0.21484375, "loss_num": 0.0289306640625, "loss_xval": 0.57421875, "num_input_tokens_seen": 359597988, "step": 6418 }, { "epoch": 14.296213808463252, "grad_norm": 110.11897277832031, "learning_rate": 1e-06, "loss": 0.4713, "num_input_tokens_seen": 359655404, "step": 6419 }, { "epoch": 14.296213808463252, "loss": 0.3378644287586212, "loss_ce": 0.00015691184671595693, "loss_iou": 0.150390625, "loss_num": 0.007476806640625, "loss_xval": 0.337890625, "num_input_tokens_seen": 359655404, "step": 6419 }, { "epoch": 14.298440979955457, "grad_norm": 15.50516128540039, "learning_rate": 1e-06, "loss": 0.3962, "num_input_tokens_seen": 359712096, "step": 6420 }, { "epoch": 14.298440979955457, "loss": 0.3304358124732971, "loss_ce": 0.00011355809692759067, "loss_iou": 0.14453125, "loss_num": 0.00823974609375, "loss_xval": 0.330078125, "num_input_tokens_seen": 359712096, "step": 6420 }, { "epoch": 14.300668151447661, "grad_norm": 23.723712921142578, "learning_rate": 1e-06, "loss": 0.5297, "num_input_tokens_seen": 359769160, "step": 6421 }, { "epoch": 14.300668151447661, "loss": 0.6636959910392761, "loss_ce": 0.00012177543976576999, "loss_iou": 0.279296875, "loss_num": 0.021240234375, "loss_xval": 0.6640625, "num_input_tokens_seen": 359769160, "step": 6421 }, { "epoch": 14.302895322939866, "grad_norm": 13.204316139221191, "learning_rate": 1e-06, "loss": 0.7653, "num_input_tokens_seen": 359824584, "step": 6422 }, { "epoch": 14.302895322939866, "loss": 0.5646822452545166, "loss_ce": 0.00010707967157941312, "loss_iou": 0.2177734375, "loss_num": 0.02587890625, "loss_xval": 0.56640625, "num_input_tokens_seen": 359824584, "step": 6422 }, { "epoch": 14.305122494432071, "grad_norm": 25.105751037597656, "learning_rate": 1e-06, "loss": 0.5937, "num_input_tokens_seen": 359877240, "step": 6423 }, { "epoch": 14.305122494432071, "loss": 0.6058588027954102, "loss_ce": 0.00026802660431712866, "loss_iou": 0.25, "loss_num": 0.020751953125, "loss_xval": 0.60546875, "num_input_tokens_seen": 359877240, "step": 6423 }, { "epoch": 14.307349665924276, "grad_norm": 20.69352149963379, "learning_rate": 1e-06, "loss": 0.579, "num_input_tokens_seen": 359932336, "step": 6424 }, { "epoch": 14.307349665924276, "loss": 0.5356358289718628, "loss_ce": 0.00011333586007822305, "loss_iou": 0.248046875, "loss_num": 0.00811767578125, "loss_xval": 0.53515625, "num_input_tokens_seen": 359932336, "step": 6424 }, { "epoch": 14.309576837416481, "grad_norm": 21.304393768310547, "learning_rate": 1e-06, "loss": 0.3508, "num_input_tokens_seen": 359991588, "step": 6425 }, { "epoch": 14.309576837416481, "loss": 0.3918312191963196, "loss_ce": 0.0001076057887985371, "loss_iou": 0.173828125, "loss_num": 0.0089111328125, "loss_xval": 0.392578125, "num_input_tokens_seen": 359991588, "step": 6425 }, { "epoch": 14.311804008908686, "grad_norm": 16.4875545501709, "learning_rate": 1e-06, "loss": 0.3818, "num_input_tokens_seen": 360046080, "step": 6426 }, { "epoch": 14.311804008908686, "loss": 0.42865312099456787, "loss_ce": 9.47633889154531e-05, "loss_iou": 0.1845703125, "loss_num": 0.01165771484375, "loss_xval": 0.427734375, "num_input_tokens_seen": 360046080, "step": 6426 }, { "epoch": 14.31403118040089, "grad_norm": 27.263931274414062, "learning_rate": 1e-06, "loss": 0.4138, "num_input_tokens_seen": 360101588, "step": 6427 }, { "epoch": 14.31403118040089, "loss": 0.4932384490966797, "loss_ce": 0.00013543458771891892, "loss_iou": 0.212890625, "loss_num": 0.013427734375, "loss_xval": 0.4921875, "num_input_tokens_seen": 360101588, "step": 6427 }, { "epoch": 14.316258351893095, "grad_norm": 15.546148300170898, "learning_rate": 1e-06, "loss": 0.5737, "num_input_tokens_seen": 360157696, "step": 6428 }, { "epoch": 14.316258351893095, "loss": 0.774878740310669, "loss_ce": 0.00034262199187651277, "loss_iou": 0.291015625, "loss_num": 0.0380859375, "loss_xval": 0.7734375, "num_input_tokens_seen": 360157696, "step": 6428 }, { "epoch": 14.3184855233853, "grad_norm": 17.817882537841797, "learning_rate": 1e-06, "loss": 0.5475, "num_input_tokens_seen": 360211328, "step": 6429 }, { "epoch": 14.3184855233853, "loss": 0.4766749143600464, "loss_ce": 0.00011241542233619839, "loss_iou": 0.205078125, "loss_num": 0.013427734375, "loss_xval": 0.4765625, "num_input_tokens_seen": 360211328, "step": 6429 }, { "epoch": 14.320712694877505, "grad_norm": 26.077342987060547, "learning_rate": 1e-06, "loss": 0.5338, "num_input_tokens_seen": 360266996, "step": 6430 }, { "epoch": 14.320712694877505, "loss": 0.5293970704078674, "loss_ce": 0.00010019890760304406, "loss_iou": 0.224609375, "loss_num": 0.01611328125, "loss_xval": 0.53125, "num_input_tokens_seen": 360266996, "step": 6430 }, { "epoch": 14.32293986636971, "grad_norm": 21.88715362548828, "learning_rate": 1e-06, "loss": 0.5877, "num_input_tokens_seen": 360324620, "step": 6431 }, { "epoch": 14.32293986636971, "loss": 0.5899729132652283, "loss_ce": 0.00012914868420921266, "loss_iou": 0.2470703125, "loss_num": 0.0191650390625, "loss_xval": 0.58984375, "num_input_tokens_seen": 360324620, "step": 6431 }, { "epoch": 14.325167037861915, "grad_norm": 21.561594009399414, "learning_rate": 1e-06, "loss": 0.519, "num_input_tokens_seen": 360381216, "step": 6432 }, { "epoch": 14.325167037861915, "loss": 0.6034383773803711, "loss_ce": 0.00016689574113115668, "loss_iou": 0.2734375, "loss_num": 0.01116943359375, "loss_xval": 0.6015625, "num_input_tokens_seen": 360381216, "step": 6432 }, { "epoch": 14.32739420935412, "grad_norm": 34.43752670288086, "learning_rate": 1e-06, "loss": 0.8583, "num_input_tokens_seen": 360434988, "step": 6433 }, { "epoch": 14.32739420935412, "loss": 0.6987577080726624, "loss_ce": 0.00014932786871213466, "loss_iou": 0.275390625, "loss_num": 0.030029296875, "loss_xval": 0.69921875, "num_input_tokens_seen": 360434988, "step": 6433 }, { "epoch": 14.329621380846325, "grad_norm": 14.277132034301758, "learning_rate": 1e-06, "loss": 0.653, "num_input_tokens_seen": 360490608, "step": 6434 }, { "epoch": 14.329621380846325, "loss": 0.697412371635437, "loss_ce": 0.0001467484253225848, "loss_iou": 0.283203125, "loss_num": 0.026611328125, "loss_xval": 0.6953125, "num_input_tokens_seen": 360490608, "step": 6434 }, { "epoch": 14.33184855233853, "grad_norm": 18.22037696838379, "learning_rate": 1e-06, "loss": 0.4797, "num_input_tokens_seen": 360547056, "step": 6435 }, { "epoch": 14.33184855233853, "loss": 0.4767254590988159, "loss_ce": 0.00010193933121627197, "loss_iou": 0.2119140625, "loss_num": 0.0106201171875, "loss_xval": 0.4765625, "num_input_tokens_seen": 360547056, "step": 6435 }, { "epoch": 14.334075723830734, "grad_norm": 16.72195816040039, "learning_rate": 1e-06, "loss": 0.3839, "num_input_tokens_seen": 360604288, "step": 6436 }, { "epoch": 14.334075723830734, "loss": 0.38419631123542786, "loss_ce": 0.00010207582090515643, "loss_iou": 0.158203125, "loss_num": 0.01373291015625, "loss_xval": 0.384765625, "num_input_tokens_seen": 360604288, "step": 6436 }, { "epoch": 14.33630289532294, "grad_norm": 16.082204818725586, "learning_rate": 1e-06, "loss": 0.5829, "num_input_tokens_seen": 360662528, "step": 6437 }, { "epoch": 14.33630289532294, "loss": 0.446768581867218, "loss_ce": 0.00011331496352795511, "loss_iou": 0.19140625, "loss_num": 0.01275634765625, "loss_xval": 0.447265625, "num_input_tokens_seen": 360662528, "step": 6437 }, { "epoch": 14.338530066815144, "grad_norm": 19.101377487182617, "learning_rate": 1e-06, "loss": 0.3889, "num_input_tokens_seen": 360717448, "step": 6438 }, { "epoch": 14.338530066815144, "loss": 0.5185633897781372, "loss_ce": 0.00013074232265353203, "loss_iou": 0.216796875, "loss_num": 0.016845703125, "loss_xval": 0.51953125, "num_input_tokens_seen": 360717448, "step": 6438 }, { "epoch": 14.340757238307349, "grad_norm": 21.411943435668945, "learning_rate": 1e-06, "loss": 0.4566, "num_input_tokens_seen": 360773220, "step": 6439 }, { "epoch": 14.340757238307349, "loss": 0.4258883595466614, "loss_ce": 0.00010708505578804761, "loss_iou": 0.197265625, "loss_num": 0.006378173828125, "loss_xval": 0.42578125, "num_input_tokens_seen": 360773220, "step": 6439 }, { "epoch": 14.342984409799554, "grad_norm": 21.08489418029785, "learning_rate": 1e-06, "loss": 0.4344, "num_input_tokens_seen": 360829380, "step": 6440 }, { "epoch": 14.342984409799554, "loss": 0.45614272356033325, "loss_ce": 8.802305092103779e-05, "loss_iou": 0.185546875, "loss_num": 0.0169677734375, "loss_xval": 0.45703125, "num_input_tokens_seen": 360829380, "step": 6440 }, { "epoch": 14.345211581291759, "grad_norm": 19.268587112426758, "learning_rate": 1e-06, "loss": 0.4262, "num_input_tokens_seen": 360885632, "step": 6441 }, { "epoch": 14.345211581291759, "loss": 0.561874270439148, "loss_ce": 0.00010666967136785388, "loss_iou": 0.220703125, "loss_num": 0.0240478515625, "loss_xval": 0.5625, "num_input_tokens_seen": 360885632, "step": 6441 }, { "epoch": 14.347438752783964, "grad_norm": 16.14497947692871, "learning_rate": 1e-06, "loss": 0.4857, "num_input_tokens_seen": 360941352, "step": 6442 }, { "epoch": 14.347438752783964, "loss": 0.6506056189537048, "loss_ce": 9.296346252085641e-05, "loss_iou": 0.294921875, "loss_num": 0.01239013671875, "loss_xval": 0.65234375, "num_input_tokens_seen": 360941352, "step": 6442 }, { "epoch": 14.34966592427617, "grad_norm": 16.787885665893555, "learning_rate": 1e-06, "loss": 0.3944, "num_input_tokens_seen": 360996836, "step": 6443 }, { "epoch": 14.34966592427617, "loss": 0.44542229175567627, "loss_ce": 0.00010977737110806629, "loss_iou": 0.20703125, "loss_num": 0.005950927734375, "loss_xval": 0.4453125, "num_input_tokens_seen": 360996836, "step": 6443 }, { "epoch": 14.351893095768375, "grad_norm": 18.984052658081055, "learning_rate": 1e-06, "loss": 0.7604, "num_input_tokens_seen": 361050092, "step": 6444 }, { "epoch": 14.351893095768375, "loss": 0.9335750341415405, "loss_ce": 0.00010336375271435827, "loss_iou": 0.392578125, "loss_num": 0.0294189453125, "loss_xval": 0.93359375, "num_input_tokens_seen": 361050092, "step": 6444 }, { "epoch": 14.35412026726058, "grad_norm": 16.68486213684082, "learning_rate": 1e-06, "loss": 0.3219, "num_input_tokens_seen": 361106880, "step": 6445 }, { "epoch": 14.35412026726058, "loss": 0.3571700155735016, "loss_ce": 0.00011435095075285062, "loss_iou": 0.1630859375, "loss_num": 0.00628662109375, "loss_xval": 0.357421875, "num_input_tokens_seen": 361106880, "step": 6445 }, { "epoch": 14.356347438752785, "grad_norm": 31.47307014465332, "learning_rate": 1e-06, "loss": 0.4125, "num_input_tokens_seen": 361161316, "step": 6446 }, { "epoch": 14.356347438752785, "loss": 0.3598456084728241, "loss_ce": 0.00010440165351610631, "loss_iou": 0.1572265625, "loss_num": 0.00921630859375, "loss_xval": 0.359375, "num_input_tokens_seen": 361161316, "step": 6446 }, { "epoch": 14.35857461024499, "grad_norm": 20.589462280273438, "learning_rate": 1e-06, "loss": 0.425, "num_input_tokens_seen": 361216012, "step": 6447 }, { "epoch": 14.35857461024499, "loss": 0.4665713906288147, "loss_ce": 0.0001407517702318728, "loss_iou": 0.201171875, "loss_num": 0.0126953125, "loss_xval": 0.466796875, "num_input_tokens_seen": 361216012, "step": 6447 }, { "epoch": 14.360801781737194, "grad_norm": 31.11367416381836, "learning_rate": 1e-06, "loss": 0.5042, "num_input_tokens_seen": 361273420, "step": 6448 }, { "epoch": 14.360801781737194, "loss": 0.5230991244316101, "loss_ce": 0.00014988121984060854, "loss_iou": 0.2373046875, "loss_num": 0.00958251953125, "loss_xval": 0.5234375, "num_input_tokens_seen": 361273420, "step": 6448 }, { "epoch": 14.3630289532294, "grad_norm": 21.973804473876953, "learning_rate": 1e-06, "loss": 0.5315, "num_input_tokens_seen": 361327552, "step": 6449 }, { "epoch": 14.3630289532294, "loss": 0.5520008206367493, "loss_ce": 0.00012092564429622144, "loss_iou": 0.2490234375, "loss_num": 0.0107421875, "loss_xval": 0.55078125, "num_input_tokens_seen": 361327552, "step": 6449 }, { "epoch": 14.365256124721604, "grad_norm": 16.959115982055664, "learning_rate": 1e-06, "loss": 0.5215, "num_input_tokens_seen": 361384308, "step": 6450 }, { "epoch": 14.365256124721604, "loss": 0.5186026096343994, "loss_ce": 0.0001089318175218068, "loss_iou": 0.2158203125, "loss_num": 0.0172119140625, "loss_xval": 0.51953125, "num_input_tokens_seen": 361384308, "step": 6450 }, { "epoch": 14.367483296213809, "grad_norm": 18.288415908813477, "learning_rate": 1e-06, "loss": 0.3716, "num_input_tokens_seen": 361441164, "step": 6451 }, { "epoch": 14.367483296213809, "loss": 0.2685241103172302, "loss_ce": 9.149497782345861e-05, "loss_iou": 0.11181640625, "loss_num": 0.0089111328125, "loss_xval": 0.267578125, "num_input_tokens_seen": 361441164, "step": 6451 }, { "epoch": 14.369710467706014, "grad_norm": 22.0161190032959, "learning_rate": 1e-06, "loss": 0.4447, "num_input_tokens_seen": 361500032, "step": 6452 }, { "epoch": 14.369710467706014, "loss": 0.5349976420402527, "loss_ce": 0.0001465780078433454, "loss_iou": 0.2373046875, "loss_num": 0.01190185546875, "loss_xval": 0.53515625, "num_input_tokens_seen": 361500032, "step": 6452 }, { "epoch": 14.371937639198219, "grad_norm": 22.7355899810791, "learning_rate": 1e-06, "loss": 0.4947, "num_input_tokens_seen": 361555872, "step": 6453 }, { "epoch": 14.371937639198219, "loss": 0.46275314688682556, "loss_ce": 0.00010668374306987971, "loss_iou": 0.2021484375, "loss_num": 0.01171875, "loss_xval": 0.462890625, "num_input_tokens_seen": 361555872, "step": 6453 }, { "epoch": 14.374164810690424, "grad_norm": 25.038970947265625, "learning_rate": 1e-06, "loss": 0.4937, "num_input_tokens_seen": 361614468, "step": 6454 }, { "epoch": 14.374164810690424, "loss": 0.5680195093154907, "loss_ce": 8.73721728567034e-05, "loss_iou": 0.232421875, "loss_num": 0.0208740234375, "loss_xval": 0.56640625, "num_input_tokens_seen": 361614468, "step": 6454 }, { "epoch": 14.376391982182628, "grad_norm": 18.690160751342773, "learning_rate": 1e-06, "loss": 0.6027, "num_input_tokens_seen": 361670736, "step": 6455 }, { "epoch": 14.376391982182628, "loss": 0.7412841320037842, "loss_ce": 0.00043940101750195026, "loss_iou": 0.306640625, "loss_num": 0.0257568359375, "loss_xval": 0.7421875, "num_input_tokens_seen": 361670736, "step": 6455 }, { "epoch": 14.378619153674833, "grad_norm": 13.359357833862305, "learning_rate": 1e-06, "loss": 0.4412, "num_input_tokens_seen": 361725704, "step": 6456 }, { "epoch": 14.378619153674833, "loss": 0.3059990406036377, "loss_ce": 9.084792691282928e-05, "loss_iou": 0.134765625, "loss_num": 0.00738525390625, "loss_xval": 0.306640625, "num_input_tokens_seen": 361725704, "step": 6456 }, { "epoch": 14.380846325167038, "grad_norm": 18.930578231811523, "learning_rate": 1e-06, "loss": 0.4059, "num_input_tokens_seen": 361782364, "step": 6457 }, { "epoch": 14.380846325167038, "loss": 0.4163687527179718, "loss_ce": 0.00010898528853431344, "loss_iou": 0.19140625, "loss_num": 0.0068359375, "loss_xval": 0.416015625, "num_input_tokens_seen": 361782364, "step": 6457 }, { "epoch": 14.383073496659243, "grad_norm": 14.719660758972168, "learning_rate": 1e-06, "loss": 0.4225, "num_input_tokens_seen": 361836700, "step": 6458 }, { "epoch": 14.383073496659243, "loss": 0.41648274660110474, "loss_ce": 0.0001009054685709998, "loss_iou": 0.181640625, "loss_num": 0.0107421875, "loss_xval": 0.416015625, "num_input_tokens_seen": 361836700, "step": 6458 }, { "epoch": 14.385300668151448, "grad_norm": 17.509685516357422, "learning_rate": 1e-06, "loss": 0.4016, "num_input_tokens_seen": 361894212, "step": 6459 }, { "epoch": 14.385300668151448, "loss": 0.42406773567199707, "loss_ce": 0.0001785791973816231, "loss_iou": 0.16015625, "loss_num": 0.0206298828125, "loss_xval": 0.423828125, "num_input_tokens_seen": 361894212, "step": 6459 }, { "epoch": 14.387527839643653, "grad_norm": 21.18560218811035, "learning_rate": 1e-06, "loss": 0.6036, "num_input_tokens_seen": 361949500, "step": 6460 }, { "epoch": 14.387527839643653, "loss": 0.5647926926612854, "loss_ce": 9.54304778133519e-05, "loss_iou": 0.25390625, "loss_num": 0.01104736328125, "loss_xval": 0.56640625, "num_input_tokens_seen": 361949500, "step": 6460 }, { "epoch": 14.389755011135858, "grad_norm": 18.161174774169922, "learning_rate": 1e-06, "loss": 0.4029, "num_input_tokens_seen": 362002964, "step": 6461 }, { "epoch": 14.389755011135858, "loss": 0.3434833586215973, "loss_ce": 9.955540008377284e-05, "loss_iou": 0.138671875, "loss_num": 0.01336669921875, "loss_xval": 0.34375, "num_input_tokens_seen": 362002964, "step": 6461 }, { "epoch": 14.391982182628063, "grad_norm": 18.891141891479492, "learning_rate": 1e-06, "loss": 0.4224, "num_input_tokens_seen": 362056564, "step": 6462 }, { "epoch": 14.391982182628063, "loss": 0.3922403156757355, "loss_ce": 0.00015046796761453152, "loss_iou": 0.16796875, "loss_num": 0.0111083984375, "loss_xval": 0.392578125, "num_input_tokens_seen": 362056564, "step": 6462 }, { "epoch": 14.394209354120267, "grad_norm": 42.18777847290039, "learning_rate": 1e-06, "loss": 0.5473, "num_input_tokens_seen": 362113504, "step": 6463 }, { "epoch": 14.394209354120267, "loss": 0.6558715105056763, "loss_ce": 0.00010980549268424511, "loss_iou": 0.30078125, "loss_num": 0.01080322265625, "loss_xval": 0.65625, "num_input_tokens_seen": 362113504, "step": 6463 }, { "epoch": 14.396436525612472, "grad_norm": 17.04292869567871, "learning_rate": 1e-06, "loss": 0.5568, "num_input_tokens_seen": 362168292, "step": 6464 }, { "epoch": 14.396436525612472, "loss": 0.5546635985374451, "loss_ce": 9.817900718189776e-05, "loss_iou": 0.2490234375, "loss_num": 0.01104736328125, "loss_xval": 0.5546875, "num_input_tokens_seen": 362168292, "step": 6464 }, { "epoch": 14.398663697104677, "grad_norm": 18.7966251373291, "learning_rate": 1e-06, "loss": 0.4122, "num_input_tokens_seen": 362226676, "step": 6465 }, { "epoch": 14.398663697104677, "loss": 0.5053236484527588, "loss_ce": 0.0001357014843961224, "loss_iou": 0.2099609375, "loss_num": 0.01708984375, "loss_xval": 0.50390625, "num_input_tokens_seen": 362226676, "step": 6465 }, { "epoch": 14.400890868596882, "grad_norm": 17.50356674194336, "learning_rate": 1e-06, "loss": 0.4747, "num_input_tokens_seen": 362285324, "step": 6466 }, { "epoch": 14.400890868596882, "loss": 0.6701542735099792, "loss_ce": 0.00011035045463358983, "loss_iou": 0.283203125, "loss_num": 0.02099609375, "loss_xval": 0.671875, "num_input_tokens_seen": 362285324, "step": 6466 }, { "epoch": 14.403118040089087, "grad_norm": 18.688034057617188, "learning_rate": 1e-06, "loss": 0.4448, "num_input_tokens_seen": 362343140, "step": 6467 }, { "epoch": 14.403118040089087, "loss": 0.6092594861984253, "loss_ce": 0.00012863794108852744, "loss_iou": 0.26171875, "loss_num": 0.0172119140625, "loss_xval": 0.609375, "num_input_tokens_seen": 362343140, "step": 6467 }, { "epoch": 14.405345211581292, "grad_norm": 25.48725700378418, "learning_rate": 1e-06, "loss": 0.5325, "num_input_tokens_seen": 362401224, "step": 6468 }, { "epoch": 14.405345211581292, "loss": 0.5204082131385803, "loss_ce": 0.0001445315283490345, "loss_iou": 0.236328125, "loss_num": 0.009765625, "loss_xval": 0.51953125, "num_input_tokens_seen": 362401224, "step": 6468 }, { "epoch": 14.407572383073497, "grad_norm": 19.30179786682129, "learning_rate": 1e-06, "loss": 0.4949, "num_input_tokens_seen": 362459744, "step": 6469 }, { "epoch": 14.407572383073497, "loss": 0.3543410301208496, "loss_ce": 9.295364725403488e-05, "loss_iou": 0.15234375, "loss_num": 0.00994873046875, "loss_xval": 0.353515625, "num_input_tokens_seen": 362459744, "step": 6469 }, { "epoch": 14.409799554565701, "grad_norm": 19.212997436523438, "learning_rate": 1e-06, "loss": 0.5026, "num_input_tokens_seen": 362516148, "step": 6470 }, { "epoch": 14.409799554565701, "loss": 0.34019219875335693, "loss_ce": 0.00010428522364236414, "loss_iou": 0.1494140625, "loss_num": 0.0084228515625, "loss_xval": 0.33984375, "num_input_tokens_seen": 362516148, "step": 6470 }, { "epoch": 14.412026726057906, "grad_norm": 20.393836975097656, "learning_rate": 1e-06, "loss": 0.5051, "num_input_tokens_seen": 362571256, "step": 6471 }, { "epoch": 14.412026726057906, "loss": 0.49153202772140503, "loss_ce": 0.0003210945869795978, "loss_iou": 0.1923828125, "loss_num": 0.021240234375, "loss_xval": 0.4921875, "num_input_tokens_seen": 362571256, "step": 6471 }, { "epoch": 14.414253897550111, "grad_norm": 48.49162292480469, "learning_rate": 1e-06, "loss": 0.4618, "num_input_tokens_seen": 362627180, "step": 6472 }, { "epoch": 14.414253897550111, "loss": 0.5423309206962585, "loss_ce": 9.457490523345768e-05, "loss_iou": 0.2255859375, "loss_num": 0.0181884765625, "loss_xval": 0.54296875, "num_input_tokens_seen": 362627180, "step": 6472 }, { "epoch": 14.416481069042316, "grad_norm": 38.27447509765625, "learning_rate": 1e-06, "loss": 0.6114, "num_input_tokens_seen": 362681880, "step": 6473 }, { "epoch": 14.416481069042316, "loss": 0.6459391117095947, "loss_ce": 0.00012608422548510134, "loss_iou": 0.2734375, "loss_num": 0.020263671875, "loss_xval": 0.64453125, "num_input_tokens_seen": 362681880, "step": 6473 }, { "epoch": 14.41870824053452, "grad_norm": 25.192468643188477, "learning_rate": 1e-06, "loss": 0.5313, "num_input_tokens_seen": 362738364, "step": 6474 }, { "epoch": 14.41870824053452, "loss": 0.44662946462631226, "loss_ce": 9.624061931390315e-05, "loss_iou": 0.2060546875, "loss_num": 0.006927490234375, "loss_xval": 0.447265625, "num_input_tokens_seen": 362738364, "step": 6474 }, { "epoch": 14.420935412026726, "grad_norm": 19.721994400024414, "learning_rate": 1e-06, "loss": 0.5204, "num_input_tokens_seen": 362793976, "step": 6475 }, { "epoch": 14.420935412026726, "loss": 0.530852198600769, "loss_ce": 9.049595973920077e-05, "loss_iou": 0.208984375, "loss_num": 0.022705078125, "loss_xval": 0.53125, "num_input_tokens_seen": 362793976, "step": 6475 }, { "epoch": 14.42316258351893, "grad_norm": 25.936969757080078, "learning_rate": 1e-06, "loss": 0.5892, "num_input_tokens_seen": 362849184, "step": 6476 }, { "epoch": 14.42316258351893, "loss": 0.6352825164794922, "loss_ce": 0.00015069330402184278, "loss_iou": 0.27734375, "loss_num": 0.016357421875, "loss_xval": 0.63671875, "num_input_tokens_seen": 362849184, "step": 6476 }, { "epoch": 14.425389755011135, "grad_norm": 14.354238510131836, "learning_rate": 1e-06, "loss": 0.3588, "num_input_tokens_seen": 362902516, "step": 6477 }, { "epoch": 14.425389755011135, "loss": 0.4610298275947571, "loss_ce": 9.230234718415886e-05, "loss_iou": 0.1728515625, "loss_num": 0.02294921875, "loss_xval": 0.4609375, "num_input_tokens_seen": 362902516, "step": 6477 }, { "epoch": 14.42761692650334, "grad_norm": 16.179262161254883, "learning_rate": 1e-06, "loss": 0.4084, "num_input_tokens_seen": 362954364, "step": 6478 }, { "epoch": 14.42761692650334, "loss": 0.2978529930114746, "loss_ce": 9.299164230469614e-05, "loss_iou": 0.130859375, "loss_num": 0.00726318359375, "loss_xval": 0.296875, "num_input_tokens_seen": 362954364, "step": 6478 }, { "epoch": 14.429844097995545, "grad_norm": 38.88999557495117, "learning_rate": 1e-06, "loss": 0.4598, "num_input_tokens_seen": 363011612, "step": 6479 }, { "epoch": 14.429844097995545, "loss": 0.5600452423095703, "loss_ce": 0.00016974634490907192, "loss_iou": 0.26171875, "loss_num": 0.007354736328125, "loss_xval": 0.55859375, "num_input_tokens_seen": 363011612, "step": 6479 }, { "epoch": 14.43207126948775, "grad_norm": 34.526512145996094, "learning_rate": 1e-06, "loss": 0.4824, "num_input_tokens_seen": 363069500, "step": 6480 }, { "epoch": 14.43207126948775, "loss": 0.4800405502319336, "loss_ce": 0.00012108102964702994, "loss_iou": 0.193359375, "loss_num": 0.0184326171875, "loss_xval": 0.48046875, "num_input_tokens_seen": 363069500, "step": 6480 }, { "epoch": 14.434298440979955, "grad_norm": 24.97442626953125, "learning_rate": 1e-06, "loss": 0.5423, "num_input_tokens_seen": 363125684, "step": 6481 }, { "epoch": 14.434298440979955, "loss": 0.3962355852127075, "loss_ce": 0.00011745323718059808, "loss_iou": 0.1728515625, "loss_num": 0.0103759765625, "loss_xval": 0.396484375, "num_input_tokens_seen": 363125684, "step": 6481 }, { "epoch": 14.43652561247216, "grad_norm": 13.889195442199707, "learning_rate": 1e-06, "loss": 0.4004, "num_input_tokens_seen": 363181616, "step": 6482 }, { "epoch": 14.43652561247216, "loss": 0.4475998878479004, "loss_ce": 0.0001511801965534687, "loss_iou": 0.1943359375, "loss_num": 0.01190185546875, "loss_xval": 0.447265625, "num_input_tokens_seen": 363181616, "step": 6482 }, { "epoch": 14.438752783964365, "grad_norm": 16.7701358795166, "learning_rate": 1e-06, "loss": 0.3514, "num_input_tokens_seen": 363238068, "step": 6483 }, { "epoch": 14.438752783964365, "loss": 0.38326865434646606, "loss_ce": 8.997365512186661e-05, "loss_iou": 0.162109375, "loss_num": 0.0118408203125, "loss_xval": 0.3828125, "num_input_tokens_seen": 363238068, "step": 6483 }, { "epoch": 14.44097995545657, "grad_norm": 16.830076217651367, "learning_rate": 1e-06, "loss": 0.4811, "num_input_tokens_seen": 363293032, "step": 6484 }, { "epoch": 14.44097995545657, "loss": 0.4167139530181885, "loss_ce": 8.8007356680464e-05, "loss_iou": 0.1806640625, "loss_num": 0.01092529296875, "loss_xval": 0.416015625, "num_input_tokens_seen": 363293032, "step": 6484 }, { "epoch": 14.443207126948774, "grad_norm": 13.648219108581543, "learning_rate": 1e-06, "loss": 0.543, "num_input_tokens_seen": 363349520, "step": 6485 }, { "epoch": 14.443207126948774, "loss": 0.47776174545288086, "loss_ce": 0.0002226911747129634, "loss_iou": 0.1865234375, "loss_num": 0.0206298828125, "loss_xval": 0.4765625, "num_input_tokens_seen": 363349520, "step": 6485 }, { "epoch": 14.44543429844098, "grad_norm": 14.256153106689453, "learning_rate": 1e-06, "loss": 0.4144, "num_input_tokens_seen": 363407380, "step": 6486 }, { "epoch": 14.44543429844098, "loss": 0.21311235427856445, "loss_ce": 9.964955097530037e-05, "loss_iou": 0.0927734375, "loss_num": 0.00543212890625, "loss_xval": 0.212890625, "num_input_tokens_seen": 363407380, "step": 6486 }, { "epoch": 14.447661469933184, "grad_norm": 20.257829666137695, "learning_rate": 1e-06, "loss": 0.4631, "num_input_tokens_seen": 363463888, "step": 6487 }, { "epoch": 14.447661469933184, "loss": 0.4675000011920929, "loss_ce": 9.276315540773794e-05, "loss_iou": 0.193359375, "loss_num": 0.01611328125, "loss_xval": 0.466796875, "num_input_tokens_seen": 363463888, "step": 6487 }, { "epoch": 14.449888641425389, "grad_norm": 15.722443580627441, "learning_rate": 1e-06, "loss": 0.401, "num_input_tokens_seen": 363520880, "step": 6488 }, { "epoch": 14.449888641425389, "loss": 0.3268738389015198, "loss_ce": 9.159540786640719e-05, "loss_iou": 0.142578125, "loss_num": 0.0084228515625, "loss_xval": 0.326171875, "num_input_tokens_seen": 363520880, "step": 6488 }, { "epoch": 14.452115812917596, "grad_norm": 24.610681533813477, "learning_rate": 1e-06, "loss": 0.5481, "num_input_tokens_seen": 363575452, "step": 6489 }, { "epoch": 14.452115812917596, "loss": 0.42991262674331665, "loss_ce": 0.00010304449824616313, "loss_iou": 0.19140625, "loss_num": 0.00933837890625, "loss_xval": 0.4296875, "num_input_tokens_seen": 363575452, "step": 6489 }, { "epoch": 14.4543429844098, "grad_norm": 15.40800952911377, "learning_rate": 1e-06, "loss": 0.32, "num_input_tokens_seen": 363630124, "step": 6490 }, { "epoch": 14.4543429844098, "loss": 0.35164231061935425, "loss_ce": 9.507540380582213e-05, "loss_iou": 0.15625, "loss_num": 0.007659912109375, "loss_xval": 0.3515625, "num_input_tokens_seen": 363630124, "step": 6490 }, { "epoch": 14.456570155902005, "grad_norm": 27.49700927734375, "learning_rate": 1e-06, "loss": 0.4365, "num_input_tokens_seen": 363683792, "step": 6491 }, { "epoch": 14.456570155902005, "loss": 0.5234502553939819, "loss_ce": 0.00013481616042554379, "loss_iou": 0.2412109375, "loss_num": 0.00823974609375, "loss_xval": 0.5234375, "num_input_tokens_seen": 363683792, "step": 6491 }, { "epoch": 14.45879732739421, "grad_norm": 16.735675811767578, "learning_rate": 1e-06, "loss": 0.359, "num_input_tokens_seen": 363740708, "step": 6492 }, { "epoch": 14.45879732739421, "loss": 0.35196787118911743, "loss_ce": 0.00010018555622082204, "loss_iou": 0.13671875, "loss_num": 0.0157470703125, "loss_xval": 0.3515625, "num_input_tokens_seen": 363740708, "step": 6492 }, { "epoch": 14.461024498886415, "grad_norm": 27.95579719543457, "learning_rate": 1e-06, "loss": 0.5228, "num_input_tokens_seen": 363795664, "step": 6493 }, { "epoch": 14.461024498886415, "loss": 0.5091565847396851, "loss_ce": 0.00012336287181824446, "loss_iou": 0.2333984375, "loss_num": 0.00836181640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 363795664, "step": 6493 }, { "epoch": 14.46325167037862, "grad_norm": 20.272802352905273, "learning_rate": 1e-06, "loss": 0.4311, "num_input_tokens_seen": 363853956, "step": 6494 }, { "epoch": 14.46325167037862, "loss": 0.4811999201774597, "loss_ce": 0.00012079046427970752, "loss_iou": 0.220703125, "loss_num": 0.00799560546875, "loss_xval": 0.48046875, "num_input_tokens_seen": 363853956, "step": 6494 }, { "epoch": 14.465478841870825, "grad_norm": 18.344619750976562, "learning_rate": 1e-06, "loss": 0.2613, "num_input_tokens_seen": 363911696, "step": 6495 }, { "epoch": 14.465478841870825, "loss": 0.237228661775589, "loss_ce": 0.00010707815818022937, "loss_iou": 0.1044921875, "loss_num": 0.005523681640625, "loss_xval": 0.2373046875, "num_input_tokens_seen": 363911696, "step": 6495 }, { "epoch": 14.46770601336303, "grad_norm": 35.97431564331055, "learning_rate": 1e-06, "loss": 0.8532, "num_input_tokens_seen": 363966144, "step": 6496 }, { "epoch": 14.46770601336303, "loss": 0.8004045486450195, "loss_ce": 0.00011163462477270514, "loss_iou": 0.34375, "loss_num": 0.0223388671875, "loss_xval": 0.80078125, "num_input_tokens_seen": 363966144, "step": 6496 }, { "epoch": 14.469933184855234, "grad_norm": 33.640872955322266, "learning_rate": 1e-06, "loss": 0.5273, "num_input_tokens_seen": 364021896, "step": 6497 }, { "epoch": 14.469933184855234, "loss": 0.3864748775959015, "loss_ce": 0.00012233102461323142, "loss_iou": 0.1669921875, "loss_num": 0.01055908203125, "loss_xval": 0.38671875, "num_input_tokens_seen": 364021896, "step": 6497 }, { "epoch": 14.47216035634744, "grad_norm": 18.709012985229492, "learning_rate": 1e-06, "loss": 0.4962, "num_input_tokens_seen": 364079360, "step": 6498 }, { "epoch": 14.47216035634744, "loss": 0.40141546726226807, "loss_ce": 0.00010935450700344518, "loss_iou": 0.1708984375, "loss_num": 0.0118408203125, "loss_xval": 0.400390625, "num_input_tokens_seen": 364079360, "step": 6498 }, { "epoch": 14.474387527839644, "grad_norm": 18.93471908569336, "learning_rate": 1e-06, "loss": 0.3134, "num_input_tokens_seen": 364136176, "step": 6499 }, { "epoch": 14.474387527839644, "loss": 0.36948075890541077, "loss_ce": 0.00021804316202178597, "loss_iou": 0.1591796875, "loss_num": 0.01007080078125, "loss_xval": 0.369140625, "num_input_tokens_seen": 364136176, "step": 6499 }, { "epoch": 14.476614699331849, "grad_norm": 21.971736907958984, "learning_rate": 1e-06, "loss": 0.4228, "num_input_tokens_seen": 364192972, "step": 6500 }, { "epoch": 14.476614699331849, "eval_seeclick_web_CIoU": 0.5850892961025238, "eval_seeclick_web_GIoU": 0.5826087892055511, "eval_seeclick_web_IoU": 0.6035565435886383, "eval_seeclick_web_MAE_all": 0.015261294320225716, "eval_seeclick_web_MAE_h": 0.007648690138012171, "eval_seeclick_web_MAE_w": 0.015442864038050175, "eval_seeclick_web_MAE_x_boxes": 0.009242635453119874, "eval_seeclick_web_MAE_y_boxes": 0.021023853914812207, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9148587584495544, "eval_seeclick_web_loss_ce": 0.0001663194634602405, "eval_seeclick_web_loss_iou": 0.4219970703125, "eval_seeclick_web_loss_num": 0.01227569580078125, "eval_seeclick_web_loss_xval": 0.9052734375, "eval_seeclick_web_runtime": 22.0534, "eval_seeclick_web_samples_per_second": 2.267, "eval_seeclick_web_steps_per_second": 0.091, "num_input_tokens_seen": 364192972, "step": 6500 }, { "epoch": 14.476614699331849, "eval_icons_CIoU": 0.294839546084404, "eval_icons_GIoU": 0.3122626394033432, "eval_icons_IoU": 0.36477692425251007, "eval_icons_MAE_all": 0.05375087633728981, "eval_icons_MAE_h": 0.032996498979628086, "eval_icons_MAE_w": 0.051369220949709415, "eval_icons_MAE_x_boxes": 0.05044420249760151, "eval_icons_MAE_y_boxes": 0.03664529975503683, "eval_icons_inside_bbox": 0.6649305522441864, "eval_icons_loss": 1.6395889520645142, "eval_icons_loss_ce": 0.00020623258751584217, "eval_icons_loss_iou": 0.659423828125, "eval_icons_loss_num": 0.05293846130371094, "eval_icons_loss_xval": 1.583984375, "eval_icons_runtime": 19.0398, "eval_icons_samples_per_second": 2.626, "eval_icons_steps_per_second": 0.105, "num_input_tokens_seen": 364192972, "step": 6500 }, { "epoch": 14.476614699331849, "eval_screenspot_CIoU": 0.37580615282058716, "eval_screenspot_GIoU": 0.3901708126068115, "eval_screenspot_IoU": 0.44902459780375165, "eval_screenspot_MAE_all": 0.054527596880992256, "eval_screenspot_MAE_h": 0.03962646176417669, "eval_screenspot_MAE_w": 0.061993442475795746, "eval_screenspot_MAE_x_boxes": 0.07007026796539624, "eval_screenspot_MAE_y_boxes": 0.037376622669398785, "eval_screenspot_inside_bbox": 0.7041666706403097, "eval_screenspot_loss": 1.547979474067688, "eval_screenspot_loss_ce": 0.00023582103798010698, "eval_screenspot_loss_iou": 0.646484375, "eval_screenspot_loss_num": 0.06189727783203125, "eval_screenspot_loss_xval": 1.60205078125, "eval_screenspot_runtime": 35.3553, "eval_screenspot_samples_per_second": 2.517, "eval_screenspot_steps_per_second": 0.085, "num_input_tokens_seen": 364192972, "step": 6500 }, { "epoch": 14.476614699331849, "eval_compot_CIoU": 0.3404065817594528, "eval_compot_GIoU": 0.35687774419784546, "eval_compot_IoU": 0.40112267434597015, "eval_compot_MAE_all": 0.01965143345296383, "eval_compot_MAE_h": 0.013854971155524254, "eval_compot_MAE_w": 0.02131012175232172, "eval_compot_MAE_x_boxes": 0.030210323631763458, "eval_compot_MAE_y_boxes": 0.006537551758810878, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4148516654968262, "eval_compot_loss_ce": 0.00016382582543883473, "eval_compot_loss_iou": 0.649169921875, "eval_compot_loss_num": 0.018445968627929688, "eval_compot_loss_xval": 1.390380859375, "eval_compot_runtime": 21.0154, "eval_compot_samples_per_second": 2.379, "eval_compot_steps_per_second": 0.095, "num_input_tokens_seen": 364192972, "step": 6500 }, { "epoch": 14.476614699331849, "eval_custom_ui_val_CIoU": 0.4791228680147065, "eval_custom_ui_val_GIoU": 0.4861221942636702, "eval_custom_ui_val_IoU": 0.5396713250213199, "eval_custom_ui_val_MAE_all": 0.02759691560640931, "eval_custom_ui_val_MAE_h": 0.015171076895462142, "eval_custom_ui_val_MAE_w": 0.03542729518893692, "eval_custom_ui_val_MAE_x_boxes": 0.03368757442674703, "eval_custom_ui_val_MAE_y_boxes": 0.013593513725532426, "eval_custom_ui_val_inside_bbox": 0.7789351873927646, "eval_custom_ui_val_loss": 1.167110800743103, "eval_custom_ui_val_loss_ce": 0.00018443458328773786, "eval_custom_ui_val_loss_iou": 0.5000135633680556, "eval_custom_ui_val_loss_num": 0.024277369181315105, "eval_custom_ui_val_loss_xval": 1.1207139756944444, "eval_custom_ui_val_runtime": 62.376, "eval_custom_ui_val_samples_per_second": 4.248, "eval_custom_ui_val_steps_per_second": 0.144, "num_input_tokens_seen": 364192972, "step": 6500 }, { "epoch": 14.476614699331849, "loss": 0.8343714475631714, "loss_ce": 0.00014291857951320708, "loss_iou": 0.375, "loss_num": 0.0164794921875, "loss_xval": 0.8359375, "num_input_tokens_seen": 364192972, "step": 6500 }, { "epoch": 14.478841870824054, "grad_norm": 12.816341400146484, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 364250380, "step": 6501 }, { "epoch": 14.478841870824054, "loss": 0.26279568672180176, "loss_ce": 0.00010036412277258933, "loss_iou": 0.109375, "loss_num": 0.00872802734375, "loss_xval": 0.26171875, "num_input_tokens_seen": 364250380, "step": 6501 }, { "epoch": 14.481069042316259, "grad_norm": 12.928550720214844, "learning_rate": 1e-06, "loss": 0.4519, "num_input_tokens_seen": 364306508, "step": 6502 }, { "epoch": 14.481069042316259, "loss": 0.47788378596305847, "loss_ce": 0.00010056734754471108, "loss_iou": 0.2080078125, "loss_num": 0.01220703125, "loss_xval": 0.478515625, "num_input_tokens_seen": 364306508, "step": 6502 }, { "epoch": 14.483296213808464, "grad_norm": 17.1734619140625, "learning_rate": 1e-06, "loss": 0.4719, "num_input_tokens_seen": 364362888, "step": 6503 }, { "epoch": 14.483296213808464, "loss": 0.48692283034324646, "loss_ce": 0.00010643218411132693, "loss_iou": 0.20703125, "loss_num": 0.014404296875, "loss_xval": 0.486328125, "num_input_tokens_seen": 364362888, "step": 6503 }, { "epoch": 14.485523385300668, "grad_norm": 24.356948852539062, "learning_rate": 1e-06, "loss": 0.5933, "num_input_tokens_seen": 364418476, "step": 6504 }, { "epoch": 14.485523385300668, "loss": 0.5144957900047302, "loss_ce": 9.150611003860831e-05, "loss_iou": 0.2275390625, "loss_num": 0.01202392578125, "loss_xval": 0.515625, "num_input_tokens_seen": 364418476, "step": 6504 }, { "epoch": 14.487750556792873, "grad_norm": 21.26868438720703, "learning_rate": 1e-06, "loss": 0.4377, "num_input_tokens_seen": 364474968, "step": 6505 }, { "epoch": 14.487750556792873, "loss": 0.3293471932411194, "loss_ce": 0.00012356144725345075, "loss_iou": 0.1376953125, "loss_num": 0.0106201171875, "loss_xval": 0.330078125, "num_input_tokens_seen": 364474968, "step": 6505 }, { "epoch": 14.489977728285078, "grad_norm": 12.99026107788086, "learning_rate": 1e-06, "loss": 0.4491, "num_input_tokens_seen": 364532716, "step": 6506 }, { "epoch": 14.489977728285078, "loss": 0.25777584314346313, "loss_ce": 8.539756527170539e-05, "loss_iou": 0.10595703125, "loss_num": 0.0091552734375, "loss_xval": 0.2578125, "num_input_tokens_seen": 364532716, "step": 6506 }, { "epoch": 14.492204899777283, "grad_norm": 15.61511516571045, "learning_rate": 1e-06, "loss": 0.4898, "num_input_tokens_seen": 364589292, "step": 6507 }, { "epoch": 14.492204899777283, "loss": 0.5754222869873047, "loss_ce": 0.00010490816930541769, "loss_iou": 0.2451171875, "loss_num": 0.0172119140625, "loss_xval": 0.57421875, "num_input_tokens_seen": 364589292, "step": 6507 }, { "epoch": 14.494432071269488, "grad_norm": 17.655942916870117, "learning_rate": 1e-06, "loss": 0.4318, "num_input_tokens_seen": 364644880, "step": 6508 }, { "epoch": 14.494432071269488, "loss": 0.3868103623390198, "loss_ce": 9.161405614577234e-05, "loss_iou": 0.1689453125, "loss_num": 0.00994873046875, "loss_xval": 0.38671875, "num_input_tokens_seen": 364644880, "step": 6508 }, { "epoch": 14.496659242761693, "grad_norm": 20.9801082611084, "learning_rate": 1e-06, "loss": 0.4926, "num_input_tokens_seen": 364702920, "step": 6509 }, { "epoch": 14.496659242761693, "loss": 0.48010796308517456, "loss_ce": 0.00012747629079967737, "loss_iou": 0.2255859375, "loss_num": 0.005859375, "loss_xval": 0.48046875, "num_input_tokens_seen": 364702920, "step": 6509 }, { "epoch": 14.498886414253898, "grad_norm": 20.405927658081055, "learning_rate": 1e-06, "loss": 0.439, "num_input_tokens_seen": 364757992, "step": 6510 }, { "epoch": 14.498886414253898, "loss": 0.642722487449646, "loss_ce": 0.00014438082871492952, "loss_iou": 0.255859375, "loss_num": 0.0257568359375, "loss_xval": 0.640625, "num_input_tokens_seen": 364757992, "step": 6510 }, { "epoch": 14.501113585746102, "grad_norm": 22.799161911010742, "learning_rate": 1e-06, "loss": 0.3945, "num_input_tokens_seen": 364815876, "step": 6511 }, { "epoch": 14.501113585746102, "loss": 0.5448106527328491, "loss_ce": 0.00013291001960169524, "loss_iou": 0.244140625, "loss_num": 0.01123046875, "loss_xval": 0.54296875, "num_input_tokens_seen": 364815876, "step": 6511 }, { "epoch": 14.503340757238307, "grad_norm": 18.602733612060547, "learning_rate": 1e-06, "loss": 0.533, "num_input_tokens_seen": 364871856, "step": 6512 }, { "epoch": 14.503340757238307, "loss": 0.3779085874557495, "loss_ce": 0.00010096091136801988, "loss_iou": 0.142578125, "loss_num": 0.018310546875, "loss_xval": 0.376953125, "num_input_tokens_seen": 364871856, "step": 6512 }, { "epoch": 14.505567928730512, "grad_norm": 35.201637268066406, "learning_rate": 1e-06, "loss": 0.6026, "num_input_tokens_seen": 364927504, "step": 6513 }, { "epoch": 14.505567928730512, "loss": 0.4659165143966675, "loss_ce": 9.620728815207258e-05, "loss_iou": 0.2109375, "loss_num": 0.00872802734375, "loss_xval": 0.46484375, "num_input_tokens_seen": 364927504, "step": 6513 }, { "epoch": 14.507795100222717, "grad_norm": 19.808265686035156, "learning_rate": 1e-06, "loss": 0.5087, "num_input_tokens_seen": 364985060, "step": 6514 }, { "epoch": 14.507795100222717, "loss": 0.5555931329727173, "loss_ce": 0.00011220310989301652, "loss_iou": 0.2314453125, "loss_num": 0.0184326171875, "loss_xval": 0.5546875, "num_input_tokens_seen": 364985060, "step": 6514 }, { "epoch": 14.510022271714922, "grad_norm": 14.76258373260498, "learning_rate": 1e-06, "loss": 0.3267, "num_input_tokens_seen": 365042232, "step": 6515 }, { "epoch": 14.510022271714922, "loss": 0.3308001160621643, "loss_ce": 0.00011161710426677018, "loss_iou": 0.154296875, "loss_num": 0.00445556640625, "loss_xval": 0.330078125, "num_input_tokens_seen": 365042232, "step": 6515 }, { "epoch": 14.512249443207127, "grad_norm": 15.700284004211426, "learning_rate": 1e-06, "loss": 0.5323, "num_input_tokens_seen": 365094928, "step": 6516 }, { "epoch": 14.512249443207127, "loss": 0.48436519503593445, "loss_ce": 0.00011227864888496697, "loss_iou": 0.1943359375, "loss_num": 0.019287109375, "loss_xval": 0.484375, "num_input_tokens_seen": 365094928, "step": 6516 }, { "epoch": 14.514476614699332, "grad_norm": 12.311739921569824, "learning_rate": 1e-06, "loss": 0.3459, "num_input_tokens_seen": 365152328, "step": 6517 }, { "epoch": 14.514476614699332, "loss": 0.41318339109420776, "loss_ce": 9.744486305862665e-05, "loss_iou": 0.177734375, "loss_num": 0.01177978515625, "loss_xval": 0.4140625, "num_input_tokens_seen": 365152328, "step": 6517 }, { "epoch": 14.516703786191536, "grad_norm": 16.08889389038086, "learning_rate": 1e-06, "loss": 0.4272, "num_input_tokens_seen": 365211004, "step": 6518 }, { "epoch": 14.516703786191536, "loss": 0.40454918146133423, "loss_ce": 0.0002522985450923443, "loss_iou": 0.1669921875, "loss_num": 0.01416015625, "loss_xval": 0.404296875, "num_input_tokens_seen": 365211004, "step": 6518 }, { "epoch": 14.518930957683741, "grad_norm": 15.88845157623291, "learning_rate": 1e-06, "loss": 0.474, "num_input_tokens_seen": 365267152, "step": 6519 }, { "epoch": 14.518930957683741, "loss": 0.4582583010196686, "loss_ce": 0.0001284035388380289, "loss_iou": 0.203125, "loss_num": 0.01043701171875, "loss_xval": 0.458984375, "num_input_tokens_seen": 365267152, "step": 6519 }, { "epoch": 14.521158129175946, "grad_norm": 22.944534301757812, "learning_rate": 1e-06, "loss": 0.3661, "num_input_tokens_seen": 365322816, "step": 6520 }, { "epoch": 14.521158129175946, "loss": 0.3829045295715332, "loss_ce": 0.00021408403699751943, "loss_iou": 0.166015625, "loss_num": 0.0101318359375, "loss_xval": 0.3828125, "num_input_tokens_seen": 365322816, "step": 6520 }, { "epoch": 14.523385300668151, "grad_norm": 15.910293579101562, "learning_rate": 1e-06, "loss": 0.5393, "num_input_tokens_seen": 365377560, "step": 6521 }, { "epoch": 14.523385300668151, "loss": 0.6288875937461853, "loss_ce": 0.0001033980370266363, "loss_iou": 0.27734375, "loss_num": 0.014404296875, "loss_xval": 0.62890625, "num_input_tokens_seen": 365377560, "step": 6521 }, { "epoch": 14.525612472160356, "grad_norm": 31.319820404052734, "learning_rate": 1e-06, "loss": 0.5413, "num_input_tokens_seen": 365431876, "step": 6522 }, { "epoch": 14.525612472160356, "loss": 0.45200419425964355, "loss_ce": 9.987234807340428e-05, "loss_iou": 0.201171875, "loss_num": 0.010009765625, "loss_xval": 0.451171875, "num_input_tokens_seen": 365431876, "step": 6522 }, { "epoch": 14.52783964365256, "grad_norm": 33.108680725097656, "learning_rate": 1e-06, "loss": 0.5389, "num_input_tokens_seen": 365487292, "step": 6523 }, { "epoch": 14.52783964365256, "loss": 0.4871612787246704, "loss_ce": 0.0001007293612929061, "loss_iou": 0.2138671875, "loss_num": 0.01202392578125, "loss_xval": 0.486328125, "num_input_tokens_seen": 365487292, "step": 6523 }, { "epoch": 14.530066815144766, "grad_norm": 26.078786849975586, "learning_rate": 1e-06, "loss": 0.3983, "num_input_tokens_seen": 365545116, "step": 6524 }, { "epoch": 14.530066815144766, "loss": 0.5119673013687134, "loss_ce": 0.00012648927804548293, "loss_iou": 0.21875, "loss_num": 0.01495361328125, "loss_xval": 0.51171875, "num_input_tokens_seen": 365545116, "step": 6524 }, { "epoch": 14.53229398663697, "grad_norm": 21.211854934692383, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 365603000, "step": 6525 }, { "epoch": 14.53229398663697, "loss": 0.5028167963027954, "loss_ce": 0.00013121790834702551, "loss_iou": 0.21484375, "loss_num": 0.014404296875, "loss_xval": 0.50390625, "num_input_tokens_seen": 365603000, "step": 6525 }, { "epoch": 14.534521158129175, "grad_norm": 13.118301391601562, "learning_rate": 1e-06, "loss": 0.3706, "num_input_tokens_seen": 365658648, "step": 6526 }, { "epoch": 14.534521158129175, "loss": 0.37536337971687317, "loss_ce": 0.00011921751865884289, "loss_iou": 0.16796875, "loss_num": 0.0079345703125, "loss_xval": 0.375, "num_input_tokens_seen": 365658648, "step": 6526 }, { "epoch": 14.53674832962138, "grad_norm": 12.883088111877441, "learning_rate": 1e-06, "loss": 0.4133, "num_input_tokens_seen": 365713996, "step": 6527 }, { "epoch": 14.53674832962138, "loss": 0.4605136811733246, "loss_ce": 0.0004306669579818845, "loss_iou": 0.2021484375, "loss_num": 0.01104736328125, "loss_xval": 0.4609375, "num_input_tokens_seen": 365713996, "step": 6527 }, { "epoch": 14.538975501113585, "grad_norm": 21.47534942626953, "learning_rate": 1e-06, "loss": 0.3516, "num_input_tokens_seen": 365769532, "step": 6528 }, { "epoch": 14.538975501113585, "loss": 0.3355334401130676, "loss_ce": 8.420874655712396e-05, "loss_iou": 0.150390625, "loss_num": 0.007110595703125, "loss_xval": 0.3359375, "num_input_tokens_seen": 365769532, "step": 6528 }, { "epoch": 14.54120267260579, "grad_norm": 17.586118698120117, "learning_rate": 1e-06, "loss": 0.549, "num_input_tokens_seen": 365826440, "step": 6529 }, { "epoch": 14.54120267260579, "loss": 0.5860310792922974, "loss_ce": 9.35560601647012e-05, "loss_iou": 0.2353515625, "loss_num": 0.02294921875, "loss_xval": 0.5859375, "num_input_tokens_seen": 365826440, "step": 6529 }, { "epoch": 14.543429844097995, "grad_norm": 19.080286026000977, "learning_rate": 1e-06, "loss": 0.4543, "num_input_tokens_seen": 365882108, "step": 6530 }, { "epoch": 14.543429844097995, "loss": 0.4774046540260315, "loss_ce": 0.00010971432493533939, "loss_iou": 0.21484375, "loss_num": 0.00958251953125, "loss_xval": 0.4765625, "num_input_tokens_seen": 365882108, "step": 6530 }, { "epoch": 14.5456570155902, "grad_norm": 18.724021911621094, "learning_rate": 1e-06, "loss": 0.5758, "num_input_tokens_seen": 365940436, "step": 6531 }, { "epoch": 14.5456570155902, "loss": 0.7265654802322388, "loss_ce": 0.00012504393816925585, "loss_iou": 0.30078125, "loss_num": 0.0247802734375, "loss_xval": 0.7265625, "num_input_tokens_seen": 365940436, "step": 6531 }, { "epoch": 14.547884187082406, "grad_norm": 16.194499969482422, "learning_rate": 1e-06, "loss": 0.4139, "num_input_tokens_seen": 365996348, "step": 6532 }, { "epoch": 14.547884187082406, "loss": 0.36508145928382874, "loss_ce": 9.123167546931654e-05, "loss_iou": 0.169921875, "loss_num": 0.004791259765625, "loss_xval": 0.365234375, "num_input_tokens_seen": 365996348, "step": 6532 }, { "epoch": 14.550111358574611, "grad_norm": 14.120255470275879, "learning_rate": 1e-06, "loss": 0.4066, "num_input_tokens_seen": 366050788, "step": 6533 }, { "epoch": 14.550111358574611, "loss": 0.380734384059906, "loss_ce": 0.0003022679884452373, "loss_iou": 0.1669921875, "loss_num": 0.00921630859375, "loss_xval": 0.380859375, "num_input_tokens_seen": 366050788, "step": 6533 }, { "epoch": 14.552338530066816, "grad_norm": 16.832948684692383, "learning_rate": 1e-06, "loss": 0.5344, "num_input_tokens_seen": 366103204, "step": 6534 }, { "epoch": 14.552338530066816, "loss": 0.49608922004699707, "loss_ce": 0.00011752717546187341, "loss_iou": 0.2138671875, "loss_num": 0.0135498046875, "loss_xval": 0.49609375, "num_input_tokens_seen": 366103204, "step": 6534 }, { "epoch": 14.55456570155902, "grad_norm": 20.420133590698242, "learning_rate": 1e-06, "loss": 0.4178, "num_input_tokens_seen": 366161640, "step": 6535 }, { "epoch": 14.55456570155902, "loss": 0.4828900098800659, "loss_ce": 0.00010191020555794239, "loss_iou": 0.2041015625, "loss_num": 0.01470947265625, "loss_xval": 0.482421875, "num_input_tokens_seen": 366161640, "step": 6535 }, { "epoch": 14.556792873051226, "grad_norm": 19.563575744628906, "learning_rate": 1e-06, "loss": 0.3978, "num_input_tokens_seen": 366216184, "step": 6536 }, { "epoch": 14.556792873051226, "loss": 0.5175689458847046, "loss_ce": 0.0001129416050389409, "loss_iou": 0.23046875, "loss_num": 0.01141357421875, "loss_xval": 0.515625, "num_input_tokens_seen": 366216184, "step": 6536 }, { "epoch": 14.55902004454343, "grad_norm": 14.907690048217773, "learning_rate": 1e-06, "loss": 0.5119, "num_input_tokens_seen": 366273176, "step": 6537 }, { "epoch": 14.55902004454343, "loss": 0.4903450906276703, "loss_ce": 0.00011072470806539059, "loss_iou": 0.212890625, "loss_num": 0.01318359375, "loss_xval": 0.490234375, "num_input_tokens_seen": 366273176, "step": 6537 }, { "epoch": 14.561247216035635, "grad_norm": 21.058177947998047, "learning_rate": 1e-06, "loss": 0.4864, "num_input_tokens_seen": 366329652, "step": 6538 }, { "epoch": 14.561247216035635, "loss": 0.38425058126449585, "loss_ce": 9.530353418085724e-05, "loss_iou": 0.1591796875, "loss_num": 0.01312255859375, "loss_xval": 0.384765625, "num_input_tokens_seen": 366329652, "step": 6538 }, { "epoch": 14.56347438752784, "grad_norm": 76.75809478759766, "learning_rate": 1e-06, "loss": 0.6636, "num_input_tokens_seen": 366388276, "step": 6539 }, { "epoch": 14.56347438752784, "loss": 0.8201836347579956, "loss_ce": 0.00011528450704645365, "loss_iou": 0.330078125, "loss_num": 0.03173828125, "loss_xval": 0.8203125, "num_input_tokens_seen": 366388276, "step": 6539 }, { "epoch": 14.565701559020045, "grad_norm": 18.305295944213867, "learning_rate": 1e-06, "loss": 0.5901, "num_input_tokens_seen": 366444384, "step": 6540 }, { "epoch": 14.565701559020045, "loss": 0.5785954594612122, "loss_ce": 0.00010423710045870394, "loss_iou": 0.25390625, "loss_num": 0.01385498046875, "loss_xval": 0.578125, "num_input_tokens_seen": 366444384, "step": 6540 }, { "epoch": 14.56792873051225, "grad_norm": 16.77396583557129, "learning_rate": 1e-06, "loss": 0.5024, "num_input_tokens_seen": 366500264, "step": 6541 }, { "epoch": 14.56792873051225, "loss": 0.47824224829673767, "loss_ce": 9.282668179366738e-05, "loss_iou": 0.1748046875, "loss_num": 0.025634765625, "loss_xval": 0.478515625, "num_input_tokens_seen": 366500264, "step": 6541 }, { "epoch": 14.570155902004455, "grad_norm": 22.724868774414062, "learning_rate": 1e-06, "loss": 0.4041, "num_input_tokens_seen": 366556940, "step": 6542 }, { "epoch": 14.570155902004455, "loss": 0.45686769485473633, "loss_ce": 8.05618183221668e-05, "loss_iou": 0.193359375, "loss_num": 0.0140380859375, "loss_xval": 0.45703125, "num_input_tokens_seen": 366556940, "step": 6542 }, { "epoch": 14.57238307349666, "grad_norm": 20.71229362487793, "learning_rate": 1e-06, "loss": 0.5195, "num_input_tokens_seen": 366610572, "step": 6543 }, { "epoch": 14.57238307349666, "loss": 0.5313507318496704, "loss_ce": 0.0001007560349535197, "loss_iou": 0.2119140625, "loss_num": 0.0213623046875, "loss_xval": 0.53125, "num_input_tokens_seen": 366610572, "step": 6543 }, { "epoch": 14.574610244988865, "grad_norm": 20.400714874267578, "learning_rate": 1e-06, "loss": 0.5844, "num_input_tokens_seen": 366665496, "step": 6544 }, { "epoch": 14.574610244988865, "loss": 0.46006056666374207, "loss_ce": 9.960292663890868e-05, "loss_iou": 0.2216796875, "loss_num": 0.0036163330078125, "loss_xval": 0.4609375, "num_input_tokens_seen": 366665496, "step": 6544 }, { "epoch": 14.57683741648107, "grad_norm": 16.26686668395996, "learning_rate": 1e-06, "loss": 0.6016, "num_input_tokens_seen": 366720024, "step": 6545 }, { "epoch": 14.57683741648107, "loss": 0.622795820236206, "loss_ce": 0.00011514931975398213, "loss_iou": 0.2578125, "loss_num": 0.021240234375, "loss_xval": 0.62109375, "num_input_tokens_seen": 366720024, "step": 6545 }, { "epoch": 14.579064587973274, "grad_norm": 14.509425163269043, "learning_rate": 1e-06, "loss": 0.4275, "num_input_tokens_seen": 366778568, "step": 6546 }, { "epoch": 14.579064587973274, "loss": 0.36485838890075684, "loss_ce": 0.00011229590745642781, "loss_iou": 0.169921875, "loss_num": 0.0050048828125, "loss_xval": 0.365234375, "num_input_tokens_seen": 366778568, "step": 6546 }, { "epoch": 14.58129175946548, "grad_norm": 26.61733627319336, "learning_rate": 1e-06, "loss": 0.5409, "num_input_tokens_seen": 366834208, "step": 6547 }, { "epoch": 14.58129175946548, "loss": 0.6287715435028076, "loss_ce": 0.0001094555773306638, "loss_iou": 0.28125, "loss_num": 0.01336669921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 366834208, "step": 6547 }, { "epoch": 14.583518930957684, "grad_norm": 21.295969009399414, "learning_rate": 1e-06, "loss": 0.4005, "num_input_tokens_seen": 366889968, "step": 6548 }, { "epoch": 14.583518930957684, "loss": 0.4716094136238098, "loss_ce": 0.00011283693311270326, "loss_iou": 0.21484375, "loss_num": 0.00836181640625, "loss_xval": 0.470703125, "num_input_tokens_seen": 366889968, "step": 6548 }, { "epoch": 14.585746102449889, "grad_norm": 30.666751861572266, "learning_rate": 1e-06, "loss": 0.4064, "num_input_tokens_seen": 366945348, "step": 6549 }, { "epoch": 14.585746102449889, "loss": 0.4583427906036377, "loss_ce": 9.083442273549736e-05, "loss_iou": 0.1875, "loss_num": 0.0166015625, "loss_xval": 0.458984375, "num_input_tokens_seen": 366945348, "step": 6549 }, { "epoch": 14.587973273942094, "grad_norm": 36.7827262878418, "learning_rate": 1e-06, "loss": 0.5327, "num_input_tokens_seen": 367000992, "step": 6550 }, { "epoch": 14.587973273942094, "loss": 0.6207618713378906, "loss_ce": 0.0001563684199936688, "loss_iou": 0.28125, "loss_num": 0.01202392578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 367000992, "step": 6550 }, { "epoch": 14.590200445434299, "grad_norm": 13.286869049072266, "learning_rate": 1e-06, "loss": 0.4203, "num_input_tokens_seen": 367057428, "step": 6551 }, { "epoch": 14.590200445434299, "loss": 0.3902452886104584, "loss_ce": 0.00010857303277589381, "loss_iou": 0.171875, "loss_num": 0.00933837890625, "loss_xval": 0.390625, "num_input_tokens_seen": 367057428, "step": 6551 }, { "epoch": 14.592427616926503, "grad_norm": 25.443744659423828, "learning_rate": 1e-06, "loss": 0.486, "num_input_tokens_seen": 367110136, "step": 6552 }, { "epoch": 14.592427616926503, "loss": 0.612711489200592, "loss_ce": 0.0016885376535356045, "loss_iou": 0.267578125, "loss_num": 0.015625, "loss_xval": 0.609375, "num_input_tokens_seen": 367110136, "step": 6552 }, { "epoch": 14.594654788418708, "grad_norm": 25.125425338745117, "learning_rate": 1e-06, "loss": 0.4382, "num_input_tokens_seen": 367167548, "step": 6553 }, { "epoch": 14.594654788418708, "loss": 0.3994516134262085, "loss_ce": 9.859049168881029e-05, "loss_iou": 0.1767578125, "loss_num": 0.009033203125, "loss_xval": 0.3984375, "num_input_tokens_seen": 367167548, "step": 6553 }, { "epoch": 14.596881959910913, "grad_norm": 16.59556770324707, "learning_rate": 1e-06, "loss": 0.4072, "num_input_tokens_seen": 367223212, "step": 6554 }, { "epoch": 14.596881959910913, "loss": 0.3758706748485565, "loss_ce": 0.00010771742381621152, "loss_iou": 0.16796875, "loss_num": 0.008056640625, "loss_xval": 0.375, "num_input_tokens_seen": 367223212, "step": 6554 }, { "epoch": 14.599109131403118, "grad_norm": 12.429368019104004, "learning_rate": 1e-06, "loss": 0.4022, "num_input_tokens_seen": 367279056, "step": 6555 }, { "epoch": 14.599109131403118, "loss": 0.5174868702888489, "loss_ce": 9.187131217913702e-05, "loss_iou": 0.236328125, "loss_num": 0.00909423828125, "loss_xval": 0.515625, "num_input_tokens_seen": 367279056, "step": 6555 }, { "epoch": 14.601336302895323, "grad_norm": 18.785852432250977, "learning_rate": 1e-06, "loss": 0.4512, "num_input_tokens_seen": 367336108, "step": 6556 }, { "epoch": 14.601336302895323, "loss": 0.5070604085922241, "loss_ce": 0.00010236204252578318, "loss_iou": 0.21484375, "loss_num": 0.015380859375, "loss_xval": 0.5078125, "num_input_tokens_seen": 367336108, "step": 6556 }, { "epoch": 14.603563474387528, "grad_norm": 15.28122329711914, "learning_rate": 1e-06, "loss": 0.3388, "num_input_tokens_seen": 367393664, "step": 6557 }, { "epoch": 14.603563474387528, "loss": 0.4066086709499359, "loss_ce": 0.00011453506886027753, "loss_iou": 0.17578125, "loss_num": 0.01080322265625, "loss_xval": 0.40625, "num_input_tokens_seen": 367393664, "step": 6557 }, { "epoch": 14.605790645879733, "grad_norm": 12.419171333312988, "learning_rate": 1e-06, "loss": 0.4405, "num_input_tokens_seen": 367451044, "step": 6558 }, { "epoch": 14.605790645879733, "loss": 0.4510771334171295, "loss_ce": 0.00014940105029381812, "loss_iou": 0.1923828125, "loss_num": 0.0133056640625, "loss_xval": 0.451171875, "num_input_tokens_seen": 367451044, "step": 6558 }, { "epoch": 14.608017817371937, "grad_norm": 14.014966011047363, "learning_rate": 1e-06, "loss": 0.5277, "num_input_tokens_seen": 367508844, "step": 6559 }, { "epoch": 14.608017817371937, "loss": 0.5670410990715027, "loss_ce": 0.00014657452993560582, "loss_iou": 0.21875, "loss_num": 0.025634765625, "loss_xval": 0.56640625, "num_input_tokens_seen": 367508844, "step": 6559 }, { "epoch": 14.610244988864142, "grad_norm": 15.982457160949707, "learning_rate": 1e-06, "loss": 0.3984, "num_input_tokens_seen": 367566664, "step": 6560 }, { "epoch": 14.610244988864142, "loss": 0.400331974029541, "loss_ce": 0.0001854914880823344, "loss_iou": 0.1728515625, "loss_num": 0.0108642578125, "loss_xval": 0.400390625, "num_input_tokens_seen": 367566664, "step": 6560 }, { "epoch": 14.612472160356347, "grad_norm": 20.686161041259766, "learning_rate": 1e-06, "loss": 0.4902, "num_input_tokens_seen": 367619140, "step": 6561 }, { "epoch": 14.612472160356347, "loss": 0.6208482384681702, "loss_ce": 0.00012072353274561465, "loss_iou": 0.259765625, "loss_num": 0.02001953125, "loss_xval": 0.62109375, "num_input_tokens_seen": 367619140, "step": 6561 }, { "epoch": 14.614699331848552, "grad_norm": 22.73474884033203, "learning_rate": 1e-06, "loss": 0.4558, "num_input_tokens_seen": 367675996, "step": 6562 }, { "epoch": 14.614699331848552, "loss": 0.4799794554710388, "loss_ce": 0.00012105887435609475, "loss_iou": 0.1845703125, "loss_num": 0.0220947265625, "loss_xval": 0.48046875, "num_input_tokens_seen": 367675996, "step": 6562 }, { "epoch": 14.616926503340757, "grad_norm": 19.15255355834961, "learning_rate": 1e-06, "loss": 0.4012, "num_input_tokens_seen": 367733188, "step": 6563 }, { "epoch": 14.616926503340757, "loss": 0.38610512018203735, "loss_ce": 0.00011879668454639614, "loss_iou": 0.1689453125, "loss_num": 0.009521484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 367733188, "step": 6563 }, { "epoch": 14.619153674832962, "grad_norm": 19.479084014892578, "learning_rate": 1e-06, "loss": 0.5865, "num_input_tokens_seen": 367791252, "step": 6564 }, { "epoch": 14.619153674832962, "loss": 0.6352872848510742, "loss_ce": 0.0001554101036163047, "loss_iou": 0.28125, "loss_num": 0.0147705078125, "loss_xval": 0.63671875, "num_input_tokens_seen": 367791252, "step": 6564 }, { "epoch": 14.621380846325167, "grad_norm": 33.37403869628906, "learning_rate": 1e-06, "loss": 0.494, "num_input_tokens_seen": 367845888, "step": 6565 }, { "epoch": 14.621380846325167, "loss": 0.3272702097892761, "loss_ce": 0.00012174884614069015, "loss_iou": 0.1416015625, "loss_num": 0.00885009765625, "loss_xval": 0.328125, "num_input_tokens_seen": 367845888, "step": 6565 }, { "epoch": 14.623608017817372, "grad_norm": 14.532173156738281, "learning_rate": 1e-06, "loss": 0.5322, "num_input_tokens_seen": 367902024, "step": 6566 }, { "epoch": 14.623608017817372, "loss": 0.6220463514328003, "loss_ce": 9.808540198719129e-05, "loss_iou": 0.259765625, "loss_num": 0.0208740234375, "loss_xval": 0.62109375, "num_input_tokens_seen": 367902024, "step": 6566 }, { "epoch": 14.625835189309576, "grad_norm": 25.17264747619629, "learning_rate": 1e-06, "loss": 0.4745, "num_input_tokens_seen": 367956996, "step": 6567 }, { "epoch": 14.625835189309576, "loss": 0.46531087160110474, "loss_ce": 0.00010092130105476826, "loss_iou": 0.193359375, "loss_num": 0.0157470703125, "loss_xval": 0.46484375, "num_input_tokens_seen": 367956996, "step": 6567 }, { "epoch": 14.628062360801781, "grad_norm": 21.86960792541504, "learning_rate": 1e-06, "loss": 0.3833, "num_input_tokens_seen": 368011628, "step": 6568 }, { "epoch": 14.628062360801781, "loss": 0.41635486483573914, "loss_ce": 9.509771916782483e-05, "loss_iou": 0.19140625, "loss_num": 0.006561279296875, "loss_xval": 0.416015625, "num_input_tokens_seen": 368011628, "step": 6568 }, { "epoch": 14.630289532293986, "grad_norm": 20.430923461914062, "learning_rate": 1e-06, "loss": 0.7204, "num_input_tokens_seen": 368065324, "step": 6569 }, { "epoch": 14.630289532293986, "loss": 0.7128270268440247, "loss_ce": 0.0001805600186344236, "loss_iou": 0.306640625, "loss_num": 0.0198974609375, "loss_xval": 0.7109375, "num_input_tokens_seen": 368065324, "step": 6569 }, { "epoch": 14.632516703786191, "grad_norm": 23.498016357421875, "learning_rate": 1e-06, "loss": 0.5559, "num_input_tokens_seen": 368120640, "step": 6570 }, { "epoch": 14.632516703786191, "loss": 0.6478054523468018, "loss_ce": 0.00010038249456556514, "loss_iou": 0.263671875, "loss_num": 0.0242919921875, "loss_xval": 0.6484375, "num_input_tokens_seen": 368120640, "step": 6570 }, { "epoch": 14.634743875278396, "grad_norm": 20.486783981323242, "learning_rate": 1e-06, "loss": 0.5641, "num_input_tokens_seen": 368176708, "step": 6571 }, { "epoch": 14.634743875278396, "loss": 0.49949222803115845, "loss_ce": 0.00010254726657876745, "loss_iou": 0.208984375, "loss_num": 0.0162353515625, "loss_xval": 0.5, "num_input_tokens_seen": 368176708, "step": 6571 }, { "epoch": 14.6369710467706, "grad_norm": 18.985647201538086, "learning_rate": 1e-06, "loss": 0.5617, "num_input_tokens_seen": 368231812, "step": 6572 }, { "epoch": 14.6369710467706, "loss": 0.6410832405090332, "loss_ce": 9.200733620673418e-05, "loss_iou": 0.298828125, "loss_num": 0.00897216796875, "loss_xval": 0.640625, "num_input_tokens_seen": 368231812, "step": 6572 }, { "epoch": 14.639198218262806, "grad_norm": 14.87316608428955, "learning_rate": 1e-06, "loss": 0.6231, "num_input_tokens_seen": 368285028, "step": 6573 }, { "epoch": 14.639198218262806, "loss": 0.7178068161010742, "loss_ce": 0.00015544812777079642, "loss_iou": 0.302734375, "loss_num": 0.0224609375, "loss_xval": 0.71875, "num_input_tokens_seen": 368285028, "step": 6573 }, { "epoch": 14.64142538975501, "grad_norm": 18.456830978393555, "learning_rate": 1e-06, "loss": 0.4465, "num_input_tokens_seen": 368341996, "step": 6574 }, { "epoch": 14.64142538975501, "loss": 0.5098215937614441, "loss_ce": 0.0015208279946818948, "loss_iou": 0.2265625, "loss_num": 0.01104736328125, "loss_xval": 0.5078125, "num_input_tokens_seen": 368341996, "step": 6574 }, { "epoch": 14.643652561247215, "grad_norm": 15.073392868041992, "learning_rate": 1e-06, "loss": 0.5345, "num_input_tokens_seen": 368397584, "step": 6575 }, { "epoch": 14.643652561247215, "loss": 0.49510324001312256, "loss_ce": 0.00010811796528287232, "loss_iou": 0.208984375, "loss_num": 0.01556396484375, "loss_xval": 0.494140625, "num_input_tokens_seen": 368397584, "step": 6575 }, { "epoch": 14.64587973273942, "grad_norm": 22.879812240600586, "learning_rate": 1e-06, "loss": 0.4901, "num_input_tokens_seen": 368452996, "step": 6576 }, { "epoch": 14.64587973273942, "loss": 0.6450592279434204, "loss_ce": 0.00016175321070477366, "loss_iou": 0.2578125, "loss_num": 0.0257568359375, "loss_xval": 0.64453125, "num_input_tokens_seen": 368452996, "step": 6576 }, { "epoch": 14.648106904231625, "grad_norm": 36.055259704589844, "learning_rate": 1e-06, "loss": 0.4346, "num_input_tokens_seen": 368511036, "step": 6577 }, { "epoch": 14.648106904231625, "loss": 0.457261860370636, "loss_ce": 0.00010854535503312945, "loss_iou": 0.1953125, "loss_num": 0.01318359375, "loss_xval": 0.45703125, "num_input_tokens_seen": 368511036, "step": 6577 }, { "epoch": 14.65033407572383, "grad_norm": 22.31118392944336, "learning_rate": 1e-06, "loss": 0.4259, "num_input_tokens_seen": 368564932, "step": 6578 }, { "epoch": 14.65033407572383, "loss": 0.39772558212280273, "loss_ce": 0.00014260000898502767, "loss_iou": 0.162109375, "loss_num": 0.0146484375, "loss_xval": 0.3984375, "num_input_tokens_seen": 368564932, "step": 6578 }, { "epoch": 14.652561247216035, "grad_norm": 17.83094596862793, "learning_rate": 1e-06, "loss": 0.5154, "num_input_tokens_seen": 368621580, "step": 6579 }, { "epoch": 14.652561247216035, "loss": 0.5228110551834106, "loss_ce": 0.0002280529006384313, "loss_iou": 0.21875, "loss_num": 0.0172119140625, "loss_xval": 0.5234375, "num_input_tokens_seen": 368621580, "step": 6579 }, { "epoch": 14.654788418708241, "grad_norm": 24.0405216217041, "learning_rate": 1e-06, "loss": 0.4541, "num_input_tokens_seen": 368674988, "step": 6580 }, { "epoch": 14.654788418708241, "loss": 0.4888764023780823, "loss_ce": 0.00010689307237043977, "loss_iou": 0.2158203125, "loss_num": 0.0113525390625, "loss_xval": 0.48828125, "num_input_tokens_seen": 368674988, "step": 6580 }, { "epoch": 14.657015590200446, "grad_norm": 17.233251571655273, "learning_rate": 1e-06, "loss": 0.3594, "num_input_tokens_seen": 368731232, "step": 6581 }, { "epoch": 14.657015590200446, "loss": 0.319793164730072, "loss_ce": 9.099852468352765e-05, "loss_iou": 0.138671875, "loss_num": 0.00848388671875, "loss_xval": 0.3203125, "num_input_tokens_seen": 368731232, "step": 6581 }, { "epoch": 14.659242761692651, "grad_norm": 31.06590461730957, "learning_rate": 1e-06, "loss": 0.6744, "num_input_tokens_seen": 368787912, "step": 6582 }, { "epoch": 14.659242761692651, "loss": 0.6104459762573242, "loss_ce": 9.442644659429789e-05, "loss_iou": 0.23828125, "loss_num": 0.0269775390625, "loss_xval": 0.609375, "num_input_tokens_seen": 368787912, "step": 6582 }, { "epoch": 14.661469933184856, "grad_norm": 16.3990535736084, "learning_rate": 1e-06, "loss": 0.3871, "num_input_tokens_seen": 368844564, "step": 6583 }, { "epoch": 14.661469933184856, "loss": 0.4364955723285675, "loss_ce": 9.420434071216732e-05, "loss_iou": 0.1953125, "loss_num": 0.00921630859375, "loss_xval": 0.435546875, "num_input_tokens_seen": 368844564, "step": 6583 }, { "epoch": 14.66369710467706, "grad_norm": 16.442039489746094, "learning_rate": 1e-06, "loss": 0.5207, "num_input_tokens_seen": 368903620, "step": 6584 }, { "epoch": 14.66369710467706, "loss": 0.8181473612785339, "loss_ce": 0.00015422608703374863, "loss_iou": 0.33203125, "loss_num": 0.0308837890625, "loss_xval": 0.81640625, "num_input_tokens_seen": 368903620, "step": 6584 }, { "epoch": 14.665924276169266, "grad_norm": 18.313331604003906, "learning_rate": 1e-06, "loss": 0.5139, "num_input_tokens_seen": 368961612, "step": 6585 }, { "epoch": 14.665924276169266, "loss": 0.6265621185302734, "loss_ce": 9.728968143463135e-05, "loss_iou": 0.26953125, "loss_num": 0.017578125, "loss_xval": 0.625, "num_input_tokens_seen": 368961612, "step": 6585 }, { "epoch": 14.66815144766147, "grad_norm": 27.344846725463867, "learning_rate": 1e-06, "loss": 0.4549, "num_input_tokens_seen": 369014408, "step": 6586 }, { "epoch": 14.66815144766147, "loss": 0.5841131806373596, "loss_ce": 0.00012881754082627594, "loss_iou": 0.234375, "loss_num": 0.023193359375, "loss_xval": 0.5859375, "num_input_tokens_seen": 369014408, "step": 6586 }, { "epoch": 14.670378619153675, "grad_norm": 19.41844367980957, "learning_rate": 1e-06, "loss": 0.4142, "num_input_tokens_seen": 369068716, "step": 6587 }, { "epoch": 14.670378619153675, "loss": 0.4551857113838196, "loss_ce": 0.00010757872951216996, "loss_iou": 0.2080078125, "loss_num": 0.0078125, "loss_xval": 0.455078125, "num_input_tokens_seen": 369068716, "step": 6587 }, { "epoch": 14.67260579064588, "grad_norm": 17.168302536010742, "learning_rate": 1e-06, "loss": 0.5873, "num_input_tokens_seen": 369125348, "step": 6588 }, { "epoch": 14.67260579064588, "loss": 0.46933096647262573, "loss_ce": 9.269404108636081e-05, "loss_iou": 0.20703125, "loss_num": 0.010986328125, "loss_xval": 0.46875, "num_input_tokens_seen": 369125348, "step": 6588 }, { "epoch": 14.674832962138085, "grad_norm": 24.008495330810547, "learning_rate": 1e-06, "loss": 0.5045, "num_input_tokens_seen": 369181304, "step": 6589 }, { "epoch": 14.674832962138085, "loss": 0.46079450845718384, "loss_ce": 0.00010113501048181206, "loss_iou": 0.216796875, "loss_num": 0.00567626953125, "loss_xval": 0.4609375, "num_input_tokens_seen": 369181304, "step": 6589 }, { "epoch": 14.67706013363029, "grad_norm": 15.958958625793457, "learning_rate": 1e-06, "loss": 0.365, "num_input_tokens_seen": 369237872, "step": 6590 }, { "epoch": 14.67706013363029, "loss": 0.4678671360015869, "loss_ce": 9.370467159897089e-05, "loss_iou": 0.2001953125, "loss_num": 0.01361083984375, "loss_xval": 0.46875, "num_input_tokens_seen": 369237872, "step": 6590 }, { "epoch": 14.679287305122495, "grad_norm": 14.867953300476074, "learning_rate": 1e-06, "loss": 0.364, "num_input_tokens_seen": 369294824, "step": 6591 }, { "epoch": 14.679287305122495, "loss": 0.4817339777946472, "loss_ce": 0.0001055731117958203, "loss_iou": 0.2197265625, "loss_num": 0.0084228515625, "loss_xval": 0.482421875, "num_input_tokens_seen": 369294824, "step": 6591 }, { "epoch": 14.6815144766147, "grad_norm": 30.02640724182129, "learning_rate": 1e-06, "loss": 0.4666, "num_input_tokens_seen": 369352304, "step": 6592 }, { "epoch": 14.6815144766147, "loss": 0.5130610466003418, "loss_ce": 0.00012160430196672678, "loss_iou": 0.21875, "loss_num": 0.01531982421875, "loss_xval": 0.51171875, "num_input_tokens_seen": 369352304, "step": 6592 }, { "epoch": 14.683741648106905, "grad_norm": 29.63843536376953, "learning_rate": 1e-06, "loss": 0.4834, "num_input_tokens_seen": 369407840, "step": 6593 }, { "epoch": 14.683741648106905, "loss": 0.5572299957275391, "loss_ce": 0.00010112335439771414, "loss_iou": 0.2197265625, "loss_num": 0.0234375, "loss_xval": 0.55859375, "num_input_tokens_seen": 369407840, "step": 6593 }, { "epoch": 14.68596881959911, "grad_norm": 21.299360275268555, "learning_rate": 1e-06, "loss": 0.5773, "num_input_tokens_seen": 369461952, "step": 6594 }, { "epoch": 14.68596881959911, "loss": 0.6562404632568359, "loss_ce": 0.0003566770756151527, "loss_iou": 0.2890625, "loss_num": 0.0155029296875, "loss_xval": 0.65625, "num_input_tokens_seen": 369461952, "step": 6594 }, { "epoch": 14.688195991091314, "grad_norm": 14.860562324523926, "learning_rate": 1e-06, "loss": 0.6268, "num_input_tokens_seen": 369517888, "step": 6595 }, { "epoch": 14.688195991091314, "loss": 0.745347797870636, "loss_ce": 0.00010857224697247148, "loss_iou": 0.333984375, "loss_num": 0.01556396484375, "loss_xval": 0.74609375, "num_input_tokens_seen": 369517888, "step": 6595 }, { "epoch": 14.690423162583519, "grad_norm": 18.80927085876465, "learning_rate": 1e-06, "loss": 0.4626, "num_input_tokens_seen": 369575536, "step": 6596 }, { "epoch": 14.690423162583519, "loss": 0.5963727831840515, "loss_ce": 0.00012036593398079276, "loss_iou": 0.263671875, "loss_num": 0.01397705078125, "loss_xval": 0.59765625, "num_input_tokens_seen": 369575536, "step": 6596 }, { "epoch": 14.692650334075724, "grad_norm": 19.051189422607422, "learning_rate": 1e-06, "loss": 0.3497, "num_input_tokens_seen": 369633808, "step": 6597 }, { "epoch": 14.692650334075724, "loss": 0.4147190749645233, "loss_ce": 0.0002903687418438494, "loss_iou": 0.1767578125, "loss_num": 0.0120849609375, "loss_xval": 0.4140625, "num_input_tokens_seen": 369633808, "step": 6597 }, { "epoch": 14.694877505567929, "grad_norm": 18.837825775146484, "learning_rate": 1e-06, "loss": 0.4103, "num_input_tokens_seen": 369691260, "step": 6598 }, { "epoch": 14.694877505567929, "loss": 0.49033933877944946, "loss_ce": 0.00010496500908629969, "loss_iou": 0.2001953125, "loss_num": 0.0179443359375, "loss_xval": 0.490234375, "num_input_tokens_seen": 369691260, "step": 6598 }, { "epoch": 14.697104677060134, "grad_norm": 21.672534942626953, "learning_rate": 1e-06, "loss": 0.4892, "num_input_tokens_seen": 369748488, "step": 6599 }, { "epoch": 14.697104677060134, "loss": 0.5086407661437988, "loss_ce": 9.587158274371177e-05, "loss_iou": 0.224609375, "loss_num": 0.01177978515625, "loss_xval": 0.5078125, "num_input_tokens_seen": 369748488, "step": 6599 }, { "epoch": 14.699331848552339, "grad_norm": 17.431072235107422, "learning_rate": 1e-06, "loss": 0.369, "num_input_tokens_seen": 369805384, "step": 6600 }, { "epoch": 14.699331848552339, "loss": 0.3214370012283325, "loss_ce": 8.691436960361898e-05, "loss_iou": 0.1494140625, "loss_num": 0.004364013671875, "loss_xval": 0.322265625, "num_input_tokens_seen": 369805384, "step": 6600 }, { "epoch": 14.701559020044543, "grad_norm": 18.672565460205078, "learning_rate": 1e-06, "loss": 0.4749, "num_input_tokens_seen": 369860176, "step": 6601 }, { "epoch": 14.701559020044543, "loss": 0.5711730718612671, "loss_ce": 0.0001281223667319864, "loss_iou": 0.2392578125, "loss_num": 0.0185546875, "loss_xval": 0.5703125, "num_input_tokens_seen": 369860176, "step": 6601 }, { "epoch": 14.703786191536748, "grad_norm": 17.336938858032227, "learning_rate": 1e-06, "loss": 0.4992, "num_input_tokens_seen": 369916752, "step": 6602 }, { "epoch": 14.703786191536748, "loss": 0.290561705827713, "loss_ce": 9.540806786390021e-05, "loss_iou": 0.1337890625, "loss_num": 0.004669189453125, "loss_xval": 0.291015625, "num_input_tokens_seen": 369916752, "step": 6602 }, { "epoch": 14.706013363028953, "grad_norm": 19.08681297302246, "learning_rate": 1e-06, "loss": 0.417, "num_input_tokens_seen": 369973148, "step": 6603 }, { "epoch": 14.706013363028953, "loss": 0.4227031469345093, "loss_ce": 9.572567796567455e-05, "loss_iou": 0.1845703125, "loss_num": 0.0106201171875, "loss_xval": 0.421875, "num_input_tokens_seen": 369973148, "step": 6603 }, { "epoch": 14.708240534521158, "grad_norm": 19.571962356567383, "learning_rate": 1e-06, "loss": 0.3897, "num_input_tokens_seen": 370030248, "step": 6604 }, { "epoch": 14.708240534521158, "loss": 0.3606414496898651, "loss_ce": 0.00022884068312123418, "loss_iou": 0.154296875, "loss_num": 0.01043701171875, "loss_xval": 0.361328125, "num_input_tokens_seen": 370030248, "step": 6604 }, { "epoch": 14.710467706013363, "grad_norm": 25.896663665771484, "learning_rate": 1e-06, "loss": 0.444, "num_input_tokens_seen": 370086860, "step": 6605 }, { "epoch": 14.710467706013363, "loss": 0.4243563711643219, "loss_ce": 0.00010100057988893241, "loss_iou": 0.193359375, "loss_num": 0.00762939453125, "loss_xval": 0.423828125, "num_input_tokens_seen": 370086860, "step": 6605 }, { "epoch": 14.712694877505568, "grad_norm": 11.470439910888672, "learning_rate": 1e-06, "loss": 0.4257, "num_input_tokens_seen": 370140792, "step": 6606 }, { "epoch": 14.712694877505568, "loss": 0.4308934211730957, "loss_ce": 0.00010726226173574105, "loss_iou": 0.1865234375, "loss_num": 0.0115966796875, "loss_xval": 0.431640625, "num_input_tokens_seen": 370140792, "step": 6606 }, { "epoch": 14.714922048997773, "grad_norm": 37.93767547607422, "learning_rate": 1e-06, "loss": 0.4366, "num_input_tokens_seen": 370198188, "step": 6607 }, { "epoch": 14.714922048997773, "loss": 0.3961656987667084, "loss_ce": 0.000108551379526034, "loss_iou": 0.15625, "loss_num": 0.0167236328125, "loss_xval": 0.396484375, "num_input_tokens_seen": 370198188, "step": 6607 }, { "epoch": 14.717149220489977, "grad_norm": 19.189268112182617, "learning_rate": 1e-06, "loss": 0.5345, "num_input_tokens_seen": 370256068, "step": 6608 }, { "epoch": 14.717149220489977, "loss": 0.5142604112625122, "loss_ce": 0.00010023896174971014, "loss_iou": 0.2373046875, "loss_num": 0.008056640625, "loss_xval": 0.515625, "num_input_tokens_seen": 370256068, "step": 6608 }, { "epoch": 14.719376391982182, "grad_norm": 27.949832916259766, "learning_rate": 1e-06, "loss": 0.4502, "num_input_tokens_seen": 370310132, "step": 6609 }, { "epoch": 14.719376391982182, "loss": 0.41979140043258667, "loss_ce": 0.000540890556294471, "loss_iou": 0.177734375, "loss_num": 0.01287841796875, "loss_xval": 0.419921875, "num_input_tokens_seen": 370310132, "step": 6609 }, { "epoch": 14.721603563474387, "grad_norm": 19.57186508178711, "learning_rate": 1e-06, "loss": 0.4912, "num_input_tokens_seen": 370365772, "step": 6610 }, { "epoch": 14.721603563474387, "loss": 0.4778703451156616, "loss_ce": 8.710511610843241e-05, "loss_iou": 0.2109375, "loss_num": 0.01129150390625, "loss_xval": 0.478515625, "num_input_tokens_seen": 370365772, "step": 6610 }, { "epoch": 14.723830734966592, "grad_norm": 22.238262176513672, "learning_rate": 1e-06, "loss": 0.4615, "num_input_tokens_seen": 370422604, "step": 6611 }, { "epoch": 14.723830734966592, "loss": 0.536961019039154, "loss_ce": 0.00021785832359455526, "loss_iou": 0.236328125, "loss_num": 0.01275634765625, "loss_xval": 0.53515625, "num_input_tokens_seen": 370422604, "step": 6611 }, { "epoch": 14.726057906458797, "grad_norm": 19.965530395507812, "learning_rate": 1e-06, "loss": 0.4523, "num_input_tokens_seen": 370477032, "step": 6612 }, { "epoch": 14.726057906458797, "loss": 0.44715508818626404, "loss_ce": 0.00013360095908865333, "loss_iou": 0.20703125, "loss_num": 0.00640869140625, "loss_xval": 0.447265625, "num_input_tokens_seen": 370477032, "step": 6612 }, { "epoch": 14.728285077951002, "grad_norm": 18.242788314819336, "learning_rate": 1e-06, "loss": 0.5108, "num_input_tokens_seen": 370532400, "step": 6613 }, { "epoch": 14.728285077951002, "loss": 0.606315553188324, "loss_ce": 0.00011437687498982996, "loss_iou": 0.265625, "loss_num": 0.01507568359375, "loss_xval": 0.60546875, "num_input_tokens_seen": 370532400, "step": 6613 }, { "epoch": 14.730512249443207, "grad_norm": 14.153312683105469, "learning_rate": 1e-06, "loss": 0.484, "num_input_tokens_seen": 370589516, "step": 6614 }, { "epoch": 14.730512249443207, "loss": 0.6295005083084106, "loss_ce": 0.0001059654459822923, "loss_iou": 0.251953125, "loss_num": 0.0252685546875, "loss_xval": 0.62890625, "num_input_tokens_seen": 370589516, "step": 6614 }, { "epoch": 14.732739420935411, "grad_norm": 19.10258674621582, "learning_rate": 1e-06, "loss": 0.465, "num_input_tokens_seen": 370647412, "step": 6615 }, { "epoch": 14.732739420935411, "loss": 0.6935760974884033, "loss_ce": 9.462784510105848e-05, "loss_iou": 0.30859375, "loss_num": 0.014892578125, "loss_xval": 0.6953125, "num_input_tokens_seen": 370647412, "step": 6615 }, { "epoch": 14.734966592427616, "grad_norm": 14.81308650970459, "learning_rate": 1e-06, "loss": 0.4206, "num_input_tokens_seen": 370706064, "step": 6616 }, { "epoch": 14.734966592427616, "loss": 0.5853002071380615, "loss_ce": 9.509964729659259e-05, "loss_iou": 0.240234375, "loss_num": 0.0208740234375, "loss_xval": 0.5859375, "num_input_tokens_seen": 370706064, "step": 6616 }, { "epoch": 14.737193763919821, "grad_norm": 44.200828552246094, "learning_rate": 1e-06, "loss": 0.4139, "num_input_tokens_seen": 370762332, "step": 6617 }, { "epoch": 14.737193763919821, "loss": 0.5105452537536621, "loss_ce": 0.00016923280782066286, "loss_iou": 0.2197265625, "loss_num": 0.01409912109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 370762332, "step": 6617 }, { "epoch": 14.739420935412026, "grad_norm": 15.740882873535156, "learning_rate": 1e-06, "loss": 0.3973, "num_input_tokens_seen": 370817860, "step": 6618 }, { "epoch": 14.739420935412026, "loss": 0.43589556217193604, "loss_ce": 0.00010457850294187665, "loss_iou": 0.18359375, "loss_num": 0.01373291015625, "loss_xval": 0.435546875, "num_input_tokens_seen": 370817860, "step": 6618 }, { "epoch": 14.74164810690423, "grad_norm": 18.04197120666504, "learning_rate": 1e-06, "loss": 0.4308, "num_input_tokens_seen": 370872024, "step": 6619 }, { "epoch": 14.74164810690423, "loss": 0.29858967661857605, "loss_ce": 9.725069685373455e-05, "loss_iou": 0.1259765625, "loss_num": 0.00921630859375, "loss_xval": 0.298828125, "num_input_tokens_seen": 370872024, "step": 6619 }, { "epoch": 14.743875278396436, "grad_norm": 17.093969345092773, "learning_rate": 1e-06, "loss": 0.4089, "num_input_tokens_seen": 370928616, "step": 6620 }, { "epoch": 14.743875278396436, "loss": 0.37071430683135986, "loss_ce": 0.00010881570779019967, "loss_iou": 0.1728515625, "loss_num": 0.005035400390625, "loss_xval": 0.37109375, "num_input_tokens_seen": 370928616, "step": 6620 }, { "epoch": 14.74610244988864, "grad_norm": 19.94365882873535, "learning_rate": 1e-06, "loss": 0.5362, "num_input_tokens_seen": 370984480, "step": 6621 }, { "epoch": 14.74610244988864, "loss": 0.7782934904098511, "loss_ce": 9.5245341071859e-05, "loss_iou": 0.35546875, "loss_num": 0.01385498046875, "loss_xval": 0.77734375, "num_input_tokens_seen": 370984480, "step": 6621 }, { "epoch": 14.748329621380847, "grad_norm": 19.63141632080078, "learning_rate": 1e-06, "loss": 0.451, "num_input_tokens_seen": 371042128, "step": 6622 }, { "epoch": 14.748329621380847, "loss": 0.46288466453552246, "loss_ce": 0.00011610893852775916, "loss_iou": 0.1962890625, "loss_num": 0.01416015625, "loss_xval": 0.462890625, "num_input_tokens_seen": 371042128, "step": 6622 }, { "epoch": 14.750556792873052, "grad_norm": 23.052513122558594, "learning_rate": 1e-06, "loss": 0.4397, "num_input_tokens_seen": 371099580, "step": 6623 }, { "epoch": 14.750556792873052, "loss": 0.5583330392837524, "loss_ce": 0.00010552619642112404, "loss_iou": 0.255859375, "loss_num": 0.00970458984375, "loss_xval": 0.55859375, "num_input_tokens_seen": 371099580, "step": 6623 }, { "epoch": 14.752783964365257, "grad_norm": 12.574701309204102, "learning_rate": 1e-06, "loss": 0.2786, "num_input_tokens_seen": 371154964, "step": 6624 }, { "epoch": 14.752783964365257, "loss": 0.29406648874282837, "loss_ce": 9.068727376870811e-05, "loss_iou": 0.1298828125, "loss_num": 0.00689697265625, "loss_xval": 0.294921875, "num_input_tokens_seen": 371154964, "step": 6624 }, { "epoch": 14.755011135857462, "grad_norm": 16.99627113342285, "learning_rate": 1e-06, "loss": 0.5441, "num_input_tokens_seen": 371211736, "step": 6625 }, { "epoch": 14.755011135857462, "loss": 0.5980323553085327, "loss_ce": 0.00013196782674640417, "loss_iou": 0.2373046875, "loss_num": 0.0245361328125, "loss_xval": 0.59765625, "num_input_tokens_seen": 371211736, "step": 6625 }, { "epoch": 14.757238307349667, "grad_norm": 25.345653533935547, "learning_rate": 1e-06, "loss": 0.6329, "num_input_tokens_seen": 371267888, "step": 6626 }, { "epoch": 14.757238307349667, "loss": 0.5716487765312195, "loss_ce": 0.00011558398546185344, "loss_iou": 0.2578125, "loss_num": 0.0113525390625, "loss_xval": 0.5703125, "num_input_tokens_seen": 371267888, "step": 6626 }, { "epoch": 14.759465478841872, "grad_norm": 35.48616027832031, "learning_rate": 1e-06, "loss": 0.5655, "num_input_tokens_seen": 371324636, "step": 6627 }, { "epoch": 14.759465478841872, "loss": 0.6881007552146912, "loss_ce": 0.00011247480870224535, "loss_iou": 0.279296875, "loss_num": 0.025634765625, "loss_xval": 0.6875, "num_input_tokens_seen": 371324636, "step": 6627 }, { "epoch": 14.761692650334076, "grad_norm": 17.21990394592285, "learning_rate": 1e-06, "loss": 0.7006, "num_input_tokens_seen": 371381504, "step": 6628 }, { "epoch": 14.761692650334076, "loss": 0.7638993263244629, "loss_ce": 0.00010532997839618474, "loss_iou": 0.302734375, "loss_num": 0.03125, "loss_xval": 0.765625, "num_input_tokens_seen": 371381504, "step": 6628 }, { "epoch": 14.763919821826281, "grad_norm": 14.376852035522461, "learning_rate": 1e-06, "loss": 0.3699, "num_input_tokens_seen": 371435912, "step": 6629 }, { "epoch": 14.763919821826281, "loss": 0.43231716752052307, "loss_ce": 0.00012721640814561397, "loss_iou": 0.1767578125, "loss_num": 0.0155029296875, "loss_xval": 0.431640625, "num_input_tokens_seen": 371435912, "step": 6629 }, { "epoch": 14.766146993318486, "grad_norm": 18.436325073242188, "learning_rate": 1e-06, "loss": 0.3864, "num_input_tokens_seen": 371491144, "step": 6630 }, { "epoch": 14.766146993318486, "loss": 0.47628775238990784, "loss_ce": 9.145887452177703e-05, "loss_iou": 0.2138671875, "loss_num": 0.009765625, "loss_xval": 0.4765625, "num_input_tokens_seen": 371491144, "step": 6630 }, { "epoch": 14.768374164810691, "grad_norm": 19.47183609008789, "learning_rate": 1e-06, "loss": 0.4151, "num_input_tokens_seen": 371548080, "step": 6631 }, { "epoch": 14.768374164810691, "loss": 0.4063809812068939, "loss_ce": 0.0001309568469878286, "loss_iou": 0.17578125, "loss_num": 0.01092529296875, "loss_xval": 0.40625, "num_input_tokens_seen": 371548080, "step": 6631 }, { "epoch": 14.770601336302896, "grad_norm": 14.947174072265625, "learning_rate": 1e-06, "loss": 0.5903, "num_input_tokens_seen": 371605028, "step": 6632 }, { "epoch": 14.770601336302896, "loss": 0.6663827896118164, "loss_ce": 0.00012304184201639146, "loss_iou": 0.27734375, "loss_num": 0.0223388671875, "loss_xval": 0.66796875, "num_input_tokens_seen": 371605028, "step": 6632 }, { "epoch": 14.7728285077951, "grad_norm": 13.995532989501953, "learning_rate": 1e-06, "loss": 0.387, "num_input_tokens_seen": 371660708, "step": 6633 }, { "epoch": 14.7728285077951, "loss": 0.29860347509384155, "loss_ce": 8.051018812693655e-05, "loss_iou": 0.1259765625, "loss_num": 0.00927734375, "loss_xval": 0.298828125, "num_input_tokens_seen": 371660708, "step": 6633 }, { "epoch": 14.775055679287306, "grad_norm": 20.504301071166992, "learning_rate": 1e-06, "loss": 0.5561, "num_input_tokens_seen": 371717676, "step": 6634 }, { "epoch": 14.775055679287306, "loss": 0.6223191022872925, "loss_ce": 0.0002488004101905972, "loss_iou": 0.271484375, "loss_num": 0.015625, "loss_xval": 0.62109375, "num_input_tokens_seen": 371717676, "step": 6634 }, { "epoch": 14.77728285077951, "grad_norm": 14.648569107055664, "learning_rate": 1e-06, "loss": 0.3508, "num_input_tokens_seen": 371772420, "step": 6635 }, { "epoch": 14.77728285077951, "loss": 0.4088403880596161, "loss_ce": 0.000148980543599464, "loss_iou": 0.166015625, "loss_num": 0.01556396484375, "loss_xval": 0.408203125, "num_input_tokens_seen": 371772420, "step": 6635 }, { "epoch": 14.779510022271715, "grad_norm": 17.389978408813477, "learning_rate": 1e-06, "loss": 0.6203, "num_input_tokens_seen": 371827592, "step": 6636 }, { "epoch": 14.779510022271715, "loss": 0.5807891488075256, "loss_ce": 0.00010066662798635662, "loss_iou": 0.2578125, "loss_num": 0.0130615234375, "loss_xval": 0.58203125, "num_input_tokens_seen": 371827592, "step": 6636 }, { "epoch": 14.78173719376392, "grad_norm": 15.41741943359375, "learning_rate": 1e-06, "loss": 0.3496, "num_input_tokens_seen": 371883112, "step": 6637 }, { "epoch": 14.78173719376392, "loss": 0.36627358198165894, "loss_ce": 0.0001541778037790209, "loss_iou": 0.1513671875, "loss_num": 0.0125732421875, "loss_xval": 0.365234375, "num_input_tokens_seen": 371883112, "step": 6637 }, { "epoch": 14.783964365256125, "grad_norm": 11.838160514831543, "learning_rate": 1e-06, "loss": 0.4474, "num_input_tokens_seen": 371939696, "step": 6638 }, { "epoch": 14.783964365256125, "loss": 0.49210840463638306, "loss_ce": 0.0001039962880895473, "loss_iou": 0.203125, "loss_num": 0.0174560546875, "loss_xval": 0.4921875, "num_input_tokens_seen": 371939696, "step": 6638 }, { "epoch": 14.78619153674833, "grad_norm": 20.356592178344727, "learning_rate": 1e-06, "loss": 0.3329, "num_input_tokens_seen": 371998200, "step": 6639 }, { "epoch": 14.78619153674833, "loss": 0.32651287317276, "loss_ce": 9.684590622782707e-05, "loss_iou": 0.1474609375, "loss_num": 0.00616455078125, "loss_xval": 0.326171875, "num_input_tokens_seen": 371998200, "step": 6639 }, { "epoch": 14.788418708240535, "grad_norm": 16.34804344177246, "learning_rate": 1e-06, "loss": 0.4732, "num_input_tokens_seen": 372054520, "step": 6640 }, { "epoch": 14.788418708240535, "loss": 0.386675626039505, "loss_ce": 7.893913425505161e-05, "loss_iou": 0.158203125, "loss_num": 0.01409912109375, "loss_xval": 0.38671875, "num_input_tokens_seen": 372054520, "step": 6640 }, { "epoch": 14.79064587973274, "grad_norm": 31.840667724609375, "learning_rate": 1e-06, "loss": 0.5544, "num_input_tokens_seen": 372109948, "step": 6641 }, { "epoch": 14.79064587973274, "loss": 0.5972850322723389, "loss_ce": 0.00011702888878062367, "loss_iou": 0.25, "loss_num": 0.019287109375, "loss_xval": 0.59765625, "num_input_tokens_seen": 372109948, "step": 6641 }, { "epoch": 14.792873051224944, "grad_norm": 16.975744247436523, "learning_rate": 1e-06, "loss": 0.4895, "num_input_tokens_seen": 372165020, "step": 6642 }, { "epoch": 14.792873051224944, "loss": 0.4539491832256317, "loss_ce": 9.175058221444488e-05, "loss_iou": 0.2041015625, "loss_num": 0.0091552734375, "loss_xval": 0.453125, "num_input_tokens_seen": 372165020, "step": 6642 }, { "epoch": 14.79510022271715, "grad_norm": 17.83343505859375, "learning_rate": 1e-06, "loss": 0.7496, "num_input_tokens_seen": 372220692, "step": 6643 }, { "epoch": 14.79510022271715, "loss": 0.9950196743011475, "loss_ce": 0.0001466473040636629, "loss_iou": 0.412109375, "loss_num": 0.034423828125, "loss_xval": 0.99609375, "num_input_tokens_seen": 372220692, "step": 6643 }, { "epoch": 14.797327394209354, "grad_norm": 13.327498435974121, "learning_rate": 1e-06, "loss": 0.4146, "num_input_tokens_seen": 372275868, "step": 6644 }, { "epoch": 14.797327394209354, "loss": 0.3324364423751831, "loss_ce": 0.00010003681381931528, "loss_iou": 0.1533203125, "loss_num": 0.005035400390625, "loss_xval": 0.33203125, "num_input_tokens_seen": 372275868, "step": 6644 }, { "epoch": 14.799554565701559, "grad_norm": 20.737871170043945, "learning_rate": 1e-06, "loss": 0.5179, "num_input_tokens_seen": 372327744, "step": 6645 }, { "epoch": 14.799554565701559, "loss": 0.3846180737018585, "loss_ce": 9.659097850089893e-05, "loss_iou": 0.171875, "loss_num": 0.008056640625, "loss_xval": 0.384765625, "num_input_tokens_seen": 372327744, "step": 6645 }, { "epoch": 14.801781737193764, "grad_norm": 19.62179946899414, "learning_rate": 1e-06, "loss": 0.5129, "num_input_tokens_seen": 372384104, "step": 6646 }, { "epoch": 14.801781737193764, "loss": 0.31222450733184814, "loss_ce": 9.073612454812974e-05, "loss_iou": 0.11669921875, "loss_num": 0.0157470703125, "loss_xval": 0.3125, "num_input_tokens_seen": 372384104, "step": 6646 }, { "epoch": 14.804008908685969, "grad_norm": 19.51956558227539, "learning_rate": 1e-06, "loss": 0.4317, "num_input_tokens_seen": 372440792, "step": 6647 }, { "epoch": 14.804008908685969, "loss": 0.40453433990478516, "loss_ce": 0.00011538183025550097, "loss_iou": 0.1806640625, "loss_num": 0.0084228515625, "loss_xval": 0.404296875, "num_input_tokens_seen": 372440792, "step": 6647 }, { "epoch": 14.806236080178174, "grad_norm": 18.005043029785156, "learning_rate": 1e-06, "loss": 0.5158, "num_input_tokens_seen": 372496276, "step": 6648 }, { "epoch": 14.806236080178174, "loss": 0.7478663921356201, "loss_ce": 0.00030782315297983587, "loss_iou": 0.333984375, "loss_num": 0.0159912109375, "loss_xval": 0.74609375, "num_input_tokens_seen": 372496276, "step": 6648 }, { "epoch": 14.808463251670378, "grad_norm": 15.16850757598877, "learning_rate": 1e-06, "loss": 0.6658, "num_input_tokens_seen": 372554104, "step": 6649 }, { "epoch": 14.808463251670378, "loss": 0.7384415864944458, "loss_ce": 0.00016030135157052428, "loss_iou": 0.26171875, "loss_num": 0.043212890625, "loss_xval": 0.73828125, "num_input_tokens_seen": 372554104, "step": 6649 }, { "epoch": 14.810690423162583, "grad_norm": 15.439560890197754, "learning_rate": 1e-06, "loss": 0.4836, "num_input_tokens_seen": 372609652, "step": 6650 }, { "epoch": 14.810690423162583, "loss": 0.499762624502182, "loss_ce": 0.0002508952165953815, "loss_iou": 0.201171875, "loss_num": 0.0191650390625, "loss_xval": 0.5, "num_input_tokens_seen": 372609652, "step": 6650 }, { "epoch": 14.812917594654788, "grad_norm": 17.865459442138672, "learning_rate": 1e-06, "loss": 0.5121, "num_input_tokens_seen": 372665168, "step": 6651 }, { "epoch": 14.812917594654788, "loss": 0.5034142136573792, "loss_ce": 0.00011833346798084676, "loss_iou": 0.2099609375, "loss_num": 0.016845703125, "loss_xval": 0.50390625, "num_input_tokens_seen": 372665168, "step": 6651 }, { "epoch": 14.815144766146993, "grad_norm": 24.064001083374023, "learning_rate": 1e-06, "loss": 0.4675, "num_input_tokens_seen": 372719608, "step": 6652 }, { "epoch": 14.815144766146993, "loss": 0.550445020198822, "loss_ce": 0.00015209712728392333, "loss_iou": 0.255859375, "loss_num": 0.00732421875, "loss_xval": 0.55078125, "num_input_tokens_seen": 372719608, "step": 6652 }, { "epoch": 14.817371937639198, "grad_norm": 19.275604248046875, "learning_rate": 1e-06, "loss": 0.3122, "num_input_tokens_seen": 372777240, "step": 6653 }, { "epoch": 14.817371937639198, "loss": 0.3434959650039673, "loss_ce": 0.00011215943231945857, "loss_iou": 0.140625, "loss_num": 0.0125732421875, "loss_xval": 0.34375, "num_input_tokens_seen": 372777240, "step": 6653 }, { "epoch": 14.819599109131403, "grad_norm": 24.118017196655273, "learning_rate": 1e-06, "loss": 0.5053, "num_input_tokens_seen": 372828860, "step": 6654 }, { "epoch": 14.819599109131403, "loss": 0.48031556606292725, "loss_ce": 9.092504478758201e-05, "loss_iou": 0.189453125, "loss_num": 0.0201416015625, "loss_xval": 0.48046875, "num_input_tokens_seen": 372828860, "step": 6654 }, { "epoch": 14.821826280623608, "grad_norm": 15.35105037689209, "learning_rate": 1e-06, "loss": 0.4081, "num_input_tokens_seen": 372886876, "step": 6655 }, { "epoch": 14.821826280623608, "loss": 0.3949987590312958, "loss_ce": 0.00010130574082722887, "loss_iou": 0.1650390625, "loss_num": 0.01287841796875, "loss_xval": 0.39453125, "num_input_tokens_seen": 372886876, "step": 6655 }, { "epoch": 14.824053452115812, "grad_norm": 22.87786293029785, "learning_rate": 1e-06, "loss": 0.5704, "num_input_tokens_seen": 372943852, "step": 6656 }, { "epoch": 14.824053452115812, "loss": 0.5047581195831299, "loss_ce": 0.00011940376134589314, "loss_iou": 0.203125, "loss_num": 0.01953125, "loss_xval": 0.50390625, "num_input_tokens_seen": 372943852, "step": 6656 }, { "epoch": 14.826280623608017, "grad_norm": 14.802397727966309, "learning_rate": 1e-06, "loss": 0.2924, "num_input_tokens_seen": 373000588, "step": 6657 }, { "epoch": 14.826280623608017, "loss": 0.2866537570953369, "loss_ce": 9.368563769385219e-05, "loss_iou": 0.12890625, "loss_num": 0.00555419921875, "loss_xval": 0.287109375, "num_input_tokens_seen": 373000588, "step": 6657 }, { "epoch": 14.828507795100222, "grad_norm": 24.41092872619629, "learning_rate": 1e-06, "loss": 0.4099, "num_input_tokens_seen": 373058656, "step": 6658 }, { "epoch": 14.828507795100222, "loss": 0.4437221884727478, "loss_ce": 0.00011863959662150592, "loss_iou": 0.1923828125, "loss_num": 0.01190185546875, "loss_xval": 0.443359375, "num_input_tokens_seen": 373058656, "step": 6658 }, { "epoch": 14.830734966592427, "grad_norm": 30.71437644958496, "learning_rate": 1e-06, "loss": 0.4298, "num_input_tokens_seen": 373112252, "step": 6659 }, { "epoch": 14.830734966592427, "loss": 0.3821251690387726, "loss_ce": 0.00010613477934384719, "loss_iou": 0.166015625, "loss_num": 0.010009765625, "loss_xval": 0.3828125, "num_input_tokens_seen": 373112252, "step": 6659 }, { "epoch": 14.832962138084632, "grad_norm": 14.227256774902344, "learning_rate": 1e-06, "loss": 0.4838, "num_input_tokens_seen": 373169236, "step": 6660 }, { "epoch": 14.832962138084632, "loss": 0.4358832836151123, "loss_ce": 9.228321141563356e-05, "loss_iou": 0.1953125, "loss_num": 0.00909423828125, "loss_xval": 0.435546875, "num_input_tokens_seen": 373169236, "step": 6660 }, { "epoch": 14.835189309576837, "grad_norm": 17.108369827270508, "learning_rate": 1e-06, "loss": 0.6556, "num_input_tokens_seen": 373227608, "step": 6661 }, { "epoch": 14.835189309576837, "loss": 0.8442061543464661, "loss_ce": 0.00015093988622538745, "loss_iou": 0.333984375, "loss_num": 0.034912109375, "loss_xval": 0.84375, "num_input_tokens_seen": 373227608, "step": 6661 }, { "epoch": 14.837416481069042, "grad_norm": 15.365765571594238, "learning_rate": 1e-06, "loss": 0.5749, "num_input_tokens_seen": 373280432, "step": 6662 }, { "epoch": 14.837416481069042, "loss": 0.6416932940483093, "loss_ce": 9.174088336294517e-05, "loss_iou": 0.265625, "loss_num": 0.021728515625, "loss_xval": 0.640625, "num_input_tokens_seen": 373280432, "step": 6662 }, { "epoch": 14.839643652561247, "grad_norm": 20.231231689453125, "learning_rate": 1e-06, "loss": 0.5829, "num_input_tokens_seen": 373335292, "step": 6663 }, { "epoch": 14.839643652561247, "loss": 0.5822228193283081, "loss_ce": 0.00013056171883363277, "loss_iou": 0.248046875, "loss_num": 0.01708984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 373335292, "step": 6663 }, { "epoch": 14.841870824053451, "grad_norm": 14.344491004943848, "learning_rate": 1e-06, "loss": 0.3425, "num_input_tokens_seen": 373391224, "step": 6664 }, { "epoch": 14.841870824053451, "loss": 0.4869232773780823, "loss_ce": 0.00010687689064070582, "loss_iou": 0.201171875, "loss_num": 0.0167236328125, "loss_xval": 0.486328125, "num_input_tokens_seen": 373391224, "step": 6664 }, { "epoch": 14.844097995545656, "grad_norm": 18.94646644592285, "learning_rate": 1e-06, "loss": 0.4103, "num_input_tokens_seen": 373448636, "step": 6665 }, { "epoch": 14.844097995545656, "loss": 0.34946849942207336, "loss_ce": 0.00010326325718779117, "loss_iou": 0.1640625, "loss_num": 0.004180908203125, "loss_xval": 0.349609375, "num_input_tokens_seen": 373448636, "step": 6665 }, { "epoch": 14.846325167037861, "grad_norm": 26.247119903564453, "learning_rate": 1e-06, "loss": 0.4845, "num_input_tokens_seen": 373505896, "step": 6666 }, { "epoch": 14.846325167037861, "loss": 0.44346243143081665, "loss_ce": 0.0001030644925776869, "loss_iou": 0.18359375, "loss_num": 0.01513671875, "loss_xval": 0.443359375, "num_input_tokens_seen": 373505896, "step": 6666 }, { "epoch": 14.848552338530066, "grad_norm": 16.330686569213867, "learning_rate": 1e-06, "loss": 0.3428, "num_input_tokens_seen": 373562748, "step": 6667 }, { "epoch": 14.848552338530066, "loss": 0.3223639130592346, "loss_ce": 9.82833735179156e-05, "loss_iou": 0.1337890625, "loss_num": 0.01080322265625, "loss_xval": 0.322265625, "num_input_tokens_seen": 373562748, "step": 6667 }, { "epoch": 14.85077951002227, "grad_norm": 17.0013484954834, "learning_rate": 1e-06, "loss": 0.4217, "num_input_tokens_seen": 373620644, "step": 6668 }, { "epoch": 14.85077951002227, "loss": 0.4391821026802063, "loss_ce": 9.516206046100706e-05, "loss_iou": 0.1962890625, "loss_num": 0.00927734375, "loss_xval": 0.439453125, "num_input_tokens_seen": 373620644, "step": 6668 }, { "epoch": 14.853006681514476, "grad_norm": 18.03797721862793, "learning_rate": 1e-06, "loss": 0.5505, "num_input_tokens_seen": 373675496, "step": 6669 }, { "epoch": 14.853006681514476, "loss": 0.6275442242622375, "loss_ce": 0.00010283813753630966, "loss_iou": 0.279296875, "loss_num": 0.013427734375, "loss_xval": 0.62890625, "num_input_tokens_seen": 373675496, "step": 6669 }, { "epoch": 14.855233853006682, "grad_norm": 18.613082885742188, "learning_rate": 1e-06, "loss": 0.4396, "num_input_tokens_seen": 373729180, "step": 6670 }, { "epoch": 14.855233853006682, "loss": 0.28043514490127563, "loss_ce": 0.00010065691458294168, "loss_iou": 0.119140625, "loss_num": 0.00830078125, "loss_xval": 0.28125, "num_input_tokens_seen": 373729180, "step": 6670 }, { "epoch": 14.857461024498887, "grad_norm": 15.190927505493164, "learning_rate": 1e-06, "loss": 0.632, "num_input_tokens_seen": 373785848, "step": 6671 }, { "epoch": 14.857461024498887, "loss": 0.4553018808364868, "loss_ce": 0.00010172194743063301, "loss_iou": 0.1884765625, "loss_num": 0.01556396484375, "loss_xval": 0.455078125, "num_input_tokens_seen": 373785848, "step": 6671 }, { "epoch": 14.859688195991092, "grad_norm": 14.331643104553223, "learning_rate": 1e-06, "loss": 0.5239, "num_input_tokens_seen": 373840668, "step": 6672 }, { "epoch": 14.859688195991092, "loss": 0.386011004447937, "loss_ce": 8.569139026803896e-05, "loss_iou": 0.1494140625, "loss_num": 0.017333984375, "loss_xval": 0.38671875, "num_input_tokens_seen": 373840668, "step": 6672 }, { "epoch": 14.861915367483297, "grad_norm": 14.409646987915039, "learning_rate": 1e-06, "loss": 0.5363, "num_input_tokens_seen": 373895844, "step": 6673 }, { "epoch": 14.861915367483297, "loss": 0.6987284421920776, "loss_ce": 0.00012002349831163883, "loss_iou": 0.3203125, "loss_num": 0.0118408203125, "loss_xval": 0.69921875, "num_input_tokens_seen": 373895844, "step": 6673 }, { "epoch": 14.864142538975502, "grad_norm": 18.301753997802734, "learning_rate": 1e-06, "loss": 0.4477, "num_input_tokens_seen": 373949004, "step": 6674 }, { "epoch": 14.864142538975502, "loss": 0.45737865567207336, "loss_ce": 0.00010326381016056985, "loss_iou": 0.1982421875, "loss_num": 0.01220703125, "loss_xval": 0.45703125, "num_input_tokens_seen": 373949004, "step": 6674 }, { "epoch": 14.866369710467707, "grad_norm": 12.914517402648926, "learning_rate": 1e-06, "loss": 0.4054, "num_input_tokens_seen": 374005364, "step": 6675 }, { "epoch": 14.866369710467707, "loss": 0.3800061047077179, "loss_ce": 0.0001232977374456823, "loss_iou": 0.1748046875, "loss_num": 0.006072998046875, "loss_xval": 0.37890625, "num_input_tokens_seen": 374005364, "step": 6675 }, { "epoch": 14.868596881959911, "grad_norm": 17.27826690673828, "learning_rate": 1e-06, "loss": 0.3994, "num_input_tokens_seen": 374060220, "step": 6676 }, { "epoch": 14.868596881959911, "loss": 0.3738711476325989, "loss_ce": 9.184792725136504e-05, "loss_iou": 0.1611328125, "loss_num": 0.01025390625, "loss_xval": 0.373046875, "num_input_tokens_seen": 374060220, "step": 6676 }, { "epoch": 14.870824053452116, "grad_norm": 19.53494644165039, "learning_rate": 1e-06, "loss": 0.2948, "num_input_tokens_seen": 374117248, "step": 6677 }, { "epoch": 14.870824053452116, "loss": 0.31820207834243774, "loss_ce": 8.682158659212291e-05, "loss_iou": 0.134765625, "loss_num": 0.009521484375, "loss_xval": 0.318359375, "num_input_tokens_seen": 374117248, "step": 6677 }, { "epoch": 14.873051224944321, "grad_norm": 25.144716262817383, "learning_rate": 1e-06, "loss": 0.5595, "num_input_tokens_seen": 374170888, "step": 6678 }, { "epoch": 14.873051224944321, "loss": 0.7696267366409302, "loss_ce": 9.549126116326079e-05, "loss_iou": 0.287109375, "loss_num": 0.038818359375, "loss_xval": 0.76953125, "num_input_tokens_seen": 374170888, "step": 6678 }, { "epoch": 14.875278396436526, "grad_norm": 26.434158325195312, "learning_rate": 1e-06, "loss": 0.3463, "num_input_tokens_seen": 374229660, "step": 6679 }, { "epoch": 14.875278396436526, "loss": 0.5142629146575928, "loss_ce": 0.00010273464431520551, "loss_iou": 0.236328125, "loss_num": 0.00823974609375, "loss_xval": 0.515625, "num_input_tokens_seen": 374229660, "step": 6679 }, { "epoch": 14.877505567928731, "grad_norm": 18.97011947631836, "learning_rate": 1e-06, "loss": 0.6984, "num_input_tokens_seen": 374285568, "step": 6680 }, { "epoch": 14.877505567928731, "loss": 0.852816641330719, "loss_ce": 0.00015551211254205555, "loss_iou": 0.33984375, "loss_num": 0.0341796875, "loss_xval": 0.8515625, "num_input_tokens_seen": 374285568, "step": 6680 }, { "epoch": 14.879732739420936, "grad_norm": 23.420482635498047, "learning_rate": 1e-06, "loss": 0.5408, "num_input_tokens_seen": 374343228, "step": 6681 }, { "epoch": 14.879732739420936, "loss": 0.7148338556289673, "loss_ce": 0.0004784108605235815, "loss_iou": 0.296875, "loss_num": 0.02392578125, "loss_xval": 0.71484375, "num_input_tokens_seen": 374343228, "step": 6681 }, { "epoch": 14.88195991091314, "grad_norm": 19.06096076965332, "learning_rate": 1e-06, "loss": 0.4187, "num_input_tokens_seen": 374399024, "step": 6682 }, { "epoch": 14.88195991091314, "loss": 0.29825353622436523, "loss_ce": 9.67753876466304e-05, "loss_iou": 0.1337890625, "loss_num": 0.0059814453125, "loss_xval": 0.298828125, "num_input_tokens_seen": 374399024, "step": 6682 }, { "epoch": 14.884187082405345, "grad_norm": 25.65113639831543, "learning_rate": 1e-06, "loss": 0.5878, "num_input_tokens_seen": 374456632, "step": 6683 }, { "epoch": 14.884187082405345, "loss": 0.6034575700759888, "loss_ce": 9.450462675886229e-05, "loss_iou": 0.26953125, "loss_num": 0.01287841796875, "loss_xval": 0.6015625, "num_input_tokens_seen": 374456632, "step": 6683 }, { "epoch": 14.88641425389755, "grad_norm": 18.01433753967285, "learning_rate": 1e-06, "loss": 0.4982, "num_input_tokens_seen": 374511092, "step": 6684 }, { "epoch": 14.88641425389755, "loss": 0.5666952133178711, "loss_ce": 0.00010584105621092021, "loss_iou": 0.232421875, "loss_num": 0.020263671875, "loss_xval": 0.56640625, "num_input_tokens_seen": 374511092, "step": 6684 }, { "epoch": 14.888641425389755, "grad_norm": 19.60915756225586, "learning_rate": 1e-06, "loss": 0.4411, "num_input_tokens_seen": 374566352, "step": 6685 }, { "epoch": 14.888641425389755, "loss": 0.5250272154808044, "loss_ce": 0.00012483444879762828, "loss_iou": 0.240234375, "loss_num": 0.009033203125, "loss_xval": 0.5234375, "num_input_tokens_seen": 374566352, "step": 6685 }, { "epoch": 14.89086859688196, "grad_norm": 22.95705795288086, "learning_rate": 1e-06, "loss": 0.454, "num_input_tokens_seen": 374624096, "step": 6686 }, { "epoch": 14.89086859688196, "loss": 0.3750418722629547, "loss_ce": 0.00010290060890838504, "loss_iou": 0.15625, "loss_num": 0.01251220703125, "loss_xval": 0.375, "num_input_tokens_seen": 374624096, "step": 6686 }, { "epoch": 14.893095768374165, "grad_norm": 21.94954490661621, "learning_rate": 1e-06, "loss": 0.5847, "num_input_tokens_seen": 374678944, "step": 6687 }, { "epoch": 14.893095768374165, "loss": 0.6209622621536255, "loss_ce": 0.0001126552015193738, "loss_iou": 0.263671875, "loss_num": 0.0186767578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 374678944, "step": 6687 }, { "epoch": 14.89532293986637, "grad_norm": 21.50327491760254, "learning_rate": 1e-06, "loss": 0.5005, "num_input_tokens_seen": 374736156, "step": 6688 }, { "epoch": 14.89532293986637, "loss": 0.420645534992218, "loss_ce": 0.00011328914115438238, "loss_iou": 0.201171875, "loss_num": 0.0038299560546875, "loss_xval": 0.419921875, "num_input_tokens_seen": 374736156, "step": 6688 }, { "epoch": 14.897550111358575, "grad_norm": 25.155494689941406, "learning_rate": 1e-06, "loss": 0.4771, "num_input_tokens_seen": 374792696, "step": 6689 }, { "epoch": 14.897550111358575, "loss": 0.4023246765136719, "loss_ce": 0.00010299000859959051, "loss_iou": 0.173828125, "loss_num": 0.01116943359375, "loss_xval": 0.40234375, "num_input_tokens_seen": 374792696, "step": 6689 }, { "epoch": 14.89977728285078, "grad_norm": 15.196331024169922, "learning_rate": 1e-06, "loss": 0.2636, "num_input_tokens_seen": 374849908, "step": 6690 }, { "epoch": 14.89977728285078, "loss": 0.24606722593307495, "loss_ce": 0.0009500437881797552, "loss_iou": 0.1123046875, "loss_num": 0.003997802734375, "loss_xval": 0.2451171875, "num_input_tokens_seen": 374849908, "step": 6690 }, { "epoch": 14.902004454342984, "grad_norm": 26.573570251464844, "learning_rate": 1e-06, "loss": 0.4118, "num_input_tokens_seen": 374904924, "step": 6691 }, { "epoch": 14.902004454342984, "loss": 0.3819239139556885, "loss_ce": 8.79746803548187e-05, "loss_iou": 0.1611328125, "loss_num": 0.0118408203125, "loss_xval": 0.3828125, "num_input_tokens_seen": 374904924, "step": 6691 }, { "epoch": 14.90423162583519, "grad_norm": 29.359655380249023, "learning_rate": 1e-06, "loss": 0.4543, "num_input_tokens_seen": 374959508, "step": 6692 }, { "epoch": 14.90423162583519, "loss": 0.28562378883361816, "loss_ce": 0.00010133428440894932, "loss_iou": 0.11474609375, "loss_num": 0.01116943359375, "loss_xval": 0.28515625, "num_input_tokens_seen": 374959508, "step": 6692 }, { "epoch": 14.906458797327394, "grad_norm": 16.882619857788086, "learning_rate": 1e-06, "loss": 0.5222, "num_input_tokens_seen": 375016108, "step": 6693 }, { "epoch": 14.906458797327394, "loss": 0.4062420725822449, "loss_ce": 0.00011414792970754206, "loss_iou": 0.18359375, "loss_num": 0.007781982421875, "loss_xval": 0.40625, "num_input_tokens_seen": 375016108, "step": 6693 }, { "epoch": 14.908685968819599, "grad_norm": 15.009581565856934, "learning_rate": 1e-06, "loss": 0.4768, "num_input_tokens_seen": 375073380, "step": 6694 }, { "epoch": 14.908685968819599, "loss": 0.5022239685058594, "loss_ce": 0.00014879369700793177, "loss_iou": 0.2255859375, "loss_num": 0.01025390625, "loss_xval": 0.50390625, "num_input_tokens_seen": 375073380, "step": 6694 }, { "epoch": 14.910913140311804, "grad_norm": 16.225059509277344, "learning_rate": 1e-06, "loss": 0.3288, "num_input_tokens_seen": 375129260, "step": 6695 }, { "epoch": 14.910913140311804, "loss": 0.23318275809288025, "loss_ce": 8.948949835030362e-05, "loss_iou": 0.10009765625, "loss_num": 0.006500244140625, "loss_xval": 0.2333984375, "num_input_tokens_seen": 375129260, "step": 6695 }, { "epoch": 14.913140311804009, "grad_norm": 18.727340698242188, "learning_rate": 1e-06, "loss": 0.4627, "num_input_tokens_seen": 375184036, "step": 6696 }, { "epoch": 14.913140311804009, "loss": 0.5411036610603333, "loss_ce": 8.803869422990829e-05, "loss_iou": 0.2158203125, "loss_num": 0.0218505859375, "loss_xval": 0.5390625, "num_input_tokens_seen": 375184036, "step": 6696 }, { "epoch": 14.915367483296214, "grad_norm": 14.84618854522705, "learning_rate": 1e-06, "loss": 0.3687, "num_input_tokens_seen": 375240860, "step": 6697 }, { "epoch": 14.915367483296214, "loss": 0.3764420747756958, "loss_ce": 9.930084343068302e-05, "loss_iou": 0.1748046875, "loss_num": 0.00543212890625, "loss_xval": 0.376953125, "num_input_tokens_seen": 375240860, "step": 6697 }, { "epoch": 14.917594654788418, "grad_norm": 14.689440727233887, "learning_rate": 1e-06, "loss": 0.5437, "num_input_tokens_seen": 375297676, "step": 6698 }, { "epoch": 14.917594654788418, "loss": 0.5835915803909302, "loss_ce": 9.553229756420478e-05, "loss_iou": 0.2578125, "loss_num": 0.0133056640625, "loss_xval": 0.58203125, "num_input_tokens_seen": 375297676, "step": 6698 }, { "epoch": 14.919821826280623, "grad_norm": 17.56962013244629, "learning_rate": 1e-06, "loss": 0.5139, "num_input_tokens_seen": 375353512, "step": 6699 }, { "epoch": 14.919821826280623, "loss": 0.5415977239608765, "loss_ce": 9.384811710333452e-05, "loss_iou": 0.2177734375, "loss_num": 0.021240234375, "loss_xval": 0.54296875, "num_input_tokens_seen": 375353512, "step": 6699 }, { "epoch": 14.922048997772828, "grad_norm": 17.380096435546875, "learning_rate": 1e-06, "loss": 0.329, "num_input_tokens_seen": 375408140, "step": 6700 }, { "epoch": 14.922048997772828, "loss": 0.3042938709259033, "loss_ce": 9.468305506743491e-05, "loss_iou": 0.126953125, "loss_num": 0.00994873046875, "loss_xval": 0.3046875, "num_input_tokens_seen": 375408140, "step": 6700 }, { "epoch": 14.924276169265033, "grad_norm": 19.394245147705078, "learning_rate": 1e-06, "loss": 0.3533, "num_input_tokens_seen": 375462496, "step": 6701 }, { "epoch": 14.924276169265033, "loss": 0.3009084463119507, "loss_ce": 9.669142309576273e-05, "loss_iou": 0.12890625, "loss_num": 0.00836181640625, "loss_xval": 0.30078125, "num_input_tokens_seen": 375462496, "step": 6701 }, { "epoch": 14.926503340757238, "grad_norm": 46.145076751708984, "learning_rate": 1e-06, "loss": 0.5536, "num_input_tokens_seen": 375515212, "step": 6702 }, { "epoch": 14.926503340757238, "loss": 0.44150978326797485, "loss_ce": 0.00010355835547670722, "loss_iou": 0.1826171875, "loss_num": 0.01519775390625, "loss_xval": 0.44140625, "num_input_tokens_seen": 375515212, "step": 6702 }, { "epoch": 14.928730512249443, "grad_norm": 17.22191619873047, "learning_rate": 1e-06, "loss": 0.6082, "num_input_tokens_seen": 375573600, "step": 6703 }, { "epoch": 14.928730512249443, "loss": 0.49972373247146606, "loss_ce": 8.993092342279851e-05, "loss_iou": 0.2314453125, "loss_num": 0.00738525390625, "loss_xval": 0.5, "num_input_tokens_seen": 375573600, "step": 6703 }, { "epoch": 14.930957683741648, "grad_norm": 17.4095458984375, "learning_rate": 1e-06, "loss": 0.4707, "num_input_tokens_seen": 375627936, "step": 6704 }, { "epoch": 14.930957683741648, "loss": 0.35287582874298096, "loss_ce": 9.260718070436269e-05, "loss_iou": 0.1474609375, "loss_num": 0.0115966796875, "loss_xval": 0.353515625, "num_input_tokens_seen": 375627936, "step": 6704 }, { "epoch": 14.933184855233852, "grad_norm": 14.413276672363281, "learning_rate": 1e-06, "loss": 0.502, "num_input_tokens_seen": 375684652, "step": 6705 }, { "epoch": 14.933184855233852, "loss": 0.6157550811767578, "loss_ce": 9.349231550004333e-05, "loss_iou": 0.267578125, "loss_num": 0.0162353515625, "loss_xval": 0.6171875, "num_input_tokens_seen": 375684652, "step": 6705 }, { "epoch": 14.935412026726057, "grad_norm": 28.362884521484375, "learning_rate": 1e-06, "loss": 0.4897, "num_input_tokens_seen": 375741700, "step": 6706 }, { "epoch": 14.935412026726057, "loss": 0.3510599732398987, "loss_ce": 0.00010782700701383874, "loss_iou": 0.15625, "loss_num": 0.00787353515625, "loss_xval": 0.3515625, "num_input_tokens_seen": 375741700, "step": 6706 }, { "epoch": 14.937639198218262, "grad_norm": 24.491363525390625, "learning_rate": 1e-06, "loss": 0.4178, "num_input_tokens_seen": 375797580, "step": 6707 }, { "epoch": 14.937639198218262, "loss": 0.2931881546974182, "loss_ce": 9.731938189361244e-05, "loss_iou": 0.1298828125, "loss_num": 0.006805419921875, "loss_xval": 0.29296875, "num_input_tokens_seen": 375797580, "step": 6707 }, { "epoch": 14.939866369710467, "grad_norm": 18.060317993164062, "learning_rate": 1e-06, "loss": 0.5446, "num_input_tokens_seen": 375851716, "step": 6708 }, { "epoch": 14.939866369710467, "loss": 0.40395689010620117, "loss_ce": 0.00014827345148660243, "loss_iou": 0.185546875, "loss_num": 0.006622314453125, "loss_xval": 0.404296875, "num_input_tokens_seen": 375851716, "step": 6708 }, { "epoch": 14.942093541202672, "grad_norm": 29.22158432006836, "learning_rate": 1e-06, "loss": 0.4788, "num_input_tokens_seen": 375907252, "step": 6709 }, { "epoch": 14.942093541202672, "loss": 0.43601399660110474, "loss_ce": 0.00010091814328916371, "loss_iou": 0.19921875, "loss_num": 0.007659912109375, "loss_xval": 0.435546875, "num_input_tokens_seen": 375907252, "step": 6709 }, { "epoch": 14.944320712694877, "grad_norm": 23.212482452392578, "learning_rate": 1e-06, "loss": 0.5383, "num_input_tokens_seen": 375962688, "step": 6710 }, { "epoch": 14.944320712694877, "loss": 0.6079156398773193, "loss_ce": 0.0001275775139220059, "loss_iou": 0.263671875, "loss_num": 0.0162353515625, "loss_xval": 0.609375, "num_input_tokens_seen": 375962688, "step": 6710 }, { "epoch": 14.946547884187082, "grad_norm": 17.981061935424805, "learning_rate": 1e-06, "loss": 0.3841, "num_input_tokens_seen": 376018616, "step": 6711 }, { "epoch": 14.946547884187082, "loss": 0.2717045545578003, "loss_ce": 9.8096948931925e-05, "loss_iou": 0.11572265625, "loss_num": 0.00799560546875, "loss_xval": 0.271484375, "num_input_tokens_seen": 376018616, "step": 6711 }, { "epoch": 14.948775055679288, "grad_norm": 19.030765533447266, "learning_rate": 1e-06, "loss": 0.3394, "num_input_tokens_seen": 376074472, "step": 6712 }, { "epoch": 14.948775055679288, "loss": 0.31912869215011597, "loss_ce": 9.793009667191654e-05, "loss_iou": 0.1484375, "loss_num": 0.004241943359375, "loss_xval": 0.318359375, "num_input_tokens_seen": 376074472, "step": 6712 }, { "epoch": 14.951002227171493, "grad_norm": 18.102794647216797, "learning_rate": 1e-06, "loss": 0.5757, "num_input_tokens_seen": 376129896, "step": 6713 }, { "epoch": 14.951002227171493, "loss": 0.7676899433135986, "loss_ce": 0.00011178333807038143, "loss_iou": 0.294921875, "loss_num": 0.03515625, "loss_xval": 0.765625, "num_input_tokens_seen": 376129896, "step": 6713 }, { "epoch": 14.953229398663698, "grad_norm": 28.493032455444336, "learning_rate": 1e-06, "loss": 0.5445, "num_input_tokens_seen": 376188248, "step": 6714 }, { "epoch": 14.953229398663698, "loss": 0.6533517837524414, "loss_ce": 0.00015353792696259916, "loss_iou": 0.267578125, "loss_num": 0.023681640625, "loss_xval": 0.65234375, "num_input_tokens_seen": 376188248, "step": 6714 }, { "epoch": 14.955456570155903, "grad_norm": 36.27149200439453, "learning_rate": 1e-06, "loss": 0.5764, "num_input_tokens_seen": 376241592, "step": 6715 }, { "epoch": 14.955456570155903, "loss": 0.3418195843696594, "loss_ce": 0.00014476115757133812, "loss_iou": 0.1484375, "loss_num": 0.00909423828125, "loss_xval": 0.341796875, "num_input_tokens_seen": 376241592, "step": 6715 }, { "epoch": 14.957683741648108, "grad_norm": 16.67228889465332, "learning_rate": 1e-06, "loss": 0.4621, "num_input_tokens_seen": 376298924, "step": 6716 }, { "epoch": 14.957683741648108, "loss": 0.6529318690299988, "loss_ce": 9.987308294512331e-05, "loss_iou": 0.267578125, "loss_num": 0.0233154296875, "loss_xval": 0.65234375, "num_input_tokens_seen": 376298924, "step": 6716 }, { "epoch": 14.959910913140313, "grad_norm": 14.509023666381836, "learning_rate": 1e-06, "loss": 0.4821, "num_input_tokens_seen": 376356404, "step": 6717 }, { "epoch": 14.959910913140313, "loss": 0.4666660726070404, "loss_ce": 0.00011332825670251623, "loss_iou": 0.208984375, "loss_num": 0.0096435546875, "loss_xval": 0.466796875, "num_input_tokens_seen": 376356404, "step": 6717 }, { "epoch": 14.962138084632517, "grad_norm": 18.535402297973633, "learning_rate": 1e-06, "loss": 0.5056, "num_input_tokens_seen": 376413404, "step": 6718 }, { "epoch": 14.962138084632517, "loss": 0.4525294899940491, "loss_ce": 0.0013728843769058585, "loss_iou": 0.171875, "loss_num": 0.0213623046875, "loss_xval": 0.451171875, "num_input_tokens_seen": 376413404, "step": 6718 }, { "epoch": 14.964365256124722, "grad_norm": 23.715373992919922, "learning_rate": 1e-06, "loss": 0.3654, "num_input_tokens_seen": 376471520, "step": 6719 }, { "epoch": 14.964365256124722, "loss": 0.3640136122703552, "loss_ce": 0.00012200840865261853, "loss_iou": 0.16015625, "loss_num": 0.0087890625, "loss_xval": 0.36328125, "num_input_tokens_seen": 376471520, "step": 6719 }, { "epoch": 14.966592427616927, "grad_norm": 22.593597412109375, "learning_rate": 1e-06, "loss": 0.5501, "num_input_tokens_seen": 376527260, "step": 6720 }, { "epoch": 14.966592427616927, "loss": 0.5731148719787598, "loss_ce": 0.00011685446224873886, "loss_iou": 0.2294921875, "loss_num": 0.022705078125, "loss_xval": 0.57421875, "num_input_tokens_seen": 376527260, "step": 6720 }, { "epoch": 14.968819599109132, "grad_norm": 14.375116348266602, "learning_rate": 1e-06, "loss": 0.5558, "num_input_tokens_seen": 376585680, "step": 6721 }, { "epoch": 14.968819599109132, "loss": 0.45606058835983276, "loss_ce": 0.0006162732024677098, "loss_iou": 0.1982421875, "loss_num": 0.01165771484375, "loss_xval": 0.455078125, "num_input_tokens_seen": 376585680, "step": 6721 }, { "epoch": 14.971046770601337, "grad_norm": 15.256864547729492, "learning_rate": 1e-06, "loss": 0.4053, "num_input_tokens_seen": 376641292, "step": 6722 }, { "epoch": 14.971046770601337, "loss": 0.44297972321510315, "loss_ce": 0.00010863250645343214, "loss_iou": 0.181640625, "loss_num": 0.0159912109375, "loss_xval": 0.443359375, "num_input_tokens_seen": 376641292, "step": 6722 }, { "epoch": 14.973273942093542, "grad_norm": 20.12420082092285, "learning_rate": 1e-06, "loss": 0.5, "num_input_tokens_seen": 376696188, "step": 6723 }, { "epoch": 14.973273942093542, "loss": 0.42197349667549133, "loss_ce": 9.847040200838819e-05, "loss_iou": 0.171875, "loss_num": 0.0157470703125, "loss_xval": 0.421875, "num_input_tokens_seen": 376696188, "step": 6723 }, { "epoch": 14.975501113585747, "grad_norm": 21.817781448364258, "learning_rate": 1e-06, "loss": 0.5414, "num_input_tokens_seen": 376752988, "step": 6724 }, { "epoch": 14.975501113585747, "loss": 0.5460734367370605, "loss_ce": 0.00017501445836387575, "loss_iou": 0.22265625, "loss_num": 0.0205078125, "loss_xval": 0.546875, "num_input_tokens_seen": 376752988, "step": 6724 }, { "epoch": 14.977728285077951, "grad_norm": 16.294130325317383, "learning_rate": 1e-06, "loss": 0.3279, "num_input_tokens_seen": 376810460, "step": 6725 }, { "epoch": 14.977728285077951, "loss": 0.28637370467185974, "loss_ce": 0.00011881387035828084, "loss_iou": 0.109375, "loss_num": 0.01348876953125, "loss_xval": 0.287109375, "num_input_tokens_seen": 376810460, "step": 6725 }, { "epoch": 14.979955456570156, "grad_norm": 17.604066848754883, "learning_rate": 1e-06, "loss": 0.4209, "num_input_tokens_seen": 376866340, "step": 6726 }, { "epoch": 14.979955456570156, "loss": 0.4271067678928375, "loss_ce": 0.00010479907359695062, "loss_iou": 0.193359375, "loss_num": 0.00823974609375, "loss_xval": 0.427734375, "num_input_tokens_seen": 376866340, "step": 6726 }, { "epoch": 14.982182628062361, "grad_norm": 18.081811904907227, "learning_rate": 1e-06, "loss": 0.4397, "num_input_tokens_seen": 376921200, "step": 6727 }, { "epoch": 14.982182628062361, "loss": 0.43722379207611084, "loss_ce": 9.000849240692332e-05, "loss_iou": 0.1826171875, "loss_num": 0.01446533203125, "loss_xval": 0.4375, "num_input_tokens_seen": 376921200, "step": 6727 }, { "epoch": 14.984409799554566, "grad_norm": 18.38675308227539, "learning_rate": 1e-06, "loss": 0.3883, "num_input_tokens_seen": 376979648, "step": 6728 }, { "epoch": 14.984409799554566, "loss": 0.36479222774505615, "loss_ce": 0.00010718008707044646, "loss_iou": 0.146484375, "loss_num": 0.0145263671875, "loss_xval": 0.365234375, "num_input_tokens_seen": 376979648, "step": 6728 }, { "epoch": 14.98663697104677, "grad_norm": 24.232728958129883, "learning_rate": 1e-06, "loss": 0.521, "num_input_tokens_seen": 377035696, "step": 6729 }, { "epoch": 14.98663697104677, "loss": 0.47929060459136963, "loss_ce": 0.00016459994367323816, "loss_iou": 0.1923828125, "loss_num": 0.01904296875, "loss_xval": 0.478515625, "num_input_tokens_seen": 377035696, "step": 6729 }, { "epoch": 14.988864142538976, "grad_norm": 16.893199920654297, "learning_rate": 1e-06, "loss": 0.4381, "num_input_tokens_seen": 377094444, "step": 6730 }, { "epoch": 14.988864142538976, "loss": 0.32279184460639954, "loss_ce": 0.00016000941104721278, "loss_iou": 0.150390625, "loss_num": 0.00457763671875, "loss_xval": 0.322265625, "num_input_tokens_seen": 377094444, "step": 6730 }, { "epoch": 14.99109131403118, "grad_norm": 19.161134719848633, "learning_rate": 1e-06, "loss": 0.3922, "num_input_tokens_seen": 377154056, "step": 6731 }, { "epoch": 14.99109131403118, "loss": 0.3056414723396301, "loss_ce": 9.948984370566905e-05, "loss_iou": 0.140625, "loss_num": 0.0048828125, "loss_xval": 0.3046875, "num_input_tokens_seen": 377154056, "step": 6731 }, { "epoch": 14.993318485523385, "grad_norm": 19.205812454223633, "learning_rate": 1e-06, "loss": 0.3231, "num_input_tokens_seen": 377211084, "step": 6732 }, { "epoch": 14.993318485523385, "loss": 0.34961456060409546, "loss_ce": 0.00012724896077997983, "loss_iou": 0.150390625, "loss_num": 0.0096435546875, "loss_xval": 0.349609375, "num_input_tokens_seen": 377211084, "step": 6732 }, { "epoch": 14.99554565701559, "grad_norm": 19.172298431396484, "learning_rate": 1e-06, "loss": 0.5644, "num_input_tokens_seen": 377269112, "step": 6733 }, { "epoch": 14.99554565701559, "loss": 0.5044995546340942, "loss_ce": 0.00010500279313419014, "loss_iou": 0.21875, "loss_num": 0.0133056640625, "loss_xval": 0.50390625, "num_input_tokens_seen": 377269112, "step": 6733 }, { "epoch": 14.997772828507795, "grad_norm": 18.66171646118164, "learning_rate": 1e-06, "loss": 0.4576, "num_input_tokens_seen": 377325276, "step": 6734 }, { "epoch": 14.997772828507795, "loss": 0.29831087589263916, "loss_ce": 9.311985195381567e-05, "loss_iou": 0.1220703125, "loss_num": 0.01080322265625, "loss_xval": 0.298828125, "num_input_tokens_seen": 377325276, "step": 6734 }, { "epoch": 15.0, "grad_norm": 23.2895565032959, "learning_rate": 1e-06, "loss": 0.3809, "num_input_tokens_seen": 377382228, "step": 6735 }, { "epoch": 15.0, "loss": 0.4358910620212555, "loss_ce": 0.00010004001524066553, "loss_iou": 0.1953125, "loss_num": 0.0089111328125, "loss_xval": 0.435546875, "num_input_tokens_seen": 377382228, "step": 6735 }, { "epoch": 15.002227171492205, "grad_norm": 15.336739540100098, "learning_rate": 1e-06, "loss": 0.4672, "num_input_tokens_seen": 377438264, "step": 6736 }, { "epoch": 15.002227171492205, "loss": 0.25816991925239563, "loss_ce": 0.00011327323591103777, "loss_iou": 0.11474609375, "loss_num": 0.00579833984375, "loss_xval": 0.2578125, "num_input_tokens_seen": 377438264, "step": 6736 }, { "epoch": 15.00445434298441, "grad_norm": 16.66543197631836, "learning_rate": 1e-06, "loss": 0.5834, "num_input_tokens_seen": 377490272, "step": 6737 }, { "epoch": 15.00445434298441, "loss": 0.7062318325042725, "loss_ce": 0.0001771746901795268, "loss_iou": 0.3046875, "loss_num": 0.019775390625, "loss_xval": 0.70703125, "num_input_tokens_seen": 377490272, "step": 6737 }, { "epoch": 15.006681514476615, "grad_norm": 16.386180877685547, "learning_rate": 1e-06, "loss": 0.3941, "num_input_tokens_seen": 377547544, "step": 6738 }, { "epoch": 15.006681514476615, "loss": 0.4180727005004883, "loss_ce": 8.869110752129927e-05, "loss_iou": 0.169921875, "loss_num": 0.015625, "loss_xval": 0.41796875, "num_input_tokens_seen": 377547544, "step": 6738 }, { "epoch": 15.00890868596882, "grad_norm": 13.663156509399414, "learning_rate": 1e-06, "loss": 0.3605, "num_input_tokens_seen": 377605960, "step": 6739 }, { "epoch": 15.00890868596882, "loss": 0.29490476846694946, "loss_ce": 0.00010495680180611089, "loss_iou": 0.130859375, "loss_num": 0.00653076171875, "loss_xval": 0.294921875, "num_input_tokens_seen": 377605960, "step": 6739 }, { "epoch": 15.011135857461024, "grad_norm": 20.333576202392578, "learning_rate": 1e-06, "loss": 0.3858, "num_input_tokens_seen": 377663176, "step": 6740 }, { "epoch": 15.011135857461024, "loss": 0.34868597984313965, "loss_ce": 0.00011420654482208192, "loss_iou": 0.15625, "loss_num": 0.00726318359375, "loss_xval": 0.34765625, "num_input_tokens_seen": 377663176, "step": 6740 }, { "epoch": 15.01336302895323, "grad_norm": 13.948286056518555, "learning_rate": 1e-06, "loss": 0.3959, "num_input_tokens_seen": 377717832, "step": 6741 }, { "epoch": 15.01336302895323, "loss": 0.42416131496429443, "loss_ce": 8.905168215278536e-05, "loss_iou": 0.173828125, "loss_num": 0.0155029296875, "loss_xval": 0.423828125, "num_input_tokens_seen": 377717832, "step": 6741 }, { "epoch": 15.015590200445434, "grad_norm": 19.52324867248535, "learning_rate": 1e-06, "loss": 0.3894, "num_input_tokens_seen": 377775816, "step": 6742 }, { "epoch": 15.015590200445434, "loss": 0.43786442279815674, "loss_ce": 0.00012025667092530057, "loss_iou": 0.177734375, "loss_num": 0.0162353515625, "loss_xval": 0.4375, "num_input_tokens_seen": 377775816, "step": 6742 }, { "epoch": 15.017817371937639, "grad_norm": 26.31039047241211, "learning_rate": 1e-06, "loss": 0.4081, "num_input_tokens_seen": 377834092, "step": 6743 }, { "epoch": 15.017817371937639, "loss": 0.4304569363594055, "loss_ce": 9.805826266529039e-05, "loss_iou": 0.189453125, "loss_num": 0.01025390625, "loss_xval": 0.4296875, "num_input_tokens_seen": 377834092, "step": 6743 }, { "epoch": 15.020044543429844, "grad_norm": 13.118270874023438, "learning_rate": 1e-06, "loss": 0.6506, "num_input_tokens_seen": 377890548, "step": 6744 }, { "epoch": 15.020044543429844, "loss": 0.8540452718734741, "loss_ce": 0.0001634818036109209, "loss_iou": 0.349609375, "loss_num": 0.03125, "loss_xval": 0.85546875, "num_input_tokens_seen": 377890548, "step": 6744 }, { "epoch": 15.022271714922049, "grad_norm": 16.432491302490234, "learning_rate": 1e-06, "loss": 0.394, "num_input_tokens_seen": 377948476, "step": 6745 }, { "epoch": 15.022271714922049, "loss": 0.39914602041244507, "loss_ce": 9.820661216508597e-05, "loss_iou": 0.181640625, "loss_num": 0.00738525390625, "loss_xval": 0.3984375, "num_input_tokens_seen": 377948476, "step": 6745 }, { "epoch": 15.024498886414253, "grad_norm": 17.178516387939453, "learning_rate": 1e-06, "loss": 0.435, "num_input_tokens_seen": 378003020, "step": 6746 }, { "epoch": 15.024498886414253, "loss": 0.5235254168510437, "loss_ce": 0.0004541404196061194, "loss_iou": 0.232421875, "loss_num": 0.01171875, "loss_xval": 0.5234375, "num_input_tokens_seen": 378003020, "step": 6746 }, { "epoch": 15.026726057906458, "grad_norm": 28.950069427490234, "learning_rate": 1e-06, "loss": 0.327, "num_input_tokens_seen": 378060880, "step": 6747 }, { "epoch": 15.026726057906458, "loss": 0.35339534282684326, "loss_ce": 0.0001238558324985206, "loss_iou": 0.16015625, "loss_num": 0.006622314453125, "loss_xval": 0.353515625, "num_input_tokens_seen": 378060880, "step": 6747 }, { "epoch": 15.028953229398663, "grad_norm": 30.91240882873535, "learning_rate": 1e-06, "loss": 0.6332, "num_input_tokens_seen": 378113388, "step": 6748 }, { "epoch": 15.028953229398663, "loss": 0.6445518136024475, "loss_ce": 0.0016074487939476967, "loss_iou": 0.283203125, "loss_num": 0.0150146484375, "loss_xval": 0.64453125, "num_input_tokens_seen": 378113388, "step": 6748 }, { "epoch": 15.031180400890868, "grad_norm": 110.00398254394531, "learning_rate": 1e-06, "loss": 0.3879, "num_input_tokens_seen": 378168524, "step": 6749 }, { "epoch": 15.031180400890868, "loss": 0.41330039501190186, "loss_ce": 9.242070518666878e-05, "loss_iou": 0.1904296875, "loss_num": 0.006500244140625, "loss_xval": 0.4140625, "num_input_tokens_seen": 378168524, "step": 6749 }, { "epoch": 15.033407572383073, "grad_norm": 36.06410217285156, "learning_rate": 1e-06, "loss": 0.6565, "num_input_tokens_seen": 378223964, "step": 6750 }, { "epoch": 15.033407572383073, "eval_seeclick_web_CIoU": 0.5852765142917633, "eval_seeclick_web_GIoU": 0.5835680663585663, "eval_seeclick_web_IoU": 0.6037326455116272, "eval_seeclick_web_MAE_all": 0.015391120221465826, "eval_seeclick_web_MAE_h": 0.007827216759324074, "eval_seeclick_web_MAE_w": 0.015470336191356182, "eval_seeclick_web_MAE_x_boxes": 0.008828274440020323, "eval_seeclick_web_MAE_y_boxes": 0.021092181792482734, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9029772877693176, "eval_seeclick_web_loss_ce": 0.00016041052003856748, "eval_seeclick_web_loss_iou": 0.4168701171875, "eval_seeclick_web_loss_num": 0.012361526489257812, "eval_seeclick_web_loss_xval": 0.895263671875, "eval_seeclick_web_runtime": 25.9811, "eval_seeclick_web_samples_per_second": 1.924, "eval_seeclick_web_steps_per_second": 0.077, "num_input_tokens_seen": 378223964, "step": 6750 }, { "epoch": 15.033407572383073, "eval_icons_CIoU": 0.28028304874897003, "eval_icons_GIoU": 0.30072087049484253, "eval_icons_IoU": 0.3571899086236954, "eval_icons_MAE_all": 0.057889632880687714, "eval_icons_MAE_h": 0.03522232733666897, "eval_icons_MAE_w": 0.05322147347033024, "eval_icons_MAE_x_boxes": 0.05888655222952366, "eval_icons_MAE_y_boxes": 0.03719859570264816, "eval_icons_inside_bbox": 0.6371527910232544, "eval_icons_loss": 1.6711426973342896, "eval_icons_loss_ce": 0.00020551962370518595, "eval_icons_loss_iou": 0.650146484375, "eval_icons_loss_num": 0.04911231994628906, "eval_icons_loss_xval": 1.546142578125, "eval_icons_runtime": 25.2226, "eval_icons_samples_per_second": 1.982, "eval_icons_steps_per_second": 0.079, "num_input_tokens_seen": 378223964, "step": 6750 }, { "epoch": 15.033407572383073, "eval_screenspot_CIoU": 0.37860554456710815, "eval_screenspot_GIoU": 0.39325101176897687, "eval_screenspot_IoU": 0.4482241968313853, "eval_screenspot_MAE_all": 0.05375574777523676, "eval_screenspot_MAE_h": 0.03931415639817715, "eval_screenspot_MAE_w": 0.05822847535212835, "eval_screenspot_MAE_x_boxes": 0.06611844276388486, "eval_screenspot_MAE_y_boxes": 0.036580439967413746, "eval_screenspot_inside_bbox": 0.7041666706403097, "eval_screenspot_loss": 1.5432301759719849, "eval_screenspot_loss_ce": 0.00022206837699438134, "eval_screenspot_loss_iou": 0.644775390625, "eval_screenspot_loss_num": 0.061370849609375, "eval_screenspot_loss_xval": 1.5965169270833333, "eval_screenspot_runtime": 42.0263, "eval_screenspot_samples_per_second": 2.118, "eval_screenspot_steps_per_second": 0.071, "num_input_tokens_seen": 378223964, "step": 6750 }, { "epoch": 15.033407572383073, "eval_compot_CIoU": 0.34727177023887634, "eval_compot_GIoU": 0.3595842719078064, "eval_compot_IoU": 0.4042260944843292, "eval_compot_MAE_all": 0.018605078104883432, "eval_compot_MAE_h": 0.011235271580517292, "eval_compot_MAE_w": 0.020991048775613308, "eval_compot_MAE_x_boxes": 0.030081474222242832, "eval_compot_MAE_y_boxes": 0.006614114856347442, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4067184925079346, "eval_compot_loss_ce": 0.0001557795621920377, "eval_compot_loss_iou": 0.650146484375, "eval_compot_loss_num": 0.0174102783203125, "eval_compot_loss_xval": 1.3876953125, "eval_compot_runtime": 25.4621, "eval_compot_samples_per_second": 1.964, "eval_compot_steps_per_second": 0.079, "num_input_tokens_seen": 378223964, "step": 6750 }, { "epoch": 15.033407572383073, "eval_custom_ui_val_CIoU": 0.48026987661918, "eval_custom_ui_val_GIoU": 0.48731139302253723, "eval_custom_ui_val_IoU": 0.5409456855720944, "eval_custom_ui_val_MAE_all": 0.027578827666325703, "eval_custom_ui_val_MAE_h": 0.015306917821160622, "eval_custom_ui_val_MAE_w": 0.03556989893938104, "eval_custom_ui_val_MAE_x_boxes": 0.033483781981178455, "eval_custom_ui_val_MAE_y_boxes": 0.013460160000249743, "eval_custom_ui_val_inside_bbox": 0.7719907429483202, "eval_custom_ui_val_loss": 1.1606297492980957, "eval_custom_ui_val_loss_ce": 0.0001816005743522611, "eval_custom_ui_val_loss_iou": 0.4960530598958333, "eval_custom_ui_val_loss_num": 0.02418337927924262, "eval_custom_ui_val_loss_xval": 1.1128472222222223, "eval_custom_ui_val_runtime": 78.0431, "eval_custom_ui_val_samples_per_second": 3.396, "eval_custom_ui_val_steps_per_second": 0.115, "num_input_tokens_seen": 378223964, "step": 6750 }, { "epoch": 15.033407572383073, "loss": 0.8121492862701416, "loss_ce": 0.00013749845675192773, "loss_iou": 0.365234375, "loss_num": 0.0166015625, "loss_xval": 0.8125, "num_input_tokens_seen": 378223964, "step": 6750 }, { "epoch": 15.035634743875278, "grad_norm": 23.943416595458984, "learning_rate": 1e-06, "loss": 0.3742, "num_input_tokens_seen": 378281124, "step": 6751 }, { "epoch": 15.035634743875278, "loss": 0.33616477251052856, "loss_ce": 0.00010521589138079435, "loss_iou": 0.1474609375, "loss_num": 0.00811767578125, "loss_xval": 0.3359375, "num_input_tokens_seen": 378281124, "step": 6751 }, { "epoch": 15.037861915367483, "grad_norm": 16.794620513916016, "learning_rate": 1e-06, "loss": 0.5123, "num_input_tokens_seen": 378335736, "step": 6752 }, { "epoch": 15.037861915367483, "loss": 0.42880141735076904, "loss_ce": 9.046800551004708e-05, "loss_iou": 0.1923828125, "loss_num": 0.0089111328125, "loss_xval": 0.4296875, "num_input_tokens_seen": 378335736, "step": 6752 }, { "epoch": 15.040089086859687, "grad_norm": 23.620935440063477, "learning_rate": 1e-06, "loss": 0.6279, "num_input_tokens_seen": 378391368, "step": 6753 }, { "epoch": 15.040089086859687, "loss": 0.624180793762207, "loss_ce": 0.00015735081979073584, "loss_iou": 0.279296875, "loss_num": 0.01312255859375, "loss_xval": 0.625, "num_input_tokens_seen": 378391368, "step": 6753 }, { "epoch": 15.042316258351892, "grad_norm": 16.139259338378906, "learning_rate": 1e-06, "loss": 0.4466, "num_input_tokens_seen": 378446784, "step": 6754 }, { "epoch": 15.042316258351892, "loss": 0.5067760348320007, "loss_ce": 9.269756264984608e-05, "loss_iou": 0.20703125, "loss_num": 0.018310546875, "loss_xval": 0.5078125, "num_input_tokens_seen": 378446784, "step": 6754 }, { "epoch": 15.044543429844097, "grad_norm": 17.31475257873535, "learning_rate": 1e-06, "loss": 0.3974, "num_input_tokens_seen": 378504372, "step": 6755 }, { "epoch": 15.044543429844097, "loss": 0.44474291801452637, "loss_ce": 0.00022385823831427842, "loss_iou": 0.177734375, "loss_num": 0.0177001953125, "loss_xval": 0.4453125, "num_input_tokens_seen": 378504372, "step": 6755 }, { "epoch": 15.046770601336302, "grad_norm": 22.26161766052246, "learning_rate": 1e-06, "loss": 0.6522, "num_input_tokens_seen": 378560752, "step": 6756 }, { "epoch": 15.046770601336302, "loss": 0.6529574990272522, "loss_ce": 0.00036961075966246426, "loss_iou": 0.28515625, "loss_num": 0.0162353515625, "loss_xval": 0.65234375, "num_input_tokens_seen": 378560752, "step": 6756 }, { "epoch": 15.048997772828507, "grad_norm": 18.25050163269043, "learning_rate": 1e-06, "loss": 0.5268, "num_input_tokens_seen": 378619176, "step": 6757 }, { "epoch": 15.048997772828507, "loss": 0.43017441034317017, "loss_ce": 0.00012070426600985229, "loss_iou": 0.18359375, "loss_num": 0.0125732421875, "loss_xval": 0.4296875, "num_input_tokens_seen": 378619176, "step": 6757 }, { "epoch": 15.051224944320714, "grad_norm": 16.298351287841797, "learning_rate": 1e-06, "loss": 0.3852, "num_input_tokens_seen": 378673764, "step": 6758 }, { "epoch": 15.051224944320714, "loss": 0.3430963158607483, "loss_ce": 0.00012450873327907175, "loss_iou": 0.1376953125, "loss_num": 0.0137939453125, "loss_xval": 0.34375, "num_input_tokens_seen": 378673764, "step": 6758 }, { "epoch": 15.053452115812918, "grad_norm": 15.573384284973145, "learning_rate": 1e-06, "loss": 0.3511, "num_input_tokens_seen": 378729620, "step": 6759 }, { "epoch": 15.053452115812918, "loss": 0.36082327365875244, "loss_ce": 0.00010551903687883168, "loss_iou": 0.166015625, "loss_num": 0.00579833984375, "loss_xval": 0.361328125, "num_input_tokens_seen": 378729620, "step": 6759 }, { "epoch": 15.055679287305123, "grad_norm": 16.886507034301758, "learning_rate": 1e-06, "loss": 0.5778, "num_input_tokens_seen": 378784440, "step": 6760 }, { "epoch": 15.055679287305123, "loss": 0.5877401828765869, "loss_ce": 9.368160681333393e-05, "loss_iou": 0.2275390625, "loss_num": 0.0264892578125, "loss_xval": 0.5859375, "num_input_tokens_seen": 378784440, "step": 6760 }, { "epoch": 15.057906458797328, "grad_norm": 18.894807815551758, "learning_rate": 1e-06, "loss": 0.4008, "num_input_tokens_seen": 378839964, "step": 6761 }, { "epoch": 15.057906458797328, "loss": 0.3778371512889862, "loss_ce": 9.055679402081296e-05, "loss_iou": 0.169921875, "loss_num": 0.007598876953125, "loss_xval": 0.376953125, "num_input_tokens_seen": 378839964, "step": 6761 }, { "epoch": 15.060133630289533, "grad_norm": 22.449024200439453, "learning_rate": 1e-06, "loss": 0.5987, "num_input_tokens_seen": 378897408, "step": 6762 }, { "epoch": 15.060133630289533, "loss": 0.6189790368080139, "loss_ce": 8.254876593127847e-05, "loss_iou": 0.255859375, "loss_num": 0.0216064453125, "loss_xval": 0.6171875, "num_input_tokens_seen": 378897408, "step": 6762 }, { "epoch": 15.062360801781738, "grad_norm": 21.661102294921875, "learning_rate": 1e-06, "loss": 0.4368, "num_input_tokens_seen": 378951352, "step": 6763 }, { "epoch": 15.062360801781738, "loss": 0.39707666635513306, "loss_ce": 0.00010398250014986843, "loss_iou": 0.1826171875, "loss_num": 0.006439208984375, "loss_xval": 0.396484375, "num_input_tokens_seen": 378951352, "step": 6763 }, { "epoch": 15.064587973273943, "grad_norm": 14.383728981018066, "learning_rate": 1e-06, "loss": 0.5996, "num_input_tokens_seen": 379007704, "step": 6764 }, { "epoch": 15.064587973273943, "loss": 0.6301254034042358, "loss_ce": 0.00012049202632624656, "loss_iou": 0.265625, "loss_num": 0.0194091796875, "loss_xval": 0.62890625, "num_input_tokens_seen": 379007704, "step": 6764 }, { "epoch": 15.066815144766148, "grad_norm": 16.69063377380371, "learning_rate": 1e-06, "loss": 0.5299, "num_input_tokens_seen": 379060260, "step": 6765 }, { "epoch": 15.066815144766148, "loss": 0.49555858969688416, "loss_ce": 0.00019726283790078014, "loss_iou": 0.1953125, "loss_num": 0.0211181640625, "loss_xval": 0.49609375, "num_input_tokens_seen": 379060260, "step": 6765 }, { "epoch": 15.069042316258352, "grad_norm": 18.389291763305664, "learning_rate": 1e-06, "loss": 0.6429, "num_input_tokens_seen": 379115936, "step": 6766 }, { "epoch": 15.069042316258352, "loss": 0.9386324882507324, "loss_ce": 0.00015596779121551663, "loss_iou": 0.41015625, "loss_num": 0.0240478515625, "loss_xval": 0.9375, "num_input_tokens_seen": 379115936, "step": 6766 }, { "epoch": 15.071269487750557, "grad_norm": 33.009193420410156, "learning_rate": 1e-06, "loss": 0.4666, "num_input_tokens_seen": 379171576, "step": 6767 }, { "epoch": 15.071269487750557, "loss": 0.5097314119338989, "loss_ce": 8.78519203979522e-05, "loss_iou": 0.2275390625, "loss_num": 0.0108642578125, "loss_xval": 0.5078125, "num_input_tokens_seen": 379171576, "step": 6767 }, { "epoch": 15.073496659242762, "grad_norm": 20.735618591308594, "learning_rate": 1e-06, "loss": 0.3335, "num_input_tokens_seen": 379231148, "step": 6768 }, { "epoch": 15.073496659242762, "loss": 0.4110013246536255, "loss_ce": 0.00011265121429460123, "loss_iou": 0.1962890625, "loss_num": 0.0038299560546875, "loss_xval": 0.41015625, "num_input_tokens_seen": 379231148, "step": 6768 }, { "epoch": 15.075723830734967, "grad_norm": 13.399028778076172, "learning_rate": 1e-06, "loss": 0.4072, "num_input_tokens_seen": 379288424, "step": 6769 }, { "epoch": 15.075723830734967, "loss": 0.2551080882549286, "loss_ce": 0.00010320404544472694, "loss_iou": 0.11474609375, "loss_num": 0.00506591796875, "loss_xval": 0.255859375, "num_input_tokens_seen": 379288424, "step": 6769 }, { "epoch": 15.077951002227172, "grad_norm": 15.024955749511719, "learning_rate": 1e-06, "loss": 0.5097, "num_input_tokens_seen": 379341648, "step": 6770 }, { "epoch": 15.077951002227172, "loss": 0.5870286226272583, "loss_ce": 0.00011455308413133025, "loss_iou": 0.25, "loss_num": 0.0172119140625, "loss_xval": 0.5859375, "num_input_tokens_seen": 379341648, "step": 6770 }, { "epoch": 15.080178173719377, "grad_norm": 57.792747497558594, "learning_rate": 1e-06, "loss": 0.5396, "num_input_tokens_seen": 379396880, "step": 6771 }, { "epoch": 15.080178173719377, "loss": 0.6382882595062256, "loss_ce": 0.00010471623681951314, "loss_iou": 0.251953125, "loss_num": 0.02685546875, "loss_xval": 0.63671875, "num_input_tokens_seen": 379396880, "step": 6771 }, { "epoch": 15.082405345211582, "grad_norm": 17.591594696044922, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 379451464, "step": 6772 }, { "epoch": 15.082405345211582, "loss": 0.41464707255363464, "loss_ce": 9.628412954043597e-05, "loss_iou": 0.1884765625, "loss_num": 0.00762939453125, "loss_xval": 0.4140625, "num_input_tokens_seen": 379451464, "step": 6772 }, { "epoch": 15.084632516703786, "grad_norm": 19.806339263916016, "learning_rate": 1e-06, "loss": 0.3308, "num_input_tokens_seen": 379507000, "step": 6773 }, { "epoch": 15.084632516703786, "loss": 0.32612937688827515, "loss_ce": 7.959181675687432e-05, "loss_iou": 0.12890625, "loss_num": 0.01348876953125, "loss_xval": 0.326171875, "num_input_tokens_seen": 379507000, "step": 6773 }, { "epoch": 15.086859688195991, "grad_norm": 17.055814743041992, "learning_rate": 1e-06, "loss": 0.3506, "num_input_tokens_seen": 379562540, "step": 6774 }, { "epoch": 15.086859688195991, "loss": 0.31467652320861816, "loss_ce": 0.00010132987517863512, "loss_iou": 0.13671875, "loss_num": 0.00799560546875, "loss_xval": 0.314453125, "num_input_tokens_seen": 379562540, "step": 6774 }, { "epoch": 15.089086859688196, "grad_norm": 24.606420516967773, "learning_rate": 1e-06, "loss": 0.401, "num_input_tokens_seen": 379620760, "step": 6775 }, { "epoch": 15.089086859688196, "loss": 0.3719760477542877, "loss_ce": 8.884338603820652e-05, "loss_iou": 0.169921875, "loss_num": 0.006256103515625, "loss_xval": 0.37109375, "num_input_tokens_seen": 379620760, "step": 6775 }, { "epoch": 15.091314031180401, "grad_norm": 20.017690658569336, "learning_rate": 1e-06, "loss": 0.4856, "num_input_tokens_seen": 379677336, "step": 6776 }, { "epoch": 15.091314031180401, "loss": 0.45812952518463135, "loss_ce": 0.00012173528375569731, "loss_iou": 0.18359375, "loss_num": 0.018310546875, "loss_xval": 0.45703125, "num_input_tokens_seen": 379677336, "step": 6776 }, { "epoch": 15.093541202672606, "grad_norm": 16.497709274291992, "learning_rate": 1e-06, "loss": 0.4482, "num_input_tokens_seen": 379734720, "step": 6777 }, { "epoch": 15.093541202672606, "loss": 0.34024685621261597, "loss_ce": 9.794162178877741e-05, "loss_iou": 0.15234375, "loss_num": 0.007080078125, "loss_xval": 0.33984375, "num_input_tokens_seen": 379734720, "step": 6777 }, { "epoch": 15.09576837416481, "grad_norm": 15.530109405517578, "learning_rate": 1e-06, "loss": 0.4104, "num_input_tokens_seen": 379790532, "step": 6778 }, { "epoch": 15.09576837416481, "loss": 0.30709517002105713, "loss_ce": 8.832212188281119e-05, "loss_iou": 0.13671875, "loss_num": 0.006805419921875, "loss_xval": 0.306640625, "num_input_tokens_seen": 379790532, "step": 6778 }, { "epoch": 15.097995545657016, "grad_norm": 16.189306259155273, "learning_rate": 1e-06, "loss": 0.4835, "num_input_tokens_seen": 379847060, "step": 6779 }, { "epoch": 15.097995545657016, "loss": 0.5428781509399414, "loss_ce": 0.0001535568735562265, "loss_iou": 0.224609375, "loss_num": 0.0186767578125, "loss_xval": 0.54296875, "num_input_tokens_seen": 379847060, "step": 6779 }, { "epoch": 15.10022271714922, "grad_norm": 15.896251678466797, "learning_rate": 1e-06, "loss": 0.4458, "num_input_tokens_seen": 379902864, "step": 6780 }, { "epoch": 15.10022271714922, "loss": 0.38522452116012573, "loss_ce": 9.271127782994881e-05, "loss_iou": 0.1748046875, "loss_num": 0.0069580078125, "loss_xval": 0.384765625, "num_input_tokens_seen": 379902864, "step": 6780 }, { "epoch": 15.102449888641425, "grad_norm": 52.78928756713867, "learning_rate": 1e-06, "loss": 0.3965, "num_input_tokens_seen": 379959096, "step": 6781 }, { "epoch": 15.102449888641425, "loss": 0.4485187530517578, "loss_ce": 0.0002765837707556784, "loss_iou": 0.19140625, "loss_num": 0.01318359375, "loss_xval": 0.44921875, "num_input_tokens_seen": 379959096, "step": 6781 }, { "epoch": 15.10467706013363, "grad_norm": 25.405174255371094, "learning_rate": 1e-06, "loss": 0.362, "num_input_tokens_seen": 380015168, "step": 6782 }, { "epoch": 15.10467706013363, "loss": 0.42562615871429443, "loss_ce": 8.907825394999236e-05, "loss_iou": 0.185546875, "loss_num": 0.01104736328125, "loss_xval": 0.42578125, "num_input_tokens_seen": 380015168, "step": 6782 }, { "epoch": 15.106904231625835, "grad_norm": 11.471501350402832, "learning_rate": 1e-06, "loss": 0.4449, "num_input_tokens_seen": 380071884, "step": 6783 }, { "epoch": 15.106904231625835, "loss": 0.4200718402862549, "loss_ce": 9.654099267208949e-05, "loss_iou": 0.1953125, "loss_num": 0.00592041015625, "loss_xval": 0.419921875, "num_input_tokens_seen": 380071884, "step": 6783 }, { "epoch": 15.10913140311804, "grad_norm": 15.025222778320312, "learning_rate": 1e-06, "loss": 0.4441, "num_input_tokens_seen": 380129560, "step": 6784 }, { "epoch": 15.10913140311804, "loss": 0.4893530011177063, "loss_ce": 9.517707803752273e-05, "loss_iou": 0.203125, "loss_num": 0.0166015625, "loss_xval": 0.48828125, "num_input_tokens_seen": 380129560, "step": 6784 }, { "epoch": 15.111358574610245, "grad_norm": 29.484661102294922, "learning_rate": 1e-06, "loss": 0.4307, "num_input_tokens_seen": 380185012, "step": 6785 }, { "epoch": 15.111358574610245, "loss": 0.314787894487381, "loss_ce": 9.060885349754244e-05, "loss_iou": 0.140625, "loss_num": 0.00677490234375, "loss_xval": 0.314453125, "num_input_tokens_seen": 380185012, "step": 6785 }, { "epoch": 15.11358574610245, "grad_norm": 14.522575378417969, "learning_rate": 1e-06, "loss": 0.5448, "num_input_tokens_seen": 380241104, "step": 6786 }, { "epoch": 15.11358574610245, "loss": 0.576589822769165, "loss_ce": 0.00011273389827692881, "loss_iou": 0.2490234375, "loss_num": 0.015625, "loss_xval": 0.578125, "num_input_tokens_seen": 380241104, "step": 6786 }, { "epoch": 15.115812917594655, "grad_norm": 13.87673568725586, "learning_rate": 1e-06, "loss": 0.5374, "num_input_tokens_seen": 380297004, "step": 6787 }, { "epoch": 15.115812917594655, "loss": 0.30384352803230286, "loss_ce": 0.0001325808116234839, "loss_iou": 0.1376953125, "loss_num": 0.005615234375, "loss_xval": 0.3046875, "num_input_tokens_seen": 380297004, "step": 6787 }, { "epoch": 15.11804008908686, "grad_norm": 23.66053581237793, "learning_rate": 1e-06, "loss": 0.5607, "num_input_tokens_seen": 380352836, "step": 6788 }, { "epoch": 15.11804008908686, "loss": 0.36867833137512207, "loss_ce": 8.703065395820886e-05, "loss_iou": 0.166015625, "loss_num": 0.00738525390625, "loss_xval": 0.369140625, "num_input_tokens_seen": 380352836, "step": 6788 }, { "epoch": 15.120267260579064, "grad_norm": 17.455827713012695, "learning_rate": 1e-06, "loss": 0.3954, "num_input_tokens_seen": 380406124, "step": 6789 }, { "epoch": 15.120267260579064, "loss": 0.3701331913471222, "loss_ce": 7.704219751758501e-05, "loss_iou": 0.15234375, "loss_num": 0.01312255859375, "loss_xval": 0.369140625, "num_input_tokens_seen": 380406124, "step": 6789 }, { "epoch": 15.122494432071269, "grad_norm": 33.90388870239258, "learning_rate": 1e-06, "loss": 0.5768, "num_input_tokens_seen": 380459684, "step": 6790 }, { "epoch": 15.122494432071269, "loss": 0.4020087718963623, "loss_ce": 9.228185808751732e-05, "loss_iou": 0.166015625, "loss_num": 0.01416015625, "loss_xval": 0.40234375, "num_input_tokens_seen": 380459684, "step": 6790 }, { "epoch": 15.124721603563474, "grad_norm": 16.799598693847656, "learning_rate": 1e-06, "loss": 0.4023, "num_input_tokens_seen": 380516820, "step": 6791 }, { "epoch": 15.124721603563474, "loss": 0.44541066884994507, "loss_ce": 9.819894330576062e-05, "loss_iou": 0.1943359375, "loss_num": 0.0113525390625, "loss_xval": 0.4453125, "num_input_tokens_seen": 380516820, "step": 6791 }, { "epoch": 15.126948775055679, "grad_norm": 15.393778800964355, "learning_rate": 1e-06, "loss": 0.6106, "num_input_tokens_seen": 380575020, "step": 6792 }, { "epoch": 15.126948775055679, "loss": 0.702007532119751, "loss_ce": 0.00010328319331165403, "loss_iou": 0.2890625, "loss_num": 0.0250244140625, "loss_xval": 0.703125, "num_input_tokens_seen": 380575020, "step": 6792 }, { "epoch": 15.129175946547884, "grad_norm": 12.515711784362793, "learning_rate": 1e-06, "loss": 0.6247, "num_input_tokens_seen": 380631440, "step": 6793 }, { "epoch": 15.129175946547884, "loss": 0.8331481218338013, "loss_ce": 0.00014029248268343508, "loss_iou": 0.31640625, "loss_num": 0.0400390625, "loss_xval": 0.83203125, "num_input_tokens_seen": 380631440, "step": 6793 }, { "epoch": 15.131403118040089, "grad_norm": 14.25368595123291, "learning_rate": 1e-06, "loss": 0.2824, "num_input_tokens_seen": 380690028, "step": 6794 }, { "epoch": 15.131403118040089, "loss": 0.3566052317619324, "loss_ce": 0.00015993951819837093, "loss_iou": 0.1552734375, "loss_num": 0.00927734375, "loss_xval": 0.35546875, "num_input_tokens_seen": 380690028, "step": 6794 }, { "epoch": 15.133630289532293, "grad_norm": 12.242840766906738, "learning_rate": 1e-06, "loss": 0.4051, "num_input_tokens_seen": 380747684, "step": 6795 }, { "epoch": 15.133630289532293, "loss": 0.33505433797836304, "loss_ce": 9.341866825707257e-05, "loss_iou": 0.158203125, "loss_num": 0.00372314453125, "loss_xval": 0.3359375, "num_input_tokens_seen": 380747684, "step": 6795 }, { "epoch": 15.135857461024498, "grad_norm": 15.275101661682129, "learning_rate": 1e-06, "loss": 0.3874, "num_input_tokens_seen": 380803156, "step": 6796 }, { "epoch": 15.135857461024498, "loss": 0.4023858308792114, "loss_ce": 0.0001641570997890085, "loss_iou": 0.17578125, "loss_num": 0.0103759765625, "loss_xval": 0.40234375, "num_input_tokens_seen": 380803156, "step": 6796 }, { "epoch": 15.138084632516703, "grad_norm": 16.61083221435547, "learning_rate": 1e-06, "loss": 0.5824, "num_input_tokens_seen": 380860000, "step": 6797 }, { "epoch": 15.138084632516703, "loss": 0.5763627290725708, "loss_ce": 0.00012979336315765977, "loss_iou": 0.251953125, "loss_num": 0.014404296875, "loss_xval": 0.578125, "num_input_tokens_seen": 380860000, "step": 6797 }, { "epoch": 15.140311804008908, "grad_norm": 17.85312271118164, "learning_rate": 1e-06, "loss": 0.3495, "num_input_tokens_seen": 380914548, "step": 6798 }, { "epoch": 15.140311804008908, "loss": 0.2694869935512543, "loss_ce": 7.780264422763139e-05, "loss_iou": 0.11669921875, "loss_num": 0.00714111328125, "loss_xval": 0.26953125, "num_input_tokens_seen": 380914548, "step": 6798 }, { "epoch": 15.142538975501113, "grad_norm": 26.615489959716797, "learning_rate": 1e-06, "loss": 0.4413, "num_input_tokens_seen": 380972508, "step": 6799 }, { "epoch": 15.142538975501113, "loss": 0.4014506936073303, "loss_ce": 8.351253200089559e-05, "loss_iou": 0.162109375, "loss_num": 0.01531982421875, "loss_xval": 0.40234375, "num_input_tokens_seen": 380972508, "step": 6799 }, { "epoch": 15.144766146993318, "grad_norm": 15.07624626159668, "learning_rate": 1e-06, "loss": 0.3529, "num_input_tokens_seen": 381027844, "step": 6800 }, { "epoch": 15.144766146993318, "loss": 0.41293323040008545, "loss_ce": 9.14369520614855e-05, "loss_iou": 0.1875, "loss_num": 0.00775146484375, "loss_xval": 0.412109375, "num_input_tokens_seen": 381027844, "step": 6800 }, { "epoch": 15.146993318485523, "grad_norm": 18.466833114624023, "learning_rate": 1e-06, "loss": 0.489, "num_input_tokens_seen": 381084744, "step": 6801 }, { "epoch": 15.146993318485523, "loss": 0.37544137239456177, "loss_ce": 7.517053745687008e-05, "loss_iou": 0.1689453125, "loss_num": 0.007598876953125, "loss_xval": 0.375, "num_input_tokens_seen": 381084744, "step": 6801 }, { "epoch": 15.14922048997773, "grad_norm": 18.854787826538086, "learning_rate": 1e-06, "loss": 0.3958, "num_input_tokens_seen": 381139260, "step": 6802 }, { "epoch": 15.14922048997773, "loss": 0.386452853679657, "loss_ce": 0.00010031522833742201, "loss_iou": 0.1689453125, "loss_num": 0.009521484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 381139260, "step": 6802 }, { "epoch": 15.151447661469934, "grad_norm": 28.44632339477539, "learning_rate": 1e-06, "loss": 0.3858, "num_input_tokens_seen": 381197604, "step": 6803 }, { "epoch": 15.151447661469934, "loss": 0.4399263262748718, "loss_ce": 0.0002290659467689693, "loss_iou": 0.1962890625, "loss_num": 0.0096435546875, "loss_xval": 0.439453125, "num_input_tokens_seen": 381197604, "step": 6803 }, { "epoch": 15.153674832962139, "grad_norm": 36.481109619140625, "learning_rate": 1e-06, "loss": 0.4761, "num_input_tokens_seen": 381253820, "step": 6804 }, { "epoch": 15.153674832962139, "loss": 0.38608303666114807, "loss_ce": 9.673433669377118e-05, "loss_iou": 0.1689453125, "loss_num": 0.00982666015625, "loss_xval": 0.38671875, "num_input_tokens_seen": 381253820, "step": 6804 }, { "epoch": 15.155902004454344, "grad_norm": 98.84033203125, "learning_rate": 1e-06, "loss": 0.3979, "num_input_tokens_seen": 381309320, "step": 6805 }, { "epoch": 15.155902004454344, "loss": 0.35694801807403564, "loss_ce": 0.00016701644926797599, "loss_iou": 0.1552734375, "loss_num": 0.00921630859375, "loss_xval": 0.357421875, "num_input_tokens_seen": 381309320, "step": 6805 }, { "epoch": 15.158129175946549, "grad_norm": 18.540637969970703, "learning_rate": 1e-06, "loss": 0.3822, "num_input_tokens_seen": 381365092, "step": 6806 }, { "epoch": 15.158129175946549, "loss": 0.40906059741973877, "loss_ce": 0.00012505166523624212, "loss_iou": 0.181640625, "loss_num": 0.00909423828125, "loss_xval": 0.408203125, "num_input_tokens_seen": 381365092, "step": 6806 }, { "epoch": 15.160356347438753, "grad_norm": 25.038162231445312, "learning_rate": 1e-06, "loss": 0.6632, "num_input_tokens_seen": 381421476, "step": 6807 }, { "epoch": 15.160356347438753, "loss": 0.6734679937362671, "loss_ce": 0.00012814889487344772, "loss_iou": 0.30078125, "loss_num": 0.014404296875, "loss_xval": 0.671875, "num_input_tokens_seen": 381421476, "step": 6807 }, { "epoch": 15.162583518930958, "grad_norm": 19.82026481628418, "learning_rate": 1e-06, "loss": 0.538, "num_input_tokens_seen": 381479976, "step": 6808 }, { "epoch": 15.162583518930958, "loss": 0.40109795331954956, "loss_ce": 9.697194036561996e-05, "loss_iou": 0.1875, "loss_num": 0.0052490234375, "loss_xval": 0.400390625, "num_input_tokens_seen": 381479976, "step": 6808 }, { "epoch": 15.164810690423163, "grad_norm": 23.628137588500977, "learning_rate": 1e-06, "loss": 0.448, "num_input_tokens_seen": 381534776, "step": 6809 }, { "epoch": 15.164810690423163, "loss": 0.5230967402458191, "loss_ce": 0.0001475035387557, "loss_iou": 0.216796875, "loss_num": 0.01806640625, "loss_xval": 0.5234375, "num_input_tokens_seen": 381534776, "step": 6809 }, { "epoch": 15.167037861915368, "grad_norm": 21.607646942138672, "learning_rate": 1e-06, "loss": 0.504, "num_input_tokens_seen": 381592192, "step": 6810 }, { "epoch": 15.167037861915368, "loss": 0.5715093612670898, "loss_ce": 9.827417670749128e-05, "loss_iou": 0.22265625, "loss_num": 0.0255126953125, "loss_xval": 0.5703125, "num_input_tokens_seen": 381592192, "step": 6810 }, { "epoch": 15.169265033407573, "grad_norm": 17.767423629760742, "learning_rate": 1e-06, "loss": 0.4062, "num_input_tokens_seen": 381650596, "step": 6811 }, { "epoch": 15.169265033407573, "loss": 0.3915339708328247, "loss_ce": 0.00011550119234016165, "loss_iou": 0.1787109375, "loss_num": 0.006805419921875, "loss_xval": 0.390625, "num_input_tokens_seen": 381650596, "step": 6811 }, { "epoch": 15.171492204899778, "grad_norm": 13.659879684448242, "learning_rate": 1e-06, "loss": 0.4005, "num_input_tokens_seen": 381706044, "step": 6812 }, { "epoch": 15.171492204899778, "loss": 0.37923288345336914, "loss_ce": 8.25146198621951e-05, "loss_iou": 0.1572265625, "loss_num": 0.0128173828125, "loss_xval": 0.37890625, "num_input_tokens_seen": 381706044, "step": 6812 }, { "epoch": 15.173719376391983, "grad_norm": 14.41109561920166, "learning_rate": 1e-06, "loss": 0.4001, "num_input_tokens_seen": 381759412, "step": 6813 }, { "epoch": 15.173719376391983, "loss": 0.38645416498184204, "loss_ce": 0.00010162763646803796, "loss_iou": 0.166015625, "loss_num": 0.01080322265625, "loss_xval": 0.38671875, "num_input_tokens_seen": 381759412, "step": 6813 }, { "epoch": 15.175946547884188, "grad_norm": 15.983702659606934, "learning_rate": 1e-06, "loss": 0.4464, "num_input_tokens_seen": 381817464, "step": 6814 }, { "epoch": 15.175946547884188, "loss": 0.45959311723709106, "loss_ce": 0.00012045353651046753, "loss_iou": 0.20703125, "loss_num": 0.00921630859375, "loss_xval": 0.458984375, "num_input_tokens_seen": 381817464, "step": 6814 }, { "epoch": 15.178173719376392, "grad_norm": 22.976959228515625, "learning_rate": 1e-06, "loss": 0.5359, "num_input_tokens_seen": 381870696, "step": 6815 }, { "epoch": 15.178173719376392, "loss": 0.3837730288505554, "loss_ce": 0.0001670640049269423, "loss_iou": 0.16015625, "loss_num": 0.01275634765625, "loss_xval": 0.3828125, "num_input_tokens_seen": 381870696, "step": 6815 }, { "epoch": 15.180400890868597, "grad_norm": 23.866098403930664, "learning_rate": 1e-06, "loss": 0.5447, "num_input_tokens_seen": 381925464, "step": 6816 }, { "epoch": 15.180400890868597, "loss": 0.3768770694732666, "loss_ce": 0.0001070558573701419, "loss_iou": 0.1767578125, "loss_num": 0.0047607421875, "loss_xval": 0.376953125, "num_input_tokens_seen": 381925464, "step": 6816 }, { "epoch": 15.182628062360802, "grad_norm": 21.5544490814209, "learning_rate": 1e-06, "loss": 0.5746, "num_input_tokens_seen": 381979292, "step": 6817 }, { "epoch": 15.182628062360802, "loss": 0.6533318758010864, "loss_ce": 0.0001337053836323321, "loss_iou": 0.283203125, "loss_num": 0.017822265625, "loss_xval": 0.65234375, "num_input_tokens_seen": 381979292, "step": 6817 }, { "epoch": 15.184855233853007, "grad_norm": 21.73903465270996, "learning_rate": 1e-06, "loss": 0.4286, "num_input_tokens_seen": 382034532, "step": 6818 }, { "epoch": 15.184855233853007, "loss": 0.4159244894981384, "loss_ce": 0.00015300727682188153, "loss_iou": 0.1806640625, "loss_num": 0.0111083984375, "loss_xval": 0.416015625, "num_input_tokens_seen": 382034532, "step": 6818 }, { "epoch": 15.187082405345212, "grad_norm": 14.082677841186523, "learning_rate": 1e-06, "loss": 0.345, "num_input_tokens_seen": 382089388, "step": 6819 }, { "epoch": 15.187082405345212, "loss": 0.3454124331474304, "loss_ce": 7.550232112407684e-05, "loss_iou": 0.14453125, "loss_num": 0.0111083984375, "loss_xval": 0.345703125, "num_input_tokens_seen": 382089388, "step": 6819 }, { "epoch": 15.189309576837417, "grad_norm": 13.859386444091797, "learning_rate": 1e-06, "loss": 0.3842, "num_input_tokens_seen": 382148896, "step": 6820 }, { "epoch": 15.189309576837417, "loss": 0.33224329352378845, "loss_ce": 8.99658989510499e-05, "loss_iou": 0.1484375, "loss_num": 0.0069580078125, "loss_xval": 0.33203125, "num_input_tokens_seen": 382148896, "step": 6820 }, { "epoch": 15.191536748329622, "grad_norm": 21.40119171142578, "learning_rate": 1e-06, "loss": 0.5285, "num_input_tokens_seen": 382204260, "step": 6821 }, { "epoch": 15.191536748329622, "loss": 0.38690587878227234, "loss_ce": 9.558402234688401e-05, "loss_iou": 0.171875, "loss_num": 0.00848388671875, "loss_xval": 0.38671875, "num_input_tokens_seen": 382204260, "step": 6821 }, { "epoch": 15.193763919821826, "grad_norm": 23.956090927124023, "learning_rate": 1e-06, "loss": 0.2846, "num_input_tokens_seen": 382261104, "step": 6822 }, { "epoch": 15.193763919821826, "loss": 0.33018404245376587, "loss_ce": 0.0001059087153407745, "loss_iou": 0.1533203125, "loss_num": 0.00469970703125, "loss_xval": 0.330078125, "num_input_tokens_seen": 382261104, "step": 6822 }, { "epoch": 15.195991091314031, "grad_norm": 29.63949203491211, "learning_rate": 1e-06, "loss": 0.4216, "num_input_tokens_seen": 382316244, "step": 6823 }, { "epoch": 15.195991091314031, "loss": 0.27267590165138245, "loss_ce": 9.288682485930622e-05, "loss_iou": 0.125, "loss_num": 0.00439453125, "loss_xval": 0.2734375, "num_input_tokens_seen": 382316244, "step": 6823 }, { "epoch": 15.198218262806236, "grad_norm": 25.926671981811523, "learning_rate": 1e-06, "loss": 0.5995, "num_input_tokens_seen": 382367780, "step": 6824 }, { "epoch": 15.198218262806236, "loss": 0.7526465654373169, "loss_ce": 0.0001441580825485289, "loss_iou": 0.3203125, "loss_num": 0.0225830078125, "loss_xval": 0.75390625, "num_input_tokens_seen": 382367780, "step": 6824 }, { "epoch": 15.200445434298441, "grad_norm": 32.10568618774414, "learning_rate": 1e-06, "loss": 0.5171, "num_input_tokens_seen": 382423572, "step": 6825 }, { "epoch": 15.200445434298441, "loss": 0.5426996946334839, "loss_ce": 9.716699423734099e-05, "loss_iou": 0.2392578125, "loss_num": 0.0128173828125, "loss_xval": 0.54296875, "num_input_tokens_seen": 382423572, "step": 6825 }, { "epoch": 15.202672605790646, "grad_norm": 30.0792293548584, "learning_rate": 1e-06, "loss": 0.4723, "num_input_tokens_seen": 382476324, "step": 6826 }, { "epoch": 15.202672605790646, "loss": 0.47158297896385193, "loss_ce": 0.0001474320306442678, "loss_iou": 0.2060546875, "loss_num": 0.01165771484375, "loss_xval": 0.470703125, "num_input_tokens_seen": 382476324, "step": 6826 }, { "epoch": 15.20489977728285, "grad_norm": 25.993913650512695, "learning_rate": 1e-06, "loss": 0.3837, "num_input_tokens_seen": 382531860, "step": 6827 }, { "epoch": 15.20489977728285, "loss": 0.37444430589675903, "loss_ce": 0.00011569406342459843, "loss_iou": 0.15625, "loss_num": 0.012451171875, "loss_xval": 0.375, "num_input_tokens_seen": 382531860, "step": 6827 }, { "epoch": 15.207126948775056, "grad_norm": 20.49637794494629, "learning_rate": 1e-06, "loss": 0.5971, "num_input_tokens_seen": 382587412, "step": 6828 }, { "epoch": 15.207126948775056, "loss": 0.459941565990448, "loss_ce": 0.00010270022903569043, "loss_iou": 0.2041015625, "loss_num": 0.010498046875, "loss_xval": 0.458984375, "num_input_tokens_seen": 382587412, "step": 6828 }, { "epoch": 15.20935412026726, "grad_norm": 16.51508331298828, "learning_rate": 1e-06, "loss": 0.4748, "num_input_tokens_seen": 382642572, "step": 6829 }, { "epoch": 15.20935412026726, "loss": 0.5448847413063049, "loss_ce": 8.491726475767791e-05, "loss_iou": 0.2333984375, "loss_num": 0.01556396484375, "loss_xval": 0.54296875, "num_input_tokens_seen": 382642572, "step": 6829 }, { "epoch": 15.211581291759465, "grad_norm": 17.06146240234375, "learning_rate": 1e-06, "loss": 0.4023, "num_input_tokens_seen": 382699556, "step": 6830 }, { "epoch": 15.211581291759465, "loss": 0.424904465675354, "loss_ce": 0.00022184928820934147, "loss_iou": 0.1904296875, "loss_num": 0.0086669921875, "loss_xval": 0.423828125, "num_input_tokens_seen": 382699556, "step": 6830 }, { "epoch": 15.21380846325167, "grad_norm": 18.930295944213867, "learning_rate": 1e-06, "loss": 0.4568, "num_input_tokens_seen": 382757356, "step": 6831 }, { "epoch": 15.21380846325167, "loss": 0.5095734596252441, "loss_ce": 0.0004182119155302644, "loss_iou": 0.201171875, "loss_num": 0.0211181640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 382757356, "step": 6831 }, { "epoch": 15.216035634743875, "grad_norm": 21.91002655029297, "learning_rate": 1e-06, "loss": 0.375, "num_input_tokens_seen": 382811564, "step": 6832 }, { "epoch": 15.216035634743875, "loss": 0.40269356966018677, "loss_ce": 0.00010566625132923946, "loss_iou": 0.1826171875, "loss_num": 0.0076904296875, "loss_xval": 0.40234375, "num_input_tokens_seen": 382811564, "step": 6832 }, { "epoch": 15.21826280623608, "grad_norm": 26.341665267944336, "learning_rate": 1e-06, "loss": 0.4079, "num_input_tokens_seen": 382865496, "step": 6833 }, { "epoch": 15.21826280623608, "loss": 0.4928325414657593, "loss_ce": 9.574595605954528e-05, "loss_iou": 0.1962890625, "loss_num": 0.02001953125, "loss_xval": 0.4921875, "num_input_tokens_seen": 382865496, "step": 6833 }, { "epoch": 15.220489977728285, "grad_norm": 21.50320816040039, "learning_rate": 1e-06, "loss": 0.355, "num_input_tokens_seen": 382922332, "step": 6834 }, { "epoch": 15.220489977728285, "loss": 0.38630521297454834, "loss_ce": 0.0007308792555704713, "loss_iou": 0.162109375, "loss_num": 0.01226806640625, "loss_xval": 0.384765625, "num_input_tokens_seen": 382922332, "step": 6834 }, { "epoch": 15.22271714922049, "grad_norm": 22.518569946289062, "learning_rate": 1e-06, "loss": 0.4254, "num_input_tokens_seen": 382980300, "step": 6835 }, { "epoch": 15.22271714922049, "loss": 0.3738847076892853, "loss_ce": 0.00010540773655520752, "loss_iou": 0.16796875, "loss_num": 0.00750732421875, "loss_xval": 0.373046875, "num_input_tokens_seen": 382980300, "step": 6835 }, { "epoch": 15.224944320712694, "grad_norm": 20.747737884521484, "learning_rate": 1e-06, "loss": 0.4773, "num_input_tokens_seen": 383038560, "step": 6836 }, { "epoch": 15.224944320712694, "loss": 0.6503742337226868, "loss_ce": 0.000105653190985322, "loss_iou": 0.267578125, "loss_num": 0.0228271484375, "loss_xval": 0.6484375, "num_input_tokens_seen": 383038560, "step": 6836 }, { "epoch": 15.2271714922049, "grad_norm": 31.612144470214844, "learning_rate": 1e-06, "loss": 0.3559, "num_input_tokens_seen": 383093332, "step": 6837 }, { "epoch": 15.2271714922049, "loss": 0.3483448326587677, "loss_ce": 7.823983469279483e-05, "loss_iou": 0.1494140625, "loss_num": 0.010009765625, "loss_xval": 0.34765625, "num_input_tokens_seen": 383093332, "step": 6837 }, { "epoch": 15.229398663697104, "grad_norm": 27.15004539489746, "learning_rate": 1e-06, "loss": 0.3923, "num_input_tokens_seen": 383150088, "step": 6838 }, { "epoch": 15.229398663697104, "loss": 0.3999464511871338, "loss_ce": 0.00028826179914176464, "loss_iou": 0.1787109375, "loss_num": 0.00836181640625, "loss_xval": 0.400390625, "num_input_tokens_seen": 383150088, "step": 6838 }, { "epoch": 15.231625835189309, "grad_norm": 20.663650512695312, "learning_rate": 1e-06, "loss": 0.5478, "num_input_tokens_seen": 383208636, "step": 6839 }, { "epoch": 15.231625835189309, "loss": 0.5868840217590332, "loss_ce": 9.200301428791136e-05, "loss_iou": 0.2392578125, "loss_num": 0.021728515625, "loss_xval": 0.5859375, "num_input_tokens_seen": 383208636, "step": 6839 }, { "epoch": 15.233853006681514, "grad_norm": 15.867166519165039, "learning_rate": 1e-06, "loss": 0.4572, "num_input_tokens_seen": 383262456, "step": 6840 }, { "epoch": 15.233853006681514, "loss": 0.5546283721923828, "loss_ce": 0.0018329141894355416, "loss_iou": 0.244140625, "loss_num": 0.01251220703125, "loss_xval": 0.5546875, "num_input_tokens_seen": 383262456, "step": 6840 }, { "epoch": 15.236080178173719, "grad_norm": 22.131988525390625, "learning_rate": 1e-06, "loss": 0.3841, "num_input_tokens_seen": 383319500, "step": 6841 }, { "epoch": 15.236080178173719, "loss": 0.3393481373786926, "loss_ce": 0.00011472964251879603, "loss_iou": 0.1396484375, "loss_num": 0.0120849609375, "loss_xval": 0.33984375, "num_input_tokens_seen": 383319500, "step": 6841 }, { "epoch": 15.238307349665924, "grad_norm": 17.503231048583984, "learning_rate": 1e-06, "loss": 0.5277, "num_input_tokens_seen": 383373476, "step": 6842 }, { "epoch": 15.238307349665924, "loss": 0.6524415612220764, "loss_ce": 9.780684194993228e-05, "loss_iou": 0.267578125, "loss_num": 0.023681640625, "loss_xval": 0.65234375, "num_input_tokens_seen": 383373476, "step": 6842 }, { "epoch": 15.240534521158128, "grad_norm": 27.421920776367188, "learning_rate": 1e-06, "loss": 0.3908, "num_input_tokens_seen": 383430396, "step": 6843 }, { "epoch": 15.240534521158128, "loss": 0.2873254120349884, "loss_ce": 9.397394023835659e-05, "loss_iou": 0.1279296875, "loss_num": 0.0064697265625, "loss_xval": 0.287109375, "num_input_tokens_seen": 383430396, "step": 6843 }, { "epoch": 15.242761692650333, "grad_norm": 18.538227081298828, "learning_rate": 1e-06, "loss": 0.8737, "num_input_tokens_seen": 383486732, "step": 6844 }, { "epoch": 15.242761692650333, "loss": 0.9400348663330078, "loss_ce": 9.340968244941905e-05, "loss_iou": 0.3984375, "loss_num": 0.028564453125, "loss_xval": 0.94140625, "num_input_tokens_seen": 383486732, "step": 6844 }, { "epoch": 15.244988864142538, "grad_norm": 18.570964813232422, "learning_rate": 1e-06, "loss": 0.4564, "num_input_tokens_seen": 383544740, "step": 6845 }, { "epoch": 15.244988864142538, "loss": 0.507795512676239, "loss_ce": 0.00010509089042898268, "loss_iou": 0.1962890625, "loss_num": 0.0230712890625, "loss_xval": 0.5078125, "num_input_tokens_seen": 383544740, "step": 6845 }, { "epoch": 15.247216035634743, "grad_norm": 25.994709014892578, "learning_rate": 1e-06, "loss": 0.3808, "num_input_tokens_seen": 383601352, "step": 6846 }, { "epoch": 15.247216035634743, "loss": 0.4001128673553467, "loss_ce": 8.847533172229305e-05, "loss_iou": 0.1806640625, "loss_num": 0.0076904296875, "loss_xval": 0.400390625, "num_input_tokens_seen": 383601352, "step": 6846 }, { "epoch": 15.249443207126948, "grad_norm": 30.125736236572266, "learning_rate": 1e-06, "loss": 0.3879, "num_input_tokens_seen": 383656492, "step": 6847 }, { "epoch": 15.249443207126948, "loss": 0.33361074328422546, "loss_ce": 0.00011466215801192448, "loss_iou": 0.1494140625, "loss_num": 0.00677490234375, "loss_xval": 0.333984375, "num_input_tokens_seen": 383656492, "step": 6847 }, { "epoch": 15.251670378619155, "grad_norm": 16.290237426757812, "learning_rate": 1e-06, "loss": 0.5262, "num_input_tokens_seen": 383713968, "step": 6848 }, { "epoch": 15.251670378619155, "loss": 0.4467582702636719, "loss_ce": 0.00010298557754140347, "loss_iou": 0.1904296875, "loss_num": 0.01300048828125, "loss_xval": 0.447265625, "num_input_tokens_seen": 383713968, "step": 6848 }, { "epoch": 15.25389755011136, "grad_norm": 32.09817886352539, "learning_rate": 1e-06, "loss": 0.5307, "num_input_tokens_seen": 383769468, "step": 6849 }, { "epoch": 15.25389755011136, "loss": 0.5073274970054626, "loss_ce": 0.0001253713999176398, "loss_iou": 0.2021484375, "loss_num": 0.0203857421875, "loss_xval": 0.5078125, "num_input_tokens_seen": 383769468, "step": 6849 }, { "epoch": 15.256124721603564, "grad_norm": 19.521888732910156, "learning_rate": 1e-06, "loss": 0.4386, "num_input_tokens_seen": 383826068, "step": 6850 }, { "epoch": 15.256124721603564, "loss": 0.3947462737560272, "loss_ce": 9.295322524849325e-05, "loss_iou": 0.1494140625, "loss_num": 0.0191650390625, "loss_xval": 0.39453125, "num_input_tokens_seen": 383826068, "step": 6850 }, { "epoch": 15.25835189309577, "grad_norm": 18.782377243041992, "learning_rate": 1e-06, "loss": 0.2575, "num_input_tokens_seen": 383883012, "step": 6851 }, { "epoch": 15.25835189309577, "loss": 0.2647382318973541, "loss_ce": 8.9788663899526e-05, "loss_iou": 0.109375, "loss_num": 0.009033203125, "loss_xval": 0.265625, "num_input_tokens_seen": 383883012, "step": 6851 }, { "epoch": 15.260579064587974, "grad_norm": 21.034971237182617, "learning_rate": 1e-06, "loss": 0.4045, "num_input_tokens_seen": 383936956, "step": 6852 }, { "epoch": 15.260579064587974, "loss": 0.4515724182128906, "loss_ce": 9.537780715618283e-05, "loss_iou": 0.185546875, "loss_num": 0.015869140625, "loss_xval": 0.451171875, "num_input_tokens_seen": 383936956, "step": 6852 }, { "epoch": 15.262806236080179, "grad_norm": 32.299102783203125, "learning_rate": 1e-06, "loss": 0.3529, "num_input_tokens_seen": 383993404, "step": 6853 }, { "epoch": 15.262806236080179, "loss": 0.31591275334358215, "loss_ce": 0.00011685363278957084, "loss_iou": 0.142578125, "loss_num": 0.00628662109375, "loss_xval": 0.31640625, "num_input_tokens_seen": 383993404, "step": 6853 }, { "epoch": 15.265033407572384, "grad_norm": 17.330768585205078, "learning_rate": 1e-06, "loss": 0.485, "num_input_tokens_seen": 384048856, "step": 6854 }, { "epoch": 15.265033407572384, "loss": 0.5928666591644287, "loss_ce": 0.00021531574020627886, "loss_iou": 0.2255859375, "loss_num": 0.0281982421875, "loss_xval": 0.59375, "num_input_tokens_seen": 384048856, "step": 6854 }, { "epoch": 15.267260579064589, "grad_norm": 19.941410064697266, "learning_rate": 1e-06, "loss": 0.4413, "num_input_tokens_seen": 384104048, "step": 6855 }, { "epoch": 15.267260579064589, "loss": 0.40443968772888184, "loss_ce": 8.178211282938719e-05, "loss_iou": 0.1748046875, "loss_num": 0.0111083984375, "loss_xval": 0.404296875, "num_input_tokens_seen": 384104048, "step": 6855 }, { "epoch": 15.269487750556793, "grad_norm": 19.013338088989258, "learning_rate": 1e-06, "loss": 0.52, "num_input_tokens_seen": 384155284, "step": 6856 }, { "epoch": 15.269487750556793, "loss": 0.3980572819709778, "loss_ce": 0.00010806175851030275, "loss_iou": 0.169921875, "loss_num": 0.01177978515625, "loss_xval": 0.3984375, "num_input_tokens_seen": 384155284, "step": 6856 }, { "epoch": 15.271714922048998, "grad_norm": 17.63640594482422, "learning_rate": 1e-06, "loss": 0.4304, "num_input_tokens_seen": 384212084, "step": 6857 }, { "epoch": 15.271714922048998, "loss": 0.46271538734436035, "loss_ce": 0.00025199330411851406, "loss_iou": 0.1845703125, "loss_num": 0.0184326171875, "loss_xval": 0.462890625, "num_input_tokens_seen": 384212084, "step": 6857 }, { "epoch": 15.273942093541203, "grad_norm": 17.39361572265625, "learning_rate": 1e-06, "loss": 0.5069, "num_input_tokens_seen": 384266828, "step": 6858 }, { "epoch": 15.273942093541203, "loss": 0.4388231039047241, "loss_ce": 0.0001023877048282884, "loss_iou": 0.18359375, "loss_num": 0.01422119140625, "loss_xval": 0.439453125, "num_input_tokens_seen": 384266828, "step": 6858 }, { "epoch": 15.276169265033408, "grad_norm": 20.657392501831055, "learning_rate": 1e-06, "loss": 0.3784, "num_input_tokens_seen": 384320760, "step": 6859 }, { "epoch": 15.276169265033408, "loss": 0.3058285713195801, "loss_ce": 0.00010348795331083238, "loss_iou": 0.140625, "loss_num": 0.004791259765625, "loss_xval": 0.306640625, "num_input_tokens_seen": 384320760, "step": 6859 }, { "epoch": 15.278396436525613, "grad_norm": 17.122583389282227, "learning_rate": 1e-06, "loss": 0.3612, "num_input_tokens_seen": 384377020, "step": 6860 }, { "epoch": 15.278396436525613, "loss": 0.33143696188926697, "loss_ce": 0.0001076041953638196, "loss_iou": 0.138671875, "loss_num": 0.0107421875, "loss_xval": 0.33203125, "num_input_tokens_seen": 384377020, "step": 6860 }, { "epoch": 15.280623608017818, "grad_norm": 24.28658676147461, "learning_rate": 1e-06, "loss": 0.4031, "num_input_tokens_seen": 384431080, "step": 6861 }, { "epoch": 15.280623608017818, "loss": 0.518648624420166, "loss_ce": 9.391328057972714e-05, "loss_iou": 0.21875, "loss_num": 0.0162353515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 384431080, "step": 6861 }, { "epoch": 15.282850779510023, "grad_norm": 11.215580940246582, "learning_rate": 1e-06, "loss": 0.304, "num_input_tokens_seen": 384487584, "step": 6862 }, { "epoch": 15.282850779510023, "loss": 0.31601682305336, "loss_ce": 9.885276085697114e-05, "loss_iou": 0.1357421875, "loss_num": 0.00885009765625, "loss_xval": 0.31640625, "num_input_tokens_seen": 384487584, "step": 6862 }, { "epoch": 15.285077951002227, "grad_norm": 22.595476150512695, "learning_rate": 1e-06, "loss": 0.3841, "num_input_tokens_seen": 384544196, "step": 6863 }, { "epoch": 15.285077951002227, "loss": 0.49600616097450256, "loss_ce": 0.00015653966693207622, "loss_iou": 0.2021484375, "loss_num": 0.018310546875, "loss_xval": 0.49609375, "num_input_tokens_seen": 384544196, "step": 6863 }, { "epoch": 15.287305122494432, "grad_norm": 65.94528198242188, "learning_rate": 1e-06, "loss": 0.4559, "num_input_tokens_seen": 384598184, "step": 6864 }, { "epoch": 15.287305122494432, "loss": 0.5419918894767761, "loss_ce": 0.00012176702875876799, "loss_iou": 0.248046875, "loss_num": 0.009033203125, "loss_xval": 0.54296875, "num_input_tokens_seen": 384598184, "step": 6864 }, { "epoch": 15.289532293986637, "grad_norm": 18.129741668701172, "learning_rate": 1e-06, "loss": 0.4641, "num_input_tokens_seen": 384655212, "step": 6865 }, { "epoch": 15.289532293986637, "loss": 0.5154638290405273, "loss_ce": 8.296032319776714e-05, "loss_iou": 0.2138671875, "loss_num": 0.0177001953125, "loss_xval": 0.515625, "num_input_tokens_seen": 384655212, "step": 6865 }, { "epoch": 15.291759465478842, "grad_norm": 33.346805572509766, "learning_rate": 1e-06, "loss": 0.4791, "num_input_tokens_seen": 384711224, "step": 6866 }, { "epoch": 15.291759465478842, "loss": 0.5016932487487793, "loss_ce": 0.00010639546962920576, "loss_iou": 0.2294921875, "loss_num": 0.0086669921875, "loss_xval": 0.5, "num_input_tokens_seen": 384711224, "step": 6866 }, { "epoch": 15.293986636971047, "grad_norm": 17.753189086914062, "learning_rate": 1e-06, "loss": 0.507, "num_input_tokens_seen": 384770584, "step": 6867 }, { "epoch": 15.293986636971047, "loss": 0.47503072023391724, "loss_ce": 0.00011617955169640481, "loss_iou": 0.1953125, "loss_num": 0.0169677734375, "loss_xval": 0.474609375, "num_input_tokens_seen": 384770584, "step": 6867 }, { "epoch": 15.296213808463252, "grad_norm": 21.835477828979492, "learning_rate": 1e-06, "loss": 0.3904, "num_input_tokens_seen": 384827512, "step": 6868 }, { "epoch": 15.296213808463252, "loss": 0.3441203534603119, "loss_ce": 8.04224400781095e-05, "loss_iou": 0.142578125, "loss_num": 0.0118408203125, "loss_xval": 0.34375, "num_input_tokens_seen": 384827512, "step": 6868 }, { "epoch": 15.298440979955457, "grad_norm": 11.53410816192627, "learning_rate": 1e-06, "loss": 0.3595, "num_input_tokens_seen": 384884920, "step": 6869 }, { "epoch": 15.298440979955457, "loss": 0.42830324172973633, "loss_ce": 8.056573278736323e-05, "loss_iou": 0.189453125, "loss_num": 0.01007080078125, "loss_xval": 0.427734375, "num_input_tokens_seen": 384884920, "step": 6869 }, { "epoch": 15.300668151447661, "grad_norm": 24.92241668701172, "learning_rate": 1e-06, "loss": 0.4735, "num_input_tokens_seen": 384940092, "step": 6870 }, { "epoch": 15.300668151447661, "loss": 0.30489498376846313, "loss_ce": 8.541756687918678e-05, "loss_iou": 0.1328125, "loss_num": 0.0078125, "loss_xval": 0.3046875, "num_input_tokens_seen": 384940092, "step": 6870 }, { "epoch": 15.302895322939866, "grad_norm": 24.968305587768555, "learning_rate": 1e-06, "loss": 0.48, "num_input_tokens_seen": 384996776, "step": 6871 }, { "epoch": 15.302895322939866, "loss": 0.4336788058280945, "loss_ce": 8.504305151291192e-05, "loss_iou": 0.181640625, "loss_num": 0.01385498046875, "loss_xval": 0.43359375, "num_input_tokens_seen": 384996776, "step": 6871 }, { "epoch": 15.305122494432071, "grad_norm": 15.246042251586914, "learning_rate": 1e-06, "loss": 0.4207, "num_input_tokens_seen": 385056728, "step": 6872 }, { "epoch": 15.305122494432071, "loss": 0.35528939962387085, "loss_ce": 9.531193063594401e-05, "loss_iou": 0.150390625, "loss_num": 0.01080322265625, "loss_xval": 0.35546875, "num_input_tokens_seen": 385056728, "step": 6872 }, { "epoch": 15.307349665924276, "grad_norm": 18.476036071777344, "learning_rate": 1e-06, "loss": 0.3138, "num_input_tokens_seen": 385113780, "step": 6873 }, { "epoch": 15.307349665924276, "loss": 0.3595203459262848, "loss_ce": 0.00011481612455099821, "loss_iou": 0.1640625, "loss_num": 0.006439208984375, "loss_xval": 0.359375, "num_input_tokens_seen": 385113780, "step": 6873 }, { "epoch": 15.309576837416481, "grad_norm": 17.357139587402344, "learning_rate": 1e-06, "loss": 0.3486, "num_input_tokens_seen": 385169428, "step": 6874 }, { "epoch": 15.309576837416481, "loss": 0.45047512650489807, "loss_ce": 9.672968735685572e-05, "loss_iou": 0.205078125, "loss_num": 0.0079345703125, "loss_xval": 0.451171875, "num_input_tokens_seen": 385169428, "step": 6874 }, { "epoch": 15.311804008908686, "grad_norm": 16.00808334350586, "learning_rate": 1e-06, "loss": 0.4884, "num_input_tokens_seen": 385226272, "step": 6875 }, { "epoch": 15.311804008908686, "loss": 0.48656389117240906, "loss_ce": 0.00011366438411641866, "loss_iou": 0.2109375, "loss_num": 0.01300048828125, "loss_xval": 0.486328125, "num_input_tokens_seen": 385226272, "step": 6875 }, { "epoch": 15.31403118040089, "grad_norm": 22.285694122314453, "learning_rate": 1e-06, "loss": 0.3616, "num_input_tokens_seen": 385281308, "step": 6876 }, { "epoch": 15.31403118040089, "loss": 0.30913233757019043, "loss_ce": 8.081403211690485e-05, "loss_iou": 0.11767578125, "loss_num": 0.01470947265625, "loss_xval": 0.30859375, "num_input_tokens_seen": 385281308, "step": 6876 }, { "epoch": 15.316258351893095, "grad_norm": 406.5437927246094, "learning_rate": 1e-06, "loss": 0.4264, "num_input_tokens_seen": 385338752, "step": 6877 }, { "epoch": 15.316258351893095, "loss": 0.3918250799179077, "loss_ce": 0.00010145184933207929, "loss_iou": 0.17578125, "loss_num": 0.007781982421875, "loss_xval": 0.392578125, "num_input_tokens_seen": 385338752, "step": 6877 }, { "epoch": 15.3184855233853, "grad_norm": 17.710416793823242, "learning_rate": 1e-06, "loss": 0.3374, "num_input_tokens_seen": 385396576, "step": 6878 }, { "epoch": 15.3184855233853, "loss": 0.35166865587234497, "loss_ce": 0.00010614388156682253, "loss_iou": 0.1552734375, "loss_num": 0.00836181640625, "loss_xval": 0.3515625, "num_input_tokens_seen": 385396576, "step": 6878 }, { "epoch": 15.320712694877505, "grad_norm": 16.165138244628906, "learning_rate": 1e-06, "loss": 0.4187, "num_input_tokens_seen": 385454284, "step": 6879 }, { "epoch": 15.320712694877505, "loss": 0.4211636483669281, "loss_ce": 0.00014315356384031475, "loss_iou": 0.185546875, "loss_num": 0.0098876953125, "loss_xval": 0.421875, "num_input_tokens_seen": 385454284, "step": 6879 }, { "epoch": 15.32293986636971, "grad_norm": 19.196983337402344, "learning_rate": 1e-06, "loss": 0.4114, "num_input_tokens_seen": 385511284, "step": 6880 }, { "epoch": 15.32293986636971, "loss": 0.42685467004776, "loss_ce": 9.687192505225539e-05, "loss_iou": 0.181640625, "loss_num": 0.01263427734375, "loss_xval": 0.42578125, "num_input_tokens_seen": 385511284, "step": 6880 }, { "epoch": 15.325167037861915, "grad_norm": 25.625106811523438, "learning_rate": 1e-06, "loss": 0.3899, "num_input_tokens_seen": 385565236, "step": 6881 }, { "epoch": 15.325167037861915, "loss": 0.3682980537414551, "loss_ce": 0.00013401404430624098, "loss_iou": 0.1640625, "loss_num": 0.0078125, "loss_xval": 0.3671875, "num_input_tokens_seen": 385565236, "step": 6881 }, { "epoch": 15.32739420935412, "grad_norm": 21.3586368560791, "learning_rate": 1e-06, "loss": 0.4805, "num_input_tokens_seen": 385619852, "step": 6882 }, { "epoch": 15.32739420935412, "loss": 0.5712690353393555, "loss_ce": 0.00010209472384303808, "loss_iou": 0.26953125, "loss_num": 0.006378173828125, "loss_xval": 0.5703125, "num_input_tokens_seen": 385619852, "step": 6882 }, { "epoch": 15.329621380846325, "grad_norm": 20.074663162231445, "learning_rate": 1e-06, "loss": 0.4422, "num_input_tokens_seen": 385675444, "step": 6883 }, { "epoch": 15.329621380846325, "loss": 0.44748926162719727, "loss_ce": 0.00010156280768569559, "loss_iou": 0.1943359375, "loss_num": 0.0115966796875, "loss_xval": 0.447265625, "num_input_tokens_seen": 385675444, "step": 6883 }, { "epoch": 15.33184855233853, "grad_norm": 19.188108444213867, "learning_rate": 1e-06, "loss": 0.5017, "num_input_tokens_seen": 385731136, "step": 6884 }, { "epoch": 15.33184855233853, "loss": 0.269458532333374, "loss_ce": 0.0001714447425911203, "loss_iou": 0.123046875, "loss_num": 0.004638671875, "loss_xval": 0.26953125, "num_input_tokens_seen": 385731136, "step": 6884 }, { "epoch": 15.334075723830734, "grad_norm": 16.712444305419922, "learning_rate": 1e-06, "loss": 0.4189, "num_input_tokens_seen": 385788012, "step": 6885 }, { "epoch": 15.334075723830734, "loss": 0.4538283050060272, "loss_ce": 9.298422082792968e-05, "loss_iou": 0.2021484375, "loss_num": 0.00982666015625, "loss_xval": 0.453125, "num_input_tokens_seen": 385788012, "step": 6885 }, { "epoch": 15.33630289532294, "grad_norm": 20.65199089050293, "learning_rate": 1e-06, "loss": 0.5626, "num_input_tokens_seen": 385846652, "step": 6886 }, { "epoch": 15.33630289532294, "loss": 0.44176751375198364, "loss_ce": 0.00011710204125847667, "loss_iou": 0.2001953125, "loss_num": 0.00823974609375, "loss_xval": 0.44140625, "num_input_tokens_seen": 385846652, "step": 6886 }, { "epoch": 15.338530066815144, "grad_norm": 36.08283996582031, "learning_rate": 1e-06, "loss": 0.4439, "num_input_tokens_seen": 385903884, "step": 6887 }, { "epoch": 15.338530066815144, "loss": 0.3505267798900604, "loss_ce": 0.00018498269491828978, "loss_iou": 0.1513671875, "loss_num": 0.0093994140625, "loss_xval": 0.349609375, "num_input_tokens_seen": 385903884, "step": 6887 }, { "epoch": 15.340757238307349, "grad_norm": 27.132768630981445, "learning_rate": 1e-06, "loss": 0.4734, "num_input_tokens_seen": 385958580, "step": 6888 }, { "epoch": 15.340757238307349, "loss": 0.42540186643600464, "loss_ce": 0.00010891577403526753, "loss_iou": 0.19140625, "loss_num": 0.00848388671875, "loss_xval": 0.42578125, "num_input_tokens_seen": 385958580, "step": 6888 }, { "epoch": 15.342984409799554, "grad_norm": 18.235618591308594, "learning_rate": 1e-06, "loss": 0.4809, "num_input_tokens_seen": 386016000, "step": 6889 }, { "epoch": 15.342984409799554, "loss": 0.4169900715351105, "loss_ce": 0.00011995389650110155, "loss_iou": 0.1982421875, "loss_num": 0.004058837890625, "loss_xval": 0.416015625, "num_input_tokens_seen": 386016000, "step": 6889 }, { "epoch": 15.345211581291759, "grad_norm": 19.752094268798828, "learning_rate": 1e-06, "loss": 0.6642, "num_input_tokens_seen": 386071424, "step": 6890 }, { "epoch": 15.345211581291759, "loss": 0.9156758189201355, "loss_ce": 0.00014846479461994022, "loss_iou": 0.380859375, "loss_num": 0.03076171875, "loss_xval": 0.9140625, "num_input_tokens_seen": 386071424, "step": 6890 }, { "epoch": 15.347438752783964, "grad_norm": 20.173500061035156, "learning_rate": 1e-06, "loss": 0.5695, "num_input_tokens_seen": 386121956, "step": 6891 }, { "epoch": 15.347438752783964, "loss": 0.2851613163948059, "loss_ce": 9.661800868343562e-05, "loss_iou": 0.126953125, "loss_num": 0.006134033203125, "loss_xval": 0.28515625, "num_input_tokens_seen": 386121956, "step": 6891 }, { "epoch": 15.34966592427617, "grad_norm": 42.235107421875, "learning_rate": 1e-06, "loss": 0.5524, "num_input_tokens_seen": 386174436, "step": 6892 }, { "epoch": 15.34966592427617, "loss": 0.5490976572036743, "loss_ce": 0.00014746160013601184, "loss_iou": 0.236328125, "loss_num": 0.01519775390625, "loss_xval": 0.55078125, "num_input_tokens_seen": 386174436, "step": 6892 }, { "epoch": 15.351893095768375, "grad_norm": 62.30870056152344, "learning_rate": 1e-06, "loss": 0.4912, "num_input_tokens_seen": 386229148, "step": 6893 }, { "epoch": 15.351893095768375, "loss": 0.4774854779243469, "loss_ce": 0.00012952039833180606, "loss_iou": 0.203125, "loss_num": 0.014404296875, "loss_xval": 0.4765625, "num_input_tokens_seen": 386229148, "step": 6893 }, { "epoch": 15.35412026726058, "grad_norm": 16.55988883972168, "learning_rate": 1e-06, "loss": 0.3568, "num_input_tokens_seen": 386284964, "step": 6894 }, { "epoch": 15.35412026726058, "loss": 0.4656769633293152, "loss_ce": 9.315234638052061e-05, "loss_iou": 0.185546875, "loss_num": 0.018798828125, "loss_xval": 0.46484375, "num_input_tokens_seen": 386284964, "step": 6894 }, { "epoch": 15.356347438752785, "grad_norm": 13.955078125, "learning_rate": 1e-06, "loss": 0.3558, "num_input_tokens_seen": 386342464, "step": 6895 }, { "epoch": 15.356347438752785, "loss": 0.3424103856086731, "loss_ce": 0.00012523065379355103, "loss_iou": 0.154296875, "loss_num": 0.00665283203125, "loss_xval": 0.341796875, "num_input_tokens_seen": 386342464, "step": 6895 }, { "epoch": 15.35857461024499, "grad_norm": 17.421804428100586, "learning_rate": 1e-06, "loss": 0.707, "num_input_tokens_seen": 386396108, "step": 6896 }, { "epoch": 15.35857461024499, "loss": 0.33755582571029663, "loss_ce": 9.246024274034426e-05, "loss_iou": 0.150390625, "loss_num": 0.00732421875, "loss_xval": 0.337890625, "num_input_tokens_seen": 386396108, "step": 6896 }, { "epoch": 15.360801781737194, "grad_norm": 20.638612747192383, "learning_rate": 1e-06, "loss": 0.3819, "num_input_tokens_seen": 386452760, "step": 6897 }, { "epoch": 15.360801781737194, "loss": 0.267736554145813, "loss_ce": 9.742352995090187e-05, "loss_iou": 0.11962890625, "loss_num": 0.00567626953125, "loss_xval": 0.267578125, "num_input_tokens_seen": 386452760, "step": 6897 }, { "epoch": 15.3630289532294, "grad_norm": 17.11157989501953, "learning_rate": 1e-06, "loss": 0.4, "num_input_tokens_seen": 386511592, "step": 6898 }, { "epoch": 15.3630289532294, "loss": 0.3749087154865265, "loss_ce": 9.182207577396184e-05, "loss_iou": 0.154296875, "loss_num": 0.0130615234375, "loss_xval": 0.375, "num_input_tokens_seen": 386511592, "step": 6898 }, { "epoch": 15.365256124721604, "grad_norm": 12.453452110290527, "learning_rate": 1e-06, "loss": 0.3186, "num_input_tokens_seen": 386568560, "step": 6899 }, { "epoch": 15.365256124721604, "loss": 0.2840937376022339, "loss_ce": 9.713226609164849e-05, "loss_iou": 0.130859375, "loss_num": 0.00445556640625, "loss_xval": 0.283203125, "num_input_tokens_seen": 386568560, "step": 6899 }, { "epoch": 15.367483296213809, "grad_norm": 19.97893524169922, "learning_rate": 1e-06, "loss": 0.3577, "num_input_tokens_seen": 386623992, "step": 6900 }, { "epoch": 15.367483296213809, "loss": 0.3777737617492676, "loss_ce": 8.821256778901443e-05, "loss_iou": 0.16796875, "loss_num": 0.00799560546875, "loss_xval": 0.376953125, "num_input_tokens_seen": 386623992, "step": 6900 }, { "epoch": 15.369710467706014, "grad_norm": 22.45269775390625, "learning_rate": 1e-06, "loss": 0.3661, "num_input_tokens_seen": 386681452, "step": 6901 }, { "epoch": 15.369710467706014, "loss": 0.2848760187625885, "loss_ce": 8.596775296609849e-05, "loss_iou": 0.1240234375, "loss_num": 0.00726318359375, "loss_xval": 0.28515625, "num_input_tokens_seen": 386681452, "step": 6901 }, { "epoch": 15.371937639198219, "grad_norm": 14.801541328430176, "learning_rate": 1e-06, "loss": 0.4713, "num_input_tokens_seen": 386737060, "step": 6902 }, { "epoch": 15.371937639198219, "loss": 0.5325612425804138, "loss_ce": 9.053810936165974e-05, "loss_iou": 0.232421875, "loss_num": 0.0133056640625, "loss_xval": 0.53125, "num_input_tokens_seen": 386737060, "step": 6902 }, { "epoch": 15.374164810690424, "grad_norm": 41.93655776977539, "learning_rate": 1e-06, "loss": 0.3774, "num_input_tokens_seen": 386792788, "step": 6903 }, { "epoch": 15.374164810690424, "loss": 0.4628770351409912, "loss_ce": 0.00010845393990166485, "loss_iou": 0.2177734375, "loss_num": 0.00543212890625, "loss_xval": 0.462890625, "num_input_tokens_seen": 386792788, "step": 6903 }, { "epoch": 15.376391982182628, "grad_norm": 16.000699996948242, "learning_rate": 1e-06, "loss": 0.3095, "num_input_tokens_seen": 386851464, "step": 6904 }, { "epoch": 15.376391982182628, "loss": 0.2996518611907959, "loss_ce": 9.130668331636116e-05, "loss_iou": 0.1337890625, "loss_num": 0.006317138671875, "loss_xval": 0.298828125, "num_input_tokens_seen": 386851464, "step": 6904 }, { "epoch": 15.378619153674833, "grad_norm": 19.54730987548828, "learning_rate": 1e-06, "loss": 0.36, "num_input_tokens_seen": 386903512, "step": 6905 }, { "epoch": 15.378619153674833, "loss": 0.5413779020309448, "loss_ce": 0.0004843665228690952, "loss_iou": 0.203125, "loss_num": 0.0269775390625, "loss_xval": 0.5390625, "num_input_tokens_seen": 386903512, "step": 6905 }, { "epoch": 15.380846325167038, "grad_norm": 14.333488464355469, "learning_rate": 1e-06, "loss": 0.3866, "num_input_tokens_seen": 386958488, "step": 6906 }, { "epoch": 15.380846325167038, "loss": 0.4710484445095062, "loss_ce": 0.0001011955610010773, "loss_iou": 0.197265625, "loss_num": 0.01519775390625, "loss_xval": 0.470703125, "num_input_tokens_seen": 386958488, "step": 6906 }, { "epoch": 15.383073496659243, "grad_norm": 32.25919723510742, "learning_rate": 1e-06, "loss": 0.3692, "num_input_tokens_seen": 387014204, "step": 6907 }, { "epoch": 15.383073496659243, "loss": 0.31186944246292114, "loss_ce": 0.0001018614784698002, "loss_iou": 0.140625, "loss_num": 0.00616455078125, "loss_xval": 0.3125, "num_input_tokens_seen": 387014204, "step": 6907 }, { "epoch": 15.385300668151448, "grad_norm": 22.024864196777344, "learning_rate": 1e-06, "loss": 0.307, "num_input_tokens_seen": 387067160, "step": 6908 }, { "epoch": 15.385300668151448, "loss": 0.329712450504303, "loss_ce": 9.209779818775132e-05, "loss_iou": 0.1533203125, "loss_num": 0.004852294921875, "loss_xval": 0.330078125, "num_input_tokens_seen": 387067160, "step": 6908 }, { "epoch": 15.387527839643653, "grad_norm": 19.91106414794922, "learning_rate": 1e-06, "loss": 0.4446, "num_input_tokens_seen": 387124480, "step": 6909 }, { "epoch": 15.387527839643653, "loss": 0.3715466856956482, "loss_ce": 8.674498531036079e-05, "loss_iou": 0.1708984375, "loss_num": 0.005767822265625, "loss_xval": 0.37109375, "num_input_tokens_seen": 387124480, "step": 6909 }, { "epoch": 15.389755011135858, "grad_norm": 23.40995979309082, "learning_rate": 1e-06, "loss": 0.349, "num_input_tokens_seen": 387183660, "step": 6910 }, { "epoch": 15.389755011135858, "loss": 0.41477489471435547, "loss_ce": 0.00010205684520769864, "loss_iou": 0.1943359375, "loss_num": 0.00518798828125, "loss_xval": 0.4140625, "num_input_tokens_seen": 387183660, "step": 6910 }, { "epoch": 15.391982182628063, "grad_norm": 20.379980087280273, "learning_rate": 1e-06, "loss": 0.4204, "num_input_tokens_seen": 387243208, "step": 6911 }, { "epoch": 15.391982182628063, "loss": 0.4162381589412689, "loss_ce": 0.0001004426449071616, "loss_iou": 0.1953125, "loss_num": 0.00494384765625, "loss_xval": 0.416015625, "num_input_tokens_seen": 387243208, "step": 6911 }, { "epoch": 15.394209354120267, "grad_norm": 13.906294822692871, "learning_rate": 1e-06, "loss": 0.3458, "num_input_tokens_seen": 387298652, "step": 6912 }, { "epoch": 15.394209354120267, "loss": 0.3168086111545563, "loss_ce": 9.718221554066986e-05, "loss_iou": 0.1474609375, "loss_num": 0.004180908203125, "loss_xval": 0.31640625, "num_input_tokens_seen": 387298652, "step": 6912 }, { "epoch": 15.396436525612472, "grad_norm": 18.725072860717773, "learning_rate": 1e-06, "loss": 0.5747, "num_input_tokens_seen": 387353036, "step": 6913 }, { "epoch": 15.396436525612472, "loss": 0.6614874601364136, "loss_ce": 0.00011045205610571429, "loss_iou": 0.28515625, "loss_num": 0.01806640625, "loss_xval": 0.66015625, "num_input_tokens_seen": 387353036, "step": 6913 }, { "epoch": 15.398663697104677, "grad_norm": 17.18587875366211, "learning_rate": 1e-06, "loss": 0.3727, "num_input_tokens_seen": 387408840, "step": 6914 }, { "epoch": 15.398663697104677, "loss": 0.35583341121673584, "loss_ce": 0.00012054783292114735, "loss_iou": 0.16796875, "loss_num": 0.00390625, "loss_xval": 0.35546875, "num_input_tokens_seen": 387408840, "step": 6914 }, { "epoch": 15.400890868596882, "grad_norm": 33.79663848876953, "learning_rate": 1e-06, "loss": 0.4239, "num_input_tokens_seen": 387464996, "step": 6915 }, { "epoch": 15.400890868596882, "loss": 0.4479781687259674, "loss_ce": 0.00010222237324342132, "loss_iou": 0.2041015625, "loss_num": 0.007781982421875, "loss_xval": 0.447265625, "num_input_tokens_seen": 387464996, "step": 6915 }, { "epoch": 15.403118040089087, "grad_norm": 28.106069564819336, "learning_rate": 1e-06, "loss": 0.6411, "num_input_tokens_seen": 387520852, "step": 6916 }, { "epoch": 15.403118040089087, "loss": 0.5481947660446167, "loss_ce": 9.903394675347954e-05, "loss_iou": 0.234375, "loss_num": 0.015869140625, "loss_xval": 0.546875, "num_input_tokens_seen": 387520852, "step": 6916 }, { "epoch": 15.405345211581292, "grad_norm": 14.857207298278809, "learning_rate": 1e-06, "loss": 0.4853, "num_input_tokens_seen": 387573816, "step": 6917 }, { "epoch": 15.405345211581292, "loss": 0.49757710099220276, "loss_ce": 0.00011006787826772779, "loss_iou": 0.2138671875, "loss_num": 0.01385498046875, "loss_xval": 0.498046875, "num_input_tokens_seen": 387573816, "step": 6917 }, { "epoch": 15.407572383073497, "grad_norm": 25.532848358154297, "learning_rate": 1e-06, "loss": 0.3972, "num_input_tokens_seen": 387631328, "step": 6918 }, { "epoch": 15.407572383073497, "loss": 0.32285118103027344, "loss_ce": 9.72603156697005e-05, "loss_iou": 0.146484375, "loss_num": 0.00604248046875, "loss_xval": 0.322265625, "num_input_tokens_seen": 387631328, "step": 6918 }, { "epoch": 15.409799554565701, "grad_norm": 32.86465072631836, "learning_rate": 1e-06, "loss": 0.5478, "num_input_tokens_seen": 387688500, "step": 6919 }, { "epoch": 15.409799554565701, "loss": 0.7613215446472168, "loss_ce": 9.104014316108078e-05, "loss_iou": 0.314453125, "loss_num": 0.0262451171875, "loss_xval": 0.76171875, "num_input_tokens_seen": 387688500, "step": 6919 }, { "epoch": 15.412026726057906, "grad_norm": 30.64493179321289, "learning_rate": 1e-06, "loss": 0.4451, "num_input_tokens_seen": 387743892, "step": 6920 }, { "epoch": 15.412026726057906, "loss": 0.4576044976711273, "loss_ce": 8.494246867485344e-05, "loss_iou": 0.1904296875, "loss_num": 0.015380859375, "loss_xval": 0.45703125, "num_input_tokens_seen": 387743892, "step": 6920 }, { "epoch": 15.414253897550111, "grad_norm": 21.50493621826172, "learning_rate": 1e-06, "loss": 0.6023, "num_input_tokens_seen": 387800900, "step": 6921 }, { "epoch": 15.414253897550111, "loss": 0.6438992023468018, "loss_ce": 0.00010035550076281652, "loss_iou": 0.28125, "loss_num": 0.0166015625, "loss_xval": 0.64453125, "num_input_tokens_seen": 387800900, "step": 6921 }, { "epoch": 15.416481069042316, "grad_norm": 20.963998794555664, "learning_rate": 1e-06, "loss": 0.3538, "num_input_tokens_seen": 387858872, "step": 6922 }, { "epoch": 15.416481069042316, "loss": 0.4122142493724823, "loss_ce": 0.00010487253894098103, "loss_iou": 0.1796875, "loss_num": 0.01068115234375, "loss_xval": 0.412109375, "num_input_tokens_seen": 387858872, "step": 6922 }, { "epoch": 15.41870824053452, "grad_norm": 20.77920913696289, "learning_rate": 1e-06, "loss": 0.4124, "num_input_tokens_seen": 387913556, "step": 6923 }, { "epoch": 15.41870824053452, "loss": 0.2547425329685211, "loss_ce": 0.00022592084133066237, "loss_iou": 0.11181640625, "loss_num": 0.006134033203125, "loss_xval": 0.25390625, "num_input_tokens_seen": 387913556, "step": 6923 }, { "epoch": 15.420935412026726, "grad_norm": 14.842677116394043, "learning_rate": 1e-06, "loss": 0.3154, "num_input_tokens_seen": 387967992, "step": 6924 }, { "epoch": 15.420935412026726, "loss": 0.35966259241104126, "loss_ce": 0.00010450358968228102, "loss_iou": 0.16015625, "loss_num": 0.0078125, "loss_xval": 0.359375, "num_input_tokens_seen": 387967992, "step": 6924 }, { "epoch": 15.42316258351893, "grad_norm": 19.513763427734375, "learning_rate": 1e-06, "loss": 0.449, "num_input_tokens_seen": 388025440, "step": 6925 }, { "epoch": 15.42316258351893, "loss": 0.4793444275856018, "loss_ce": 9.635625610826537e-05, "loss_iou": 0.203125, "loss_num": 0.0147705078125, "loss_xval": 0.478515625, "num_input_tokens_seen": 388025440, "step": 6925 }, { "epoch": 15.425389755011135, "grad_norm": 14.279714584350586, "learning_rate": 1e-06, "loss": 0.4925, "num_input_tokens_seen": 388082980, "step": 6926 }, { "epoch": 15.425389755011135, "loss": 0.5041942596435547, "loss_ce": 0.00041012922883965075, "loss_iou": 0.2041015625, "loss_num": 0.0189208984375, "loss_xval": 0.50390625, "num_input_tokens_seen": 388082980, "step": 6926 }, { "epoch": 15.42761692650334, "grad_norm": 44.215213775634766, "learning_rate": 1e-06, "loss": 0.4468, "num_input_tokens_seen": 388138776, "step": 6927 }, { "epoch": 15.42761692650334, "loss": 0.4352813959121704, "loss_ce": 0.00010072036093333736, "loss_iou": 0.1884765625, "loss_num": 0.01153564453125, "loss_xval": 0.435546875, "num_input_tokens_seen": 388138776, "step": 6927 }, { "epoch": 15.429844097995545, "grad_norm": 14.542059898376465, "learning_rate": 1e-06, "loss": 0.428, "num_input_tokens_seen": 388193644, "step": 6928 }, { "epoch": 15.429844097995545, "loss": 0.5707679986953735, "loss_ce": 8.928313036449254e-05, "loss_iou": 0.2421875, "loss_num": 0.01708984375, "loss_xval": 0.5703125, "num_input_tokens_seen": 388193644, "step": 6928 }, { "epoch": 15.43207126948775, "grad_norm": 21.284255981445312, "learning_rate": 1e-06, "loss": 0.5289, "num_input_tokens_seen": 388250780, "step": 6929 }, { "epoch": 15.43207126948775, "loss": 0.6608084440231323, "loss_ce": 0.0001029052073135972, "loss_iou": 0.287109375, "loss_num": 0.0172119140625, "loss_xval": 0.66015625, "num_input_tokens_seen": 388250780, "step": 6929 }, { "epoch": 15.434298440979955, "grad_norm": 11.952805519104004, "learning_rate": 1e-06, "loss": 0.3343, "num_input_tokens_seen": 388305896, "step": 6930 }, { "epoch": 15.434298440979955, "loss": 0.4233115315437317, "loss_ce": 9.375169611303136e-05, "loss_iou": 0.1650390625, "loss_num": 0.018310546875, "loss_xval": 0.423828125, "num_input_tokens_seen": 388305896, "step": 6930 }, { "epoch": 15.43652561247216, "grad_norm": 21.607667922973633, "learning_rate": 1e-06, "loss": 0.4612, "num_input_tokens_seen": 388362324, "step": 6931 }, { "epoch": 15.43652561247216, "loss": 0.3633829951286316, "loss_ce": 0.0001628020836506039, "loss_iou": 0.1650390625, "loss_num": 0.006500244140625, "loss_xval": 0.36328125, "num_input_tokens_seen": 388362324, "step": 6931 }, { "epoch": 15.438752783964365, "grad_norm": 26.418800354003906, "learning_rate": 1e-06, "loss": 0.4414, "num_input_tokens_seen": 388417860, "step": 6932 }, { "epoch": 15.438752783964365, "loss": 0.38741356134414673, "loss_ce": 8.447450818493962e-05, "loss_iou": 0.1728515625, "loss_num": 0.00848388671875, "loss_xval": 0.38671875, "num_input_tokens_seen": 388417860, "step": 6932 }, { "epoch": 15.44097995545657, "grad_norm": 21.265588760375977, "learning_rate": 1e-06, "loss": 0.4136, "num_input_tokens_seen": 388472732, "step": 6933 }, { "epoch": 15.44097995545657, "loss": 0.42595887184143066, "loss_ce": 0.00011659698066068813, "loss_iou": 0.201171875, "loss_num": 0.004638671875, "loss_xval": 0.42578125, "num_input_tokens_seen": 388472732, "step": 6933 }, { "epoch": 15.443207126948774, "grad_norm": 18.719799041748047, "learning_rate": 1e-06, "loss": 0.4502, "num_input_tokens_seen": 388531568, "step": 6934 }, { "epoch": 15.443207126948774, "loss": 0.4275781214237213, "loss_ce": 8.788651030045003e-05, "loss_iou": 0.181640625, "loss_num": 0.01263427734375, "loss_xval": 0.427734375, "num_input_tokens_seen": 388531568, "step": 6934 }, { "epoch": 15.44543429844098, "grad_norm": 13.221081733703613, "learning_rate": 1e-06, "loss": 0.4555, "num_input_tokens_seen": 388587640, "step": 6935 }, { "epoch": 15.44543429844098, "loss": 0.4827903211116791, "loss_ce": 0.0001243370061274618, "loss_iou": 0.2119140625, "loss_num": 0.011962890625, "loss_xval": 0.482421875, "num_input_tokens_seen": 388587640, "step": 6935 }, { "epoch": 15.447661469933184, "grad_norm": 15.271649360656738, "learning_rate": 1e-06, "loss": 0.3251, "num_input_tokens_seen": 388645528, "step": 6936 }, { "epoch": 15.447661469933184, "loss": 0.35192036628723145, "loss_ce": 8.32095684017986e-05, "loss_iou": 0.154296875, "loss_num": 0.0087890625, "loss_xval": 0.3515625, "num_input_tokens_seen": 388645528, "step": 6936 }, { "epoch": 15.449888641425389, "grad_norm": 18.44422149658203, "learning_rate": 1e-06, "loss": 0.5482, "num_input_tokens_seen": 388698432, "step": 6937 }, { "epoch": 15.449888641425389, "loss": 0.569794774055481, "loss_ce": 9.262157982448116e-05, "loss_iou": 0.2578125, "loss_num": 0.010986328125, "loss_xval": 0.5703125, "num_input_tokens_seen": 388698432, "step": 6937 }, { "epoch": 15.452115812917596, "grad_norm": 23.565710067749023, "learning_rate": 1e-06, "loss": 0.3753, "num_input_tokens_seen": 388757224, "step": 6938 }, { "epoch": 15.452115812917596, "loss": 0.30654728412628174, "loss_ce": 8.977011020760983e-05, "loss_iou": 0.140625, "loss_num": 0.00518798828125, "loss_xval": 0.306640625, "num_input_tokens_seen": 388757224, "step": 6938 }, { "epoch": 15.4543429844098, "grad_norm": 13.924105644226074, "learning_rate": 1e-06, "loss": 0.3849, "num_input_tokens_seen": 388812656, "step": 6939 }, { "epoch": 15.4543429844098, "loss": 0.21841883659362793, "loss_ce": 9.608666005078703e-05, "loss_iou": 0.09619140625, "loss_num": 0.005157470703125, "loss_xval": 0.21875, "num_input_tokens_seen": 388812656, "step": 6939 }, { "epoch": 15.456570155902005, "grad_norm": 21.45891571044922, "learning_rate": 1e-06, "loss": 0.432, "num_input_tokens_seen": 388867552, "step": 6940 }, { "epoch": 15.456570155902005, "loss": 0.4227088689804077, "loss_ce": 0.00010145263513550162, "loss_iou": 0.1767578125, "loss_num": 0.013671875, "loss_xval": 0.421875, "num_input_tokens_seen": 388867552, "step": 6940 }, { "epoch": 15.45879732739421, "grad_norm": 19.371652603149414, "learning_rate": 1e-06, "loss": 0.631, "num_input_tokens_seen": 388923044, "step": 6941 }, { "epoch": 15.45879732739421, "loss": 0.6530581712722778, "loss_ce": 0.00010402440966572613, "loss_iou": 0.29296875, "loss_num": 0.01324462890625, "loss_xval": 0.65234375, "num_input_tokens_seen": 388923044, "step": 6941 }, { "epoch": 15.461024498886415, "grad_norm": 24.440471649169922, "learning_rate": 1e-06, "loss": 0.425, "num_input_tokens_seen": 388976632, "step": 6942 }, { "epoch": 15.461024498886415, "loss": 0.4324702322483063, "loss_ce": 9.719458466861397e-05, "loss_iou": 0.1904296875, "loss_num": 0.010498046875, "loss_xval": 0.431640625, "num_input_tokens_seen": 388976632, "step": 6942 }, { "epoch": 15.46325167037862, "grad_norm": 18.36454963684082, "learning_rate": 1e-06, "loss": 0.3942, "num_input_tokens_seen": 389031268, "step": 6943 }, { "epoch": 15.46325167037862, "loss": 0.39659687876701355, "loss_ce": 0.00011250589159317315, "loss_iou": 0.177734375, "loss_num": 0.00823974609375, "loss_xval": 0.396484375, "num_input_tokens_seen": 389031268, "step": 6943 }, { "epoch": 15.465478841870825, "grad_norm": 17.621511459350586, "learning_rate": 1e-06, "loss": 0.4569, "num_input_tokens_seen": 389089160, "step": 6944 }, { "epoch": 15.465478841870825, "loss": 0.33285778760910034, "loss_ce": 9.410581697011366e-05, "loss_iou": 0.15625, "loss_num": 0.00421142578125, "loss_xval": 0.33203125, "num_input_tokens_seen": 389089160, "step": 6944 }, { "epoch": 15.46770601336303, "grad_norm": 16.278667449951172, "learning_rate": 1e-06, "loss": 0.3584, "num_input_tokens_seen": 389144868, "step": 6945 }, { "epoch": 15.46770601336303, "loss": 0.3341924548149109, "loss_ce": 8.60134678077884e-05, "loss_iou": 0.14453125, "loss_num": 0.009033203125, "loss_xval": 0.333984375, "num_input_tokens_seen": 389144868, "step": 6945 }, { "epoch": 15.469933184855234, "grad_norm": 14.054006576538086, "learning_rate": 1e-06, "loss": 0.4937, "num_input_tokens_seen": 389202724, "step": 6946 }, { "epoch": 15.469933184855234, "loss": 0.4199259281158447, "loss_ce": 0.00012615637388080359, "loss_iou": 0.1689453125, "loss_num": 0.016357421875, "loss_xval": 0.419921875, "num_input_tokens_seen": 389202724, "step": 6946 }, { "epoch": 15.47216035634744, "grad_norm": 16.913305282592773, "learning_rate": 1e-06, "loss": 0.5997, "num_input_tokens_seen": 389260720, "step": 6947 }, { "epoch": 15.47216035634744, "loss": 0.46518251299858093, "loss_ce": 9.461917215958238e-05, "loss_iou": 0.203125, "loss_num": 0.0115966796875, "loss_xval": 0.46484375, "num_input_tokens_seen": 389260720, "step": 6947 }, { "epoch": 15.474387527839644, "grad_norm": 18.17692756652832, "learning_rate": 1e-06, "loss": 0.5238, "num_input_tokens_seen": 389315804, "step": 6948 }, { "epoch": 15.474387527839644, "loss": 0.5708150863647461, "loss_ce": 7.536027260357514e-05, "loss_iou": 0.244140625, "loss_num": 0.0166015625, "loss_xval": 0.5703125, "num_input_tokens_seen": 389315804, "step": 6948 }, { "epoch": 15.476614699331849, "grad_norm": 30.645221710205078, "learning_rate": 1e-06, "loss": 0.393, "num_input_tokens_seen": 389373028, "step": 6949 }, { "epoch": 15.476614699331849, "loss": 0.37821051478385925, "loss_ce": 9.771598706720397e-05, "loss_iou": 0.162109375, "loss_num": 0.01080322265625, "loss_xval": 0.37890625, "num_input_tokens_seen": 389373028, "step": 6949 }, { "epoch": 15.478841870824054, "grad_norm": 23.542387008666992, "learning_rate": 1e-06, "loss": 0.5772, "num_input_tokens_seen": 389429224, "step": 6950 }, { "epoch": 15.478841870824054, "loss": 0.7504802942276001, "loss_ce": 0.001334758591838181, "loss_iou": 0.306640625, "loss_num": 0.0272216796875, "loss_xval": 0.75, "num_input_tokens_seen": 389429224, "step": 6950 }, { "epoch": 15.481069042316259, "grad_norm": 27.925491333007812, "learning_rate": 1e-06, "loss": 0.3852, "num_input_tokens_seen": 389486864, "step": 6951 }, { "epoch": 15.481069042316259, "loss": 0.48157191276550293, "loss_ce": 0.00012661112123169005, "loss_iou": 0.166015625, "loss_num": 0.029541015625, "loss_xval": 0.48046875, "num_input_tokens_seen": 389486864, "step": 6951 }, { "epoch": 15.483296213808464, "grad_norm": 15.1088285446167, "learning_rate": 1e-06, "loss": 0.3681, "num_input_tokens_seen": 389542096, "step": 6952 }, { "epoch": 15.483296213808464, "loss": 0.41208964586257935, "loss_ce": 0.00010235629451926798, "loss_iou": 0.1943359375, "loss_num": 0.004608154296875, "loss_xval": 0.412109375, "num_input_tokens_seen": 389542096, "step": 6952 }, { "epoch": 15.485523385300668, "grad_norm": 20.11035919189453, "learning_rate": 1e-06, "loss": 0.5138, "num_input_tokens_seen": 389596724, "step": 6953 }, { "epoch": 15.485523385300668, "loss": 0.37680959701538086, "loss_ce": 0.00010061028297059238, "loss_iou": 0.1669921875, "loss_num": 0.0086669921875, "loss_xval": 0.376953125, "num_input_tokens_seen": 389596724, "step": 6953 }, { "epoch": 15.487750556792873, "grad_norm": 13.750123977661133, "learning_rate": 1e-06, "loss": 0.5883, "num_input_tokens_seen": 389652700, "step": 6954 }, { "epoch": 15.487750556792873, "loss": 0.6634094715118408, "loss_ce": 0.0001404019130859524, "loss_iou": 0.26171875, "loss_num": 0.028076171875, "loss_xval": 0.6640625, "num_input_tokens_seen": 389652700, "step": 6954 }, { "epoch": 15.489977728285078, "grad_norm": 13.668987274169922, "learning_rate": 1e-06, "loss": 0.3403, "num_input_tokens_seen": 389710304, "step": 6955 }, { "epoch": 15.489977728285078, "loss": 0.33821815252304077, "loss_ce": 8.338829502463341e-05, "loss_iou": 0.1484375, "loss_num": 0.00811767578125, "loss_xval": 0.337890625, "num_input_tokens_seen": 389710304, "step": 6955 }, { "epoch": 15.492204899777283, "grad_norm": 20.135528564453125, "learning_rate": 1e-06, "loss": 0.4608, "num_input_tokens_seen": 389764472, "step": 6956 }, { "epoch": 15.492204899777283, "loss": 0.46713533997535706, "loss_ce": 9.433674858883023e-05, "loss_iou": 0.208984375, "loss_num": 0.01007080078125, "loss_xval": 0.466796875, "num_input_tokens_seen": 389764472, "step": 6956 }, { "epoch": 15.494432071269488, "grad_norm": 19.5330867767334, "learning_rate": 1e-06, "loss": 0.3886, "num_input_tokens_seen": 389820184, "step": 6957 }, { "epoch": 15.494432071269488, "loss": 0.46810320019721985, "loss_ce": 8.56061524245888e-05, "loss_iou": 0.2099609375, "loss_num": 0.00958251953125, "loss_xval": 0.46875, "num_input_tokens_seen": 389820184, "step": 6957 }, { "epoch": 15.496659242761693, "grad_norm": 15.649205207824707, "learning_rate": 1e-06, "loss": 0.3468, "num_input_tokens_seen": 389875816, "step": 6958 }, { "epoch": 15.496659242761693, "loss": 0.3824237287044525, "loss_ce": 9.9498953204602e-05, "loss_iou": 0.1787109375, "loss_num": 0.005157470703125, "loss_xval": 0.3828125, "num_input_tokens_seen": 389875816, "step": 6958 }, { "epoch": 15.498886414253898, "grad_norm": 17.48607063293457, "learning_rate": 1e-06, "loss": 0.4428, "num_input_tokens_seen": 389934296, "step": 6959 }, { "epoch": 15.498886414253898, "loss": 0.45505648851394653, "loss_ce": 0.00010044968803413212, "loss_iou": 0.1953125, "loss_num": 0.0126953125, "loss_xval": 0.455078125, "num_input_tokens_seen": 389934296, "step": 6959 }, { "epoch": 15.501113585746102, "grad_norm": 23.723533630371094, "learning_rate": 1e-06, "loss": 0.3997, "num_input_tokens_seen": 389988784, "step": 6960 }, { "epoch": 15.501113585746102, "loss": 0.5000880360603333, "loss_ce": 8.804388926364481e-05, "loss_iou": 0.224609375, "loss_num": 0.01025390625, "loss_xval": 0.5, "num_input_tokens_seen": 389988784, "step": 6960 }, { "epoch": 15.503340757238307, "grad_norm": 24.47084617614746, "learning_rate": 1e-06, "loss": 0.3908, "num_input_tokens_seen": 390042176, "step": 6961 }, { "epoch": 15.503340757238307, "loss": 0.3538658022880554, "loss_ce": 0.00010604046110529453, "loss_iou": 0.1650390625, "loss_num": 0.00494384765625, "loss_xval": 0.353515625, "num_input_tokens_seen": 390042176, "step": 6961 }, { "epoch": 15.505567928730512, "grad_norm": 19.05753517150879, "learning_rate": 1e-06, "loss": 0.3267, "num_input_tokens_seen": 390098484, "step": 6962 }, { "epoch": 15.505567928730512, "loss": 0.3786473274230957, "loss_ce": 0.00010729986388469115, "loss_iou": 0.166015625, "loss_num": 0.00921630859375, "loss_xval": 0.37890625, "num_input_tokens_seen": 390098484, "step": 6962 }, { "epoch": 15.507795100222717, "grad_norm": 10.61585807800293, "learning_rate": 1e-06, "loss": 0.3648, "num_input_tokens_seen": 390156300, "step": 6963 }, { "epoch": 15.507795100222717, "loss": 0.378567099571228, "loss_ce": 8.807641279418021e-05, "loss_iou": 0.16015625, "loss_num": 0.01165771484375, "loss_xval": 0.37890625, "num_input_tokens_seen": 390156300, "step": 6963 }, { "epoch": 15.510022271714922, "grad_norm": 25.818944931030273, "learning_rate": 1e-06, "loss": 0.3777, "num_input_tokens_seen": 390210808, "step": 6964 }, { "epoch": 15.510022271714922, "loss": 0.42782527208328247, "loss_ce": 9.0899906354025e-05, "loss_iou": 0.1708984375, "loss_num": 0.017333984375, "loss_xval": 0.427734375, "num_input_tokens_seen": 390210808, "step": 6964 }, { "epoch": 15.512249443207127, "grad_norm": 17.772178649902344, "learning_rate": 1e-06, "loss": 0.3301, "num_input_tokens_seen": 390266472, "step": 6965 }, { "epoch": 15.512249443207127, "loss": 0.40292441844940186, "loss_ce": 9.23790066735819e-05, "loss_iou": 0.1865234375, "loss_num": 0.006011962890625, "loss_xval": 0.40234375, "num_input_tokens_seen": 390266472, "step": 6965 }, { "epoch": 15.514476614699332, "grad_norm": 29.70581817626953, "learning_rate": 1e-06, "loss": 0.4923, "num_input_tokens_seen": 390323712, "step": 6966 }, { "epoch": 15.514476614699332, "loss": 0.4616038203239441, "loss_ce": 0.00011701375478878617, "loss_iou": 0.208984375, "loss_num": 0.008544921875, "loss_xval": 0.4609375, "num_input_tokens_seen": 390323712, "step": 6966 }, { "epoch": 15.516703786191536, "grad_norm": 50.45325469970703, "learning_rate": 1e-06, "loss": 0.3747, "num_input_tokens_seen": 390380440, "step": 6967 }, { "epoch": 15.516703786191536, "loss": 0.3414144814014435, "loss_ce": 0.00010588267468847334, "loss_iou": 0.150390625, "loss_num": 0.00811767578125, "loss_xval": 0.341796875, "num_input_tokens_seen": 390380440, "step": 6967 }, { "epoch": 15.518930957683741, "grad_norm": 18.55625343322754, "learning_rate": 1e-06, "loss": 0.4821, "num_input_tokens_seen": 390437412, "step": 6968 }, { "epoch": 15.518930957683741, "loss": 0.48657822608947754, "loss_ce": 0.00012801533739548177, "loss_iou": 0.2265625, "loss_num": 0.006378173828125, "loss_xval": 0.486328125, "num_input_tokens_seen": 390437412, "step": 6968 }, { "epoch": 15.521158129175946, "grad_norm": 25.324066162109375, "learning_rate": 1e-06, "loss": 0.4971, "num_input_tokens_seen": 390491840, "step": 6969 }, { "epoch": 15.521158129175946, "loss": 0.536230206489563, "loss_ce": 9.739406232256442e-05, "loss_iou": 0.23046875, "loss_num": 0.01483154296875, "loss_xval": 0.53515625, "num_input_tokens_seen": 390491840, "step": 6969 }, { "epoch": 15.523385300668151, "grad_norm": 18.16643714904785, "learning_rate": 1e-06, "loss": 0.4849, "num_input_tokens_seen": 390546924, "step": 6970 }, { "epoch": 15.523385300668151, "loss": 0.54595947265625, "loss_ce": 9.153402788797393e-05, "loss_iou": 0.2431640625, "loss_num": 0.01153564453125, "loss_xval": 0.546875, "num_input_tokens_seen": 390546924, "step": 6970 }, { "epoch": 15.525612472160356, "grad_norm": 15.818894386291504, "learning_rate": 1e-06, "loss": 0.5592, "num_input_tokens_seen": 390603308, "step": 6971 }, { "epoch": 15.525612472160356, "loss": 0.597541093826294, "loss_ce": 0.00012892311497125775, "loss_iou": 0.265625, "loss_num": 0.0135498046875, "loss_xval": 0.59765625, "num_input_tokens_seen": 390603308, "step": 6971 }, { "epoch": 15.52783964365256, "grad_norm": 30.140596389770508, "learning_rate": 1e-06, "loss": 0.5127, "num_input_tokens_seen": 390657280, "step": 6972 }, { "epoch": 15.52783964365256, "loss": 0.6534337401390076, "loss_ce": 0.00011343492224114016, "loss_iou": 0.294921875, "loss_num": 0.01251220703125, "loss_xval": 0.65234375, "num_input_tokens_seen": 390657280, "step": 6972 }, { "epoch": 15.530066815144766, "grad_norm": 19.85612678527832, "learning_rate": 1e-06, "loss": 0.5059, "num_input_tokens_seen": 390714956, "step": 6973 }, { "epoch": 15.530066815144766, "loss": 0.764450192451477, "loss_ce": 0.00010690266208257526, "loss_iou": 0.34765625, "loss_num": 0.01385498046875, "loss_xval": 0.765625, "num_input_tokens_seen": 390714956, "step": 6973 }, { "epoch": 15.53229398663697, "grad_norm": 12.298739433288574, "learning_rate": 1e-06, "loss": 0.5395, "num_input_tokens_seen": 390772920, "step": 6974 }, { "epoch": 15.53229398663697, "loss": 0.7317103147506714, "loss_ce": 0.00026501319371163845, "loss_iou": 0.296875, "loss_num": 0.0277099609375, "loss_xval": 0.73046875, "num_input_tokens_seen": 390772920, "step": 6974 }, { "epoch": 15.534521158129175, "grad_norm": 22.31592559814453, "learning_rate": 1e-06, "loss": 0.4543, "num_input_tokens_seen": 390826668, "step": 6975 }, { "epoch": 15.534521158129175, "loss": 0.5942932367324829, "loss_ce": 0.00017704666242934763, "loss_iou": 0.267578125, "loss_num": 0.01165771484375, "loss_xval": 0.59375, "num_input_tokens_seen": 390826668, "step": 6975 }, { "epoch": 15.53674832962138, "grad_norm": 18.439233779907227, "learning_rate": 1e-06, "loss": 0.4188, "num_input_tokens_seen": 390882348, "step": 6976 }, { "epoch": 15.53674832962138, "loss": 0.2939029932022095, "loss_ce": 9.499942825641483e-05, "loss_iou": 0.1318359375, "loss_num": 0.006103515625, "loss_xval": 0.29296875, "num_input_tokens_seen": 390882348, "step": 6976 }, { "epoch": 15.538975501113585, "grad_norm": 16.87866973876953, "learning_rate": 1e-06, "loss": 0.3277, "num_input_tokens_seen": 390939420, "step": 6977 }, { "epoch": 15.538975501113585, "loss": 0.36519938707351685, "loss_ce": 8.707845699973404e-05, "loss_iou": 0.15234375, "loss_num": 0.01214599609375, "loss_xval": 0.365234375, "num_input_tokens_seen": 390939420, "step": 6977 }, { "epoch": 15.54120267260579, "grad_norm": 14.497117042541504, "learning_rate": 1e-06, "loss": 0.3234, "num_input_tokens_seen": 390992964, "step": 6978 }, { "epoch": 15.54120267260579, "loss": 0.36044928431510925, "loss_ce": 9.772789780981839e-05, "loss_iou": 0.171875, "loss_num": 0.00347900390625, "loss_xval": 0.359375, "num_input_tokens_seen": 390992964, "step": 6978 }, { "epoch": 15.543429844097995, "grad_norm": 18.7347412109375, "learning_rate": 1e-06, "loss": 0.3939, "num_input_tokens_seen": 391049216, "step": 6979 }, { "epoch": 15.543429844097995, "loss": 0.4501742124557495, "loss_ce": 0.00010095632751472294, "loss_iou": 0.19921875, "loss_num": 0.01043701171875, "loss_xval": 0.44921875, "num_input_tokens_seen": 391049216, "step": 6979 }, { "epoch": 15.5456570155902, "grad_norm": 18.551483154296875, "learning_rate": 1e-06, "loss": 0.4831, "num_input_tokens_seen": 391105972, "step": 6980 }, { "epoch": 15.5456570155902, "loss": 0.5650371313095093, "loss_ce": 9.57687952904962e-05, "loss_iou": 0.255859375, "loss_num": 0.0106201171875, "loss_xval": 0.56640625, "num_input_tokens_seen": 391105972, "step": 6980 }, { "epoch": 15.547884187082406, "grad_norm": 18.95163345336914, "learning_rate": 1e-06, "loss": 0.4954, "num_input_tokens_seen": 391159136, "step": 6981 }, { "epoch": 15.547884187082406, "loss": 0.5984832048416138, "loss_ce": 9.453899110667408e-05, "loss_iou": 0.2734375, "loss_num": 0.01025390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 391159136, "step": 6981 }, { "epoch": 15.550111358574611, "grad_norm": 15.434839248657227, "learning_rate": 1e-06, "loss": 0.3884, "num_input_tokens_seen": 391215408, "step": 6982 }, { "epoch": 15.550111358574611, "loss": 0.46562230587005615, "loss_ce": 0.00010716939868871123, "loss_iou": 0.208984375, "loss_num": 0.00946044921875, "loss_xval": 0.46484375, "num_input_tokens_seen": 391215408, "step": 6982 }, { "epoch": 15.552338530066816, "grad_norm": 56.02677536010742, "learning_rate": 1e-06, "loss": 0.5136, "num_input_tokens_seen": 391270184, "step": 6983 }, { "epoch": 15.552338530066816, "loss": 0.5048550367355347, "loss_ce": 9.426505130250007e-05, "loss_iou": 0.2294921875, "loss_num": 0.00921630859375, "loss_xval": 0.50390625, "num_input_tokens_seen": 391270184, "step": 6983 }, { "epoch": 15.55456570155902, "grad_norm": 37.09487533569336, "learning_rate": 1e-06, "loss": 0.4122, "num_input_tokens_seen": 391323132, "step": 6984 }, { "epoch": 15.55456570155902, "loss": 0.43672704696655273, "loss_ce": 8.155644172802567e-05, "loss_iou": 0.19140625, "loss_num": 0.01055908203125, "loss_xval": 0.4375, "num_input_tokens_seen": 391323132, "step": 6984 }, { "epoch": 15.556792873051226, "grad_norm": 29.41912078857422, "learning_rate": 1e-06, "loss": 0.4368, "num_input_tokens_seen": 391378804, "step": 6985 }, { "epoch": 15.556792873051226, "loss": 0.45505693554878235, "loss_ce": 0.00010087570990435779, "loss_iou": 0.2021484375, "loss_num": 0.00994873046875, "loss_xval": 0.455078125, "num_input_tokens_seen": 391378804, "step": 6985 }, { "epoch": 15.55902004454343, "grad_norm": 22.739253997802734, "learning_rate": 1e-06, "loss": 0.5131, "num_input_tokens_seen": 391434260, "step": 6986 }, { "epoch": 15.55902004454343, "loss": 0.5328000783920288, "loss_ce": 8.528250327799469e-05, "loss_iou": 0.232421875, "loss_num": 0.013671875, "loss_xval": 0.53125, "num_input_tokens_seen": 391434260, "step": 6986 }, { "epoch": 15.561247216035635, "grad_norm": 15.653422355651855, "learning_rate": 1e-06, "loss": 0.5016, "num_input_tokens_seen": 391493312, "step": 6987 }, { "epoch": 15.561247216035635, "loss": 0.4914650619029999, "loss_ce": 0.0001320439187111333, "loss_iou": 0.22265625, "loss_num": 0.0093994140625, "loss_xval": 0.4921875, "num_input_tokens_seen": 391493312, "step": 6987 }, { "epoch": 15.56347438752784, "grad_norm": 21.306421279907227, "learning_rate": 1e-06, "loss": 0.5295, "num_input_tokens_seen": 391548072, "step": 6988 }, { "epoch": 15.56347438752784, "loss": 0.6857566833496094, "loss_ce": 8.777156472206116e-05, "loss_iou": 0.287109375, "loss_num": 0.0225830078125, "loss_xval": 0.6875, "num_input_tokens_seen": 391548072, "step": 6988 }, { "epoch": 15.565701559020045, "grad_norm": 18.79554557800293, "learning_rate": 1e-06, "loss": 0.5781, "num_input_tokens_seen": 391602092, "step": 6989 }, { "epoch": 15.565701559020045, "loss": 0.6327000856399536, "loss_ce": 0.00013169870362617075, "loss_iou": 0.279296875, "loss_num": 0.01470947265625, "loss_xval": 0.6328125, "num_input_tokens_seen": 391602092, "step": 6989 }, { "epoch": 15.56792873051225, "grad_norm": 19.208559036254883, "learning_rate": 1e-06, "loss": 0.4444, "num_input_tokens_seen": 391659220, "step": 6990 }, { "epoch": 15.56792873051225, "loss": 0.40756696462631226, "loss_ce": 9.625191160012037e-05, "loss_iou": 0.1767578125, "loss_num": 0.01092529296875, "loss_xval": 0.408203125, "num_input_tokens_seen": 391659220, "step": 6990 }, { "epoch": 15.570155902004455, "grad_norm": 25.114105224609375, "learning_rate": 1e-06, "loss": 0.5278, "num_input_tokens_seen": 391713976, "step": 6991 }, { "epoch": 15.570155902004455, "loss": 0.760166347026825, "loss_ce": 0.0001565346319694072, "loss_iou": 0.29296875, "loss_num": 0.034912109375, "loss_xval": 0.76171875, "num_input_tokens_seen": 391713976, "step": 6991 }, { "epoch": 15.57238307349666, "grad_norm": 19.96893310546875, "learning_rate": 1e-06, "loss": 0.4927, "num_input_tokens_seen": 391768912, "step": 6992 }, { "epoch": 15.57238307349666, "loss": 0.5373374223709106, "loss_ce": 0.00010592768376227468, "loss_iou": 0.244140625, "loss_num": 0.00994873046875, "loss_xval": 0.5390625, "num_input_tokens_seen": 391768912, "step": 6992 }, { "epoch": 15.574610244988865, "grad_norm": 16.1031494140625, "learning_rate": 1e-06, "loss": 0.6124, "num_input_tokens_seen": 391825536, "step": 6993 }, { "epoch": 15.574610244988865, "loss": 0.762206494808197, "loss_ce": 0.00012153427815064788, "loss_iou": 0.296875, "loss_num": 0.0341796875, "loss_xval": 0.76171875, "num_input_tokens_seen": 391825536, "step": 6993 }, { "epoch": 15.57683741648107, "grad_norm": 20.991905212402344, "learning_rate": 1e-06, "loss": 0.6052, "num_input_tokens_seen": 391879656, "step": 6994 }, { "epoch": 15.57683741648107, "loss": 0.7651537656784058, "loss_ce": 0.00020015102927573025, "loss_iou": 0.283203125, "loss_num": 0.039794921875, "loss_xval": 0.765625, "num_input_tokens_seen": 391879656, "step": 6994 }, { "epoch": 15.579064587973274, "grad_norm": 16.341785430908203, "learning_rate": 1e-06, "loss": 0.4781, "num_input_tokens_seen": 391935488, "step": 6995 }, { "epoch": 15.579064587973274, "loss": 0.5291576385498047, "loss_ce": 0.00010491380817256868, "loss_iou": 0.220703125, "loss_num": 0.0174560546875, "loss_xval": 0.52734375, "num_input_tokens_seen": 391935488, "step": 6995 }, { "epoch": 15.58129175946548, "grad_norm": 19.71142578125, "learning_rate": 1e-06, "loss": 0.5139, "num_input_tokens_seen": 391991892, "step": 6996 }, { "epoch": 15.58129175946548, "loss": 0.44407549500465393, "loss_ce": 0.00010574980115052313, "loss_iou": 0.203125, "loss_num": 0.00750732421875, "loss_xval": 0.443359375, "num_input_tokens_seen": 391991892, "step": 6996 }, { "epoch": 15.583518930957684, "grad_norm": 22.070716857910156, "learning_rate": 1e-06, "loss": 0.3774, "num_input_tokens_seen": 392046924, "step": 6997 }, { "epoch": 15.583518930957684, "loss": 0.2757876515388489, "loss_ce": 9.184512600768358e-05, "loss_iou": 0.1162109375, "loss_num": 0.0086669921875, "loss_xval": 0.275390625, "num_input_tokens_seen": 392046924, "step": 6997 }, { "epoch": 15.585746102449889, "grad_norm": 20.057531356811523, "learning_rate": 1e-06, "loss": 0.4721, "num_input_tokens_seen": 392104208, "step": 6998 }, { "epoch": 15.585746102449889, "loss": 0.5038794279098511, "loss_ce": 9.52341069933027e-05, "loss_iou": 0.2138671875, "loss_num": 0.0150146484375, "loss_xval": 0.50390625, "num_input_tokens_seen": 392104208, "step": 6998 }, { "epoch": 15.587973273942094, "grad_norm": 25.237815856933594, "learning_rate": 1e-06, "loss": 0.5194, "num_input_tokens_seen": 392161208, "step": 6999 }, { "epoch": 15.587973273942094, "loss": 0.46581047773361206, "loss_ce": 0.00011221040040254593, "loss_iou": 0.2060546875, "loss_num": 0.01080322265625, "loss_xval": 0.46484375, "num_input_tokens_seen": 392161208, "step": 6999 }, { "epoch": 15.590200445434299, "grad_norm": 15.100346565246582, "learning_rate": 1e-06, "loss": 0.4739, "num_input_tokens_seen": 392214772, "step": 7000 }, { "epoch": 15.590200445434299, "eval_seeclick_web_CIoU": 0.5901551246643066, "eval_seeclick_web_GIoU": 0.5888436436653137, "eval_seeclick_web_IoU": 0.6081466972827911, "eval_seeclick_web_MAE_all": 0.015473631210625172, "eval_seeclick_web_MAE_h": 0.0076336238998919725, "eval_seeclick_web_MAE_w": 0.015680983662605286, "eval_seeclick_web_MAE_x_boxes": 0.008724939078092575, "eval_seeclick_web_MAE_y_boxes": 0.021282089641317725, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.907034695148468, "eval_seeclick_web_loss_ce": 0.00015554412675555795, "eval_seeclick_web_loss_iou": 0.4166259765625, "eval_seeclick_web_loss_num": 0.01241302490234375, "eval_seeclick_web_loss_xval": 0.895263671875, "eval_seeclick_web_runtime": 24.1637, "eval_seeclick_web_samples_per_second": 2.069, "eval_seeclick_web_steps_per_second": 0.083, "num_input_tokens_seen": 392214772, "step": 7000 }, { "epoch": 15.590200445434299, "eval_icons_CIoU": 0.2777545750141144, "eval_icons_GIoU": 0.30234697461128235, "eval_icons_IoU": 0.3519442528486252, "eval_icons_MAE_all": 0.05883444473147392, "eval_icons_MAE_h": 0.031461546663194895, "eval_icons_MAE_w": 0.058418434113264084, "eval_icons_MAE_x_boxes": 0.06006164848804474, "eval_icons_MAE_y_boxes": 0.03738272096961737, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7067599296569824, "eval_icons_loss_ce": 0.00019056371820624918, "eval_icons_loss_iou": 0.668701171875, "eval_icons_loss_num": 0.05805206298828125, "eval_icons_loss_xval": 1.628662109375, "eval_icons_runtime": 19.373, "eval_icons_samples_per_second": 2.581, "eval_icons_steps_per_second": 0.103, "num_input_tokens_seen": 392214772, "step": 7000 }, { "epoch": 15.590200445434299, "eval_screenspot_CIoU": 0.371360719203949, "eval_screenspot_GIoU": 0.3886072834332784, "eval_screenspot_IoU": 0.4512639542420705, "eval_screenspot_MAE_all": 0.055713951587677, "eval_screenspot_MAE_h": 0.039317984133958817, "eval_screenspot_MAE_w": 0.06650510802865028, "eval_screenspot_MAE_x_boxes": 0.07384055045743783, "eval_screenspot_MAE_y_boxes": 0.03706817328929901, "eval_screenspot_inside_bbox": 0.7145833373069763, "eval_screenspot_loss": 1.5692362785339355, "eval_screenspot_loss_ce": 0.00024274400008531907, "eval_screenspot_loss_iou": 0.6532389322916666, "eval_screenspot_loss_num": 0.06329091389973958, "eval_screenspot_loss_xval": 1.6232096354166667, "eval_screenspot_runtime": 37.4679, "eval_screenspot_samples_per_second": 2.375, "eval_screenspot_steps_per_second": 0.08, "num_input_tokens_seen": 392214772, "step": 7000 }, { "epoch": 15.590200445434299, "eval_compot_CIoU": 0.3413173258304596, "eval_compot_GIoU": 0.35517509281635284, "eval_compot_IoU": 0.401169016957283, "eval_compot_MAE_all": 0.019693247973918915, "eval_compot_MAE_h": 0.013937031384557486, "eval_compot_MAE_w": 0.021615090779960155, "eval_compot_MAE_x_boxes": 0.02992706559598446, "eval_compot_MAE_y_boxes": 0.0066331722773611546, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4276500940322876, "eval_compot_loss_ce": 0.00015015306416898966, "eval_compot_loss_iou": 0.6561279296875, "eval_compot_loss_num": 0.018407821655273438, "eval_compot_loss_xval": 1.404296875, "eval_compot_runtime": 24.1014, "eval_compot_samples_per_second": 2.075, "eval_compot_steps_per_second": 0.083, "num_input_tokens_seen": 392214772, "step": 7000 }, { "epoch": 15.590200445434299, "eval_custom_ui_val_CIoU": 0.4745243142048518, "eval_custom_ui_val_GIoU": 0.4812171955903371, "eval_custom_ui_val_IoU": 0.535526971022288, "eval_custom_ui_val_MAE_all": 0.027974836269600525, "eval_custom_ui_val_MAE_h": 0.015499611799087789, "eval_custom_ui_val_MAE_w": 0.03617638742758168, "eval_custom_ui_val_MAE_x_boxes": 0.033723721021993294, "eval_custom_ui_val_MAE_y_boxes": 0.013699741387325857, "eval_custom_ui_val_inside_bbox": 0.7685185207260979, "eval_custom_ui_val_loss": 1.1788626909255981, "eval_custom_ui_val_loss_ce": 0.00017328551944552196, "eval_custom_ui_val_loss_iou": 0.5056423611111112, "eval_custom_ui_val_loss_num": 0.02455192142062717, "eval_custom_ui_val_loss_xval": 1.1339518229166667, "eval_custom_ui_val_runtime": 72.2545, "eval_custom_ui_val_samples_per_second": 3.668, "eval_custom_ui_val_steps_per_second": 0.125, "num_input_tokens_seen": 392214772, "step": 7000 }, { "epoch": 15.590200445434299, "loss": 0.8509615063667297, "loss_ce": 0.0001314119144808501, "loss_iou": 0.3828125, "loss_num": 0.01708984375, "loss_xval": 0.8515625, "num_input_tokens_seen": 392214772, "step": 7000 }, { "epoch": 15.592427616926503, "grad_norm": 15.370309829711914, "learning_rate": 1e-06, "loss": 0.5216, "num_input_tokens_seen": 392269336, "step": 7001 }, { "epoch": 15.592427616926503, "loss": 0.47645553946495056, "loss_ce": 0.00013718298578169197, "loss_iou": 0.2099609375, "loss_num": 0.01123046875, "loss_xval": 0.4765625, "num_input_tokens_seen": 392269336, "step": 7001 }, { "epoch": 15.594654788418708, "grad_norm": 14.073122024536133, "learning_rate": 1e-06, "loss": 0.4764, "num_input_tokens_seen": 392324300, "step": 7002 }, { "epoch": 15.594654788418708, "loss": 0.3416048288345337, "loss_ce": 0.00011310819536447525, "loss_iou": 0.1552734375, "loss_num": 0.006195068359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 392324300, "step": 7002 }, { "epoch": 15.596881959910913, "grad_norm": 23.41605567932129, "learning_rate": 1e-06, "loss": 0.4436, "num_input_tokens_seen": 392381052, "step": 7003 }, { "epoch": 15.596881959910913, "loss": 0.4482382535934448, "loss_ce": 0.00036228023236617446, "loss_iou": 0.19921875, "loss_num": 0.0098876953125, "loss_xval": 0.447265625, "num_input_tokens_seen": 392381052, "step": 7003 }, { "epoch": 15.599109131403118, "grad_norm": 18.33258628845215, "learning_rate": 1e-06, "loss": 0.3708, "num_input_tokens_seen": 392438500, "step": 7004 }, { "epoch": 15.599109131403118, "loss": 0.49753135442733765, "loss_ce": 9.481675806455314e-05, "loss_iou": 0.2001953125, "loss_num": 0.019287109375, "loss_xval": 0.498046875, "num_input_tokens_seen": 392438500, "step": 7004 }, { "epoch": 15.601336302895323, "grad_norm": 40.55979537963867, "learning_rate": 1e-06, "loss": 0.4622, "num_input_tokens_seen": 392491684, "step": 7005 }, { "epoch": 15.601336302895323, "loss": 0.2954270541667938, "loss_ce": 0.00013897480675950646, "loss_iou": 0.1318359375, "loss_num": 0.006195068359375, "loss_xval": 0.294921875, "num_input_tokens_seen": 392491684, "step": 7005 }, { "epoch": 15.603563474387528, "grad_norm": 10.46609115600586, "learning_rate": 1e-06, "loss": 0.4256, "num_input_tokens_seen": 392550124, "step": 7006 }, { "epoch": 15.603563474387528, "loss": 0.4542039632797241, "loss_ce": 0.00010239450784865767, "loss_iou": 0.2001953125, "loss_num": 0.01080322265625, "loss_xval": 0.453125, "num_input_tokens_seen": 392550124, "step": 7006 }, { "epoch": 15.605790645879733, "grad_norm": 15.158596992492676, "learning_rate": 1e-06, "loss": 0.3997, "num_input_tokens_seen": 392606256, "step": 7007 }, { "epoch": 15.605790645879733, "loss": 0.473002552986145, "loss_ce": 0.00010216711962129921, "loss_iou": 0.1865234375, "loss_num": 0.02001953125, "loss_xval": 0.47265625, "num_input_tokens_seen": 392606256, "step": 7007 }, { "epoch": 15.608017817371937, "grad_norm": 23.731393814086914, "learning_rate": 1e-06, "loss": 0.4986, "num_input_tokens_seen": 392661192, "step": 7008 }, { "epoch": 15.608017817371937, "loss": 0.4049649238586426, "loss_ce": 8.820135553833097e-05, "loss_iou": 0.1845703125, "loss_num": 0.00732421875, "loss_xval": 0.404296875, "num_input_tokens_seen": 392661192, "step": 7008 }, { "epoch": 15.610244988864142, "grad_norm": 15.854848861694336, "learning_rate": 1e-06, "loss": 0.4375, "num_input_tokens_seen": 392716600, "step": 7009 }, { "epoch": 15.610244988864142, "loss": 0.5096275806427002, "loss_ce": 0.00010606721480144188, "loss_iou": 0.20703125, "loss_num": 0.01904296875, "loss_xval": 0.5078125, "num_input_tokens_seen": 392716600, "step": 7009 }, { "epoch": 15.612472160356347, "grad_norm": 13.901849746704102, "learning_rate": 1e-06, "loss": 0.4194, "num_input_tokens_seen": 392774900, "step": 7010 }, { "epoch": 15.612472160356347, "loss": 0.4252671003341675, "loss_ce": 9.621331264497712e-05, "loss_iou": 0.19140625, "loss_num": 0.0084228515625, "loss_xval": 0.42578125, "num_input_tokens_seen": 392774900, "step": 7010 }, { "epoch": 15.614699331848552, "grad_norm": 13.334481239318848, "learning_rate": 1e-06, "loss": 0.4398, "num_input_tokens_seen": 392830348, "step": 7011 }, { "epoch": 15.614699331848552, "loss": 0.6181551218032837, "loss_ce": 0.00011313124559819698, "loss_iou": 0.244140625, "loss_num": 0.02587890625, "loss_xval": 0.6171875, "num_input_tokens_seen": 392830348, "step": 7011 }, { "epoch": 15.616926503340757, "grad_norm": 17.49168586730957, "learning_rate": 1e-06, "loss": 0.6052, "num_input_tokens_seen": 392887200, "step": 7012 }, { "epoch": 15.616926503340757, "loss": 0.7159167528152466, "loss_ce": 9.638856136007234e-05, "loss_iou": 0.310546875, "loss_num": 0.0186767578125, "loss_xval": 0.71484375, "num_input_tokens_seen": 392887200, "step": 7012 }, { "epoch": 15.619153674832962, "grad_norm": 18.257720947265625, "learning_rate": 1e-06, "loss": 0.3736, "num_input_tokens_seen": 392945796, "step": 7013 }, { "epoch": 15.619153674832962, "loss": 0.4207872748374939, "loss_ce": 0.000132970220874995, "loss_iou": 0.1796875, "loss_num": 0.012451171875, "loss_xval": 0.419921875, "num_input_tokens_seen": 392945796, "step": 7013 }, { "epoch": 15.621380846325167, "grad_norm": 21.57740020751953, "learning_rate": 1e-06, "loss": 0.3268, "num_input_tokens_seen": 393004392, "step": 7014 }, { "epoch": 15.621380846325167, "loss": 0.26535099744796753, "loss_ce": 9.219862113241106e-05, "loss_iou": 0.12109375, "loss_num": 0.00457763671875, "loss_xval": 0.265625, "num_input_tokens_seen": 393004392, "step": 7014 }, { "epoch": 15.623608017817372, "grad_norm": 17.83563232421875, "learning_rate": 1e-06, "loss": 0.3994, "num_input_tokens_seen": 393060176, "step": 7015 }, { "epoch": 15.623608017817372, "loss": 0.3122119903564453, "loss_ce": 7.820721657481045e-05, "loss_iou": 0.1357421875, "loss_num": 0.00799560546875, "loss_xval": 0.3125, "num_input_tokens_seen": 393060176, "step": 7015 }, { "epoch": 15.625835189309576, "grad_norm": 29.299760818481445, "learning_rate": 1e-06, "loss": 0.5213, "num_input_tokens_seen": 393118056, "step": 7016 }, { "epoch": 15.625835189309576, "loss": 0.37060514092445374, "loss_ce": 0.00012174376752227545, "loss_iou": 0.16796875, "loss_num": 0.006927490234375, "loss_xval": 0.37109375, "num_input_tokens_seen": 393118056, "step": 7016 }, { "epoch": 15.628062360801781, "grad_norm": 12.648885726928711, "learning_rate": 1e-06, "loss": 0.5216, "num_input_tokens_seen": 393177148, "step": 7017 }, { "epoch": 15.628062360801781, "loss": 0.40695855021476746, "loss_ce": 9.820509876590222e-05, "loss_iou": 0.189453125, "loss_num": 0.00543212890625, "loss_xval": 0.40625, "num_input_tokens_seen": 393177148, "step": 7017 }, { "epoch": 15.630289532293986, "grad_norm": 24.881315231323242, "learning_rate": 1e-06, "loss": 0.3718, "num_input_tokens_seen": 393231232, "step": 7018 }, { "epoch": 15.630289532293986, "loss": 0.41211479902267456, "loss_ce": 0.00012749881716445088, "loss_iou": 0.1689453125, "loss_num": 0.0147705078125, "loss_xval": 0.412109375, "num_input_tokens_seen": 393231232, "step": 7018 }, { "epoch": 15.632516703786191, "grad_norm": 21.462350845336914, "learning_rate": 1e-06, "loss": 0.5006, "num_input_tokens_seen": 393285416, "step": 7019 }, { "epoch": 15.632516703786191, "loss": 0.383044958114624, "loss_ce": 0.00023245607735589147, "loss_iou": 0.1767578125, "loss_num": 0.005767822265625, "loss_xval": 0.3828125, "num_input_tokens_seen": 393285416, "step": 7019 }, { "epoch": 15.634743875278396, "grad_norm": 18.3259220123291, "learning_rate": 1e-06, "loss": 0.5313, "num_input_tokens_seen": 393342488, "step": 7020 }, { "epoch": 15.634743875278396, "loss": 0.6249858140945435, "loss_ce": 0.00016893941210582852, "loss_iou": 0.259765625, "loss_num": 0.02099609375, "loss_xval": 0.625, "num_input_tokens_seen": 393342488, "step": 7020 }, { "epoch": 15.6369710467706, "grad_norm": 33.112945556640625, "learning_rate": 1e-06, "loss": 0.5863, "num_input_tokens_seen": 393400080, "step": 7021 }, { "epoch": 15.6369710467706, "loss": 0.4414959251880646, "loss_ce": 8.966858149506152e-05, "loss_iou": 0.1953125, "loss_num": 0.01025390625, "loss_xval": 0.44140625, "num_input_tokens_seen": 393400080, "step": 7021 }, { "epoch": 15.639198218262806, "grad_norm": 16.2619571685791, "learning_rate": 1e-06, "loss": 0.3958, "num_input_tokens_seen": 393456744, "step": 7022 }, { "epoch": 15.639198218262806, "loss": 0.37460190057754517, "loss_ce": 9.020163997774944e-05, "loss_iou": 0.16796875, "loss_num": 0.007720947265625, "loss_xval": 0.375, "num_input_tokens_seen": 393456744, "step": 7022 }, { "epoch": 15.64142538975501, "grad_norm": 20.06168556213379, "learning_rate": 1e-06, "loss": 0.3711, "num_input_tokens_seen": 393511708, "step": 7023 }, { "epoch": 15.64142538975501, "loss": 0.4522230625152588, "loss_ce": 7.460695633199066e-05, "loss_iou": 0.1875, "loss_num": 0.0155029296875, "loss_xval": 0.453125, "num_input_tokens_seen": 393511708, "step": 7023 }, { "epoch": 15.643652561247215, "grad_norm": 13.850775718688965, "learning_rate": 1e-06, "loss": 0.4639, "num_input_tokens_seen": 393569980, "step": 7024 }, { "epoch": 15.643652561247215, "loss": 0.4485822916030884, "loss_ce": 9.598617180017754e-05, "loss_iou": 0.1904296875, "loss_num": 0.01348876953125, "loss_xval": 0.44921875, "num_input_tokens_seen": 393569980, "step": 7024 }, { "epoch": 15.64587973273942, "grad_norm": 24.194141387939453, "learning_rate": 1e-06, "loss": 0.6029, "num_input_tokens_seen": 393623688, "step": 7025 }, { "epoch": 15.64587973273942, "loss": 0.7536381483078003, "loss_ce": 9.815217345021665e-05, "loss_iou": 0.283203125, "loss_num": 0.037353515625, "loss_xval": 0.75390625, "num_input_tokens_seen": 393623688, "step": 7025 }, { "epoch": 15.648106904231625, "grad_norm": 149.88299560546875, "learning_rate": 1e-06, "loss": 0.4324, "num_input_tokens_seen": 393678112, "step": 7026 }, { "epoch": 15.648106904231625, "loss": 0.4286256432533264, "loss_ce": 9.779322135727853e-05, "loss_iou": 0.1748046875, "loss_num": 0.0157470703125, "loss_xval": 0.427734375, "num_input_tokens_seen": 393678112, "step": 7026 }, { "epoch": 15.65033407572383, "grad_norm": 18.4648494720459, "learning_rate": 1e-06, "loss": 0.5376, "num_input_tokens_seen": 393735444, "step": 7027 }, { "epoch": 15.65033407572383, "loss": 0.5531010627746582, "loss_ce": 0.00012253447494003922, "loss_iou": 0.2158203125, "loss_num": 0.0245361328125, "loss_xval": 0.5546875, "num_input_tokens_seen": 393735444, "step": 7027 }, { "epoch": 15.652561247216035, "grad_norm": 23.74418830871582, "learning_rate": 1e-06, "loss": 0.4233, "num_input_tokens_seen": 393789508, "step": 7028 }, { "epoch": 15.652561247216035, "loss": 0.3642292618751526, "loss_ce": 9.35211282921955e-05, "loss_iou": 0.1552734375, "loss_num": 0.01080322265625, "loss_xval": 0.36328125, "num_input_tokens_seen": 393789508, "step": 7028 }, { "epoch": 15.654788418708241, "grad_norm": 18.24901580810547, "learning_rate": 1e-06, "loss": 0.4294, "num_input_tokens_seen": 393845576, "step": 7029 }, { "epoch": 15.654788418708241, "loss": 0.337370365858078, "loss_ce": 9.009480709210038e-05, "loss_iou": 0.150390625, "loss_num": 0.007293701171875, "loss_xval": 0.337890625, "num_input_tokens_seen": 393845576, "step": 7029 }, { "epoch": 15.657015590200446, "grad_norm": 18.35692596435547, "learning_rate": 1e-06, "loss": 0.3768, "num_input_tokens_seen": 393901840, "step": 7030 }, { "epoch": 15.657015590200446, "loss": 0.3451303243637085, "loss_ce": 0.00015963747864589095, "loss_iou": 0.158203125, "loss_num": 0.005462646484375, "loss_xval": 0.345703125, "num_input_tokens_seen": 393901840, "step": 7030 }, { "epoch": 15.659242761692651, "grad_norm": 19.633066177368164, "learning_rate": 1e-06, "loss": 0.3587, "num_input_tokens_seen": 393956624, "step": 7031 }, { "epoch": 15.659242761692651, "loss": 0.23722299933433533, "loss_ce": 0.00010140843369299546, "loss_iou": 0.1083984375, "loss_num": 0.004058837890625, "loss_xval": 0.2373046875, "num_input_tokens_seen": 393956624, "step": 7031 }, { "epoch": 15.661469933184856, "grad_norm": 13.26457691192627, "learning_rate": 1e-06, "loss": 0.4878, "num_input_tokens_seen": 394015648, "step": 7032 }, { "epoch": 15.661469933184856, "loss": 0.5062072277069092, "loss_ce": 0.0003479141159914434, "loss_iou": 0.2041015625, "loss_num": 0.01953125, "loss_xval": 0.5078125, "num_input_tokens_seen": 394015648, "step": 7032 }, { "epoch": 15.66369710467706, "grad_norm": 15.56800651550293, "learning_rate": 1e-06, "loss": 0.4825, "num_input_tokens_seen": 394068676, "step": 7033 }, { "epoch": 15.66369710467706, "loss": 0.38187873363494873, "loss_ce": 0.0001038167392835021, "loss_iou": 0.1552734375, "loss_num": 0.01409912109375, "loss_xval": 0.380859375, "num_input_tokens_seen": 394068676, "step": 7033 }, { "epoch": 15.665924276169266, "grad_norm": 13.747230529785156, "learning_rate": 1e-06, "loss": 0.502, "num_input_tokens_seen": 394125676, "step": 7034 }, { "epoch": 15.665924276169266, "loss": 0.47450119256973267, "loss_ce": 0.0001359964517178014, "loss_iou": 0.208984375, "loss_num": 0.01129150390625, "loss_xval": 0.474609375, "num_input_tokens_seen": 394125676, "step": 7034 }, { "epoch": 15.66815144766147, "grad_norm": 13.67151165008545, "learning_rate": 1e-06, "loss": 0.3227, "num_input_tokens_seen": 394182648, "step": 7035 }, { "epoch": 15.66815144766147, "loss": 0.3714517056941986, "loss_ce": 0.0001138186635216698, "loss_iou": 0.1689453125, "loss_num": 0.006683349609375, "loss_xval": 0.37109375, "num_input_tokens_seen": 394182648, "step": 7035 }, { "epoch": 15.670378619153675, "grad_norm": 27.722789764404297, "learning_rate": 1e-06, "loss": 0.4209, "num_input_tokens_seen": 394236828, "step": 7036 }, { "epoch": 15.670378619153675, "loss": 0.4359535276889801, "loss_ce": 0.00010151089372811839, "loss_iou": 0.189453125, "loss_num": 0.011474609375, "loss_xval": 0.435546875, "num_input_tokens_seen": 394236828, "step": 7036 }, { "epoch": 15.67260579064588, "grad_norm": 18.144771575927734, "learning_rate": 1e-06, "loss": 0.4676, "num_input_tokens_seen": 394291552, "step": 7037 }, { "epoch": 15.67260579064588, "loss": 0.4619390070438385, "loss_ce": 8.598440763307735e-05, "loss_iou": 0.201171875, "loss_num": 0.011962890625, "loss_xval": 0.4609375, "num_input_tokens_seen": 394291552, "step": 7037 }, { "epoch": 15.674832962138085, "grad_norm": 15.883332252502441, "learning_rate": 1e-06, "loss": 0.3619, "num_input_tokens_seen": 394349988, "step": 7038 }, { "epoch": 15.674832962138085, "loss": 0.3775450587272644, "loss_ce": 0.00010363116598455235, "loss_iou": 0.166015625, "loss_num": 0.00897216796875, "loss_xval": 0.376953125, "num_input_tokens_seen": 394349988, "step": 7038 }, { "epoch": 15.67706013363029, "grad_norm": 18.735139846801758, "learning_rate": 1e-06, "loss": 0.3232, "num_input_tokens_seen": 394405816, "step": 7039 }, { "epoch": 15.67706013363029, "loss": 0.30405598878860474, "loss_ce": 0.0001009251645882614, "loss_iou": 0.12890625, "loss_num": 0.0093994140625, "loss_xval": 0.3046875, "num_input_tokens_seen": 394405816, "step": 7039 }, { "epoch": 15.679287305122495, "grad_norm": 32.7242546081543, "learning_rate": 1e-06, "loss": 0.5001, "num_input_tokens_seen": 394458988, "step": 7040 }, { "epoch": 15.679287305122495, "loss": 0.6007954478263855, "loss_ce": 8.743096987018362e-05, "loss_iou": 0.265625, "loss_num": 0.0137939453125, "loss_xval": 0.6015625, "num_input_tokens_seen": 394458988, "step": 7040 }, { "epoch": 15.6815144766147, "grad_norm": 17.01080322265625, "learning_rate": 1e-06, "loss": 0.4883, "num_input_tokens_seen": 394517044, "step": 7041 }, { "epoch": 15.6815144766147, "loss": 0.6097177267074585, "loss_ce": 9.863986633718014e-05, "loss_iou": 0.26953125, "loss_num": 0.014404296875, "loss_xval": 0.609375, "num_input_tokens_seen": 394517044, "step": 7041 }, { "epoch": 15.683741648106905, "grad_norm": 23.347501754760742, "learning_rate": 1e-06, "loss": 0.4004, "num_input_tokens_seen": 394575972, "step": 7042 }, { "epoch": 15.683741648106905, "loss": 0.424541175365448, "loss_ce": 0.00010270069469697773, "loss_iou": 0.193359375, "loss_num": 0.007659912109375, "loss_xval": 0.423828125, "num_input_tokens_seen": 394575972, "step": 7042 }, { "epoch": 15.68596881959911, "grad_norm": 17.75396156311035, "learning_rate": 1e-06, "loss": 0.4391, "num_input_tokens_seen": 394631332, "step": 7043 }, { "epoch": 15.68596881959911, "loss": 0.5316604375839233, "loss_ce": 0.0010207871673628688, "loss_iou": 0.2275390625, "loss_num": 0.01519775390625, "loss_xval": 0.53125, "num_input_tokens_seen": 394631332, "step": 7043 }, { "epoch": 15.688195991091314, "grad_norm": 20.462841033935547, "learning_rate": 1e-06, "loss": 0.2736, "num_input_tokens_seen": 394687344, "step": 7044 }, { "epoch": 15.688195991091314, "loss": 0.30839377641677856, "loss_ce": 0.0001052148436428979, "loss_iou": 0.12890625, "loss_num": 0.0101318359375, "loss_xval": 0.30859375, "num_input_tokens_seen": 394687344, "step": 7044 }, { "epoch": 15.690423162583519, "grad_norm": 17.69515037536621, "learning_rate": 1e-06, "loss": 0.5623, "num_input_tokens_seen": 394742256, "step": 7045 }, { "epoch": 15.690423162583519, "loss": 0.6370512247085571, "loss_ce": 0.00011880889360327274, "loss_iou": 0.279296875, "loss_num": 0.0155029296875, "loss_xval": 0.63671875, "num_input_tokens_seen": 394742256, "step": 7045 }, { "epoch": 15.692650334075724, "grad_norm": 19.867359161376953, "learning_rate": 1e-06, "loss": 0.4736, "num_input_tokens_seen": 394796212, "step": 7046 }, { "epoch": 15.692650334075724, "loss": 0.5707098245620728, "loss_ce": 0.00015316563076339662, "loss_iou": 0.2353515625, "loss_num": 0.0198974609375, "loss_xval": 0.5703125, "num_input_tokens_seen": 394796212, "step": 7046 }, { "epoch": 15.694877505567929, "grad_norm": 17.22439193725586, "learning_rate": 1e-06, "loss": 0.5096, "num_input_tokens_seen": 394849460, "step": 7047 }, { "epoch": 15.694877505567929, "loss": 0.4576007127761841, "loss_ce": 8.11615027487278e-05, "loss_iou": 0.2060546875, "loss_num": 0.0091552734375, "loss_xval": 0.45703125, "num_input_tokens_seen": 394849460, "step": 7047 }, { "epoch": 15.697104677060134, "grad_norm": 19.11503791809082, "learning_rate": 1e-06, "loss": 0.3946, "num_input_tokens_seen": 394907576, "step": 7048 }, { "epoch": 15.697104677060134, "loss": 0.29610675573349, "loss_ce": 8.626521594123915e-05, "loss_iou": 0.1328125, "loss_num": 0.006256103515625, "loss_xval": 0.296875, "num_input_tokens_seen": 394907576, "step": 7048 }, { "epoch": 15.699331848552339, "grad_norm": 14.611241340637207, "learning_rate": 1e-06, "loss": 0.3639, "num_input_tokens_seen": 394963124, "step": 7049 }, { "epoch": 15.699331848552339, "loss": 0.4972745180130005, "loss_ce": 8.216071000788361e-05, "loss_iou": 0.2099609375, "loss_num": 0.015625, "loss_xval": 0.498046875, "num_input_tokens_seen": 394963124, "step": 7049 }, { "epoch": 15.701559020044543, "grad_norm": 22.508859634399414, "learning_rate": 1e-06, "loss": 0.4025, "num_input_tokens_seen": 395020276, "step": 7050 }, { "epoch": 15.701559020044543, "loss": 0.35203513503074646, "loss_ce": 0.00010644704889273271, "loss_iou": 0.154296875, "loss_num": 0.0086669921875, "loss_xval": 0.3515625, "num_input_tokens_seen": 395020276, "step": 7050 }, { "epoch": 15.703786191536748, "grad_norm": 24.426239013671875, "learning_rate": 1e-06, "loss": 0.5065, "num_input_tokens_seen": 395078896, "step": 7051 }, { "epoch": 15.703786191536748, "loss": 0.530193030834198, "loss_ce": 0.00010265262972097844, "loss_iou": 0.2333984375, "loss_num": 0.0128173828125, "loss_xval": 0.53125, "num_input_tokens_seen": 395078896, "step": 7051 }, { "epoch": 15.706013363028953, "grad_norm": 21.90961456298828, "learning_rate": 1e-06, "loss": 0.6311, "num_input_tokens_seen": 395137768, "step": 7052 }, { "epoch": 15.706013363028953, "loss": 0.6914348006248474, "loss_ce": 8.963010623119771e-05, "loss_iou": 0.267578125, "loss_num": 0.03125, "loss_xval": 0.69140625, "num_input_tokens_seen": 395137768, "step": 7052 }, { "epoch": 15.708240534521158, "grad_norm": 17.655078887939453, "learning_rate": 1e-06, "loss": 0.4446, "num_input_tokens_seen": 395194468, "step": 7053 }, { "epoch": 15.708240534521158, "loss": 0.49838685989379883, "loss_ce": 9.587279055267572e-05, "loss_iou": 0.208984375, "loss_num": 0.0162353515625, "loss_xval": 0.498046875, "num_input_tokens_seen": 395194468, "step": 7053 }, { "epoch": 15.710467706013363, "grad_norm": 16.85371971130371, "learning_rate": 1e-06, "loss": 0.7519, "num_input_tokens_seen": 395248904, "step": 7054 }, { "epoch": 15.710467706013363, "loss": 0.6303013563156128, "loss_ce": 0.0001133399928221479, "loss_iou": 0.248046875, "loss_num": 0.027099609375, "loss_xval": 0.62890625, "num_input_tokens_seen": 395248904, "step": 7054 }, { "epoch": 15.712694877505568, "grad_norm": 18.303388595581055, "learning_rate": 1e-06, "loss": 0.349, "num_input_tokens_seen": 395306728, "step": 7055 }, { "epoch": 15.712694877505568, "loss": 0.32140427827835083, "loss_ce": 0.00011521350825205445, "loss_iou": 0.13671875, "loss_num": 0.009765625, "loss_xval": 0.3203125, "num_input_tokens_seen": 395306728, "step": 7055 }, { "epoch": 15.714922048997773, "grad_norm": 16.185035705566406, "learning_rate": 1e-06, "loss": 0.4021, "num_input_tokens_seen": 395362652, "step": 7056 }, { "epoch": 15.714922048997773, "loss": 0.24985191226005554, "loss_ce": 9.60544275585562e-05, "loss_iou": 0.1103515625, "loss_num": 0.005706787109375, "loss_xval": 0.25, "num_input_tokens_seen": 395362652, "step": 7056 }, { "epoch": 15.717149220489977, "grad_norm": 12.218123435974121, "learning_rate": 1e-06, "loss": 0.4379, "num_input_tokens_seen": 395419380, "step": 7057 }, { "epoch": 15.717149220489977, "loss": 0.4651813805103302, "loss_ce": 9.347945888293907e-05, "loss_iou": 0.203125, "loss_num": 0.0115966796875, "loss_xval": 0.46484375, "num_input_tokens_seen": 395419380, "step": 7057 }, { "epoch": 15.719376391982182, "grad_norm": 19.603456497192383, "learning_rate": 1e-06, "loss": 0.3471, "num_input_tokens_seen": 395475324, "step": 7058 }, { "epoch": 15.719376391982182, "loss": 0.33774223923683167, "loss_ce": 9.575536387274042e-05, "loss_iou": 0.154296875, "loss_num": 0.005828857421875, "loss_xval": 0.337890625, "num_input_tokens_seen": 395475324, "step": 7058 }, { "epoch": 15.721603563474387, "grad_norm": 25.467702865600586, "learning_rate": 1e-06, "loss": 0.379, "num_input_tokens_seen": 395532144, "step": 7059 }, { "epoch": 15.721603563474387, "loss": 0.3512808680534363, "loss_ce": 8.456782961729914e-05, "loss_iou": 0.1494140625, "loss_num": 0.010498046875, "loss_xval": 0.3515625, "num_input_tokens_seen": 395532144, "step": 7059 }, { "epoch": 15.723830734966592, "grad_norm": 18.630399703979492, "learning_rate": 1e-06, "loss": 0.3926, "num_input_tokens_seen": 395587848, "step": 7060 }, { "epoch": 15.723830734966592, "loss": 0.4616337716579437, "loss_ce": 8.594746032031253e-05, "loss_iou": 0.1953125, "loss_num": 0.01422119140625, "loss_xval": 0.4609375, "num_input_tokens_seen": 395587848, "step": 7060 }, { "epoch": 15.726057906458797, "grad_norm": 14.84992504119873, "learning_rate": 1e-06, "loss": 0.6171, "num_input_tokens_seen": 395642736, "step": 7061 }, { "epoch": 15.726057906458797, "loss": 0.6872535943984985, "loss_ce": 0.0001198096142616123, "loss_iou": 0.265625, "loss_num": 0.031494140625, "loss_xval": 0.6875, "num_input_tokens_seen": 395642736, "step": 7061 }, { "epoch": 15.728285077951002, "grad_norm": 23.57808494567871, "learning_rate": 1e-06, "loss": 0.4608, "num_input_tokens_seen": 395699808, "step": 7062 }, { "epoch": 15.728285077951002, "loss": 0.6477620601654053, "loss_ce": 0.00011803221423178911, "loss_iou": 0.298828125, "loss_num": 0.01031494140625, "loss_xval": 0.6484375, "num_input_tokens_seen": 395699808, "step": 7062 }, { "epoch": 15.730512249443207, "grad_norm": 15.21978759765625, "learning_rate": 1e-06, "loss": 0.4271, "num_input_tokens_seen": 395757824, "step": 7063 }, { "epoch": 15.730512249443207, "loss": 0.3883220851421356, "loss_ce": 7.74528380134143e-05, "loss_iou": 0.173828125, "loss_num": 0.00799560546875, "loss_xval": 0.388671875, "num_input_tokens_seen": 395757824, "step": 7063 }, { "epoch": 15.732739420935411, "grad_norm": 29.952592849731445, "learning_rate": 1e-06, "loss": 0.3861, "num_input_tokens_seen": 395815416, "step": 7064 }, { "epoch": 15.732739420935411, "loss": 0.37809085845947266, "loss_ce": 0.00010012884740717709, "loss_iou": 0.1728515625, "loss_num": 0.006500244140625, "loss_xval": 0.37890625, "num_input_tokens_seen": 395815416, "step": 7064 }, { "epoch": 15.734966592427616, "grad_norm": 16.03706169128418, "learning_rate": 1e-06, "loss": 0.4989, "num_input_tokens_seen": 395868684, "step": 7065 }, { "epoch": 15.734966592427616, "loss": 0.523518443107605, "loss_ce": 8.08899276307784e-05, "loss_iou": 0.2255859375, "loss_num": 0.0142822265625, "loss_xval": 0.5234375, "num_input_tokens_seen": 395868684, "step": 7065 }, { "epoch": 15.737193763919821, "grad_norm": 16.18170928955078, "learning_rate": 1e-06, "loss": 0.3421, "num_input_tokens_seen": 395925000, "step": 7066 }, { "epoch": 15.737193763919821, "loss": 0.4338296949863434, "loss_ce": 0.00011389805149519816, "loss_iou": 0.1982421875, "loss_num": 0.007568359375, "loss_xval": 0.43359375, "num_input_tokens_seen": 395925000, "step": 7066 }, { "epoch": 15.739420935412026, "grad_norm": 11.560032844543457, "learning_rate": 1e-06, "loss": 0.3395, "num_input_tokens_seen": 395979500, "step": 7067 }, { "epoch": 15.739420935412026, "loss": 0.26005110144615173, "loss_ce": 0.00010237214155495167, "loss_iou": 0.107421875, "loss_num": 0.009033203125, "loss_xval": 0.259765625, "num_input_tokens_seen": 395979500, "step": 7067 }, { "epoch": 15.74164810690423, "grad_norm": 24.783843994140625, "learning_rate": 1e-06, "loss": 0.3423, "num_input_tokens_seen": 396037980, "step": 7068 }, { "epoch": 15.74164810690423, "loss": 0.2767605781555176, "loss_ce": 8.823502867016941e-05, "loss_iou": 0.10791015625, "loss_num": 0.01214599609375, "loss_xval": 0.27734375, "num_input_tokens_seen": 396037980, "step": 7068 }, { "epoch": 15.743875278396436, "grad_norm": 13.664560317993164, "learning_rate": 1e-06, "loss": 0.3566, "num_input_tokens_seen": 396095480, "step": 7069 }, { "epoch": 15.743875278396436, "loss": 0.30287209153175354, "loss_ce": 7.669955084566027e-05, "loss_iou": 0.1328125, "loss_num": 0.00750732421875, "loss_xval": 0.302734375, "num_input_tokens_seen": 396095480, "step": 7069 }, { "epoch": 15.74610244988864, "grad_norm": 17.73408317565918, "learning_rate": 1e-06, "loss": 0.4481, "num_input_tokens_seen": 396152320, "step": 7070 }, { "epoch": 15.74610244988864, "loss": 0.5743053555488586, "loss_ce": 8.658809383632615e-05, "loss_iou": 0.2392578125, "loss_num": 0.019287109375, "loss_xval": 0.57421875, "num_input_tokens_seen": 396152320, "step": 7070 }, { "epoch": 15.748329621380847, "grad_norm": 16.201440811157227, "learning_rate": 1e-06, "loss": 0.4466, "num_input_tokens_seen": 396212012, "step": 7071 }, { "epoch": 15.748329621380847, "loss": 0.5690320730209351, "loss_ce": 0.00018442686996422708, "loss_iou": 0.232421875, "loss_num": 0.0211181640625, "loss_xval": 0.5703125, "num_input_tokens_seen": 396212012, "step": 7071 }, { "epoch": 15.750556792873052, "grad_norm": 15.789632797241211, "learning_rate": 1e-06, "loss": 0.3196, "num_input_tokens_seen": 396266208, "step": 7072 }, { "epoch": 15.750556792873052, "loss": 0.30217671394348145, "loss_ce": 0.00011373275629011914, "loss_iou": 0.126953125, "loss_num": 0.00970458984375, "loss_xval": 0.302734375, "num_input_tokens_seen": 396266208, "step": 7072 }, { "epoch": 15.752783964365257, "grad_norm": 18.404560089111328, "learning_rate": 1e-06, "loss": 0.4347, "num_input_tokens_seen": 396325024, "step": 7073 }, { "epoch": 15.752783964365257, "loss": 0.2929561734199524, "loss_ce": 7.899131742306054e-05, "loss_iou": 0.12060546875, "loss_num": 0.0103759765625, "loss_xval": 0.29296875, "num_input_tokens_seen": 396325024, "step": 7073 }, { "epoch": 15.755011135857462, "grad_norm": 24.089750289916992, "learning_rate": 1e-06, "loss": 0.5156, "num_input_tokens_seen": 396382244, "step": 7074 }, { "epoch": 15.755011135857462, "loss": 0.2893972396850586, "loss_ce": 9.059577132575214e-05, "loss_iou": 0.125, "loss_num": 0.00799560546875, "loss_xval": 0.2890625, "num_input_tokens_seen": 396382244, "step": 7074 }, { "epoch": 15.757238307349667, "grad_norm": 13.719793319702148, "learning_rate": 1e-06, "loss": 0.3745, "num_input_tokens_seen": 396438428, "step": 7075 }, { "epoch": 15.757238307349667, "loss": 0.41726016998291016, "loss_ce": 0.00014590269711334258, "loss_iou": 0.1650390625, "loss_num": 0.017333984375, "loss_xval": 0.41796875, "num_input_tokens_seen": 396438428, "step": 7075 }, { "epoch": 15.759465478841872, "grad_norm": 17.886260986328125, "learning_rate": 1e-06, "loss": 0.5086, "num_input_tokens_seen": 396492232, "step": 7076 }, { "epoch": 15.759465478841872, "loss": 0.2809308171272278, "loss_ce": 0.00010807791841216385, "loss_iou": 0.11083984375, "loss_num": 0.0118408203125, "loss_xval": 0.28125, "num_input_tokens_seen": 396492232, "step": 7076 }, { "epoch": 15.761692650334076, "grad_norm": 27.26051902770996, "learning_rate": 1e-06, "loss": 0.4125, "num_input_tokens_seen": 396546276, "step": 7077 }, { "epoch": 15.761692650334076, "loss": 0.391597181558609, "loss_ce": 0.00011767500836867839, "loss_iou": 0.1552734375, "loss_num": 0.0162353515625, "loss_xval": 0.390625, "num_input_tokens_seen": 396546276, "step": 7077 }, { "epoch": 15.763919821826281, "grad_norm": 19.251379013061523, "learning_rate": 1e-06, "loss": 0.6037, "num_input_tokens_seen": 396600784, "step": 7078 }, { "epoch": 15.763919821826281, "loss": 0.5241034030914307, "loss_ce": 0.00011657152936095372, "loss_iou": 0.234375, "loss_num": 0.01080322265625, "loss_xval": 0.5234375, "num_input_tokens_seen": 396600784, "step": 7078 }, { "epoch": 15.766146993318486, "grad_norm": 15.035192489624023, "learning_rate": 1e-06, "loss": 0.579, "num_input_tokens_seen": 396656868, "step": 7079 }, { "epoch": 15.766146993318486, "loss": 0.699531078338623, "loss_ce": 0.00031230467720888555, "loss_iou": 0.296875, "loss_num": 0.021240234375, "loss_xval": 0.69921875, "num_input_tokens_seen": 396656868, "step": 7079 }, { "epoch": 15.768374164810691, "grad_norm": 23.482166290283203, "learning_rate": 1e-06, "loss": 0.4239, "num_input_tokens_seen": 396711564, "step": 7080 }, { "epoch": 15.768374164810691, "loss": 0.4484688639640808, "loss_ce": 0.00010461645433679223, "loss_iou": 0.18359375, "loss_num": 0.0162353515625, "loss_xval": 0.44921875, "num_input_tokens_seen": 396711564, "step": 7080 }, { "epoch": 15.770601336302896, "grad_norm": 15.677742958068848, "learning_rate": 1e-06, "loss": 0.4709, "num_input_tokens_seen": 396769364, "step": 7081 }, { "epoch": 15.770601336302896, "loss": 0.4059576392173767, "loss_ce": 0.00016540827346034348, "loss_iou": 0.177734375, "loss_num": 0.01019287109375, "loss_xval": 0.40625, "num_input_tokens_seen": 396769364, "step": 7081 }, { "epoch": 15.7728285077951, "grad_norm": 28.17963409423828, "learning_rate": 1e-06, "loss": 0.4652, "num_input_tokens_seen": 396826500, "step": 7082 }, { "epoch": 15.7728285077951, "loss": 0.4379936456680298, "loss_ce": 0.00012745258572977036, "loss_iou": 0.1728515625, "loss_num": 0.018310546875, "loss_xval": 0.4375, "num_input_tokens_seen": 396826500, "step": 7082 }, { "epoch": 15.775055679287306, "grad_norm": 15.786062240600586, "learning_rate": 1e-06, "loss": 0.4024, "num_input_tokens_seen": 396881984, "step": 7083 }, { "epoch": 15.775055679287306, "loss": 0.3337669372558594, "loss_ce": 0.00014876520435791463, "loss_iou": 0.1533203125, "loss_num": 0.00537109375, "loss_xval": 0.333984375, "num_input_tokens_seen": 396881984, "step": 7083 }, { "epoch": 15.77728285077951, "grad_norm": 18.75821304321289, "learning_rate": 1e-06, "loss": 0.5366, "num_input_tokens_seen": 396937636, "step": 7084 }, { "epoch": 15.77728285077951, "loss": 0.43820372223854065, "loss_ce": 9.338198287878186e-05, "loss_iou": 0.197265625, "loss_num": 0.00872802734375, "loss_xval": 0.4375, "num_input_tokens_seen": 396937636, "step": 7084 }, { "epoch": 15.779510022271715, "grad_norm": 21.541553497314453, "learning_rate": 1e-06, "loss": 0.3054, "num_input_tokens_seen": 396991964, "step": 7085 }, { "epoch": 15.779510022271715, "loss": 0.25417301058769226, "loss_ce": 0.0002667623048182577, "loss_iou": 0.10693359375, "loss_num": 0.008056640625, "loss_xval": 0.25390625, "num_input_tokens_seen": 396991964, "step": 7085 }, { "epoch": 15.78173719376392, "grad_norm": 21.87734603881836, "learning_rate": 1e-06, "loss": 0.3177, "num_input_tokens_seen": 397048488, "step": 7086 }, { "epoch": 15.78173719376392, "loss": 0.32540494203567505, "loss_ce": 8.75709592946805e-05, "loss_iou": 0.134765625, "loss_num": 0.01116943359375, "loss_xval": 0.326171875, "num_input_tokens_seen": 397048488, "step": 7086 }, { "epoch": 15.783964365256125, "grad_norm": 21.914344787597656, "learning_rate": 1e-06, "loss": 0.3939, "num_input_tokens_seen": 397103460, "step": 7087 }, { "epoch": 15.783964365256125, "loss": 0.4146553874015808, "loss_ce": 0.00010459712939336896, "loss_iou": 0.166015625, "loss_num": 0.0167236328125, "loss_xval": 0.4140625, "num_input_tokens_seen": 397103460, "step": 7087 }, { "epoch": 15.78619153674833, "grad_norm": 21.92269515991211, "learning_rate": 1e-06, "loss": 0.3736, "num_input_tokens_seen": 397158120, "step": 7088 }, { "epoch": 15.78619153674833, "loss": 0.3409271240234375, "loss_ce": 0.00010680149716790766, "loss_iou": 0.1611328125, "loss_num": 0.003692626953125, "loss_xval": 0.33984375, "num_input_tokens_seen": 397158120, "step": 7088 }, { "epoch": 15.788418708240535, "grad_norm": 19.19719696044922, "learning_rate": 1e-06, "loss": 0.4235, "num_input_tokens_seen": 397215380, "step": 7089 }, { "epoch": 15.788418708240535, "loss": 0.45488834381103516, "loss_ce": 0.00017644368926994503, "loss_iou": 0.2041015625, "loss_num": 0.00946044921875, "loss_xval": 0.455078125, "num_input_tokens_seen": 397215380, "step": 7089 }, { "epoch": 15.79064587973274, "grad_norm": 21.13319969177246, "learning_rate": 1e-06, "loss": 0.4486, "num_input_tokens_seen": 397270500, "step": 7090 }, { "epoch": 15.79064587973274, "loss": 0.3929722011089325, "loss_ce": 0.00014992158685345203, "loss_iou": 0.17578125, "loss_num": 0.0081787109375, "loss_xval": 0.392578125, "num_input_tokens_seen": 397270500, "step": 7090 }, { "epoch": 15.792873051224944, "grad_norm": 13.024203300476074, "learning_rate": 1e-06, "loss": 0.4421, "num_input_tokens_seen": 397328104, "step": 7091 }, { "epoch": 15.792873051224944, "loss": 0.5068001747131348, "loss_ce": 8.632005483377725e-05, "loss_iou": 0.2236328125, "loss_num": 0.01177978515625, "loss_xval": 0.5078125, "num_input_tokens_seen": 397328104, "step": 7091 }, { "epoch": 15.79510022271715, "grad_norm": 17.045452117919922, "learning_rate": 1e-06, "loss": 0.3977, "num_input_tokens_seen": 397384764, "step": 7092 }, { "epoch": 15.79510022271715, "loss": 0.3076792359352112, "loss_ce": 9.255674376618117e-05, "loss_iou": 0.138671875, "loss_num": 0.00592041015625, "loss_xval": 0.306640625, "num_input_tokens_seen": 397384764, "step": 7092 }, { "epoch": 15.797327394209354, "grad_norm": 15.998059272766113, "learning_rate": 1e-06, "loss": 0.3722, "num_input_tokens_seen": 397438400, "step": 7093 }, { "epoch": 15.797327394209354, "loss": 0.3463239073753357, "loss_ce": 0.00013250944903120399, "loss_iou": 0.1474609375, "loss_num": 0.01007080078125, "loss_xval": 0.345703125, "num_input_tokens_seen": 397438400, "step": 7093 }, { "epoch": 15.799554565701559, "grad_norm": 19.02997589111328, "learning_rate": 1e-06, "loss": 0.4442, "num_input_tokens_seen": 397496316, "step": 7094 }, { "epoch": 15.799554565701559, "loss": 0.39536595344543457, "loss_ce": 0.0001022747892420739, "loss_iou": 0.1796875, "loss_num": 0.0069580078125, "loss_xval": 0.39453125, "num_input_tokens_seen": 397496316, "step": 7094 }, { "epoch": 15.801781737193764, "grad_norm": 22.2495059967041, "learning_rate": 1e-06, "loss": 0.482, "num_input_tokens_seen": 397551668, "step": 7095 }, { "epoch": 15.801781737193764, "loss": 0.5242829918861389, "loss_ce": 0.00029618252301588655, "loss_iou": 0.2138671875, "loss_num": 0.0194091796875, "loss_xval": 0.5234375, "num_input_tokens_seen": 397551668, "step": 7095 }, { "epoch": 15.804008908685969, "grad_norm": 14.987038612365723, "learning_rate": 1e-06, "loss": 0.4245, "num_input_tokens_seen": 397609464, "step": 7096 }, { "epoch": 15.804008908685969, "loss": 0.41660401225090027, "loss_ce": 0.00010012378334067762, "loss_iou": 0.1748046875, "loss_num": 0.01336669921875, "loss_xval": 0.416015625, "num_input_tokens_seen": 397609464, "step": 7096 }, { "epoch": 15.806236080178174, "grad_norm": 14.618922233581543, "learning_rate": 1e-06, "loss": 0.4714, "num_input_tokens_seen": 397667576, "step": 7097 }, { "epoch": 15.806236080178174, "loss": 0.5953065156936646, "loss_ce": 9.167171083390713e-05, "loss_iou": 0.275390625, "loss_num": 0.00848388671875, "loss_xval": 0.59375, "num_input_tokens_seen": 397667576, "step": 7097 }, { "epoch": 15.808463251670378, "grad_norm": 14.448540687561035, "learning_rate": 1e-06, "loss": 0.4018, "num_input_tokens_seen": 397722704, "step": 7098 }, { "epoch": 15.808463251670378, "loss": 0.486581951379776, "loss_ce": 0.00010122812818735838, "loss_iou": 0.1943359375, "loss_num": 0.0196533203125, "loss_xval": 0.486328125, "num_input_tokens_seen": 397722704, "step": 7098 }, { "epoch": 15.810690423162583, "grad_norm": 15.517051696777344, "learning_rate": 1e-06, "loss": 0.3963, "num_input_tokens_seen": 397776580, "step": 7099 }, { "epoch": 15.810690423162583, "loss": 0.36538606882095337, "loss_ce": 0.00012117931328248233, "loss_iou": 0.158203125, "loss_num": 0.00982666015625, "loss_xval": 0.365234375, "num_input_tokens_seen": 397776580, "step": 7099 }, { "epoch": 15.812917594654788, "grad_norm": 23.810832977294922, "learning_rate": 1e-06, "loss": 0.4988, "num_input_tokens_seen": 397832360, "step": 7100 }, { "epoch": 15.812917594654788, "loss": 0.6032587885856628, "loss_ce": 0.00010939198546111584, "loss_iou": 0.265625, "loss_num": 0.01385498046875, "loss_xval": 0.6015625, "num_input_tokens_seen": 397832360, "step": 7100 }, { "epoch": 15.815144766146993, "grad_norm": 15.830489158630371, "learning_rate": 1e-06, "loss": 0.5576, "num_input_tokens_seen": 397888856, "step": 7101 }, { "epoch": 15.815144766146993, "loss": 0.607633113861084, "loss_ce": 8.912877819966525e-05, "loss_iou": 0.2470703125, "loss_num": 0.0228271484375, "loss_xval": 0.609375, "num_input_tokens_seen": 397888856, "step": 7101 }, { "epoch": 15.817371937639198, "grad_norm": 19.18913459777832, "learning_rate": 1e-06, "loss": 0.4931, "num_input_tokens_seen": 397941060, "step": 7102 }, { "epoch": 15.817371937639198, "loss": 0.3700322210788727, "loss_ce": 9.815287921810523e-05, "loss_iou": 0.1484375, "loss_num": 0.0145263671875, "loss_xval": 0.369140625, "num_input_tokens_seen": 397941060, "step": 7102 }, { "epoch": 15.819599109131403, "grad_norm": 13.465466499328613, "learning_rate": 1e-06, "loss": 0.3357, "num_input_tokens_seen": 397996392, "step": 7103 }, { "epoch": 15.819599109131403, "loss": 0.395857036113739, "loss_ce": 0.00010508089326322079, "loss_iou": 0.177734375, "loss_num": 0.00811767578125, "loss_xval": 0.396484375, "num_input_tokens_seen": 397996392, "step": 7103 }, { "epoch": 15.821826280623608, "grad_norm": 17.05833625793457, "learning_rate": 1e-06, "loss": 0.4311, "num_input_tokens_seen": 398053248, "step": 7104 }, { "epoch": 15.821826280623608, "loss": 0.4999108910560608, "loss_ce": 0.0001550541928736493, "loss_iou": 0.2060546875, "loss_num": 0.0177001953125, "loss_xval": 0.5, "num_input_tokens_seen": 398053248, "step": 7104 }, { "epoch": 15.824053452115812, "grad_norm": 20.213592529296875, "learning_rate": 1e-06, "loss": 0.4103, "num_input_tokens_seen": 398110088, "step": 7105 }, { "epoch": 15.824053452115812, "loss": 0.5278196334838867, "loss_ce": 0.00010968356218654662, "loss_iou": 0.224609375, "loss_num": 0.015625, "loss_xval": 0.52734375, "num_input_tokens_seen": 398110088, "step": 7105 }, { "epoch": 15.826280623608017, "grad_norm": 17.705123901367188, "learning_rate": 1e-06, "loss": 0.515, "num_input_tokens_seen": 398168524, "step": 7106 }, { "epoch": 15.826280623608017, "loss": 0.39392590522766113, "loss_ce": 0.0007374440901912749, "loss_iou": 0.1630859375, "loss_num": 0.0133056640625, "loss_xval": 0.392578125, "num_input_tokens_seen": 398168524, "step": 7106 }, { "epoch": 15.828507795100222, "grad_norm": 11.036231994628906, "learning_rate": 1e-06, "loss": 0.5384, "num_input_tokens_seen": 398226688, "step": 7107 }, { "epoch": 15.828507795100222, "loss": 0.5651922225952148, "loss_ce": 0.00012873421655967832, "loss_iou": 0.2314453125, "loss_num": 0.0205078125, "loss_xval": 0.56640625, "num_input_tokens_seen": 398226688, "step": 7107 }, { "epoch": 15.830734966592427, "grad_norm": 19.0125789642334, "learning_rate": 1e-06, "loss": 0.4158, "num_input_tokens_seen": 398282356, "step": 7108 }, { "epoch": 15.830734966592427, "loss": 0.28864729404449463, "loss_ce": 7.308388012461364e-05, "loss_iou": 0.12451171875, "loss_num": 0.00799560546875, "loss_xval": 0.2890625, "num_input_tokens_seen": 398282356, "step": 7108 }, { "epoch": 15.832962138084632, "grad_norm": 21.866071701049805, "learning_rate": 1e-06, "loss": 0.4139, "num_input_tokens_seen": 398339436, "step": 7109 }, { "epoch": 15.832962138084632, "loss": 0.5395206809043884, "loss_ce": 9.19756930670701e-05, "loss_iou": 0.2421875, "loss_num": 0.0108642578125, "loss_xval": 0.5390625, "num_input_tokens_seen": 398339436, "step": 7109 }, { "epoch": 15.835189309576837, "grad_norm": 25.225812911987305, "learning_rate": 1e-06, "loss": 0.5883, "num_input_tokens_seen": 398395324, "step": 7110 }, { "epoch": 15.835189309576837, "loss": 0.5729167461395264, "loss_ce": 0.00016287056496366858, "loss_iou": 0.25390625, "loss_num": 0.01318359375, "loss_xval": 0.57421875, "num_input_tokens_seen": 398395324, "step": 7110 }, { "epoch": 15.837416481069042, "grad_norm": 17.75248908996582, "learning_rate": 1e-06, "loss": 0.3587, "num_input_tokens_seen": 398450532, "step": 7111 }, { "epoch": 15.837416481069042, "loss": 0.3196753263473511, "loss_ce": 9.522202162770554e-05, "loss_iou": 0.142578125, "loss_num": 0.00677490234375, "loss_xval": 0.3203125, "num_input_tokens_seen": 398450532, "step": 7111 }, { "epoch": 15.839643652561247, "grad_norm": 18.968854904174805, "learning_rate": 1e-06, "loss": 0.3703, "num_input_tokens_seen": 398505144, "step": 7112 }, { "epoch": 15.839643652561247, "loss": 0.3004349172115326, "loss_ce": 8.092600910458714e-05, "loss_iou": 0.134765625, "loss_num": 0.006195068359375, "loss_xval": 0.30078125, "num_input_tokens_seen": 398505144, "step": 7112 }, { "epoch": 15.841870824053451, "grad_norm": 18.018905639648438, "learning_rate": 1e-06, "loss": 0.3754, "num_input_tokens_seen": 398560608, "step": 7113 }, { "epoch": 15.841870824053451, "loss": 0.3709629476070404, "loss_ce": 0.0001133316254708916, "loss_iou": 0.1611328125, "loss_num": 0.00958251953125, "loss_xval": 0.37109375, "num_input_tokens_seen": 398560608, "step": 7113 }, { "epoch": 15.844097995545656, "grad_norm": 24.227947235107422, "learning_rate": 1e-06, "loss": 0.4965, "num_input_tokens_seen": 398617888, "step": 7114 }, { "epoch": 15.844097995545656, "loss": 0.5535033941268921, "loss_ce": 9.764648712007329e-05, "loss_iou": 0.2421875, "loss_num": 0.013671875, "loss_xval": 0.5546875, "num_input_tokens_seen": 398617888, "step": 7114 }, { "epoch": 15.846325167037861, "grad_norm": 23.27136993408203, "learning_rate": 1e-06, "loss": 0.58, "num_input_tokens_seen": 398671732, "step": 7115 }, { "epoch": 15.846325167037861, "loss": 0.7010478973388672, "loss_ce": 0.00010489902342669666, "loss_iou": 0.3046875, "loss_num": 0.018310546875, "loss_xval": 0.69921875, "num_input_tokens_seen": 398671732, "step": 7115 }, { "epoch": 15.848552338530066, "grad_norm": 18.09553337097168, "learning_rate": 1e-06, "loss": 0.4632, "num_input_tokens_seen": 398728176, "step": 7116 }, { "epoch": 15.848552338530066, "loss": 0.4016414284706116, "loss_ce": 9.112786210607737e-05, "loss_iou": 0.1689453125, "loss_num": 0.0125732421875, "loss_xval": 0.40234375, "num_input_tokens_seen": 398728176, "step": 7116 }, { "epoch": 15.85077951002227, "grad_norm": 17.273834228515625, "learning_rate": 1e-06, "loss": 0.6624, "num_input_tokens_seen": 398783004, "step": 7117 }, { "epoch": 15.85077951002227, "loss": 0.753657341003418, "loss_ce": 0.00011729233665391803, "loss_iou": 0.26953125, "loss_num": 0.042724609375, "loss_xval": 0.75390625, "num_input_tokens_seen": 398783004, "step": 7117 }, { "epoch": 15.853006681514476, "grad_norm": 12.48859977722168, "learning_rate": 1e-06, "loss": 0.4666, "num_input_tokens_seen": 398842220, "step": 7118 }, { "epoch": 15.853006681514476, "loss": 0.3523826599121094, "loss_ce": 8.77102866070345e-05, "loss_iou": 0.138671875, "loss_num": 0.01513671875, "loss_xval": 0.3515625, "num_input_tokens_seen": 398842220, "step": 7118 }, { "epoch": 15.855233853006682, "grad_norm": 13.259708404541016, "learning_rate": 1e-06, "loss": 0.4148, "num_input_tokens_seen": 398899104, "step": 7119 }, { "epoch": 15.855233853006682, "loss": 0.5045324563980103, "loss_ce": 0.00013790541561320424, "loss_iou": 0.1923828125, "loss_num": 0.0240478515625, "loss_xval": 0.50390625, "num_input_tokens_seen": 398899104, "step": 7119 }, { "epoch": 15.857461024498887, "grad_norm": 18.383527755737305, "learning_rate": 1e-06, "loss": 0.4292, "num_input_tokens_seen": 398952944, "step": 7120 }, { "epoch": 15.857461024498887, "loss": 0.42771950364112854, "loss_ce": 0.00010719949204940349, "loss_iou": 0.2001953125, "loss_num": 0.005645751953125, "loss_xval": 0.427734375, "num_input_tokens_seen": 398952944, "step": 7120 }, { "epoch": 15.859688195991092, "grad_norm": 44.14472579956055, "learning_rate": 1e-06, "loss": 0.4647, "num_input_tokens_seen": 399007520, "step": 7121 }, { "epoch": 15.859688195991092, "loss": 0.501062273979187, "loss_ce": 8.574766980018467e-05, "loss_iou": 0.201171875, "loss_num": 0.0198974609375, "loss_xval": 0.5, "num_input_tokens_seen": 399007520, "step": 7121 }, { "epoch": 15.861915367483297, "grad_norm": 19.604982376098633, "learning_rate": 1e-06, "loss": 0.5346, "num_input_tokens_seen": 399062428, "step": 7122 }, { "epoch": 15.861915367483297, "loss": 0.5388393998146057, "loss_ce": 8.2079553976655e-05, "loss_iou": 0.220703125, "loss_num": 0.0194091796875, "loss_xval": 0.5390625, "num_input_tokens_seen": 399062428, "step": 7122 }, { "epoch": 15.864142538975502, "grad_norm": 15.576171875, "learning_rate": 1e-06, "loss": 0.4607, "num_input_tokens_seen": 399117136, "step": 7123 }, { "epoch": 15.864142538975502, "loss": 0.6172924637794495, "loss_ce": 0.00010496602772036567, "loss_iou": 0.28515625, "loss_num": 0.00933837890625, "loss_xval": 0.6171875, "num_input_tokens_seen": 399117136, "step": 7123 }, { "epoch": 15.866369710467707, "grad_norm": 20.8918514251709, "learning_rate": 1e-06, "loss": 0.4582, "num_input_tokens_seen": 399172568, "step": 7124 }, { "epoch": 15.866369710467707, "loss": 0.3362141251564026, "loss_ce": 9.351145854452625e-05, "loss_iou": 0.142578125, "loss_num": 0.01025390625, "loss_xval": 0.3359375, "num_input_tokens_seen": 399172568, "step": 7124 }, { "epoch": 15.868596881959911, "grad_norm": 14.206762313842773, "learning_rate": 1e-06, "loss": 0.3939, "num_input_tokens_seen": 399229784, "step": 7125 }, { "epoch": 15.868596881959911, "loss": 0.3726486265659332, "loss_ce": 9.005493484437466e-05, "loss_iou": 0.154296875, "loss_num": 0.01300048828125, "loss_xval": 0.373046875, "num_input_tokens_seen": 399229784, "step": 7125 }, { "epoch": 15.870824053452116, "grad_norm": 40.32673263549805, "learning_rate": 1e-06, "loss": 0.4629, "num_input_tokens_seen": 399285780, "step": 7126 }, { "epoch": 15.870824053452116, "loss": 0.49388349056243896, "loss_ce": 0.00010904869122896343, "loss_iou": 0.234375, "loss_num": 0.004852294921875, "loss_xval": 0.494140625, "num_input_tokens_seen": 399285780, "step": 7126 }, { "epoch": 15.873051224944321, "grad_norm": 13.4733304977417, "learning_rate": 1e-06, "loss": 0.5213, "num_input_tokens_seen": 399342376, "step": 7127 }, { "epoch": 15.873051224944321, "loss": 0.5921471118927002, "loss_ce": 0.00010613157064653933, "loss_iou": 0.265625, "loss_num": 0.01226806640625, "loss_xval": 0.59375, "num_input_tokens_seen": 399342376, "step": 7127 }, { "epoch": 15.875278396436526, "grad_norm": 15.447736740112305, "learning_rate": 1e-06, "loss": 0.4606, "num_input_tokens_seen": 399400404, "step": 7128 }, { "epoch": 15.875278396436526, "loss": 0.5045484304428101, "loss_ce": 0.00015390958287753165, "loss_iou": 0.2158203125, "loss_num": 0.0146484375, "loss_xval": 0.50390625, "num_input_tokens_seen": 399400404, "step": 7128 }, { "epoch": 15.877505567928731, "grad_norm": 15.931106567382812, "learning_rate": 1e-06, "loss": 0.4239, "num_input_tokens_seen": 399455484, "step": 7129 }, { "epoch": 15.877505567928731, "loss": 0.4633306860923767, "loss_ce": 0.00031800862052477896, "loss_iou": 0.19921875, "loss_num": 0.0128173828125, "loss_xval": 0.462890625, "num_input_tokens_seen": 399455484, "step": 7129 }, { "epoch": 15.879732739420936, "grad_norm": 17.251869201660156, "learning_rate": 1e-06, "loss": 0.3316, "num_input_tokens_seen": 399510620, "step": 7130 }, { "epoch": 15.879732739420936, "loss": 0.31160733103752136, "loss_ce": 8.388744026888162e-05, "loss_iou": 0.1318359375, "loss_num": 0.00946044921875, "loss_xval": 0.3125, "num_input_tokens_seen": 399510620, "step": 7130 }, { "epoch": 15.88195991091314, "grad_norm": 20.6621150970459, "learning_rate": 1e-06, "loss": 0.367, "num_input_tokens_seen": 399567148, "step": 7131 }, { "epoch": 15.88195991091314, "loss": 0.39595168828964233, "loss_ce": 7.766317867208272e-05, "loss_iou": 0.18359375, "loss_num": 0.005767822265625, "loss_xval": 0.396484375, "num_input_tokens_seen": 399567148, "step": 7131 }, { "epoch": 15.884187082405345, "grad_norm": 20.26247215270996, "learning_rate": 1e-06, "loss": 0.5707, "num_input_tokens_seen": 399619636, "step": 7132 }, { "epoch": 15.884187082405345, "loss": 0.6330655217170715, "loss_ce": 0.000130962478579022, "loss_iou": 0.267578125, "loss_num": 0.02001953125, "loss_xval": 0.6328125, "num_input_tokens_seen": 399619636, "step": 7132 }, { "epoch": 15.88641425389755, "grad_norm": 19.18486213684082, "learning_rate": 1e-06, "loss": 0.5153, "num_input_tokens_seen": 399674064, "step": 7133 }, { "epoch": 15.88641425389755, "loss": 0.5503383874893188, "loss_ce": 0.00010647171438904479, "loss_iou": 0.228515625, "loss_num": 0.0185546875, "loss_xval": 0.55078125, "num_input_tokens_seen": 399674064, "step": 7133 }, { "epoch": 15.888641425389755, "grad_norm": 16.185976028442383, "learning_rate": 1e-06, "loss": 0.3956, "num_input_tokens_seen": 399728452, "step": 7134 }, { "epoch": 15.888641425389755, "loss": 0.24313417077064514, "loss_ce": 9.21574974199757e-05, "loss_iou": 0.10888671875, "loss_num": 0.005096435546875, "loss_xval": 0.2431640625, "num_input_tokens_seen": 399728452, "step": 7134 }, { "epoch": 15.89086859688196, "grad_norm": 18.77434539794922, "learning_rate": 1e-06, "loss": 0.5478, "num_input_tokens_seen": 399783984, "step": 7135 }, { "epoch": 15.89086859688196, "loss": 0.4603026211261749, "loss_ce": 9.756326471688226e-05, "loss_iou": 0.203125, "loss_num": 0.0106201171875, "loss_xval": 0.4609375, "num_input_tokens_seen": 399783984, "step": 7135 }, { "epoch": 15.893095768374165, "grad_norm": 17.734636306762695, "learning_rate": 1e-06, "loss": 0.53, "num_input_tokens_seen": 399839268, "step": 7136 }, { "epoch": 15.893095768374165, "loss": 0.6124453544616699, "loss_ce": 0.00014065181312616915, "loss_iou": 0.251953125, "loss_num": 0.0216064453125, "loss_xval": 0.61328125, "num_input_tokens_seen": 399839268, "step": 7136 }, { "epoch": 15.89532293986637, "grad_norm": 20.12199592590332, "learning_rate": 1e-06, "loss": 0.4646, "num_input_tokens_seen": 399896836, "step": 7137 }, { "epoch": 15.89532293986637, "loss": 0.5352360010147095, "loss_ce": 7.976061169756576e-05, "loss_iou": 0.2333984375, "loss_num": 0.013427734375, "loss_xval": 0.53515625, "num_input_tokens_seen": 399896836, "step": 7137 }, { "epoch": 15.897550111358575, "grad_norm": 20.401044845581055, "learning_rate": 1e-06, "loss": 0.5685, "num_input_tokens_seen": 399954484, "step": 7138 }, { "epoch": 15.897550111358575, "loss": 0.512428879737854, "loss_ce": 9.979259630199522e-05, "loss_iou": 0.212890625, "loss_num": 0.0172119140625, "loss_xval": 0.51171875, "num_input_tokens_seen": 399954484, "step": 7138 }, { "epoch": 15.89977728285078, "grad_norm": 33.50958251953125, "learning_rate": 1e-06, "loss": 0.4488, "num_input_tokens_seen": 400013840, "step": 7139 }, { "epoch": 15.89977728285078, "loss": 0.44518011808395386, "loss_ce": 0.00015754257037770003, "loss_iou": 0.19140625, "loss_num": 0.01263427734375, "loss_xval": 0.4453125, "num_input_tokens_seen": 400013840, "step": 7139 }, { "epoch": 15.902004454342984, "grad_norm": 14.90358829498291, "learning_rate": 1e-06, "loss": 0.6293, "num_input_tokens_seen": 400068508, "step": 7140 }, { "epoch": 15.902004454342984, "loss": 0.7256982326507568, "loss_ce": 0.00011228243238292634, "loss_iou": 0.294921875, "loss_num": 0.0277099609375, "loss_xval": 0.7265625, "num_input_tokens_seen": 400068508, "step": 7140 }, { "epoch": 15.90423162583519, "grad_norm": 15.052915573120117, "learning_rate": 1e-06, "loss": 0.3924, "num_input_tokens_seen": 400125004, "step": 7141 }, { "epoch": 15.90423162583519, "loss": 0.49386632442474365, "loss_ce": 0.00015298393554985523, "loss_iou": 0.1884765625, "loss_num": 0.0235595703125, "loss_xval": 0.494140625, "num_input_tokens_seen": 400125004, "step": 7141 }, { "epoch": 15.906458797327394, "grad_norm": 21.69573402404785, "learning_rate": 1e-06, "loss": 0.4718, "num_input_tokens_seen": 400179284, "step": 7142 }, { "epoch": 15.906458797327394, "loss": 0.2900165319442749, "loss_ce": 9.953400876838714e-05, "loss_iou": 0.1279296875, "loss_num": 0.0067138671875, "loss_xval": 0.2890625, "num_input_tokens_seen": 400179284, "step": 7142 }, { "epoch": 15.908685968819599, "grad_norm": 17.714357376098633, "learning_rate": 1e-06, "loss": 0.5839, "num_input_tokens_seen": 400233780, "step": 7143 }, { "epoch": 15.908685968819599, "loss": 0.5741137862205505, "loss_ce": 0.0001391808473272249, "loss_iou": 0.265625, "loss_num": 0.0089111328125, "loss_xval": 0.57421875, "num_input_tokens_seen": 400233780, "step": 7143 }, { "epoch": 15.910913140311804, "grad_norm": 32.93324279785156, "learning_rate": 1e-06, "loss": 0.5251, "num_input_tokens_seen": 400291408, "step": 7144 }, { "epoch": 15.910913140311804, "loss": 0.4064697325229645, "loss_ce": 9.767108713276684e-05, "loss_iou": 0.1806640625, "loss_num": 0.00897216796875, "loss_xval": 0.40625, "num_input_tokens_seen": 400291408, "step": 7144 }, { "epoch": 15.913140311804009, "grad_norm": 19.784011840820312, "learning_rate": 1e-06, "loss": 0.5329, "num_input_tokens_seen": 400348664, "step": 7145 }, { "epoch": 15.913140311804009, "loss": 0.4993619918823242, "loss_ce": 9.443063754588366e-05, "loss_iou": 0.216796875, "loss_num": 0.01300048828125, "loss_xval": 0.5, "num_input_tokens_seen": 400348664, "step": 7145 }, { "epoch": 15.915367483296214, "grad_norm": 17.10857391357422, "learning_rate": 1e-06, "loss": 0.4308, "num_input_tokens_seen": 400402800, "step": 7146 }, { "epoch": 15.915367483296214, "loss": 0.5981404781341553, "loss_ce": 0.0002400614321231842, "loss_iou": 0.251953125, "loss_num": 0.0191650390625, "loss_xval": 0.59765625, "num_input_tokens_seen": 400402800, "step": 7146 }, { "epoch": 15.917594654788418, "grad_norm": 18.01358413696289, "learning_rate": 1e-06, "loss": 0.4396, "num_input_tokens_seen": 400460196, "step": 7147 }, { "epoch": 15.917594654788418, "loss": 0.524014949798584, "loss_ce": 8.919274841900915e-05, "loss_iou": 0.208984375, "loss_num": 0.0211181640625, "loss_xval": 0.5234375, "num_input_tokens_seen": 400460196, "step": 7147 }, { "epoch": 15.919821826280623, "grad_norm": 16.308366775512695, "learning_rate": 1e-06, "loss": 0.5253, "num_input_tokens_seen": 400517080, "step": 7148 }, { "epoch": 15.919821826280623, "loss": 0.4072136878967285, "loss_ce": 0.00010919298802036792, "loss_iou": 0.1591796875, "loss_num": 0.017578125, "loss_xval": 0.40625, "num_input_tokens_seen": 400517080, "step": 7148 }, { "epoch": 15.922048997772828, "grad_norm": 14.491548538208008, "learning_rate": 1e-06, "loss": 0.4755, "num_input_tokens_seen": 400572672, "step": 7149 }, { "epoch": 15.922048997772828, "loss": 0.4918016791343689, "loss_ce": 0.0001024569064611569, "loss_iou": 0.224609375, "loss_num": 0.0084228515625, "loss_xval": 0.4921875, "num_input_tokens_seen": 400572672, "step": 7149 }, { "epoch": 15.924276169265033, "grad_norm": 25.39553451538086, "learning_rate": 1e-06, "loss": 0.5337, "num_input_tokens_seen": 400628244, "step": 7150 }, { "epoch": 15.924276169265033, "loss": 0.423916757106781, "loss_ce": 8.863602124620229e-05, "loss_iou": 0.1728515625, "loss_num": 0.015625, "loss_xval": 0.423828125, "num_input_tokens_seen": 400628244, "step": 7150 }, { "epoch": 15.926503340757238, "grad_norm": 18.745763778686523, "learning_rate": 1e-06, "loss": 0.4148, "num_input_tokens_seen": 400685080, "step": 7151 }, { "epoch": 15.926503340757238, "loss": 0.4842875599861145, "loss_ce": 0.00012620513734873384, "loss_iou": 0.189453125, "loss_num": 0.021240234375, "loss_xval": 0.484375, "num_input_tokens_seen": 400685080, "step": 7151 }, { "epoch": 15.928730512249443, "grad_norm": 21.89429473876953, "learning_rate": 1e-06, "loss": 0.4732, "num_input_tokens_seen": 400736332, "step": 7152 }, { "epoch": 15.928730512249443, "loss": 0.4724288582801819, "loss_ce": 7.779937004670501e-05, "loss_iou": 0.197265625, "loss_num": 0.015869140625, "loss_xval": 0.47265625, "num_input_tokens_seen": 400736332, "step": 7152 }, { "epoch": 15.930957683741648, "grad_norm": 22.908029556274414, "learning_rate": 1e-06, "loss": 0.2874, "num_input_tokens_seen": 400792008, "step": 7153 }, { "epoch": 15.930957683741648, "loss": 0.2497827112674713, "loss_ce": 8.78690043464303e-05, "loss_iou": 0.11376953125, "loss_num": 0.004364013671875, "loss_xval": 0.25, "num_input_tokens_seen": 400792008, "step": 7153 }, { "epoch": 15.933184855233852, "grad_norm": 16.189254760742188, "learning_rate": 1e-06, "loss": 0.4473, "num_input_tokens_seen": 400849396, "step": 7154 }, { "epoch": 15.933184855233852, "loss": 0.4695594310760498, "loss_ce": 7.704535528318956e-05, "loss_iou": 0.208984375, "loss_num": 0.0103759765625, "loss_xval": 0.46875, "num_input_tokens_seen": 400849396, "step": 7154 }, { "epoch": 15.935412026726057, "grad_norm": 19.59444236755371, "learning_rate": 1e-06, "loss": 0.4609, "num_input_tokens_seen": 400905600, "step": 7155 }, { "epoch": 15.935412026726057, "loss": 0.26319292187690735, "loss_ce": 0.0001313859538640827, "loss_iou": 0.1103515625, "loss_num": 0.00836181640625, "loss_xval": 0.263671875, "num_input_tokens_seen": 400905600, "step": 7155 }, { "epoch": 15.937639198218262, "grad_norm": 14.831480979919434, "learning_rate": 1e-06, "loss": 0.5633, "num_input_tokens_seen": 400964476, "step": 7156 }, { "epoch": 15.937639198218262, "loss": 0.6740528345108032, "loss_ce": 0.00010258887050440535, "loss_iou": 0.275390625, "loss_num": 0.0247802734375, "loss_xval": 0.67578125, "num_input_tokens_seen": 400964476, "step": 7156 }, { "epoch": 15.939866369710467, "grad_norm": 22.283842086791992, "learning_rate": 1e-06, "loss": 0.4229, "num_input_tokens_seen": 401020968, "step": 7157 }, { "epoch": 15.939866369710467, "loss": 0.48083391785621643, "loss_ce": 9.051487722899765e-05, "loss_iou": 0.2138671875, "loss_num": 0.0107421875, "loss_xval": 0.48046875, "num_input_tokens_seen": 401020968, "step": 7157 }, { "epoch": 15.942093541202672, "grad_norm": 13.97571849822998, "learning_rate": 1e-06, "loss": 0.4721, "num_input_tokens_seen": 401075944, "step": 7158 }, { "epoch": 15.942093541202672, "loss": 0.3242437243461609, "loss_ce": 8.600985893281177e-05, "loss_iou": 0.1484375, "loss_num": 0.00555419921875, "loss_xval": 0.32421875, "num_input_tokens_seen": 401075944, "step": 7158 }, { "epoch": 15.944320712694877, "grad_norm": 25.421710968017578, "learning_rate": 1e-06, "loss": 0.7803, "num_input_tokens_seen": 401130976, "step": 7159 }, { "epoch": 15.944320712694877, "loss": 0.8276197910308838, "loss_ce": 0.00010511695290915668, "loss_iou": 0.3359375, "loss_num": 0.031494140625, "loss_xval": 0.828125, "num_input_tokens_seen": 401130976, "step": 7159 }, { "epoch": 15.946547884187082, "grad_norm": 39.614234924316406, "learning_rate": 1e-06, "loss": 0.4332, "num_input_tokens_seen": 401186464, "step": 7160 }, { "epoch": 15.946547884187082, "loss": 0.483007550239563, "loss_ce": 9.739773668115959e-05, "loss_iou": 0.19921875, "loss_num": 0.0167236328125, "loss_xval": 0.482421875, "num_input_tokens_seen": 401186464, "step": 7160 }, { "epoch": 15.948775055679288, "grad_norm": 16.9848690032959, "learning_rate": 1e-06, "loss": 0.5029, "num_input_tokens_seen": 401242136, "step": 7161 }, { "epoch": 15.948775055679288, "loss": 0.5525789260864258, "loss_ce": 8.865697600413114e-05, "loss_iou": 0.232421875, "loss_num": 0.0174560546875, "loss_xval": 0.55078125, "num_input_tokens_seen": 401242136, "step": 7161 }, { "epoch": 15.951002227171493, "grad_norm": 19.457195281982422, "learning_rate": 1e-06, "loss": 0.4659, "num_input_tokens_seen": 401297980, "step": 7162 }, { "epoch": 15.951002227171493, "loss": 0.28521448373794556, "loss_ce": 8.872566104400903e-05, "loss_iou": 0.1298828125, "loss_num": 0.005218505859375, "loss_xval": 0.28515625, "num_input_tokens_seen": 401297980, "step": 7162 }, { "epoch": 15.953229398663698, "grad_norm": 13.675963401794434, "learning_rate": 1e-06, "loss": 0.3121, "num_input_tokens_seen": 401355992, "step": 7163 }, { "epoch": 15.953229398663698, "loss": 0.300504207611084, "loss_ce": 8.916326623875648e-05, "loss_iou": 0.1376953125, "loss_num": 0.005035400390625, "loss_xval": 0.30078125, "num_input_tokens_seen": 401355992, "step": 7163 }, { "epoch": 15.955456570155903, "grad_norm": 17.08403205871582, "learning_rate": 1e-06, "loss": 0.6135, "num_input_tokens_seen": 401410732, "step": 7164 }, { "epoch": 15.955456570155903, "loss": 0.6625217199325562, "loss_ce": 0.0001681805297266692, "loss_iou": 0.27734375, "loss_num": 0.021484375, "loss_xval": 0.6640625, "num_input_tokens_seen": 401410732, "step": 7164 }, { "epoch": 15.957683741648108, "grad_norm": 16.065011978149414, "learning_rate": 1e-06, "loss": 0.4816, "num_input_tokens_seen": 401466736, "step": 7165 }, { "epoch": 15.957683741648108, "loss": 0.605895459651947, "loss_ce": 0.00018256741168443114, "loss_iou": 0.2392578125, "loss_num": 0.0255126953125, "loss_xval": 0.60546875, "num_input_tokens_seen": 401466736, "step": 7165 }, { "epoch": 15.959910913140313, "grad_norm": 15.503355979919434, "learning_rate": 1e-06, "loss": 0.5208, "num_input_tokens_seen": 401525152, "step": 7166 }, { "epoch": 15.959910913140313, "loss": 0.6843163967132568, "loss_ce": 0.00011229477968299761, "loss_iou": 0.306640625, "loss_num": 0.01446533203125, "loss_xval": 0.68359375, "num_input_tokens_seen": 401525152, "step": 7166 }, { "epoch": 15.962138084632517, "grad_norm": 23.097681045532227, "learning_rate": 1e-06, "loss": 0.3613, "num_input_tokens_seen": 401576820, "step": 7167 }, { "epoch": 15.962138084632517, "loss": 0.289186954498291, "loss_ce": 0.0001244572049472481, "loss_iou": 0.11083984375, "loss_num": 0.01348876953125, "loss_xval": 0.2890625, "num_input_tokens_seen": 401576820, "step": 7167 }, { "epoch": 15.964365256124722, "grad_norm": 13.968826293945312, "learning_rate": 1e-06, "loss": 0.3522, "num_input_tokens_seen": 401630452, "step": 7168 }, { "epoch": 15.964365256124722, "loss": 0.37191227078437805, "loss_ce": 8.611210068920627e-05, "loss_iou": 0.1611328125, "loss_num": 0.00994873046875, "loss_xval": 0.37109375, "num_input_tokens_seen": 401630452, "step": 7168 }, { "epoch": 15.966592427616927, "grad_norm": 16.292312622070312, "learning_rate": 1e-06, "loss": 0.3667, "num_input_tokens_seen": 401686368, "step": 7169 }, { "epoch": 15.966592427616927, "loss": 0.37168073654174805, "loss_ce": 9.872023656498641e-05, "loss_iou": 0.16796875, "loss_num": 0.006988525390625, "loss_xval": 0.37109375, "num_input_tokens_seen": 401686368, "step": 7169 }, { "epoch": 15.968819599109132, "grad_norm": 30.845420837402344, "learning_rate": 1e-06, "loss": 0.4289, "num_input_tokens_seen": 401743020, "step": 7170 }, { "epoch": 15.968819599109132, "loss": 0.4194309413433075, "loss_ce": 0.00011940038530156016, "loss_iou": 0.1787109375, "loss_num": 0.01220703125, "loss_xval": 0.419921875, "num_input_tokens_seen": 401743020, "step": 7170 }, { "epoch": 15.971046770601337, "grad_norm": 24.887876510620117, "learning_rate": 1e-06, "loss": 0.5927, "num_input_tokens_seen": 401795184, "step": 7171 }, { "epoch": 15.971046770601337, "loss": 0.7264291048049927, "loss_ce": 0.00011069556057918817, "loss_iou": 0.30859375, "loss_num": 0.021484375, "loss_xval": 0.7265625, "num_input_tokens_seen": 401795184, "step": 7171 }, { "epoch": 15.973273942093542, "grad_norm": 14.26672077178955, "learning_rate": 1e-06, "loss": 0.4417, "num_input_tokens_seen": 401852640, "step": 7172 }, { "epoch": 15.973273942093542, "loss": 0.4775257706642151, "loss_ce": 0.00010878611647058278, "loss_iou": 0.2138671875, "loss_num": 0.00994873046875, "loss_xval": 0.4765625, "num_input_tokens_seen": 401852640, "step": 7172 }, { "epoch": 15.975501113585747, "grad_norm": 20.04558753967285, "learning_rate": 1e-06, "loss": 0.3823, "num_input_tokens_seen": 401909140, "step": 7173 }, { "epoch": 15.975501113585747, "loss": 0.35957685112953186, "loss_ce": 7.978198118507862e-05, "loss_iou": 0.1650390625, "loss_num": 0.00579833984375, "loss_xval": 0.359375, "num_input_tokens_seen": 401909140, "step": 7173 }, { "epoch": 15.977728285077951, "grad_norm": 14.882387161254883, "learning_rate": 1e-06, "loss": 0.4257, "num_input_tokens_seen": 401968400, "step": 7174 }, { "epoch": 15.977728285077951, "loss": 0.48474133014678955, "loss_ce": 0.00012219653581269085, "loss_iou": 0.220703125, "loss_num": 0.0086669921875, "loss_xval": 0.484375, "num_input_tokens_seen": 401968400, "step": 7174 }, { "epoch": 15.979955456570156, "grad_norm": 12.847021102905273, "learning_rate": 1e-06, "loss": 0.3644, "num_input_tokens_seen": 402023480, "step": 7175 }, { "epoch": 15.979955456570156, "loss": 0.4103115499019623, "loss_ce": 9.426410542801023e-05, "loss_iou": 0.1728515625, "loss_num": 0.01287841796875, "loss_xval": 0.41015625, "num_input_tokens_seen": 402023480, "step": 7175 }, { "epoch": 15.982182628062361, "grad_norm": 13.715554237365723, "learning_rate": 1e-06, "loss": 0.3894, "num_input_tokens_seen": 402080180, "step": 7176 }, { "epoch": 15.982182628062361, "loss": 0.47568291425704956, "loss_ce": 9.699161455500871e-05, "loss_iou": 0.2080078125, "loss_num": 0.01202392578125, "loss_xval": 0.4765625, "num_input_tokens_seen": 402080180, "step": 7176 }, { "epoch": 15.984409799554566, "grad_norm": 16.425464630126953, "learning_rate": 1e-06, "loss": 0.3162, "num_input_tokens_seen": 402135848, "step": 7177 }, { "epoch": 15.984409799554566, "loss": 0.38852375745773315, "loss_ce": 9.600124030839652e-05, "loss_iou": 0.1767578125, "loss_num": 0.007171630859375, "loss_xval": 0.388671875, "num_input_tokens_seen": 402135848, "step": 7177 }, { "epoch": 15.98663697104677, "grad_norm": 15.072603225708008, "learning_rate": 1e-06, "loss": 0.4141, "num_input_tokens_seen": 402191316, "step": 7178 }, { "epoch": 15.98663697104677, "loss": 0.48317813873291016, "loss_ce": 0.00014590806677006185, "loss_iou": 0.2080078125, "loss_num": 0.01336669921875, "loss_xval": 0.482421875, "num_input_tokens_seen": 402191316, "step": 7178 }, { "epoch": 15.988864142538976, "grad_norm": 26.326980590820312, "learning_rate": 1e-06, "loss": 0.4093, "num_input_tokens_seen": 402246396, "step": 7179 }, { "epoch": 15.988864142538976, "loss": 0.3864772319793701, "loss_ce": 0.0001246822066605091, "loss_iou": 0.1708984375, "loss_num": 0.00872802734375, "loss_xval": 0.38671875, "num_input_tokens_seen": 402246396, "step": 7179 }, { "epoch": 15.99109131403118, "grad_norm": 14.615377426147461, "learning_rate": 1e-06, "loss": 0.3313, "num_input_tokens_seen": 402303500, "step": 7180 }, { "epoch": 15.99109131403118, "loss": 0.2933087646961212, "loss_ce": 0.00015690606960561126, "loss_iou": 0.12060546875, "loss_num": 0.01025390625, "loss_xval": 0.29296875, "num_input_tokens_seen": 402303500, "step": 7180 }, { "epoch": 15.993318485523385, "grad_norm": 20.349151611328125, "learning_rate": 1e-06, "loss": 0.3356, "num_input_tokens_seen": 402358536, "step": 7181 }, { "epoch": 15.993318485523385, "loss": 0.3299248516559601, "loss_ce": 9.087211219593883e-05, "loss_iou": 0.1435546875, "loss_num": 0.00860595703125, "loss_xval": 0.330078125, "num_input_tokens_seen": 402358536, "step": 7181 }, { "epoch": 15.99554565701559, "grad_norm": 14.283751487731934, "learning_rate": 1e-06, "loss": 0.3803, "num_input_tokens_seen": 402414524, "step": 7182 }, { "epoch": 15.99554565701559, "loss": 0.2790136933326721, "loss_ce": 8.300953777506948e-05, "loss_iou": 0.1025390625, "loss_num": 0.01470947265625, "loss_xval": 0.279296875, "num_input_tokens_seen": 402414524, "step": 7182 }, { "epoch": 15.997772828507795, "grad_norm": 53.64034652709961, "learning_rate": 1e-06, "loss": 0.4319, "num_input_tokens_seen": 402473320, "step": 7183 }, { "epoch": 15.997772828507795, "loss": 0.478466272354126, "loss_ce": 0.00019476463785395026, "loss_iou": 0.2236328125, "loss_num": 0.006195068359375, "loss_xval": 0.478515625, "num_input_tokens_seen": 402473320, "step": 7183 }, { "epoch": 16.0, "grad_norm": 29.848583221435547, "learning_rate": 1e-06, "loss": 0.4615, "num_input_tokens_seen": 402529284, "step": 7184 }, { "epoch": 16.0, "loss": 0.5135668516159058, "loss_ce": 0.00013914526789449155, "loss_iou": 0.220703125, "loss_num": 0.01422119140625, "loss_xval": 0.51171875, "num_input_tokens_seen": 402529284, "step": 7184 }, { "epoch": 16.002227171492205, "grad_norm": 27.619600296020508, "learning_rate": 1e-06, "loss": 0.4962, "num_input_tokens_seen": 402585352, "step": 7185 }, { "epoch": 16.002227171492205, "loss": 0.6742956042289734, "loss_ce": 0.00010129276779480278, "loss_iou": 0.279296875, "loss_num": 0.0233154296875, "loss_xval": 0.67578125, "num_input_tokens_seen": 402585352, "step": 7185 }, { "epoch": 16.00445434298441, "grad_norm": 20.31178855895996, "learning_rate": 1e-06, "loss": 0.5351, "num_input_tokens_seen": 402638996, "step": 7186 }, { "epoch": 16.00445434298441, "loss": 0.5318279266357422, "loss_ce": 8.96200945135206e-05, "loss_iou": 0.23828125, "loss_num": 0.0113525390625, "loss_xval": 0.53125, "num_input_tokens_seen": 402638996, "step": 7186 }, { "epoch": 16.006681514476615, "grad_norm": 17.070863723754883, "learning_rate": 1e-06, "loss": 0.448, "num_input_tokens_seen": 402695996, "step": 7187 }, { "epoch": 16.006681514476615, "loss": 0.4115889072418213, "loss_ce": 8.989499474409968e-05, "loss_iou": 0.1708984375, "loss_num": 0.0140380859375, "loss_xval": 0.412109375, "num_input_tokens_seen": 402695996, "step": 7187 }, { "epoch": 16.00890868596882, "grad_norm": 12.187263488769531, "learning_rate": 1e-06, "loss": 0.2493, "num_input_tokens_seen": 402753988, "step": 7188 }, { "epoch": 16.00890868596882, "loss": 0.24472574889659882, "loss_ce": 9.685206168796867e-05, "loss_iou": 0.10498046875, "loss_num": 0.006927490234375, "loss_xval": 0.244140625, "num_input_tokens_seen": 402753988, "step": 7188 }, { "epoch": 16.011135857461024, "grad_norm": 13.688761711120605, "learning_rate": 1e-06, "loss": 0.3247, "num_input_tokens_seen": 402810468, "step": 7189 }, { "epoch": 16.011135857461024, "loss": 0.42870235443115234, "loss_ce": 0.00011348059342708439, "loss_iou": 0.1826171875, "loss_num": 0.01263427734375, "loss_xval": 0.427734375, "num_input_tokens_seen": 402810468, "step": 7189 }, { "epoch": 16.01336302895323, "grad_norm": 17.964651107788086, "learning_rate": 1e-06, "loss": 0.5188, "num_input_tokens_seen": 402862988, "step": 7190 }, { "epoch": 16.01336302895323, "loss": 0.5445123314857483, "loss_ce": 7.876359450165182e-05, "loss_iou": 0.2001953125, "loss_num": 0.029052734375, "loss_xval": 0.54296875, "num_input_tokens_seen": 402862988, "step": 7190 }, { "epoch": 16.015590200445434, "grad_norm": 22.054534912109375, "learning_rate": 1e-06, "loss": 0.3839, "num_input_tokens_seen": 402919908, "step": 7191 }, { "epoch": 16.015590200445434, "loss": 0.3673054575920105, "loss_ce": 0.00011793937301263213, "loss_iou": 0.171875, "loss_num": 0.004669189453125, "loss_xval": 0.3671875, "num_input_tokens_seen": 402919908, "step": 7191 }, { "epoch": 16.01781737193764, "grad_norm": 16.937606811523438, "learning_rate": 1e-06, "loss": 0.4729, "num_input_tokens_seen": 402976004, "step": 7192 }, { "epoch": 16.01781737193764, "loss": 0.5479812026023865, "loss_ce": 0.0001907070109155029, "loss_iou": 0.2421875, "loss_num": 0.0126953125, "loss_xval": 0.546875, "num_input_tokens_seen": 402976004, "step": 7192 }, { "epoch": 16.020044543429844, "grad_norm": 21.24249839782715, "learning_rate": 1e-06, "loss": 0.4644, "num_input_tokens_seen": 403032388, "step": 7193 }, { "epoch": 16.020044543429844, "loss": 0.3727763891220093, "loss_ce": 0.00015674906899221241, "loss_iou": 0.1708984375, "loss_num": 0.006072998046875, "loss_xval": 0.373046875, "num_input_tokens_seen": 403032388, "step": 7193 }, { "epoch": 16.02227171492205, "grad_norm": 15.852849960327148, "learning_rate": 1e-06, "loss": 0.4715, "num_input_tokens_seen": 403091080, "step": 7194 }, { "epoch": 16.02227171492205, "loss": 0.5445390939712524, "loss_ce": 0.00010548095451667905, "loss_iou": 0.232421875, "loss_num": 0.01611328125, "loss_xval": 0.54296875, "num_input_tokens_seen": 403091080, "step": 7194 }, { "epoch": 16.024498886414253, "grad_norm": 22.950151443481445, "learning_rate": 1e-06, "loss": 0.648, "num_input_tokens_seen": 403148524, "step": 7195 }, { "epoch": 16.024498886414253, "loss": 0.6838928461074829, "loss_ce": 0.00017702819604892284, "loss_iou": 0.2734375, "loss_num": 0.02783203125, "loss_xval": 0.68359375, "num_input_tokens_seen": 403148524, "step": 7195 }, { "epoch": 16.02672605790646, "grad_norm": 21.878934860229492, "learning_rate": 1e-06, "loss": 0.5133, "num_input_tokens_seen": 403205004, "step": 7196 }, { "epoch": 16.02672605790646, "loss": 0.5003616809844971, "loss_ce": 0.0003617034526541829, "loss_iou": 0.2255859375, "loss_num": 0.0096435546875, "loss_xval": 0.5, "num_input_tokens_seen": 403205004, "step": 7196 }, { "epoch": 16.028953229398663, "grad_norm": 24.652055740356445, "learning_rate": 1e-06, "loss": 0.5203, "num_input_tokens_seen": 403256684, "step": 7197 }, { "epoch": 16.028953229398663, "loss": 0.38059282302856445, "loss_ce": 9.965993376681581e-05, "loss_iou": 0.15625, "loss_num": 0.01361083984375, "loss_xval": 0.380859375, "num_input_tokens_seen": 403256684, "step": 7197 }, { "epoch": 16.031180400890868, "grad_norm": 14.730175018310547, "learning_rate": 1e-06, "loss": 0.5587, "num_input_tokens_seen": 403313248, "step": 7198 }, { "epoch": 16.031180400890868, "loss": 0.6153490543365479, "loss_ce": 0.00011463207920314744, "loss_iou": 0.259765625, "loss_num": 0.019287109375, "loss_xval": 0.6171875, "num_input_tokens_seen": 403313248, "step": 7198 }, { "epoch": 16.033407572383073, "grad_norm": 18.867799758911133, "learning_rate": 1e-06, "loss": 0.4701, "num_input_tokens_seen": 403371832, "step": 7199 }, { "epoch": 16.033407572383073, "loss": 0.6135093569755554, "loss_ce": 0.0002280865446664393, "loss_iou": 0.26953125, "loss_num": 0.01519775390625, "loss_xval": 0.61328125, "num_input_tokens_seen": 403371832, "step": 7199 }, { "epoch": 16.035634743875278, "grad_norm": 15.737879753112793, "learning_rate": 1e-06, "loss": 0.6626, "num_input_tokens_seen": 403428312, "step": 7200 }, { "epoch": 16.035634743875278, "loss": 0.7531107068061829, "loss_ce": 0.00018101301975548267, "loss_iou": 0.337890625, "loss_num": 0.01519775390625, "loss_xval": 0.75390625, "num_input_tokens_seen": 403428312, "step": 7200 }, { "epoch": 16.037861915367483, "grad_norm": 16.80666160583496, "learning_rate": 1e-06, "loss": 0.4065, "num_input_tokens_seen": 403486724, "step": 7201 }, { "epoch": 16.037861915367483, "loss": 0.47910380363464355, "loss_ce": 9.990914259105921e-05, "loss_iou": 0.22265625, "loss_num": 0.006500244140625, "loss_xval": 0.478515625, "num_input_tokens_seen": 403486724, "step": 7201 }, { "epoch": 16.040089086859687, "grad_norm": 11.938071250915527, "learning_rate": 1e-06, "loss": 0.4147, "num_input_tokens_seen": 403541852, "step": 7202 }, { "epoch": 16.040089086859687, "loss": 0.4437464475631714, "loss_ce": 8.189181244233623e-05, "loss_iou": 0.1884765625, "loss_num": 0.0133056640625, "loss_xval": 0.443359375, "num_input_tokens_seen": 403541852, "step": 7202 }, { "epoch": 16.042316258351892, "grad_norm": 33.16254806518555, "learning_rate": 1e-06, "loss": 0.4485, "num_input_tokens_seen": 403598276, "step": 7203 }, { "epoch": 16.042316258351892, "loss": 0.3794906437397003, "loss_ce": 9.610810957383364e-05, "loss_iou": 0.16796875, "loss_num": 0.0087890625, "loss_xval": 0.37890625, "num_input_tokens_seen": 403598276, "step": 7203 }, { "epoch": 16.044543429844097, "grad_norm": 28.809633255004883, "learning_rate": 1e-06, "loss": 0.4134, "num_input_tokens_seen": 403657000, "step": 7204 }, { "epoch": 16.044543429844097, "loss": 0.3428630530834198, "loss_ce": 8.960704144556075e-05, "loss_iou": 0.1552734375, "loss_num": 0.006378173828125, "loss_xval": 0.34375, "num_input_tokens_seen": 403657000, "step": 7204 }, { "epoch": 16.046770601336302, "grad_norm": 25.093759536743164, "learning_rate": 1e-06, "loss": 0.6919, "num_input_tokens_seen": 403711572, "step": 7205 }, { "epoch": 16.046770601336302, "loss": 0.38008594512939453, "loss_ce": 8.106790482997894e-05, "loss_iou": 0.1611328125, "loss_num": 0.01165771484375, "loss_xval": 0.380859375, "num_input_tokens_seen": 403711572, "step": 7205 }, { "epoch": 16.048997772828507, "grad_norm": 19.109130859375, "learning_rate": 1e-06, "loss": 0.2554, "num_input_tokens_seen": 403767064, "step": 7206 }, { "epoch": 16.048997772828507, "loss": 0.22988614439964294, "loss_ce": 8.877603249857202e-05, "loss_iou": 0.10009765625, "loss_num": 0.005889892578125, "loss_xval": 0.2294921875, "num_input_tokens_seen": 403767064, "step": 7206 }, { "epoch": 16.051224944320712, "grad_norm": 14.356725692749023, "learning_rate": 1e-06, "loss": 0.4188, "num_input_tokens_seen": 403823508, "step": 7207 }, { "epoch": 16.051224944320712, "loss": 0.4483864903450012, "loss_ce": 0.0001443219225620851, "loss_iou": 0.1923828125, "loss_num": 0.0126953125, "loss_xval": 0.44921875, "num_input_tokens_seen": 403823508, "step": 7207 }, { "epoch": 16.053452115812917, "grad_norm": 18.274364471435547, "learning_rate": 1e-06, "loss": 0.4482, "num_input_tokens_seen": 403878140, "step": 7208 }, { "epoch": 16.053452115812917, "loss": 0.4601691961288452, "loss_ce": 8.618818537797779e-05, "loss_iou": 0.1982421875, "loss_num": 0.01263427734375, "loss_xval": 0.4609375, "num_input_tokens_seen": 403878140, "step": 7208 }, { "epoch": 16.05567928730512, "grad_norm": 19.279592514038086, "learning_rate": 1e-06, "loss": 0.5412, "num_input_tokens_seen": 403932144, "step": 7209 }, { "epoch": 16.05567928730512, "loss": 0.6549313068389893, "loss_ce": 0.00014613490202464163, "loss_iou": 0.267578125, "loss_num": 0.0242919921875, "loss_xval": 0.65625, "num_input_tokens_seen": 403932144, "step": 7209 }, { "epoch": 16.057906458797326, "grad_norm": 15.034238815307617, "learning_rate": 1e-06, "loss": 0.471, "num_input_tokens_seen": 403987788, "step": 7210 }, { "epoch": 16.057906458797326, "loss": 0.43674468994140625, "loss_ce": 9.918860450852662e-05, "loss_iou": 0.201171875, "loss_num": 0.006988525390625, "loss_xval": 0.4375, "num_input_tokens_seen": 403987788, "step": 7210 }, { "epoch": 16.06013363028953, "grad_norm": 13.465814590454102, "learning_rate": 1e-06, "loss": 0.3559, "num_input_tokens_seen": 404044384, "step": 7211 }, { "epoch": 16.06013363028953, "loss": 0.3224736154079437, "loss_ce": 8.591696678195149e-05, "loss_iou": 0.1416015625, "loss_num": 0.007781982421875, "loss_xval": 0.322265625, "num_input_tokens_seen": 404044384, "step": 7211 }, { "epoch": 16.062360801781736, "grad_norm": 18.50025749206543, "learning_rate": 1e-06, "loss": 0.369, "num_input_tokens_seen": 404099392, "step": 7212 }, { "epoch": 16.062360801781736, "loss": 0.35518187284469604, "loss_ce": 7.935409666970372e-05, "loss_iou": 0.1494140625, "loss_num": 0.01123046875, "loss_xval": 0.35546875, "num_input_tokens_seen": 404099392, "step": 7212 }, { "epoch": 16.06458797327394, "grad_norm": 12.4852294921875, "learning_rate": 1e-06, "loss": 0.4552, "num_input_tokens_seen": 404153904, "step": 7213 }, { "epoch": 16.06458797327394, "loss": 0.5071730017662048, "loss_ce": 9.29402667679824e-05, "loss_iou": 0.2138671875, "loss_num": 0.0157470703125, "loss_xval": 0.5078125, "num_input_tokens_seen": 404153904, "step": 7213 }, { "epoch": 16.066815144766146, "grad_norm": 22.292587280273438, "learning_rate": 1e-06, "loss": 0.5293, "num_input_tokens_seen": 404207456, "step": 7214 }, { "epoch": 16.066815144766146, "loss": 0.3804709315299988, "loss_ce": 9.985938959289342e-05, "loss_iou": 0.1708984375, "loss_num": 0.007537841796875, "loss_xval": 0.380859375, "num_input_tokens_seen": 404207456, "step": 7214 }, { "epoch": 16.06904231625835, "grad_norm": 21.88534927368164, "learning_rate": 1e-06, "loss": 0.4697, "num_input_tokens_seen": 404262880, "step": 7215 }, { "epoch": 16.06904231625835, "loss": 0.6787058115005493, "loss_ce": 0.0001169023453257978, "loss_iou": 0.314453125, "loss_num": 0.009765625, "loss_xval": 0.6796875, "num_input_tokens_seen": 404262880, "step": 7215 }, { "epoch": 16.071269487750556, "grad_norm": 29.59630584716797, "learning_rate": 1e-06, "loss": 0.5467, "num_input_tokens_seen": 404316780, "step": 7216 }, { "epoch": 16.071269487750556, "loss": 0.505081057548523, "loss_ce": 7.61750852689147e-05, "loss_iou": 0.220703125, "loss_num": 0.012451171875, "loss_xval": 0.50390625, "num_input_tokens_seen": 404316780, "step": 7216 }, { "epoch": 16.07349665924276, "grad_norm": 14.112740516662598, "learning_rate": 1e-06, "loss": 0.4883, "num_input_tokens_seen": 404374304, "step": 7217 }, { "epoch": 16.07349665924276, "loss": 0.29110169410705566, "loss_ce": 8.60571744851768e-05, "loss_iou": 0.1328125, "loss_num": 0.00518798828125, "loss_xval": 0.291015625, "num_input_tokens_seen": 404374304, "step": 7217 }, { "epoch": 16.075723830734965, "grad_norm": 17.42337417602539, "learning_rate": 1e-06, "loss": 0.3548, "num_input_tokens_seen": 404430784, "step": 7218 }, { "epoch": 16.075723830734965, "loss": 0.3221060037612915, "loss_ce": 8.452765177935362e-05, "loss_iou": 0.142578125, "loss_num": 0.007537841796875, "loss_xval": 0.322265625, "num_input_tokens_seen": 404430784, "step": 7218 }, { "epoch": 16.07795100222717, "grad_norm": 20.392663955688477, "learning_rate": 1e-06, "loss": 0.5018, "num_input_tokens_seen": 404486928, "step": 7219 }, { "epoch": 16.07795100222717, "loss": 0.45973044633865356, "loss_ce": 7.468648254871368e-05, "loss_iou": 0.205078125, "loss_num": 0.0098876953125, "loss_xval": 0.458984375, "num_input_tokens_seen": 404486928, "step": 7219 }, { "epoch": 16.080178173719375, "grad_norm": 21.25473403930664, "learning_rate": 1e-06, "loss": 0.6253, "num_input_tokens_seen": 404542168, "step": 7220 }, { "epoch": 16.080178173719375, "loss": 0.576141357421875, "loss_ce": 9.15601704036817e-05, "loss_iou": 0.2412109375, "loss_num": 0.0186767578125, "loss_xval": 0.57421875, "num_input_tokens_seen": 404542168, "step": 7220 }, { "epoch": 16.08240534521158, "grad_norm": 15.591520309448242, "learning_rate": 1e-06, "loss": 0.4269, "num_input_tokens_seen": 404597716, "step": 7221 }, { "epoch": 16.08240534521158, "loss": 0.38363730907440186, "loss_ce": 9.239626524504274e-05, "loss_iou": 0.1650390625, "loss_num": 0.01068115234375, "loss_xval": 0.3828125, "num_input_tokens_seen": 404597716, "step": 7221 }, { "epoch": 16.084632516703785, "grad_norm": 34.75109100341797, "learning_rate": 1e-06, "loss": 0.4051, "num_input_tokens_seen": 404652108, "step": 7222 }, { "epoch": 16.084632516703785, "loss": 0.3343104124069214, "loss_ce": 8.188547508325428e-05, "loss_iou": 0.140625, "loss_num": 0.0106201171875, "loss_xval": 0.333984375, "num_input_tokens_seen": 404652108, "step": 7222 }, { "epoch": 16.08685968819599, "grad_norm": 17.89012908935547, "learning_rate": 1e-06, "loss": 0.5697, "num_input_tokens_seen": 404708964, "step": 7223 }, { "epoch": 16.08685968819599, "loss": 0.3971855044364929, "loss_ce": 9.079407755052671e-05, "loss_iou": 0.18359375, "loss_num": 0.005859375, "loss_xval": 0.396484375, "num_input_tokens_seen": 404708964, "step": 7223 }, { "epoch": 16.089086859688194, "grad_norm": 18.168272018432617, "learning_rate": 1e-06, "loss": 0.4312, "num_input_tokens_seen": 404760744, "step": 7224 }, { "epoch": 16.089086859688194, "loss": 0.4721256494522095, "loss_ce": 7.975117478054017e-05, "loss_iou": 0.19921875, "loss_num": 0.0147705078125, "loss_xval": 0.47265625, "num_input_tokens_seen": 404760744, "step": 7224 }, { "epoch": 16.0913140311804, "grad_norm": 22.09956169128418, "learning_rate": 1e-06, "loss": 0.2769, "num_input_tokens_seen": 404818936, "step": 7225 }, { "epoch": 16.0913140311804, "loss": 0.19356948137283325, "loss_ce": 8.803060336504132e-05, "loss_iou": 0.087890625, "loss_num": 0.0035400390625, "loss_xval": 0.193359375, "num_input_tokens_seen": 404818936, "step": 7225 }, { "epoch": 16.093541202672604, "grad_norm": 18.174219131469727, "learning_rate": 1e-06, "loss": 0.3931, "num_input_tokens_seen": 404874408, "step": 7226 }, { "epoch": 16.093541202672604, "loss": 0.40315836668014526, "loss_ce": 8.220285963034257e-05, "loss_iou": 0.1708984375, "loss_num": 0.01214599609375, "loss_xval": 0.40234375, "num_input_tokens_seen": 404874408, "step": 7226 }, { "epoch": 16.09576837416481, "grad_norm": 21.083255767822266, "learning_rate": 1e-06, "loss": 0.2451, "num_input_tokens_seen": 404932184, "step": 7227 }, { "epoch": 16.09576837416481, "loss": 0.2461136281490326, "loss_ce": 8.089626498986036e-05, "loss_iou": 0.10693359375, "loss_num": 0.0064697265625, "loss_xval": 0.24609375, "num_input_tokens_seen": 404932184, "step": 7227 }, { "epoch": 16.097995545657014, "grad_norm": 15.489051818847656, "learning_rate": 1e-06, "loss": 0.4338, "num_input_tokens_seen": 404989408, "step": 7228 }, { "epoch": 16.097995545657014, "loss": 0.46629413962364197, "loss_ce": 0.00010758035205071792, "loss_iou": 0.216796875, "loss_num": 0.00634765625, "loss_xval": 0.466796875, "num_input_tokens_seen": 404989408, "step": 7228 }, { "epoch": 16.100222717149222, "grad_norm": 28.974552154541016, "learning_rate": 1e-06, "loss": 0.5686, "num_input_tokens_seen": 405044892, "step": 7229 }, { "epoch": 16.100222717149222, "loss": 0.4830133318901062, "loss_ce": 0.00010318079148419201, "loss_iou": 0.220703125, "loss_num": 0.00823974609375, "loss_xval": 0.482421875, "num_input_tokens_seen": 405044892, "step": 7229 }, { "epoch": 16.102449888641427, "grad_norm": 27.26142692565918, "learning_rate": 1e-06, "loss": 0.5382, "num_input_tokens_seen": 405102960, "step": 7230 }, { "epoch": 16.102449888641427, "loss": 0.6526839137077332, "loss_ce": 9.603158105164766e-05, "loss_iou": 0.28125, "loss_num": 0.0179443359375, "loss_xval": 0.65234375, "num_input_tokens_seen": 405102960, "step": 7230 }, { "epoch": 16.104677060133632, "grad_norm": 33.493282318115234, "learning_rate": 1e-06, "loss": 0.4217, "num_input_tokens_seen": 405159344, "step": 7231 }, { "epoch": 16.104677060133632, "loss": 0.47552353143692017, "loss_ce": 0.00012070016236975789, "loss_iou": 0.1884765625, "loss_num": 0.0196533203125, "loss_xval": 0.474609375, "num_input_tokens_seen": 405159344, "step": 7231 }, { "epoch": 16.106904231625837, "grad_norm": 16.54621124267578, "learning_rate": 1e-06, "loss": 0.3254, "num_input_tokens_seen": 405215920, "step": 7232 }, { "epoch": 16.106904231625837, "loss": 0.3362829387187958, "loss_ce": 0.00010127984569408, "loss_iou": 0.1494140625, "loss_num": 0.00750732421875, "loss_xval": 0.3359375, "num_input_tokens_seen": 405215920, "step": 7232 }, { "epoch": 16.10913140311804, "grad_norm": 15.172420501708984, "learning_rate": 1e-06, "loss": 0.271, "num_input_tokens_seen": 405274772, "step": 7233 }, { "epoch": 16.10913140311804, "loss": 0.1989382654428482, "loss_ce": 8.573340164730325e-05, "loss_iou": 0.08642578125, "loss_num": 0.005126953125, "loss_xval": 0.19921875, "num_input_tokens_seen": 405274772, "step": 7233 }, { "epoch": 16.111358574610247, "grad_norm": 25.89063835144043, "learning_rate": 1e-06, "loss": 0.428, "num_input_tokens_seen": 405328804, "step": 7234 }, { "epoch": 16.111358574610247, "loss": 0.48993584513664246, "loss_ce": 0.00012872781371697783, "loss_iou": 0.1982421875, "loss_num": 0.0185546875, "loss_xval": 0.490234375, "num_input_tokens_seen": 405328804, "step": 7234 }, { "epoch": 16.11358574610245, "grad_norm": 17.315288543701172, "learning_rate": 1e-06, "loss": 0.3599, "num_input_tokens_seen": 405384920, "step": 7235 }, { "epoch": 16.11358574610245, "loss": 0.40499159693717957, "loss_ce": 8.437626820523292e-05, "loss_iou": 0.181640625, "loss_num": 0.00836181640625, "loss_xval": 0.404296875, "num_input_tokens_seen": 405384920, "step": 7235 }, { "epoch": 16.115812917594656, "grad_norm": 11.827547073364258, "learning_rate": 1e-06, "loss": 0.4208, "num_input_tokens_seen": 405440816, "step": 7236 }, { "epoch": 16.115812917594656, "loss": 0.43206048011779785, "loss_ce": 0.00011468880984466523, "loss_iou": 0.1962890625, "loss_num": 0.007720947265625, "loss_xval": 0.431640625, "num_input_tokens_seen": 405440816, "step": 7236 }, { "epoch": 16.11804008908686, "grad_norm": 16.36489486694336, "learning_rate": 1e-06, "loss": 0.4765, "num_input_tokens_seen": 405497324, "step": 7237 }, { "epoch": 16.11804008908686, "loss": 0.4784888029098511, "loss_ce": 9.523934568278491e-05, "loss_iou": 0.1787109375, "loss_num": 0.0242919921875, "loss_xval": 0.478515625, "num_input_tokens_seen": 405497324, "step": 7237 }, { "epoch": 16.120267260579066, "grad_norm": 19.48402214050293, "learning_rate": 1e-06, "loss": 0.521, "num_input_tokens_seen": 405551216, "step": 7238 }, { "epoch": 16.120267260579066, "loss": 0.6264359951019287, "loss_ce": 9.326158760813996e-05, "loss_iou": 0.2431640625, "loss_num": 0.028076171875, "loss_xval": 0.625, "num_input_tokens_seen": 405551216, "step": 7238 }, { "epoch": 16.12249443207127, "grad_norm": 20.337753295898438, "learning_rate": 1e-06, "loss": 0.3115, "num_input_tokens_seen": 405608940, "step": 7239 }, { "epoch": 16.12249443207127, "loss": 0.27264925837516785, "loss_ce": 0.000310403760522604, "loss_iou": 0.11376953125, "loss_num": 0.009033203125, "loss_xval": 0.271484375, "num_input_tokens_seen": 405608940, "step": 7239 }, { "epoch": 16.124721603563476, "grad_norm": 66.91169738769531, "learning_rate": 1e-06, "loss": 0.4479, "num_input_tokens_seen": 405666224, "step": 7240 }, { "epoch": 16.124721603563476, "loss": 0.5517283082008362, "loss_ce": 9.25497297430411e-05, "loss_iou": 0.26171875, "loss_num": 0.00592041015625, "loss_xval": 0.55078125, "num_input_tokens_seen": 405666224, "step": 7240 }, { "epoch": 16.12694877505568, "grad_norm": 14.807357788085938, "learning_rate": 1e-06, "loss": 0.4308, "num_input_tokens_seen": 405722588, "step": 7241 }, { "epoch": 16.12694877505568, "loss": 0.47714054584503174, "loss_ce": 8.976385288406163e-05, "loss_iou": 0.220703125, "loss_num": 0.00738525390625, "loss_xval": 0.4765625, "num_input_tokens_seen": 405722588, "step": 7241 }, { "epoch": 16.129175946547885, "grad_norm": 40.85698318481445, "learning_rate": 1e-06, "loss": 0.3856, "num_input_tokens_seen": 405778132, "step": 7242 }, { "epoch": 16.129175946547885, "loss": 0.40902432799339294, "loss_ce": 8.878795779310167e-05, "loss_iou": 0.1845703125, "loss_num": 0.008056640625, "loss_xval": 0.408203125, "num_input_tokens_seen": 405778132, "step": 7242 }, { "epoch": 16.13140311804009, "grad_norm": 19.070831298828125, "learning_rate": 1e-06, "loss": 0.4482, "num_input_tokens_seen": 405834844, "step": 7243 }, { "epoch": 16.13140311804009, "loss": 0.5067603588104248, "loss_ce": 0.00010757060954347253, "loss_iou": 0.216796875, "loss_num": 0.01458740234375, "loss_xval": 0.5078125, "num_input_tokens_seen": 405834844, "step": 7243 }, { "epoch": 16.133630289532295, "grad_norm": 22.71406364440918, "learning_rate": 1e-06, "loss": 0.5471, "num_input_tokens_seen": 405889304, "step": 7244 }, { "epoch": 16.133630289532295, "loss": 0.4746703505516052, "loss_ce": 9.150059486273676e-05, "loss_iou": 0.2138671875, "loss_num": 0.009521484375, "loss_xval": 0.474609375, "num_input_tokens_seen": 405889304, "step": 7244 }, { "epoch": 16.1358574610245, "grad_norm": 26.008378982543945, "learning_rate": 1e-06, "loss": 0.4208, "num_input_tokens_seen": 405948244, "step": 7245 }, { "epoch": 16.1358574610245, "loss": 0.44539159536361694, "loss_ce": 7.910738349892199e-05, "loss_iou": 0.2060546875, "loss_num": 0.006561279296875, "loss_xval": 0.4453125, "num_input_tokens_seen": 405948244, "step": 7245 }, { "epoch": 16.138084632516705, "grad_norm": 17.593692779541016, "learning_rate": 1e-06, "loss": 0.6091, "num_input_tokens_seen": 406004756, "step": 7246 }, { "epoch": 16.138084632516705, "loss": 0.672180712223053, "loss_ce": 0.00018365512369200587, "loss_iou": 0.314453125, "loss_num": 0.009033203125, "loss_xval": 0.671875, "num_input_tokens_seen": 406004756, "step": 7246 }, { "epoch": 16.14031180400891, "grad_norm": 31.90231704711914, "learning_rate": 1e-06, "loss": 0.6659, "num_input_tokens_seen": 406061152, "step": 7247 }, { "epoch": 16.14031180400891, "loss": 0.34200403094291687, "loss_ce": 8.508679457008839e-05, "loss_iou": 0.150390625, "loss_num": 0.00799560546875, "loss_xval": 0.341796875, "num_input_tokens_seen": 406061152, "step": 7247 }, { "epoch": 16.142538975501115, "grad_norm": 13.510699272155762, "learning_rate": 1e-06, "loss": 0.2774, "num_input_tokens_seen": 406116672, "step": 7248 }, { "epoch": 16.142538975501115, "loss": 0.29828941822052, "loss_ce": 7.161758549045771e-05, "loss_iou": 0.125, "loss_num": 0.00970458984375, "loss_xval": 0.298828125, "num_input_tokens_seen": 406116672, "step": 7248 }, { "epoch": 16.14476614699332, "grad_norm": 13.294876098632812, "learning_rate": 1e-06, "loss": 0.3414, "num_input_tokens_seen": 406175172, "step": 7249 }, { "epoch": 16.14476614699332, "loss": 0.3227793574333191, "loss_ce": 8.648384391563013e-05, "loss_iou": 0.1533203125, "loss_num": 0.0031280517578125, "loss_xval": 0.322265625, "num_input_tokens_seen": 406175172, "step": 7249 }, { "epoch": 16.146993318485524, "grad_norm": 18.024934768676758, "learning_rate": 1e-06, "loss": 0.4407, "num_input_tokens_seen": 406234160, "step": 7250 }, { "epoch": 16.146993318485524, "eval_seeclick_web_CIoU": 0.588862270116806, "eval_seeclick_web_GIoU": 0.587228536605835, "eval_seeclick_web_IoU": 0.6071678400039673, "eval_seeclick_web_MAE_all": 0.015188148012384772, "eval_seeclick_web_MAE_h": 0.007499874569475651, "eval_seeclick_web_MAE_w": 0.015260101296007633, "eval_seeclick_web_MAE_x_boxes": 0.008470539702102542, "eval_seeclick_web_MAE_y_boxes": 0.02111952123232186, "eval_seeclick_web_inside_bbox": 0.9166666567325592, "eval_seeclick_web_loss": 0.9167152643203735, "eval_seeclick_web_loss_ce": 0.00014891428872942924, "eval_seeclick_web_loss_iou": 0.4229736328125, "eval_seeclick_web_loss_num": 0.01216888427734375, "eval_seeclick_web_loss_xval": 0.906982421875, "eval_seeclick_web_runtime": 20.8185, "eval_seeclick_web_samples_per_second": 2.402, "eval_seeclick_web_steps_per_second": 0.096, "num_input_tokens_seen": 406234160, "step": 7250 }, { "epoch": 16.146993318485524, "eval_icons_CIoU": 0.26575663685798645, "eval_icons_GIoU": 0.2966430187225342, "eval_icons_IoU": 0.3484746217727661, "eval_icons_MAE_all": 0.06336861476302147, "eval_icons_MAE_h": 0.03217336814850569, "eval_icons_MAE_w": 0.07706875540316105, "eval_icons_MAE_x_boxes": 0.05414869636297226, "eval_icons_MAE_y_boxes": 0.037681372836232185, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7173901796340942, "eval_icons_loss_ce": 0.00019168824655935168, "eval_icons_loss_iou": 0.66650390625, "eval_icons_loss_num": 0.061092376708984375, "eval_icons_loss_xval": 1.638671875, "eval_icons_runtime": 20.0031, "eval_icons_samples_per_second": 2.5, "eval_icons_steps_per_second": 0.1, "num_input_tokens_seen": 406234160, "step": 7250 }, { "epoch": 16.146993318485524, "eval_screenspot_CIoU": 0.38737640778223675, "eval_screenspot_GIoU": 0.40602253874142963, "eval_screenspot_IoU": 0.45554816722869873, "eval_screenspot_MAE_all": 0.05349355190992355, "eval_screenspot_MAE_h": 0.039430550610025726, "eval_screenspot_MAE_w": 0.0571071021258831, "eval_screenspot_MAE_x_boxes": 0.0624094990392526, "eval_screenspot_MAE_y_boxes": 0.0382879643390576, "eval_screenspot_inside_bbox": 0.7041666706403097, "eval_screenspot_loss": 1.5262062549591064, "eval_screenspot_loss_ce": 0.00022643499445014945, "eval_screenspot_loss_iou": 0.6376953125, "eval_screenspot_loss_num": 0.06162389119466146, "eval_screenspot_loss_xval": 1.58349609375, "eval_screenspot_runtime": 33.0274, "eval_screenspot_samples_per_second": 2.695, "eval_screenspot_steps_per_second": 0.091, "num_input_tokens_seen": 406234160, "step": 7250 }, { "epoch": 16.146993318485524, "eval_compot_CIoU": 0.34477105736732483, "eval_compot_GIoU": 0.3559461981058121, "eval_compot_IoU": 0.40127159655094147, "eval_compot_MAE_all": 0.01874891249462962, "eval_compot_MAE_h": 0.011277861427515745, "eval_compot_MAE_w": 0.02132485620677471, "eval_compot_MAE_x_boxes": 0.029761233367025852, "eval_compot_MAE_y_boxes": 0.0067885443568229675, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.402956247329712, "eval_compot_loss_ce": 0.00014377458137460053, "eval_compot_loss_iou": 0.6480712890625, "eval_compot_loss_num": 0.017492294311523438, "eval_compot_loss_xval": 1.3837890625, "eval_compot_runtime": 20.4358, "eval_compot_samples_per_second": 2.447, "eval_compot_steps_per_second": 0.098, "num_input_tokens_seen": 406234160, "step": 7250 }, { "epoch": 16.146993318485524, "eval_custom_ui_val_CIoU": 0.4759764571984609, "eval_custom_ui_val_GIoU": 0.48175321850511765, "eval_custom_ui_val_IoU": 0.5367199348078834, "eval_custom_ui_val_MAE_all": 0.027078964850968786, "eval_custom_ui_val_MAE_h": 0.014058138916475905, "eval_custom_ui_val_MAE_w": 0.036136620212346315, "eval_custom_ui_val_MAE_x_boxes": 0.0336780981419401, "eval_custom_ui_val_MAE_y_boxes": 0.012756779815794693, "eval_custom_ui_val_inside_bbox": 0.7719907429483202, "eval_custom_ui_val_loss": 1.1681312322616577, "eval_custom_ui_val_loss_ce": 0.00017250773412848098, "eval_custom_ui_val_loss_iou": 0.5040554470486112, "eval_custom_ui_val_loss_num": 0.023714171515570745, "eval_custom_ui_val_loss_xval": 1.1267903645833333, "eval_custom_ui_val_runtime": 56.1097, "eval_custom_ui_val_samples_per_second": 4.723, "eval_custom_ui_val_steps_per_second": 0.16, "num_input_tokens_seen": 406234160, "step": 7250 }, { "epoch": 16.146993318485524, "loss": 0.8809858560562134, "loss_ce": 0.0001264033926418051, "loss_iou": 0.3984375, "loss_num": 0.016845703125, "loss_xval": 0.8828125, "num_input_tokens_seen": 406234160, "step": 7250 }, { "epoch": 16.14922048997773, "grad_norm": 13.051247596740723, "learning_rate": 1e-06, "loss": 0.2912, "num_input_tokens_seen": 406287840, "step": 7251 }, { "epoch": 16.14922048997773, "loss": 0.3833409249782562, "loss_ce": 0.00010119502258021384, "loss_iou": 0.1728515625, "loss_num": 0.00738525390625, "loss_xval": 0.3828125, "num_input_tokens_seen": 406287840, "step": 7251 }, { "epoch": 16.151447661469934, "grad_norm": 17.345809936523438, "learning_rate": 1e-06, "loss": 0.4775, "num_input_tokens_seen": 406344524, "step": 7252 }, { "epoch": 16.151447661469934, "loss": 0.3954039514064789, "loss_ce": 7.925261161290109e-05, "loss_iou": 0.1787109375, "loss_num": 0.00775146484375, "loss_xval": 0.39453125, "num_input_tokens_seen": 406344524, "step": 7252 }, { "epoch": 16.15367483296214, "grad_norm": 17.00496482849121, "learning_rate": 1e-06, "loss": 0.389, "num_input_tokens_seen": 406399856, "step": 7253 }, { "epoch": 16.15367483296214, "loss": 0.35250192880630493, "loss_ce": 8.492142660543323e-05, "loss_iou": 0.1533203125, "loss_num": 0.00927734375, "loss_xval": 0.3515625, "num_input_tokens_seen": 406399856, "step": 7253 }, { "epoch": 16.155902004454344, "grad_norm": 13.97536563873291, "learning_rate": 1e-06, "loss": 0.3508, "num_input_tokens_seen": 406457204, "step": 7254 }, { "epoch": 16.155902004454344, "loss": 0.3879046142101288, "loss_ce": 8.720727055333555e-05, "loss_iou": 0.1767578125, "loss_num": 0.0067138671875, "loss_xval": 0.388671875, "num_input_tokens_seen": 406457204, "step": 7254 }, { "epoch": 16.15812917594655, "grad_norm": 14.464372634887695, "learning_rate": 1e-06, "loss": 0.4449, "num_input_tokens_seen": 406513904, "step": 7255 }, { "epoch": 16.15812917594655, "loss": 0.37365224957466125, "loss_ce": 0.00023914946359582245, "loss_iou": 0.1533203125, "loss_num": 0.01348876953125, "loss_xval": 0.373046875, "num_input_tokens_seen": 406513904, "step": 7255 }, { "epoch": 16.160356347438753, "grad_norm": 14.928121566772461, "learning_rate": 1e-06, "loss": 0.4961, "num_input_tokens_seen": 406570348, "step": 7256 }, { "epoch": 16.160356347438753, "loss": 0.7330120801925659, "loss_ce": 0.00010193722846452147, "loss_iou": 0.259765625, "loss_num": 0.04248046875, "loss_xval": 0.734375, "num_input_tokens_seen": 406570348, "step": 7256 }, { "epoch": 16.16258351893096, "grad_norm": 20.846715927124023, "learning_rate": 1e-06, "loss": 0.6857, "num_input_tokens_seen": 406624784, "step": 7257 }, { "epoch": 16.16258351893096, "loss": 0.9515559673309326, "loss_ce": 0.00013993156608194113, "loss_iou": 0.369140625, "loss_num": 0.042724609375, "loss_xval": 0.953125, "num_input_tokens_seen": 406624784, "step": 7257 }, { "epoch": 16.164810690423163, "grad_norm": 14.39743423461914, "learning_rate": 1e-06, "loss": 0.5994, "num_input_tokens_seen": 406679488, "step": 7258 }, { "epoch": 16.164810690423163, "loss": 0.450466126203537, "loss_ce": 8.771609282121062e-05, "loss_iou": 0.19921875, "loss_num": 0.01025390625, "loss_xval": 0.451171875, "num_input_tokens_seen": 406679488, "step": 7258 }, { "epoch": 16.167037861915368, "grad_norm": 31.99239158630371, "learning_rate": 1e-06, "loss": 0.3736, "num_input_tokens_seen": 406737108, "step": 7259 }, { "epoch": 16.167037861915368, "loss": 0.25009024143218994, "loss_ce": 9.024170140037313e-05, "loss_iou": 0.1083984375, "loss_num": 0.006744384765625, "loss_xval": 0.25, "num_input_tokens_seen": 406737108, "step": 7259 }, { "epoch": 16.169265033407573, "grad_norm": 28.246858596801758, "learning_rate": 1e-06, "loss": 0.3793, "num_input_tokens_seen": 406794236, "step": 7260 }, { "epoch": 16.169265033407573, "loss": 0.3895048499107361, "loss_ce": 0.00010054315498564392, "loss_iou": 0.17578125, "loss_num": 0.007659912109375, "loss_xval": 0.388671875, "num_input_tokens_seen": 406794236, "step": 7260 }, { "epoch": 16.171492204899778, "grad_norm": 13.259971618652344, "learning_rate": 1e-06, "loss": 0.3625, "num_input_tokens_seen": 406851408, "step": 7261 }, { "epoch": 16.171492204899778, "loss": 0.39652132987976074, "loss_ce": 9.797551319934428e-05, "loss_iou": 0.17578125, "loss_num": 0.00897216796875, "loss_xval": 0.396484375, "num_input_tokens_seen": 406851408, "step": 7261 }, { "epoch": 16.173719376391983, "grad_norm": 18.107431411743164, "learning_rate": 1e-06, "loss": 0.4601, "num_input_tokens_seen": 406909764, "step": 7262 }, { "epoch": 16.173719376391983, "loss": 0.5472245216369629, "loss_ce": 0.0001053560699801892, "loss_iou": 0.236328125, "loss_num": 0.01507568359375, "loss_xval": 0.546875, "num_input_tokens_seen": 406909764, "step": 7262 }, { "epoch": 16.175946547884188, "grad_norm": 19.771875381469727, "learning_rate": 1e-06, "loss": 0.4511, "num_input_tokens_seen": 406967348, "step": 7263 }, { "epoch": 16.175946547884188, "loss": 0.3307103216648102, "loss_ce": 8.286767115350813e-05, "loss_iou": 0.150390625, "loss_num": 0.005889892578125, "loss_xval": 0.330078125, "num_input_tokens_seen": 406967348, "step": 7263 }, { "epoch": 16.178173719376392, "grad_norm": 20.80531883239746, "learning_rate": 1e-06, "loss": 0.4105, "num_input_tokens_seen": 407023044, "step": 7264 }, { "epoch": 16.178173719376392, "loss": 0.40901893377304077, "loss_ce": 8.336683094967157e-05, "loss_iou": 0.189453125, "loss_num": 0.006011962890625, "loss_xval": 0.408203125, "num_input_tokens_seen": 407023044, "step": 7264 }, { "epoch": 16.180400890868597, "grad_norm": 15.4229736328125, "learning_rate": 1e-06, "loss": 0.366, "num_input_tokens_seen": 407078844, "step": 7265 }, { "epoch": 16.180400890868597, "loss": 0.22904685139656067, "loss_ce": 0.0001039912604028359, "loss_iou": 0.09228515625, "loss_num": 0.00885009765625, "loss_xval": 0.228515625, "num_input_tokens_seen": 407078844, "step": 7265 }, { "epoch": 16.182628062360802, "grad_norm": 15.534099578857422, "learning_rate": 1e-06, "loss": 0.4986, "num_input_tokens_seen": 407135112, "step": 7266 }, { "epoch": 16.182628062360802, "loss": 0.41431134939193726, "loss_ce": 0.00012679336941801012, "loss_iou": 0.1826171875, "loss_num": 0.00994873046875, "loss_xval": 0.4140625, "num_input_tokens_seen": 407135112, "step": 7266 }, { "epoch": 16.184855233853007, "grad_norm": 11.986660957336426, "learning_rate": 1e-06, "loss": 0.3914, "num_input_tokens_seen": 407193344, "step": 7267 }, { "epoch": 16.184855233853007, "loss": 0.3952281177043915, "loss_ce": 8.651654206914827e-05, "loss_iou": 0.1650390625, "loss_num": 0.01300048828125, "loss_xval": 0.39453125, "num_input_tokens_seen": 407193344, "step": 7267 }, { "epoch": 16.187082405345212, "grad_norm": 14.020734786987305, "learning_rate": 1e-06, "loss": 0.4324, "num_input_tokens_seen": 407249716, "step": 7268 }, { "epoch": 16.187082405345212, "loss": 0.48159003257751465, "loss_ce": 0.0001447216491214931, "loss_iou": 0.2060546875, "loss_num": 0.013671875, "loss_xval": 0.48046875, "num_input_tokens_seen": 407249716, "step": 7268 }, { "epoch": 16.189309576837417, "grad_norm": 11.799369812011719, "learning_rate": 1e-06, "loss": 0.3835, "num_input_tokens_seen": 407306452, "step": 7269 }, { "epoch": 16.189309576837417, "loss": 0.3408535122871399, "loss_ce": 9.422671428183094e-05, "loss_iou": 0.1552734375, "loss_num": 0.005950927734375, "loss_xval": 0.33984375, "num_input_tokens_seen": 407306452, "step": 7269 }, { "epoch": 16.19153674832962, "grad_norm": 14.987945556640625, "learning_rate": 1e-06, "loss": 0.3707, "num_input_tokens_seen": 407363312, "step": 7270 }, { "epoch": 16.19153674832962, "loss": 0.2843882739543915, "loss_ce": 8.651558891870081e-05, "loss_iou": 0.12353515625, "loss_num": 0.007476806640625, "loss_xval": 0.28515625, "num_input_tokens_seen": 407363312, "step": 7270 }, { "epoch": 16.193763919821826, "grad_norm": 14.705431938171387, "learning_rate": 1e-06, "loss": 0.3561, "num_input_tokens_seen": 407417848, "step": 7271 }, { "epoch": 16.193763919821826, "loss": 0.30038201808929443, "loss_ce": 8.906603034120053e-05, "loss_iou": 0.123046875, "loss_num": 0.01080322265625, "loss_xval": 0.30078125, "num_input_tokens_seen": 407417848, "step": 7271 }, { "epoch": 16.19599109131403, "grad_norm": 33.002174377441406, "learning_rate": 1e-06, "loss": 0.5219, "num_input_tokens_seen": 407475472, "step": 7272 }, { "epoch": 16.19599109131403, "loss": 0.510102391242981, "loss_ce": 9.262951789423823e-05, "loss_iou": 0.2197265625, "loss_num": 0.01409912109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 407475472, "step": 7272 }, { "epoch": 16.198218262806236, "grad_norm": 29.614242553710938, "learning_rate": 1e-06, "loss": 0.3678, "num_input_tokens_seen": 407531476, "step": 7273 }, { "epoch": 16.198218262806236, "loss": 0.37849289178848267, "loss_ce": 7.491311407648027e-05, "loss_iou": 0.1650390625, "loss_num": 0.00958251953125, "loss_xval": 0.37890625, "num_input_tokens_seen": 407531476, "step": 7273 }, { "epoch": 16.20044543429844, "grad_norm": 16.283767700195312, "learning_rate": 1e-06, "loss": 0.4892, "num_input_tokens_seen": 407587100, "step": 7274 }, { "epoch": 16.20044543429844, "loss": 0.5279887914657593, "loss_ce": 9.569715621182695e-05, "loss_iou": 0.22265625, "loss_num": 0.0166015625, "loss_xval": 0.52734375, "num_input_tokens_seen": 407587100, "step": 7274 }, { "epoch": 16.202672605790646, "grad_norm": 16.31856346130371, "learning_rate": 1e-06, "loss": 0.4131, "num_input_tokens_seen": 407643272, "step": 7275 }, { "epoch": 16.202672605790646, "loss": 0.40553590655326843, "loss_ce": 0.00010989317524945363, "loss_iou": 0.17578125, "loss_num": 0.01080322265625, "loss_xval": 0.40625, "num_input_tokens_seen": 407643272, "step": 7275 }, { "epoch": 16.20489977728285, "grad_norm": 18.60124969482422, "learning_rate": 1e-06, "loss": 0.3314, "num_input_tokens_seen": 407699796, "step": 7276 }, { "epoch": 16.20489977728285, "loss": 0.28377124667167664, "loss_ce": 7.983081741258502e-05, "loss_iou": 0.125, "loss_num": 0.006591796875, "loss_xval": 0.283203125, "num_input_tokens_seen": 407699796, "step": 7276 }, { "epoch": 16.207126948775056, "grad_norm": 22.093128204345703, "learning_rate": 1e-06, "loss": 0.4329, "num_input_tokens_seen": 407753556, "step": 7277 }, { "epoch": 16.207126948775056, "loss": 0.4864369034767151, "loss_ce": 0.00010880557238124311, "loss_iou": 0.2138671875, "loss_num": 0.0118408203125, "loss_xval": 0.486328125, "num_input_tokens_seen": 407753556, "step": 7277 }, { "epoch": 16.20935412026726, "grad_norm": 17.99799156188965, "learning_rate": 1e-06, "loss": 0.3275, "num_input_tokens_seen": 407811196, "step": 7278 }, { "epoch": 16.20935412026726, "loss": 0.2680291533470154, "loss_ce": 8.48027557367459e-05, "loss_iou": 0.1201171875, "loss_num": 0.0054931640625, "loss_xval": 0.267578125, "num_input_tokens_seen": 407811196, "step": 7278 }, { "epoch": 16.211581291759465, "grad_norm": 62.915122985839844, "learning_rate": 1e-06, "loss": 0.4762, "num_input_tokens_seen": 407866648, "step": 7279 }, { "epoch": 16.211581291759465, "loss": 0.3208872675895691, "loss_ce": 8.647728100186214e-05, "loss_iou": 0.1494140625, "loss_num": 0.0042724609375, "loss_xval": 0.3203125, "num_input_tokens_seen": 407866648, "step": 7279 }, { "epoch": 16.21380846325167, "grad_norm": 24.828845977783203, "learning_rate": 1e-06, "loss": 0.5674, "num_input_tokens_seen": 407922120, "step": 7280 }, { "epoch": 16.21380846325167, "loss": 0.4975318908691406, "loss_ce": 9.53997514443472e-05, "loss_iou": 0.2138671875, "loss_num": 0.01409912109375, "loss_xval": 0.498046875, "num_input_tokens_seen": 407922120, "step": 7280 }, { "epoch": 16.216035634743875, "grad_norm": 16.575681686401367, "learning_rate": 1e-06, "loss": 0.4067, "num_input_tokens_seen": 407974588, "step": 7281 }, { "epoch": 16.216035634743875, "loss": 0.5100204944610596, "loss_ce": 0.00104833475779742, "loss_iou": 0.208984375, "loss_num": 0.01806640625, "loss_xval": 0.5078125, "num_input_tokens_seen": 407974588, "step": 7281 }, { "epoch": 16.21826280623608, "grad_norm": 16.462749481201172, "learning_rate": 1e-06, "loss": 0.3674, "num_input_tokens_seen": 408030196, "step": 7282 }, { "epoch": 16.21826280623608, "loss": 0.43087995052337646, "loss_ce": 9.384381701238453e-05, "loss_iou": 0.1787109375, "loss_num": 0.014892578125, "loss_xval": 0.431640625, "num_input_tokens_seen": 408030196, "step": 7282 }, { "epoch": 16.220489977728285, "grad_norm": 18.638999938964844, "learning_rate": 1e-06, "loss": 0.4374, "num_input_tokens_seen": 408084800, "step": 7283 }, { "epoch": 16.220489977728285, "loss": 0.4949721395969391, "loss_ce": 9.908679930958897e-05, "loss_iou": 0.22265625, "loss_num": 0.00982666015625, "loss_xval": 0.494140625, "num_input_tokens_seen": 408084800, "step": 7283 }, { "epoch": 16.22271714922049, "grad_norm": 13.85960578918457, "learning_rate": 1e-06, "loss": 0.3035, "num_input_tokens_seen": 408142404, "step": 7284 }, { "epoch": 16.22271714922049, "loss": 0.3523852527141571, "loss_ce": 9.034241520566866e-05, "loss_iou": 0.1513671875, "loss_num": 0.00970458984375, "loss_xval": 0.3515625, "num_input_tokens_seen": 408142404, "step": 7284 }, { "epoch": 16.224944320712694, "grad_norm": 23.113548278808594, "learning_rate": 1e-06, "loss": 0.5333, "num_input_tokens_seen": 408197056, "step": 7285 }, { "epoch": 16.224944320712694, "loss": 0.5909112691879272, "loss_ce": 9.095118002733216e-05, "loss_iou": 0.271484375, "loss_num": 0.00946044921875, "loss_xval": 0.58984375, "num_input_tokens_seen": 408197056, "step": 7285 }, { "epoch": 16.2271714922049, "grad_norm": 19.409629821777344, "learning_rate": 1e-06, "loss": 0.3651, "num_input_tokens_seen": 408249604, "step": 7286 }, { "epoch": 16.2271714922049, "loss": 0.36336296796798706, "loss_ce": 8.17275868030265e-05, "loss_iou": 0.1650390625, "loss_num": 0.0067138671875, "loss_xval": 0.36328125, "num_input_tokens_seen": 408249604, "step": 7286 }, { "epoch": 16.229398663697104, "grad_norm": 17.79453468322754, "learning_rate": 1e-06, "loss": 0.328, "num_input_tokens_seen": 408306396, "step": 7287 }, { "epoch": 16.229398663697104, "loss": 0.34055426716804504, "loss_ce": 0.00010017414024332538, "loss_iou": 0.158203125, "loss_num": 0.004669189453125, "loss_xval": 0.33984375, "num_input_tokens_seen": 408306396, "step": 7287 }, { "epoch": 16.23162583518931, "grad_norm": 18.441938400268555, "learning_rate": 1e-06, "loss": 0.4376, "num_input_tokens_seen": 408362596, "step": 7288 }, { "epoch": 16.23162583518931, "loss": 0.470440149307251, "loss_ce": 0.00010323309834348038, "loss_iou": 0.2158203125, "loss_num": 0.00787353515625, "loss_xval": 0.470703125, "num_input_tokens_seen": 408362596, "step": 7288 }, { "epoch": 16.233853006681514, "grad_norm": 24.354867935180664, "learning_rate": 1e-06, "loss": 0.3503, "num_input_tokens_seen": 408417028, "step": 7289 }, { "epoch": 16.233853006681514, "loss": 0.3560473918914795, "loss_ce": 9.036185656441376e-05, "loss_iou": 0.158203125, "loss_num": 0.00799560546875, "loss_xval": 0.35546875, "num_input_tokens_seen": 408417028, "step": 7289 }, { "epoch": 16.23608017817372, "grad_norm": 15.969454765319824, "learning_rate": 1e-06, "loss": 0.5287, "num_input_tokens_seen": 408472732, "step": 7290 }, { "epoch": 16.23608017817372, "loss": 0.5947001576423645, "loss_ce": 9.568793757352978e-05, "loss_iou": 0.255859375, "loss_num": 0.0167236328125, "loss_xval": 0.59375, "num_input_tokens_seen": 408472732, "step": 7290 }, { "epoch": 16.238307349665924, "grad_norm": 19.128751754760742, "learning_rate": 1e-06, "loss": 0.3783, "num_input_tokens_seen": 408531592, "step": 7291 }, { "epoch": 16.238307349665924, "loss": 0.37436580657958984, "loss_ce": 9.823799337027594e-05, "loss_iou": 0.16015625, "loss_num": 0.01092529296875, "loss_xval": 0.375, "num_input_tokens_seen": 408531592, "step": 7291 }, { "epoch": 16.24053452115813, "grad_norm": 20.96180534362793, "learning_rate": 1e-06, "loss": 0.4015, "num_input_tokens_seen": 408587248, "step": 7292 }, { "epoch": 16.24053452115813, "loss": 0.3449326753616333, "loss_ce": 8.403346146224067e-05, "loss_iou": 0.1572265625, "loss_num": 0.006011962890625, "loss_xval": 0.345703125, "num_input_tokens_seen": 408587248, "step": 7292 }, { "epoch": 16.242761692650333, "grad_norm": 16.29045867919922, "learning_rate": 1e-06, "loss": 0.3866, "num_input_tokens_seen": 408643652, "step": 7293 }, { "epoch": 16.242761692650333, "loss": 0.461269736289978, "loss_ce": 8.806748519418761e-05, "loss_iou": 0.208984375, "loss_num": 0.00872802734375, "loss_xval": 0.4609375, "num_input_tokens_seen": 408643652, "step": 7293 }, { "epoch": 16.244988864142538, "grad_norm": 16.806655883789062, "learning_rate": 1e-06, "loss": 0.4739, "num_input_tokens_seen": 408696544, "step": 7294 }, { "epoch": 16.244988864142538, "loss": 0.5039190053939819, "loss_ce": 0.00013484872761182487, "loss_iou": 0.212890625, "loss_num": 0.0157470703125, "loss_xval": 0.50390625, "num_input_tokens_seen": 408696544, "step": 7294 }, { "epoch": 16.247216035634743, "grad_norm": 19.150453567504883, "learning_rate": 1e-06, "loss": 0.5361, "num_input_tokens_seen": 408752192, "step": 7295 }, { "epoch": 16.247216035634743, "loss": 0.4750003218650818, "loss_ce": 8.576853724662215e-05, "loss_iou": 0.2099609375, "loss_num": 0.0111083984375, "loss_xval": 0.474609375, "num_input_tokens_seen": 408752192, "step": 7295 }, { "epoch": 16.249443207126948, "grad_norm": 14.885661125183105, "learning_rate": 1e-06, "loss": 0.4392, "num_input_tokens_seen": 408807428, "step": 7296 }, { "epoch": 16.249443207126948, "loss": 0.3516565263271332, "loss_ce": 9.402677096659318e-05, "loss_iou": 0.1591796875, "loss_num": 0.00653076171875, "loss_xval": 0.3515625, "num_input_tokens_seen": 408807428, "step": 7296 }, { "epoch": 16.251670378619153, "grad_norm": 20.185546875, "learning_rate": 1e-06, "loss": 0.6554, "num_input_tokens_seen": 408862660, "step": 7297 }, { "epoch": 16.251670378619153, "loss": 0.6360622644424438, "loss_ce": 7.591473695356399e-05, "loss_iou": 0.275390625, "loss_num": 0.017333984375, "loss_xval": 0.63671875, "num_input_tokens_seen": 408862660, "step": 7297 }, { "epoch": 16.253897550111358, "grad_norm": 24.526891708374023, "learning_rate": 1e-06, "loss": 0.5942, "num_input_tokens_seen": 408915492, "step": 7298 }, { "epoch": 16.253897550111358, "loss": 0.4688360095024109, "loss_ce": 8.602187153883278e-05, "loss_iou": 0.18359375, "loss_num": 0.0203857421875, "loss_xval": 0.46875, "num_input_tokens_seen": 408915492, "step": 7298 }, { "epoch": 16.256124721603562, "grad_norm": 28.41362953186035, "learning_rate": 1e-06, "loss": 0.3349, "num_input_tokens_seen": 408971936, "step": 7299 }, { "epoch": 16.256124721603562, "loss": 0.3919934928417206, "loss_ce": 0.00014778405602555722, "loss_iou": 0.1806640625, "loss_num": 0.005950927734375, "loss_xval": 0.392578125, "num_input_tokens_seen": 408971936, "step": 7299 }, { "epoch": 16.258351893095767, "grad_norm": 16.66225814819336, "learning_rate": 1e-06, "loss": 0.4285, "num_input_tokens_seen": 409026252, "step": 7300 }, { "epoch": 16.258351893095767, "loss": 0.4181760251522064, "loss_ce": 8.519048424204811e-05, "loss_iou": 0.1845703125, "loss_num": 0.0098876953125, "loss_xval": 0.41796875, "num_input_tokens_seen": 409026252, "step": 7300 }, { "epoch": 16.260579064587972, "grad_norm": 19.2691593170166, "learning_rate": 1e-06, "loss": 0.67, "num_input_tokens_seen": 409084124, "step": 7301 }, { "epoch": 16.260579064587972, "loss": 0.6911599636077881, "loss_ce": 0.00011992135114269331, "loss_iou": 0.275390625, "loss_num": 0.0279541015625, "loss_xval": 0.69140625, "num_input_tokens_seen": 409084124, "step": 7301 }, { "epoch": 16.262806236080177, "grad_norm": 14.751253128051758, "learning_rate": 1e-06, "loss": 0.3899, "num_input_tokens_seen": 409141304, "step": 7302 }, { "epoch": 16.262806236080177, "loss": 0.47578924894332886, "loss_ce": 8.124149462673813e-05, "loss_iou": 0.1982421875, "loss_num": 0.01611328125, "loss_xval": 0.4765625, "num_input_tokens_seen": 409141304, "step": 7302 }, { "epoch": 16.265033407572382, "grad_norm": 14.404290199279785, "learning_rate": 1e-06, "loss": 0.3463, "num_input_tokens_seen": 409197796, "step": 7303 }, { "epoch": 16.265033407572382, "loss": 0.26949194073677063, "loss_ce": 8.277669257950038e-05, "loss_iou": 0.1259765625, "loss_num": 0.003570556640625, "loss_xval": 0.26953125, "num_input_tokens_seen": 409197796, "step": 7303 }, { "epoch": 16.267260579064587, "grad_norm": 20.53440284729004, "learning_rate": 1e-06, "loss": 0.3827, "num_input_tokens_seen": 409252580, "step": 7304 }, { "epoch": 16.267260579064587, "loss": 0.4455221891403198, "loss_ce": 8.760465425439179e-05, "loss_iou": 0.1904296875, "loss_num": 0.01263427734375, "loss_xval": 0.4453125, "num_input_tokens_seen": 409252580, "step": 7304 }, { "epoch": 16.26948775055679, "grad_norm": 28.248842239379883, "learning_rate": 1e-06, "loss": 0.6714, "num_input_tokens_seen": 409309884, "step": 7305 }, { "epoch": 16.26948775055679, "loss": 0.570942759513855, "loss_ce": 0.0006302678375504911, "loss_iou": 0.2412109375, "loss_num": 0.017822265625, "loss_xval": 0.5703125, "num_input_tokens_seen": 409309884, "step": 7305 }, { "epoch": 16.271714922048996, "grad_norm": 15.996672630310059, "learning_rate": 1e-06, "loss": 0.5986, "num_input_tokens_seen": 409366592, "step": 7306 }, { "epoch": 16.271714922048996, "loss": 0.5386756658554077, "loss_ce": 0.000101389319752343, "loss_iou": 0.2216796875, "loss_num": 0.0189208984375, "loss_xval": 0.5390625, "num_input_tokens_seen": 409366592, "step": 7306 }, { "epoch": 16.2739420935412, "grad_norm": 25.504066467285156, "learning_rate": 1e-06, "loss": 0.4661, "num_input_tokens_seen": 409423384, "step": 7307 }, { "epoch": 16.2739420935412, "loss": 0.5826142430305481, "loss_ce": 9.470840450376272e-05, "loss_iou": 0.263671875, "loss_num": 0.0107421875, "loss_xval": 0.58203125, "num_input_tokens_seen": 409423384, "step": 7307 }, { "epoch": 16.276169265033406, "grad_norm": 16.374908447265625, "learning_rate": 1e-06, "loss": 0.5241, "num_input_tokens_seen": 409479828, "step": 7308 }, { "epoch": 16.276169265033406, "loss": 0.6848694086074829, "loss_ce": 0.00017701313481666148, "loss_iou": 0.30859375, "loss_num": 0.01336669921875, "loss_xval": 0.68359375, "num_input_tokens_seen": 409479828, "step": 7308 }, { "epoch": 16.27839643652561, "grad_norm": 17.140676498413086, "learning_rate": 1e-06, "loss": 0.5788, "num_input_tokens_seen": 409536380, "step": 7309 }, { "epoch": 16.27839643652561, "loss": 0.7437722682952881, "loss_ce": 0.00011992130021099001, "loss_iou": 0.29296875, "loss_num": 0.031005859375, "loss_xval": 0.7421875, "num_input_tokens_seen": 409536380, "step": 7309 }, { "epoch": 16.280623608017816, "grad_norm": 14.995177268981934, "learning_rate": 1e-06, "loss": 0.4511, "num_input_tokens_seen": 409594828, "step": 7310 }, { "epoch": 16.280623608017816, "loss": 0.3734976649284363, "loss_ce": 8.45702743390575e-05, "loss_iou": 0.1630859375, "loss_num": 0.00933837890625, "loss_xval": 0.373046875, "num_input_tokens_seen": 409594828, "step": 7310 }, { "epoch": 16.28285077951002, "grad_norm": 16.360715866088867, "learning_rate": 1e-06, "loss": 0.4679, "num_input_tokens_seen": 409650308, "step": 7311 }, { "epoch": 16.28285077951002, "loss": 0.4848405718803406, "loss_ce": 9.936667629517615e-05, "loss_iou": 0.2177734375, "loss_num": 0.0098876953125, "loss_xval": 0.484375, "num_input_tokens_seen": 409650308, "step": 7311 }, { "epoch": 16.285077951002226, "grad_norm": 21.59230613708496, "learning_rate": 1e-06, "loss": 0.4159, "num_input_tokens_seen": 409706112, "step": 7312 }, { "epoch": 16.285077951002226, "loss": 0.5259721875190735, "loss_ce": 9.327764564659446e-05, "loss_iou": 0.251953125, "loss_num": 0.004608154296875, "loss_xval": 0.52734375, "num_input_tokens_seen": 409706112, "step": 7312 }, { "epoch": 16.28730512249443, "grad_norm": 15.556495666503906, "learning_rate": 1e-06, "loss": 0.3937, "num_input_tokens_seen": 409763944, "step": 7313 }, { "epoch": 16.28730512249443, "loss": 0.3481227159500122, "loss_ce": 0.00010025159281212837, "loss_iou": 0.1611328125, "loss_num": 0.0052490234375, "loss_xval": 0.34765625, "num_input_tokens_seen": 409763944, "step": 7313 }, { "epoch": 16.289532293986635, "grad_norm": 17.400310516357422, "learning_rate": 1e-06, "loss": 0.4426, "num_input_tokens_seen": 409815968, "step": 7314 }, { "epoch": 16.289532293986635, "loss": 0.4380715787410736, "loss_ce": 8.330351556651294e-05, "loss_iou": 0.18359375, "loss_num": 0.01416015625, "loss_xval": 0.4375, "num_input_tokens_seen": 409815968, "step": 7314 }, { "epoch": 16.29175946547884, "grad_norm": 17.484792709350586, "learning_rate": 1e-06, "loss": 0.3899, "num_input_tokens_seen": 409870240, "step": 7315 }, { "epoch": 16.29175946547884, "loss": 0.42489010095596313, "loss_ce": 8.540091221220791e-05, "loss_iou": 0.1708984375, "loss_num": 0.0164794921875, "loss_xval": 0.42578125, "num_input_tokens_seen": 409870240, "step": 7315 }, { "epoch": 16.293986636971045, "grad_norm": 14.591642379760742, "learning_rate": 1e-06, "loss": 0.3274, "num_input_tokens_seen": 409926680, "step": 7316 }, { "epoch": 16.293986636971045, "loss": 0.2797589600086212, "loss_ce": 9.588080865796655e-05, "loss_iou": 0.126953125, "loss_num": 0.005126953125, "loss_xval": 0.279296875, "num_input_tokens_seen": 409926680, "step": 7316 }, { "epoch": 16.29621380846325, "grad_norm": 13.230761528015137, "learning_rate": 1e-06, "loss": 0.3727, "num_input_tokens_seen": 409985004, "step": 7317 }, { "epoch": 16.29621380846325, "loss": 0.3750907778739929, "loss_ce": 9.078568109543994e-05, "loss_iou": 0.15625, "loss_num": 0.0125732421875, "loss_xval": 0.375, "num_input_tokens_seen": 409985004, "step": 7317 }, { "epoch": 16.29844097995546, "grad_norm": 32.36854934692383, "learning_rate": 1e-06, "loss": 0.4765, "num_input_tokens_seen": 410040396, "step": 7318 }, { "epoch": 16.29844097995546, "loss": 0.43508851528167725, "loss_ce": 9.096259600482881e-05, "loss_iou": 0.1962890625, "loss_num": 0.008544921875, "loss_xval": 0.435546875, "num_input_tokens_seen": 410040396, "step": 7318 }, { "epoch": 16.30066815144766, "grad_norm": 20.330373764038086, "learning_rate": 1e-06, "loss": 0.3185, "num_input_tokens_seen": 410093968, "step": 7319 }, { "epoch": 16.30066815144766, "loss": 0.3120901584625244, "loss_ce": 7.842134800739586e-05, "loss_iou": 0.134765625, "loss_num": 0.008544921875, "loss_xval": 0.3125, "num_input_tokens_seen": 410093968, "step": 7319 }, { "epoch": 16.302895322939868, "grad_norm": 15.776129722595215, "learning_rate": 1e-06, "loss": 0.3339, "num_input_tokens_seen": 410149384, "step": 7320 }, { "epoch": 16.302895322939868, "loss": 0.47568702697753906, "loss_ce": 0.00010110878793057054, "loss_iou": 0.2041015625, "loss_num": 0.01348876953125, "loss_xval": 0.4765625, "num_input_tokens_seen": 410149384, "step": 7320 }, { "epoch": 16.305122494432073, "grad_norm": 21.19259262084961, "learning_rate": 1e-06, "loss": 0.5786, "num_input_tokens_seen": 410206596, "step": 7321 }, { "epoch": 16.305122494432073, "loss": 0.36239707469940186, "loss_ce": 9.236540063284338e-05, "loss_iou": 0.1640625, "loss_num": 0.006866455078125, "loss_xval": 0.36328125, "num_input_tokens_seen": 410206596, "step": 7321 }, { "epoch": 16.307349665924278, "grad_norm": 22.16942596435547, "learning_rate": 1e-06, "loss": 0.4556, "num_input_tokens_seen": 410261328, "step": 7322 }, { "epoch": 16.307349665924278, "loss": 0.5388376712799072, "loss_ce": 8.029842138057575e-05, "loss_iou": 0.2099609375, "loss_num": 0.023681640625, "loss_xval": 0.5390625, "num_input_tokens_seen": 410261328, "step": 7322 }, { "epoch": 16.309576837416483, "grad_norm": 21.34149169921875, "learning_rate": 1e-06, "loss": 0.4369, "num_input_tokens_seen": 410317892, "step": 7323 }, { "epoch": 16.309576837416483, "loss": 0.5326899886131287, "loss_ce": 9.718516957946122e-05, "loss_iou": 0.2333984375, "loss_num": 0.01318359375, "loss_xval": 0.53125, "num_input_tokens_seen": 410317892, "step": 7323 }, { "epoch": 16.311804008908688, "grad_norm": 22.547149658203125, "learning_rate": 1e-06, "loss": 0.3742, "num_input_tokens_seen": 410372852, "step": 7324 }, { "epoch": 16.311804008908688, "loss": 0.3052588105201721, "loss_ce": 8.303693175548688e-05, "loss_iou": 0.1337890625, "loss_num": 0.007659912109375, "loss_xval": 0.3046875, "num_input_tokens_seen": 410372852, "step": 7324 }, { "epoch": 16.314031180400892, "grad_norm": 24.133289337158203, "learning_rate": 1e-06, "loss": 0.3867, "num_input_tokens_seen": 410428208, "step": 7325 }, { "epoch": 16.314031180400892, "loss": 0.3308970034122467, "loss_ce": 8.646737842354923e-05, "loss_iou": 0.1494140625, "loss_num": 0.006256103515625, "loss_xval": 0.330078125, "num_input_tokens_seen": 410428208, "step": 7325 }, { "epoch": 16.316258351893097, "grad_norm": 27.495380401611328, "learning_rate": 1e-06, "loss": 0.5133, "num_input_tokens_seen": 410483916, "step": 7326 }, { "epoch": 16.316258351893097, "loss": 0.5182612538337708, "loss_ce": 7.278715202119201e-05, "loss_iou": 0.2421875, "loss_num": 0.0068359375, "loss_xval": 0.51953125, "num_input_tokens_seen": 410483916, "step": 7326 }, { "epoch": 16.318485523385302, "grad_norm": 26.30192756652832, "learning_rate": 1e-06, "loss": 0.3812, "num_input_tokens_seen": 410539204, "step": 7327 }, { "epoch": 16.318485523385302, "loss": 0.47011250257492065, "loss_ce": 0.00014180486323311925, "loss_iou": 0.1982421875, "loss_num": 0.01483154296875, "loss_xval": 0.470703125, "num_input_tokens_seen": 410539204, "step": 7327 }, { "epoch": 16.320712694877507, "grad_norm": 17.40894317626953, "learning_rate": 1e-06, "loss": 0.4687, "num_input_tokens_seen": 410594636, "step": 7328 }, { "epoch": 16.320712694877507, "loss": 0.44930049777030945, "loss_ce": 8.17526743048802e-05, "loss_iou": 0.1943359375, "loss_num": 0.01226806640625, "loss_xval": 0.44921875, "num_input_tokens_seen": 410594636, "step": 7328 }, { "epoch": 16.322939866369712, "grad_norm": 21.12818145751953, "learning_rate": 1e-06, "loss": 0.3845, "num_input_tokens_seen": 410651876, "step": 7329 }, { "epoch": 16.322939866369712, "loss": 0.3459153175354004, "loss_ce": 9.013438830152154e-05, "loss_iou": 0.1630859375, "loss_num": 0.003936767578125, "loss_xval": 0.345703125, "num_input_tokens_seen": 410651876, "step": 7329 }, { "epoch": 16.325167037861917, "grad_norm": 10.89534854888916, "learning_rate": 1e-06, "loss": 0.5001, "num_input_tokens_seen": 410706904, "step": 7330 }, { "epoch": 16.325167037861917, "loss": 0.46097275614738464, "loss_ce": 9.627400140743703e-05, "loss_iou": 0.205078125, "loss_num": 0.01019287109375, "loss_xval": 0.4609375, "num_input_tokens_seen": 410706904, "step": 7330 }, { "epoch": 16.32739420935412, "grad_norm": 20.05289077758789, "learning_rate": 1e-06, "loss": 0.4095, "num_input_tokens_seen": 410763772, "step": 7331 }, { "epoch": 16.32739420935412, "loss": 0.3861430883407593, "loss_ce": 9.571410191711038e-05, "loss_iou": 0.1689453125, "loss_num": 0.0096435546875, "loss_xval": 0.38671875, "num_input_tokens_seen": 410763772, "step": 7331 }, { "epoch": 16.329621380846326, "grad_norm": 15.384933471679688, "learning_rate": 1e-06, "loss": 0.3819, "num_input_tokens_seen": 410823420, "step": 7332 }, { "epoch": 16.329621380846326, "loss": 0.4961845278739929, "loss_ce": 9.074924309970811e-05, "loss_iou": 0.2255859375, "loss_num": 0.00909423828125, "loss_xval": 0.49609375, "num_input_tokens_seen": 410823420, "step": 7332 }, { "epoch": 16.33184855233853, "grad_norm": 24.686939239501953, "learning_rate": 1e-06, "loss": 0.4537, "num_input_tokens_seen": 410877940, "step": 7333 }, { "epoch": 16.33184855233853, "loss": 0.5593637824058533, "loss_ce": 0.00015970882668625563, "loss_iou": 0.234375, "loss_num": 0.018310546875, "loss_xval": 0.55859375, "num_input_tokens_seen": 410877940, "step": 7333 }, { "epoch": 16.334075723830736, "grad_norm": 22.968791961669922, "learning_rate": 1e-06, "loss": 0.5139, "num_input_tokens_seen": 410932476, "step": 7334 }, { "epoch": 16.334075723830736, "loss": 0.39212775230407715, "loss_ce": 9.897441486828029e-05, "loss_iou": 0.171875, "loss_num": 0.00958251953125, "loss_xval": 0.392578125, "num_input_tokens_seen": 410932476, "step": 7334 }, { "epoch": 16.33630289532294, "grad_norm": 15.322009086608887, "learning_rate": 1e-06, "loss": 0.4757, "num_input_tokens_seen": 410990488, "step": 7335 }, { "epoch": 16.33630289532294, "loss": 0.5929901003837585, "loss_ce": 9.460109868086874e-05, "loss_iou": 0.240234375, "loss_num": 0.0223388671875, "loss_xval": 0.59375, "num_input_tokens_seen": 410990488, "step": 7335 }, { "epoch": 16.338530066815146, "grad_norm": 34.48351287841797, "learning_rate": 1e-06, "loss": 0.5019, "num_input_tokens_seen": 411046180, "step": 7336 }, { "epoch": 16.338530066815146, "loss": 0.581402063369751, "loss_ce": 0.00010323138849344105, "loss_iou": 0.2578125, "loss_num": 0.0135498046875, "loss_xval": 0.58203125, "num_input_tokens_seen": 411046180, "step": 7336 }, { "epoch": 16.34075723830735, "grad_norm": 16.476621627807617, "learning_rate": 1e-06, "loss": 0.3056, "num_input_tokens_seen": 411102856, "step": 7337 }, { "epoch": 16.34075723830735, "loss": 0.1867981106042862, "loss_ce": 9.157234308077022e-05, "loss_iou": 0.08251953125, "loss_num": 0.00439453125, "loss_xval": 0.1865234375, "num_input_tokens_seen": 411102856, "step": 7337 }, { "epoch": 16.342984409799556, "grad_norm": 19.494869232177734, "learning_rate": 1e-06, "loss": 0.3802, "num_input_tokens_seen": 411159860, "step": 7338 }, { "epoch": 16.342984409799556, "loss": 0.3621300756931305, "loss_ce": 0.0003136666491627693, "loss_iou": 0.1455078125, "loss_num": 0.01422119140625, "loss_xval": 0.361328125, "num_input_tokens_seen": 411159860, "step": 7338 }, { "epoch": 16.34521158129176, "grad_norm": 19.032352447509766, "learning_rate": 1e-06, "loss": 0.4447, "num_input_tokens_seen": 411215444, "step": 7339 }, { "epoch": 16.34521158129176, "loss": 0.3872949481010437, "loss_ce": 8.788703416939825e-05, "loss_iou": 0.1611328125, "loss_num": 0.0130615234375, "loss_xval": 0.38671875, "num_input_tokens_seen": 411215444, "step": 7339 }, { "epoch": 16.347438752783965, "grad_norm": 18.541751861572266, "learning_rate": 1e-06, "loss": 0.4312, "num_input_tokens_seen": 411270292, "step": 7340 }, { "epoch": 16.347438752783965, "loss": 0.5562987923622131, "loss_ce": 8.540366252418607e-05, "loss_iou": 0.2392578125, "loss_num": 0.015380859375, "loss_xval": 0.5546875, "num_input_tokens_seen": 411270292, "step": 7340 }, { "epoch": 16.34966592427617, "grad_norm": 13.023123741149902, "learning_rate": 1e-06, "loss": 0.5157, "num_input_tokens_seen": 411327140, "step": 7341 }, { "epoch": 16.34966592427617, "loss": 0.4642055630683899, "loss_ce": 9.424821473658085e-05, "loss_iou": 0.1923828125, "loss_num": 0.0157470703125, "loss_xval": 0.46484375, "num_input_tokens_seen": 411327140, "step": 7341 }, { "epoch": 16.351893095768375, "grad_norm": 18.090694427490234, "learning_rate": 1e-06, "loss": 0.428, "num_input_tokens_seen": 411383152, "step": 7342 }, { "epoch": 16.351893095768375, "loss": 0.46582120656967163, "loss_ce": 0.0009774666978046298, "loss_iou": 0.2021484375, "loss_num": 0.011962890625, "loss_xval": 0.46484375, "num_input_tokens_seen": 411383152, "step": 7342 }, { "epoch": 16.35412026726058, "grad_norm": 16.27545928955078, "learning_rate": 1e-06, "loss": 0.3941, "num_input_tokens_seen": 411438268, "step": 7343 }, { "epoch": 16.35412026726058, "loss": 0.41550177335739136, "loss_ce": 9.647855767980218e-05, "loss_iou": 0.1904296875, "loss_num": 0.0068359375, "loss_xval": 0.416015625, "num_input_tokens_seen": 411438268, "step": 7343 }, { "epoch": 16.356347438752785, "grad_norm": 22.559267044067383, "learning_rate": 1e-06, "loss": 0.4177, "num_input_tokens_seen": 411493464, "step": 7344 }, { "epoch": 16.356347438752785, "loss": 0.4500234127044678, "loss_ce": 7.222830026876181e-05, "loss_iou": 0.19140625, "loss_num": 0.01348876953125, "loss_xval": 0.44921875, "num_input_tokens_seen": 411493464, "step": 7344 }, { "epoch": 16.35857461024499, "grad_norm": 17.649221420288086, "learning_rate": 1e-06, "loss": 0.3428, "num_input_tokens_seen": 411550844, "step": 7345 }, { "epoch": 16.35857461024499, "loss": 0.46036189794540405, "loss_ce": 9.580078767612576e-05, "loss_iou": 0.1962890625, "loss_num": 0.013671875, "loss_xval": 0.4609375, "num_input_tokens_seen": 411550844, "step": 7345 }, { "epoch": 16.360801781737194, "grad_norm": 19.68630027770996, "learning_rate": 1e-06, "loss": 0.3448, "num_input_tokens_seen": 411606484, "step": 7346 }, { "epoch": 16.360801781737194, "loss": 0.31284722685813904, "loss_ce": 0.00010307719639968127, "loss_iou": 0.14453125, "loss_num": 0.00482177734375, "loss_xval": 0.3125, "num_input_tokens_seen": 411606484, "step": 7346 }, { "epoch": 16.3630289532294, "grad_norm": 23.66335678100586, "learning_rate": 1e-06, "loss": 0.4744, "num_input_tokens_seen": 411662964, "step": 7347 }, { "epoch": 16.3630289532294, "loss": 0.38772842288017273, "loss_ce": 0.00015515368431806564, "loss_iou": 0.171875, "loss_num": 0.00885009765625, "loss_xval": 0.38671875, "num_input_tokens_seen": 411662964, "step": 7347 }, { "epoch": 16.365256124721604, "grad_norm": 16.446657180786133, "learning_rate": 1e-06, "loss": 0.4439, "num_input_tokens_seen": 411718656, "step": 7348 }, { "epoch": 16.365256124721604, "loss": 0.40172719955444336, "loss_ce": 0.00011588144116103649, "loss_iou": 0.16796875, "loss_num": 0.01300048828125, "loss_xval": 0.40234375, "num_input_tokens_seen": 411718656, "step": 7348 }, { "epoch": 16.36748329621381, "grad_norm": 12.772607803344727, "learning_rate": 1e-06, "loss": 0.354, "num_input_tokens_seen": 411774636, "step": 7349 }, { "epoch": 16.36748329621381, "loss": 0.3649415373802185, "loss_ce": 7.335752889048308e-05, "loss_iou": 0.154296875, "loss_num": 0.0113525390625, "loss_xval": 0.365234375, "num_input_tokens_seen": 411774636, "step": 7349 }, { "epoch": 16.369710467706014, "grad_norm": 199.87049865722656, "learning_rate": 1e-06, "loss": 0.4038, "num_input_tokens_seen": 411828580, "step": 7350 }, { "epoch": 16.369710467706014, "loss": 0.5851800441741943, "loss_ce": 9.696916822576895e-05, "loss_iou": 0.26171875, "loss_num": 0.01251220703125, "loss_xval": 0.5859375, "num_input_tokens_seen": 411828580, "step": 7350 }, { "epoch": 16.37193763919822, "grad_norm": 13.645878791809082, "learning_rate": 1e-06, "loss": 0.3657, "num_input_tokens_seen": 411886588, "step": 7351 }, { "epoch": 16.37193763919822, "loss": 0.5355052947998047, "loss_ce": 0.00010485852544661611, "loss_iou": 0.2138671875, "loss_num": 0.021728515625, "loss_xval": 0.53515625, "num_input_tokens_seen": 411886588, "step": 7351 }, { "epoch": 16.374164810690424, "grad_norm": 16.982749938964844, "learning_rate": 1e-06, "loss": 0.4847, "num_input_tokens_seen": 411944540, "step": 7352 }, { "epoch": 16.374164810690424, "loss": 0.48775994777679443, "loss_ce": 8.901581168174744e-05, "loss_iou": 0.212890625, "loss_num": 0.01220703125, "loss_xval": 0.48828125, "num_input_tokens_seen": 411944540, "step": 7352 }, { "epoch": 16.37639198218263, "grad_norm": 19.0021915435791, "learning_rate": 1e-06, "loss": 0.5334, "num_input_tokens_seen": 412002580, "step": 7353 }, { "epoch": 16.37639198218263, "loss": 0.7117968797683716, "loss_ce": 0.00012699057697318494, "loss_iou": 0.32421875, "loss_num": 0.0123291015625, "loss_xval": 0.7109375, "num_input_tokens_seen": 412002580, "step": 7353 }, { "epoch": 16.378619153674833, "grad_norm": 16.590665817260742, "learning_rate": 1e-06, "loss": 0.4343, "num_input_tokens_seen": 412059804, "step": 7354 }, { "epoch": 16.378619153674833, "loss": 0.41806820034980774, "loss_ce": 9.94568836176768e-05, "loss_iou": 0.1796875, "loss_num": 0.01165771484375, "loss_xval": 0.41796875, "num_input_tokens_seen": 412059804, "step": 7354 }, { "epoch": 16.380846325167038, "grad_norm": 14.91476821899414, "learning_rate": 1e-06, "loss": 0.389, "num_input_tokens_seen": 412117424, "step": 7355 }, { "epoch": 16.380846325167038, "loss": 0.429162859916687, "loss_ce": 8.569219789933413e-05, "loss_iou": 0.17578125, "loss_num": 0.01531982421875, "loss_xval": 0.4296875, "num_input_tokens_seen": 412117424, "step": 7355 }, { "epoch": 16.383073496659243, "grad_norm": 21.37236976623535, "learning_rate": 1e-06, "loss": 0.3955, "num_input_tokens_seen": 412175316, "step": 7356 }, { "epoch": 16.383073496659243, "loss": 0.471897691488266, "loss_ce": 9.593518188921735e-05, "loss_iou": 0.2119140625, "loss_num": 0.009521484375, "loss_xval": 0.47265625, "num_input_tokens_seen": 412175316, "step": 7356 }, { "epoch": 16.385300668151448, "grad_norm": 23.095491409301758, "learning_rate": 1e-06, "loss": 0.5033, "num_input_tokens_seen": 412232752, "step": 7357 }, { "epoch": 16.385300668151448, "loss": 0.42098525166511536, "loss_ce": 8.681518374942243e-05, "loss_iou": 0.1787109375, "loss_num": 0.0125732421875, "loss_xval": 0.421875, "num_input_tokens_seen": 412232752, "step": 7357 }, { "epoch": 16.387527839643653, "grad_norm": 73.42839813232422, "learning_rate": 1e-06, "loss": 0.6915, "num_input_tokens_seen": 412284664, "step": 7358 }, { "epoch": 16.387527839643653, "loss": 0.7482743859291077, "loss_ce": 0.00010545070108491927, "loss_iou": 0.318359375, "loss_num": 0.02197265625, "loss_xval": 0.75, "num_input_tokens_seen": 412284664, "step": 7358 }, { "epoch": 16.389755011135858, "grad_norm": 24.8407039642334, "learning_rate": 1e-06, "loss": 0.5937, "num_input_tokens_seen": 412342800, "step": 7359 }, { "epoch": 16.389755011135858, "loss": 0.5182784795761108, "loss_ce": 9.002141450764611e-05, "loss_iou": 0.2392578125, "loss_num": 0.00799560546875, "loss_xval": 0.51953125, "num_input_tokens_seen": 412342800, "step": 7359 }, { "epoch": 16.391982182628063, "grad_norm": 19.556819915771484, "learning_rate": 1e-06, "loss": 0.4151, "num_input_tokens_seen": 412397900, "step": 7360 }, { "epoch": 16.391982182628063, "loss": 0.3638564348220825, "loss_ce": 8.691055700182915e-05, "loss_iou": 0.15625, "loss_num": 0.010498046875, "loss_xval": 0.36328125, "num_input_tokens_seen": 412397900, "step": 7360 }, { "epoch": 16.394209354120267, "grad_norm": 27.42656707763672, "learning_rate": 1e-06, "loss": 0.4955, "num_input_tokens_seen": 412454180, "step": 7361 }, { "epoch": 16.394209354120267, "loss": 0.5466644763946533, "loss_ce": 9.469907672610134e-05, "loss_iou": 0.2421875, "loss_num": 0.01226806640625, "loss_xval": 0.546875, "num_input_tokens_seen": 412454180, "step": 7361 }, { "epoch": 16.396436525612472, "grad_norm": 12.27857780456543, "learning_rate": 1e-06, "loss": 0.3187, "num_input_tokens_seen": 412511772, "step": 7362 }, { "epoch": 16.396436525612472, "loss": 0.31820547580718994, "loss_ce": 9.021394362207502e-05, "loss_iou": 0.13671875, "loss_num": 0.00885009765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 412511772, "step": 7362 }, { "epoch": 16.398663697104677, "grad_norm": 15.365641593933105, "learning_rate": 1e-06, "loss": 0.4709, "num_input_tokens_seen": 412565312, "step": 7363 }, { "epoch": 16.398663697104677, "loss": 0.44320639967918396, "loss_ce": 9.116313594859093e-05, "loss_iou": 0.1787109375, "loss_num": 0.0172119140625, "loss_xval": 0.443359375, "num_input_tokens_seen": 412565312, "step": 7363 }, { "epoch": 16.400890868596882, "grad_norm": 16.197721481323242, "learning_rate": 1e-06, "loss": 0.4165, "num_input_tokens_seen": 412619724, "step": 7364 }, { "epoch": 16.400890868596882, "loss": 0.3934442400932312, "loss_ce": 0.00013366495841182768, "loss_iou": 0.177734375, "loss_num": 0.0074462890625, "loss_xval": 0.392578125, "num_input_tokens_seen": 412619724, "step": 7364 }, { "epoch": 16.403118040089087, "grad_norm": 19.257944107055664, "learning_rate": 1e-06, "loss": 0.4529, "num_input_tokens_seen": 412672252, "step": 7365 }, { "epoch": 16.403118040089087, "loss": 0.3346201777458191, "loss_ce": 8.649235678603873e-05, "loss_iou": 0.150390625, "loss_num": 0.00689697265625, "loss_xval": 0.333984375, "num_input_tokens_seen": 412672252, "step": 7365 }, { "epoch": 16.40534521158129, "grad_norm": 35.989013671875, "learning_rate": 1e-06, "loss": 0.4365, "num_input_tokens_seen": 412727280, "step": 7366 }, { "epoch": 16.40534521158129, "loss": 0.46420300006866455, "loss_ce": 9.16688732104376e-05, "loss_iou": 0.197265625, "loss_num": 0.0137939453125, "loss_xval": 0.46484375, "num_input_tokens_seen": 412727280, "step": 7366 }, { "epoch": 16.407572383073497, "grad_norm": 19.286439895629883, "learning_rate": 1e-06, "loss": 0.4871, "num_input_tokens_seen": 412782212, "step": 7367 }, { "epoch": 16.407572383073497, "loss": 0.4057268500328064, "loss_ce": 8.719813922652975e-05, "loss_iou": 0.189453125, "loss_num": 0.00531005859375, "loss_xval": 0.40625, "num_input_tokens_seen": 412782212, "step": 7367 }, { "epoch": 16.4097995545657, "grad_norm": 12.866009712219238, "learning_rate": 1e-06, "loss": 0.473, "num_input_tokens_seen": 412836960, "step": 7368 }, { "epoch": 16.4097995545657, "loss": 0.5923727750778198, "loss_ce": 8.767165127210319e-05, "loss_iou": 0.24609375, "loss_num": 0.02001953125, "loss_xval": 0.59375, "num_input_tokens_seen": 412836960, "step": 7368 }, { "epoch": 16.412026726057906, "grad_norm": 18.949840545654297, "learning_rate": 1e-06, "loss": 0.4169, "num_input_tokens_seen": 412894060, "step": 7369 }, { "epoch": 16.412026726057906, "loss": 0.5453881025314331, "loss_ce": 0.00022210404858924448, "loss_iou": 0.2373046875, "loss_num": 0.01397705078125, "loss_xval": 0.546875, "num_input_tokens_seen": 412894060, "step": 7369 }, { "epoch": 16.41425389755011, "grad_norm": 16.93563461303711, "learning_rate": 1e-06, "loss": 0.4276, "num_input_tokens_seen": 412950112, "step": 7370 }, { "epoch": 16.41425389755011, "loss": 0.383827805519104, "loss_ce": 0.00013031261914875358, "loss_iou": 0.1669921875, "loss_num": 0.01007080078125, "loss_xval": 0.3828125, "num_input_tokens_seen": 412950112, "step": 7370 }, { "epoch": 16.416481069042316, "grad_norm": 20.606613159179688, "learning_rate": 1e-06, "loss": 0.4678, "num_input_tokens_seen": 413005572, "step": 7371 }, { "epoch": 16.416481069042316, "loss": 0.5181921720504761, "loss_ce": 0.00012574487482197583, "loss_iou": 0.22265625, "loss_num": 0.0142822265625, "loss_xval": 0.51953125, "num_input_tokens_seen": 413005572, "step": 7371 }, { "epoch": 16.41870824053452, "grad_norm": 24.132949829101562, "learning_rate": 1e-06, "loss": 0.5933, "num_input_tokens_seen": 413061068, "step": 7372 }, { "epoch": 16.41870824053452, "loss": 0.5776008367538452, "loss_ce": 8.622092718724161e-05, "loss_iou": 0.263671875, "loss_num": 0.01007080078125, "loss_xval": 0.578125, "num_input_tokens_seen": 413061068, "step": 7372 }, { "epoch": 16.420935412026726, "grad_norm": 15.339935302734375, "learning_rate": 1e-06, "loss": 0.5645, "num_input_tokens_seen": 413118180, "step": 7373 }, { "epoch": 16.420935412026726, "loss": 0.5817098617553711, "loss_ce": 0.0001058535126503557, "loss_iou": 0.25390625, "loss_num": 0.014892578125, "loss_xval": 0.58203125, "num_input_tokens_seen": 413118180, "step": 7373 }, { "epoch": 16.42316258351893, "grad_norm": 17.126455307006836, "learning_rate": 1e-06, "loss": 0.4166, "num_input_tokens_seen": 413176160, "step": 7374 }, { "epoch": 16.42316258351893, "loss": 0.3593493700027466, "loss_ce": 9.64459904935211e-05, "loss_iou": 0.158203125, "loss_num": 0.008544921875, "loss_xval": 0.359375, "num_input_tokens_seen": 413176160, "step": 7374 }, { "epoch": 16.425389755011135, "grad_norm": 13.592398643493652, "learning_rate": 1e-06, "loss": 0.2656, "num_input_tokens_seen": 413233164, "step": 7375 }, { "epoch": 16.425389755011135, "loss": 0.23620259761810303, "loss_ce": 8.810235158307478e-05, "loss_iou": 0.10498046875, "loss_num": 0.00506591796875, "loss_xval": 0.236328125, "num_input_tokens_seen": 413233164, "step": 7375 }, { "epoch": 16.42761692650334, "grad_norm": 13.771684646606445, "learning_rate": 1e-06, "loss": 0.4194, "num_input_tokens_seen": 413290152, "step": 7376 }, { "epoch": 16.42761692650334, "loss": 0.410991370677948, "loss_ce": 0.00010270239727105945, "loss_iou": 0.171875, "loss_num": 0.013427734375, "loss_xval": 0.41015625, "num_input_tokens_seen": 413290152, "step": 7376 }, { "epoch": 16.429844097995545, "grad_norm": 12.945503234863281, "learning_rate": 1e-06, "loss": 0.2917, "num_input_tokens_seen": 413346308, "step": 7377 }, { "epoch": 16.429844097995545, "loss": 0.2603330910205841, "loss_ce": 7.917418406577781e-05, "loss_iou": 0.11962890625, "loss_num": 0.004119873046875, "loss_xval": 0.259765625, "num_input_tokens_seen": 413346308, "step": 7377 }, { "epoch": 16.43207126948775, "grad_norm": 31.2889404296875, "learning_rate": 1e-06, "loss": 0.5393, "num_input_tokens_seen": 413400924, "step": 7378 }, { "epoch": 16.43207126948775, "loss": 0.45716169476509094, "loss_ce": 9.228027192875743e-05, "loss_iou": 0.208984375, "loss_num": 0.0078125, "loss_xval": 0.45703125, "num_input_tokens_seen": 413400924, "step": 7378 }, { "epoch": 16.434298440979955, "grad_norm": 18.560880661010742, "learning_rate": 1e-06, "loss": 0.4677, "num_input_tokens_seen": 413456024, "step": 7379 }, { "epoch": 16.434298440979955, "loss": 0.580654501914978, "loss_ce": 8.805579273030162e-05, "loss_iou": 0.2451171875, "loss_num": 0.01806640625, "loss_xval": 0.58203125, "num_input_tokens_seen": 413456024, "step": 7379 }, { "epoch": 16.43652561247216, "grad_norm": 16.95804786682129, "learning_rate": 1e-06, "loss": 0.6411, "num_input_tokens_seen": 413513556, "step": 7380 }, { "epoch": 16.43652561247216, "loss": 0.973118245601654, "loss_ce": 9.582037455402315e-05, "loss_iou": 0.37890625, "loss_num": 0.04296875, "loss_xval": 0.97265625, "num_input_tokens_seen": 413513556, "step": 7380 }, { "epoch": 16.438752783964365, "grad_norm": 17.053752899169922, "learning_rate": 1e-06, "loss": 0.4378, "num_input_tokens_seen": 413569980, "step": 7381 }, { "epoch": 16.438752783964365, "loss": 0.42026323080062866, "loss_ce": 9.722512913867831e-05, "loss_iou": 0.1796875, "loss_num": 0.012451171875, "loss_xval": 0.419921875, "num_input_tokens_seen": 413569980, "step": 7381 }, { "epoch": 16.44097995545657, "grad_norm": 18.919458389282227, "learning_rate": 1e-06, "loss": 0.4147, "num_input_tokens_seen": 413627584, "step": 7382 }, { "epoch": 16.44097995545657, "loss": 0.27885180711746216, "loss_ce": 0.00010426974768051878, "loss_iou": 0.11865234375, "loss_num": 0.0081787109375, "loss_xval": 0.279296875, "num_input_tokens_seen": 413627584, "step": 7382 }, { "epoch": 16.443207126948774, "grad_norm": 14.903229713439941, "learning_rate": 1e-06, "loss": 0.3204, "num_input_tokens_seen": 413684008, "step": 7383 }, { "epoch": 16.443207126948774, "loss": 0.30172598361968994, "loss_ce": 9.026764746522531e-05, "loss_iou": 0.1318359375, "loss_num": 0.007720947265625, "loss_xval": 0.30078125, "num_input_tokens_seen": 413684008, "step": 7383 }, { "epoch": 16.44543429844098, "grad_norm": 20.07790184020996, "learning_rate": 1e-06, "loss": 0.4495, "num_input_tokens_seen": 413740804, "step": 7384 }, { "epoch": 16.44543429844098, "loss": 0.4181979298591614, "loss_ce": 0.00010710630158428103, "loss_iou": 0.1953125, "loss_num": 0.00555419921875, "loss_xval": 0.41796875, "num_input_tokens_seen": 413740804, "step": 7384 }, { "epoch": 16.447661469933184, "grad_norm": 26.779462814331055, "learning_rate": 1e-06, "loss": 0.4354, "num_input_tokens_seen": 413795972, "step": 7385 }, { "epoch": 16.447661469933184, "loss": 0.5660018920898438, "loss_ce": 8.394767064601183e-05, "loss_iou": 0.251953125, "loss_num": 0.01214599609375, "loss_xval": 0.56640625, "num_input_tokens_seen": 413795972, "step": 7385 }, { "epoch": 16.44988864142539, "grad_norm": 33.413516998291016, "learning_rate": 1e-06, "loss": 0.3646, "num_input_tokens_seen": 413850008, "step": 7386 }, { "epoch": 16.44988864142539, "loss": 0.35230499505996704, "loss_ce": 0.00019320733554195613, "loss_iou": 0.16015625, "loss_num": 0.006378173828125, "loss_xval": 0.3515625, "num_input_tokens_seen": 413850008, "step": 7386 }, { "epoch": 16.452115812917594, "grad_norm": 16.137666702270508, "learning_rate": 1e-06, "loss": 0.3793, "num_input_tokens_seen": 413908860, "step": 7387 }, { "epoch": 16.452115812917594, "loss": 0.37570327520370483, "loss_ce": 9.29224188439548e-05, "loss_iou": 0.15625, "loss_num": 0.012451171875, "loss_xval": 0.375, "num_input_tokens_seen": 413908860, "step": 7387 }, { "epoch": 16.4543429844098, "grad_norm": 27.6522274017334, "learning_rate": 1e-06, "loss": 0.3894, "num_input_tokens_seen": 413964468, "step": 7388 }, { "epoch": 16.4543429844098, "loss": 0.36471468210220337, "loss_ce": 9.06552595552057e-05, "loss_iou": 0.16796875, "loss_num": 0.005859375, "loss_xval": 0.365234375, "num_input_tokens_seen": 413964468, "step": 7388 }, { "epoch": 16.456570155902003, "grad_norm": 19.11590003967285, "learning_rate": 1e-06, "loss": 0.4466, "num_input_tokens_seen": 414020268, "step": 7389 }, { "epoch": 16.456570155902003, "loss": 0.39550697803497314, "loss_ce": 0.00012124592467444018, "loss_iou": 0.16796875, "loss_num": 0.01220703125, "loss_xval": 0.39453125, "num_input_tokens_seen": 414020268, "step": 7389 }, { "epoch": 16.45879732739421, "grad_norm": 12.807024002075195, "learning_rate": 1e-06, "loss": 0.3725, "num_input_tokens_seen": 414079048, "step": 7390 }, { "epoch": 16.45879732739421, "loss": 0.3609822988510132, "loss_ce": 8.14046070445329e-05, "loss_iou": 0.1513671875, "loss_num": 0.01153564453125, "loss_xval": 0.361328125, "num_input_tokens_seen": 414079048, "step": 7390 }, { "epoch": 16.461024498886413, "grad_norm": 15.505331039428711, "learning_rate": 1e-06, "loss": 0.3684, "num_input_tokens_seen": 414134828, "step": 7391 }, { "epoch": 16.461024498886413, "loss": 0.37520790100097656, "loss_ce": 8.581792644690722e-05, "loss_iou": 0.1611328125, "loss_num": 0.01031494140625, "loss_xval": 0.375, "num_input_tokens_seen": 414134828, "step": 7391 }, { "epoch": 16.463251670378618, "grad_norm": 52.602256774902344, "learning_rate": 1e-06, "loss": 0.4731, "num_input_tokens_seen": 414188716, "step": 7392 }, { "epoch": 16.463251670378618, "loss": 0.5707922577857971, "loss_ce": 8.304757648147643e-05, "loss_iou": 0.2333984375, "loss_num": 0.0208740234375, "loss_xval": 0.5703125, "num_input_tokens_seen": 414188716, "step": 7392 }, { "epoch": 16.465478841870823, "grad_norm": 17.48988151550293, "learning_rate": 1e-06, "loss": 0.5544, "num_input_tokens_seen": 414247204, "step": 7393 }, { "epoch": 16.465478841870823, "loss": 0.6307286024093628, "loss_ce": 0.00011338148033246398, "loss_iou": 0.263671875, "loss_num": 0.0205078125, "loss_xval": 0.62890625, "num_input_tokens_seen": 414247204, "step": 7393 }, { "epoch": 16.467706013363028, "grad_norm": 20.05725860595703, "learning_rate": 1e-06, "loss": 0.4437, "num_input_tokens_seen": 414302244, "step": 7394 }, { "epoch": 16.467706013363028, "loss": 0.4451483488082886, "loss_ce": 7.999275112524629e-05, "loss_iou": 0.19921875, "loss_num": 0.00933837890625, "loss_xval": 0.4453125, "num_input_tokens_seen": 414302244, "step": 7394 }, { "epoch": 16.469933184855233, "grad_norm": 21.719507217407227, "learning_rate": 1e-06, "loss": 0.4859, "num_input_tokens_seen": 414358148, "step": 7395 }, { "epoch": 16.469933184855233, "loss": 0.5905551910400391, "loss_ce": 0.00019262763089500368, "loss_iou": 0.23046875, "loss_num": 0.0257568359375, "loss_xval": 0.58984375, "num_input_tokens_seen": 414358148, "step": 7395 }, { "epoch": 16.472160356347437, "grad_norm": 18.241609573364258, "learning_rate": 1e-06, "loss": 0.396, "num_input_tokens_seen": 414413656, "step": 7396 }, { "epoch": 16.472160356347437, "loss": 0.4245295226573944, "loss_ce": 9.104832133743912e-05, "loss_iou": 0.18359375, "loss_num": 0.01153564453125, "loss_xval": 0.423828125, "num_input_tokens_seen": 414413656, "step": 7396 }, { "epoch": 16.474387527839642, "grad_norm": 16.02747344970703, "learning_rate": 1e-06, "loss": 0.3917, "num_input_tokens_seen": 414471236, "step": 7397 }, { "epoch": 16.474387527839642, "loss": 0.4211219251155853, "loss_ce": 0.00010141961683984846, "loss_iou": 0.19140625, "loss_num": 0.007781982421875, "loss_xval": 0.421875, "num_input_tokens_seen": 414471236, "step": 7397 }, { "epoch": 16.476614699331847, "grad_norm": 26.210834503173828, "learning_rate": 1e-06, "loss": 0.3714, "num_input_tokens_seen": 414526900, "step": 7398 }, { "epoch": 16.476614699331847, "loss": 0.36032408475875854, "loss_ce": 9.459605644224212e-05, "loss_iou": 0.1630859375, "loss_num": 0.0068359375, "loss_xval": 0.359375, "num_input_tokens_seen": 414526900, "step": 7398 }, { "epoch": 16.478841870824052, "grad_norm": 21.00611114501953, "learning_rate": 1e-06, "loss": 0.3868, "num_input_tokens_seen": 414582196, "step": 7399 }, { "epoch": 16.478841870824052, "loss": 0.3853411376476288, "loss_ce": 8.721851918380708e-05, "loss_iou": 0.1513671875, "loss_num": 0.0167236328125, "loss_xval": 0.384765625, "num_input_tokens_seen": 414582196, "step": 7399 }, { "epoch": 16.481069042316257, "grad_norm": 17.40221405029297, "learning_rate": 1e-06, "loss": 0.5933, "num_input_tokens_seen": 414638744, "step": 7400 }, { "epoch": 16.481069042316257, "loss": 0.7026082277297974, "loss_ce": 9.35388816287741e-05, "loss_iou": 0.28125, "loss_num": 0.02734375, "loss_xval": 0.703125, "num_input_tokens_seen": 414638744, "step": 7400 }, { "epoch": 16.48329621380846, "grad_norm": 17.026206970214844, "learning_rate": 1e-06, "loss": 0.5015, "num_input_tokens_seen": 414695508, "step": 7401 }, { "epoch": 16.48329621380846, "loss": 0.5965702533721924, "loss_ce": 0.00013468590623233467, "loss_iou": 0.2353515625, "loss_num": 0.02490234375, "loss_xval": 0.59765625, "num_input_tokens_seen": 414695508, "step": 7401 }, { "epoch": 16.485523385300667, "grad_norm": 20.803653717041016, "learning_rate": 1e-06, "loss": 0.4458, "num_input_tokens_seen": 414750064, "step": 7402 }, { "epoch": 16.485523385300667, "loss": 0.4915461540222168, "loss_ce": 0.00015212838479783386, "loss_iou": 0.216796875, "loss_num": 0.01165771484375, "loss_xval": 0.4921875, "num_input_tokens_seen": 414750064, "step": 7402 }, { "epoch": 16.48775055679287, "grad_norm": 18.564687728881836, "learning_rate": 1e-06, "loss": 0.4463, "num_input_tokens_seen": 414807600, "step": 7403 }, { "epoch": 16.48775055679287, "loss": 0.4674866199493408, "loss_ce": 7.938436465337873e-05, "loss_iou": 0.20703125, "loss_num": 0.01092529296875, "loss_xval": 0.466796875, "num_input_tokens_seen": 414807600, "step": 7403 }, { "epoch": 16.489977728285076, "grad_norm": 19.705589294433594, "learning_rate": 1e-06, "loss": 0.4589, "num_input_tokens_seen": 414862628, "step": 7404 }, { "epoch": 16.489977728285076, "loss": 0.524620771408081, "loss_ce": 8.46216207719408e-05, "loss_iou": 0.22265625, "loss_num": 0.0159912109375, "loss_xval": 0.5234375, "num_input_tokens_seen": 414862628, "step": 7404 }, { "epoch": 16.49220489977728, "grad_norm": 15.944623947143555, "learning_rate": 1e-06, "loss": 0.2974, "num_input_tokens_seen": 414921268, "step": 7405 }, { "epoch": 16.49220489977728, "loss": 0.3903467059135437, "loss_ce": 8.790481660980731e-05, "loss_iou": 0.15625, "loss_num": 0.0155029296875, "loss_xval": 0.390625, "num_input_tokens_seen": 414921268, "step": 7405 }, { "epoch": 16.494432071269486, "grad_norm": 16.492647171020508, "learning_rate": 1e-06, "loss": 0.3803, "num_input_tokens_seen": 414979472, "step": 7406 }, { "epoch": 16.494432071269486, "loss": 0.46630802750587463, "loss_ce": 0.00012149002577643842, "loss_iou": 0.212890625, "loss_num": 0.00811767578125, "loss_xval": 0.466796875, "num_input_tokens_seen": 414979472, "step": 7406 }, { "epoch": 16.49665924276169, "grad_norm": 19.485671997070312, "learning_rate": 1e-06, "loss": 0.4414, "num_input_tokens_seen": 415036420, "step": 7407 }, { "epoch": 16.49665924276169, "loss": 0.3690652847290039, "loss_ce": 7.723318412899971e-05, "loss_iou": 0.16015625, "loss_num": 0.0096435546875, "loss_xval": 0.369140625, "num_input_tokens_seen": 415036420, "step": 7407 }, { "epoch": 16.498886414253896, "grad_norm": 29.99024200439453, "learning_rate": 1e-06, "loss": 0.4787, "num_input_tokens_seen": 415092432, "step": 7408 }, { "epoch": 16.498886414253896, "loss": 0.40274959802627563, "loss_ce": 0.00010066555842058733, "loss_iou": 0.1787109375, "loss_num": 0.00909423828125, "loss_xval": 0.40234375, "num_input_tokens_seen": 415092432, "step": 7408 }, { "epoch": 16.501113585746104, "grad_norm": 19.012744903564453, "learning_rate": 1e-06, "loss": 0.2874, "num_input_tokens_seen": 415148876, "step": 7409 }, { "epoch": 16.501113585746104, "loss": 0.2875651717185974, "loss_ce": 8.95664852578193e-05, "loss_iou": 0.1328125, "loss_num": 0.00421142578125, "loss_xval": 0.287109375, "num_input_tokens_seen": 415148876, "step": 7409 }, { "epoch": 16.50334075723831, "grad_norm": 14.734273910522461, "learning_rate": 1e-06, "loss": 0.6548, "num_input_tokens_seen": 415204496, "step": 7410 }, { "epoch": 16.50334075723831, "loss": 0.7657289505004883, "loss_ce": 0.00010389051749370992, "loss_iou": 0.326171875, "loss_num": 0.0228271484375, "loss_xval": 0.765625, "num_input_tokens_seen": 415204496, "step": 7410 }, { "epoch": 16.505567928730514, "grad_norm": 16.262977600097656, "learning_rate": 1e-06, "loss": 0.4765, "num_input_tokens_seen": 415260244, "step": 7411 }, { "epoch": 16.505567928730514, "loss": 0.28719890117645264, "loss_ce": 8.953356882557273e-05, "loss_iou": 0.126953125, "loss_num": 0.006439208984375, "loss_xval": 0.287109375, "num_input_tokens_seen": 415260244, "step": 7411 }, { "epoch": 16.50779510022272, "grad_norm": 21.31691551208496, "learning_rate": 1e-06, "loss": 0.3835, "num_input_tokens_seen": 415314104, "step": 7412 }, { "epoch": 16.50779510022272, "loss": 0.3603422939777374, "loss_ce": 0.00011278928286628798, "loss_iou": 0.1630859375, "loss_num": 0.00689697265625, "loss_xval": 0.359375, "num_input_tokens_seen": 415314104, "step": 7412 }, { "epoch": 16.510022271714924, "grad_norm": 20.083385467529297, "learning_rate": 1e-06, "loss": 0.4469, "num_input_tokens_seen": 415370528, "step": 7413 }, { "epoch": 16.510022271714924, "loss": 0.42683807015419006, "loss_ce": 8.025132410693914e-05, "loss_iou": 0.197265625, "loss_num": 0.00653076171875, "loss_xval": 0.42578125, "num_input_tokens_seen": 415370528, "step": 7413 }, { "epoch": 16.51224944320713, "grad_norm": 22.564624786376953, "learning_rate": 1e-06, "loss": 0.388, "num_input_tokens_seen": 415430264, "step": 7414 }, { "epoch": 16.51224944320713, "loss": 0.38656115531921387, "loss_ce": 8.655471901874989e-05, "loss_iou": 0.17578125, "loss_num": 0.007110595703125, "loss_xval": 0.38671875, "num_input_tokens_seen": 415430264, "step": 7414 }, { "epoch": 16.514476614699333, "grad_norm": 17.342039108276367, "learning_rate": 1e-06, "loss": 0.4182, "num_input_tokens_seen": 415486144, "step": 7415 }, { "epoch": 16.514476614699333, "loss": 0.3627013564109802, "loss_ce": 9.151514677796513e-05, "loss_iou": 0.1533203125, "loss_num": 0.01123046875, "loss_xval": 0.36328125, "num_input_tokens_seen": 415486144, "step": 7415 }, { "epoch": 16.51670378619154, "grad_norm": 34.493228912353516, "learning_rate": 1e-06, "loss": 0.4162, "num_input_tokens_seen": 415543736, "step": 7416 }, { "epoch": 16.51670378619154, "loss": 0.42243343591690063, "loss_ce": 0.00031430544913746417, "loss_iou": 0.185546875, "loss_num": 0.010009765625, "loss_xval": 0.421875, "num_input_tokens_seen": 415543736, "step": 7416 }, { "epoch": 16.518930957683743, "grad_norm": 26.808746337890625, "learning_rate": 1e-06, "loss": 0.4272, "num_input_tokens_seen": 415601828, "step": 7417 }, { "epoch": 16.518930957683743, "loss": 0.5273232460021973, "loss_ce": 0.00010160428064409643, "loss_iou": 0.2041015625, "loss_num": 0.023681640625, "loss_xval": 0.52734375, "num_input_tokens_seen": 415601828, "step": 7417 }, { "epoch": 16.521158129175948, "grad_norm": 9.941266059875488, "learning_rate": 1e-06, "loss": 0.3009, "num_input_tokens_seen": 415660184, "step": 7418 }, { "epoch": 16.521158129175948, "loss": 0.23453950881958008, "loss_ce": 7.294982788152993e-05, "loss_iou": 0.09912109375, "loss_num": 0.00732421875, "loss_xval": 0.234375, "num_input_tokens_seen": 415660184, "step": 7418 }, { "epoch": 16.523385300668153, "grad_norm": 26.528013229370117, "learning_rate": 1e-06, "loss": 0.6055, "num_input_tokens_seen": 415715484, "step": 7419 }, { "epoch": 16.523385300668153, "loss": 0.8102438449859619, "loss_ce": 0.000429451436502859, "loss_iou": 0.3125, "loss_num": 0.036865234375, "loss_xval": 0.80859375, "num_input_tokens_seen": 415715484, "step": 7419 }, { "epoch": 16.525612472160358, "grad_norm": 19.867149353027344, "learning_rate": 1e-06, "loss": 0.3117, "num_input_tokens_seen": 415770232, "step": 7420 }, { "epoch": 16.525612472160358, "loss": 0.26326221227645874, "loss_ce": 7.859835750423372e-05, "loss_iou": 0.12158203125, "loss_num": 0.0040283203125, "loss_xval": 0.263671875, "num_input_tokens_seen": 415770232, "step": 7420 }, { "epoch": 16.527839643652563, "grad_norm": 19.17835807800293, "learning_rate": 1e-06, "loss": 0.4112, "num_input_tokens_seen": 415825868, "step": 7421 }, { "epoch": 16.527839643652563, "loss": 0.43073534965515137, "loss_ce": 7.126775744836777e-05, "loss_iou": 0.1787109375, "loss_num": 0.01458740234375, "loss_xval": 0.4296875, "num_input_tokens_seen": 415825868, "step": 7421 }, { "epoch": 16.530066815144767, "grad_norm": 17.873729705810547, "learning_rate": 1e-06, "loss": 0.3862, "num_input_tokens_seen": 415882708, "step": 7422 }, { "epoch": 16.530066815144767, "loss": 0.26814356446266174, "loss_ce": 7.717055268585682e-05, "loss_iou": 0.119140625, "loss_num": 0.006072998046875, "loss_xval": 0.267578125, "num_input_tokens_seen": 415882708, "step": 7422 }, { "epoch": 16.532293986636972, "grad_norm": 16.114519119262695, "learning_rate": 1e-06, "loss": 0.5852, "num_input_tokens_seen": 415938224, "step": 7423 }, { "epoch": 16.532293986636972, "loss": 0.6861782073974609, "loss_ce": 0.000265107664745301, "loss_iou": 0.29296875, "loss_num": 0.0196533203125, "loss_xval": 0.6875, "num_input_tokens_seen": 415938224, "step": 7423 }, { "epoch": 16.534521158129177, "grad_norm": 21.42132568359375, "learning_rate": 1e-06, "loss": 0.3615, "num_input_tokens_seen": 415992012, "step": 7424 }, { "epoch": 16.534521158129177, "loss": 0.25483426451683044, "loss_ce": 7.351664680754766e-05, "loss_iou": 0.10546875, "loss_num": 0.00885009765625, "loss_xval": 0.25390625, "num_input_tokens_seen": 415992012, "step": 7424 }, { "epoch": 16.536748329621382, "grad_norm": 28.71778678894043, "learning_rate": 1e-06, "loss": 0.556, "num_input_tokens_seen": 416048204, "step": 7425 }, { "epoch": 16.536748329621382, "loss": 0.48399072885513306, "loss_ce": 0.00010403442865936086, "loss_iou": 0.2255859375, "loss_num": 0.00640869140625, "loss_xval": 0.484375, "num_input_tokens_seen": 416048204, "step": 7425 }, { "epoch": 16.538975501113587, "grad_norm": 19.75933265686035, "learning_rate": 1e-06, "loss": 0.5605, "num_input_tokens_seen": 416101644, "step": 7426 }, { "epoch": 16.538975501113587, "loss": 0.74393230676651, "loss_ce": 0.00018841384735424072, "loss_iou": 0.294921875, "loss_num": 0.0306396484375, "loss_xval": 0.7421875, "num_input_tokens_seen": 416101644, "step": 7426 }, { "epoch": 16.54120267260579, "grad_norm": 22.49873924255371, "learning_rate": 1e-06, "loss": 0.3687, "num_input_tokens_seen": 416160076, "step": 7427 }, { "epoch": 16.54120267260579, "loss": 0.33309027552604675, "loss_ce": 8.246669312939048e-05, "loss_iou": 0.150390625, "loss_num": 0.006500244140625, "loss_xval": 0.33203125, "num_input_tokens_seen": 416160076, "step": 7427 }, { "epoch": 16.543429844097997, "grad_norm": 13.900813102722168, "learning_rate": 1e-06, "loss": 0.4714, "num_input_tokens_seen": 416217540, "step": 7428 }, { "epoch": 16.543429844097997, "loss": 0.5082305669784546, "loss_ce": 0.00017388846026733518, "loss_iou": 0.2021484375, "loss_num": 0.0206298828125, "loss_xval": 0.5078125, "num_input_tokens_seen": 416217540, "step": 7428 }, { "epoch": 16.5456570155902, "grad_norm": 21.1329402923584, "learning_rate": 1e-06, "loss": 0.4258, "num_input_tokens_seen": 416274740, "step": 7429 }, { "epoch": 16.5456570155902, "loss": 0.3279839754104614, "loss_ce": 0.00010313085658708587, "loss_iou": 0.1318359375, "loss_num": 0.01300048828125, "loss_xval": 0.328125, "num_input_tokens_seen": 416274740, "step": 7429 }, { "epoch": 16.547884187082406, "grad_norm": 20.564817428588867, "learning_rate": 1e-06, "loss": 0.3677, "num_input_tokens_seen": 416330560, "step": 7430 }, { "epoch": 16.547884187082406, "loss": 0.31831276416778564, "loss_ce": 7.547263521701097e-05, "loss_iou": 0.1396484375, "loss_num": 0.00762939453125, "loss_xval": 0.318359375, "num_input_tokens_seen": 416330560, "step": 7430 }, { "epoch": 16.55011135857461, "grad_norm": 19.32866859436035, "learning_rate": 1e-06, "loss": 0.3478, "num_input_tokens_seen": 416384232, "step": 7431 }, { "epoch": 16.55011135857461, "loss": 0.2748854160308838, "loss_ce": 0.00010513007873669267, "loss_iou": 0.1240234375, "loss_num": 0.00537109375, "loss_xval": 0.275390625, "num_input_tokens_seen": 416384232, "step": 7431 }, { "epoch": 16.552338530066816, "grad_norm": 16.105804443359375, "learning_rate": 1e-06, "loss": 0.4259, "num_input_tokens_seen": 416442036, "step": 7432 }, { "epoch": 16.552338530066816, "loss": 0.5960502624511719, "loss_ce": 0.00010297551489202306, "loss_iou": 0.248046875, "loss_num": 0.0198974609375, "loss_xval": 0.59765625, "num_input_tokens_seen": 416442036, "step": 7432 }, { "epoch": 16.55456570155902, "grad_norm": 26.98290252685547, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 416495444, "step": 7433 }, { "epoch": 16.55456570155902, "loss": 0.2754194438457489, "loss_ce": 8.982772123999894e-05, "loss_iou": 0.12109375, "loss_num": 0.006500244140625, "loss_xval": 0.275390625, "num_input_tokens_seen": 416495444, "step": 7433 }, { "epoch": 16.556792873051226, "grad_norm": 23.57514762878418, "learning_rate": 1e-06, "loss": 0.5139, "num_input_tokens_seen": 416550332, "step": 7434 }, { "epoch": 16.556792873051226, "loss": 0.44662949442863464, "loss_ce": 9.629656415199861e-05, "loss_iou": 0.1875, "loss_num": 0.01409912109375, "loss_xval": 0.447265625, "num_input_tokens_seen": 416550332, "step": 7434 }, { "epoch": 16.55902004454343, "grad_norm": 15.292926788330078, "learning_rate": 1e-06, "loss": 0.5265, "num_input_tokens_seen": 416605772, "step": 7435 }, { "epoch": 16.55902004454343, "loss": 0.6938190460205078, "loss_ce": 0.00015451818762812763, "loss_iou": 0.259765625, "loss_num": 0.034912109375, "loss_xval": 0.6953125, "num_input_tokens_seen": 416605772, "step": 7435 }, { "epoch": 16.561247216035635, "grad_norm": 29.28976058959961, "learning_rate": 1e-06, "loss": 0.5638, "num_input_tokens_seen": 416662900, "step": 7436 }, { "epoch": 16.561247216035635, "loss": 0.43191277980804443, "loss_ce": 8.905126014724374e-05, "loss_iou": 0.1865234375, "loss_num": 0.011474609375, "loss_xval": 0.431640625, "num_input_tokens_seen": 416662900, "step": 7436 }, { "epoch": 16.56347438752784, "grad_norm": 17.69676971435547, "learning_rate": 1e-06, "loss": 0.4027, "num_input_tokens_seen": 416718660, "step": 7437 }, { "epoch": 16.56347438752784, "loss": 0.368381142616272, "loss_ce": 9.499008592683822e-05, "loss_iou": 0.1748046875, "loss_num": 0.0035552978515625, "loss_xval": 0.369140625, "num_input_tokens_seen": 416718660, "step": 7437 }, { "epoch": 16.565701559020045, "grad_norm": 21.888330459594727, "learning_rate": 1e-06, "loss": 0.4108, "num_input_tokens_seen": 416775236, "step": 7438 }, { "epoch": 16.565701559020045, "loss": 0.48281651735305786, "loss_ce": 8.945601439336315e-05, "loss_iou": 0.2197265625, "loss_num": 0.00872802734375, "loss_xval": 0.482421875, "num_input_tokens_seen": 416775236, "step": 7438 }, { "epoch": 16.56792873051225, "grad_norm": 15.91091537475586, "learning_rate": 1e-06, "loss": 0.4277, "num_input_tokens_seen": 416831260, "step": 7439 }, { "epoch": 16.56792873051225, "loss": 0.4805620312690735, "loss_ce": 9.329438034910709e-05, "loss_iou": 0.21875, "loss_num": 0.00836181640625, "loss_xval": 0.48046875, "num_input_tokens_seen": 416831260, "step": 7439 }, { "epoch": 16.570155902004455, "grad_norm": 32.6405143737793, "learning_rate": 1e-06, "loss": 0.4557, "num_input_tokens_seen": 416887804, "step": 7440 }, { "epoch": 16.570155902004455, "loss": 0.47945916652679443, "loss_ce": 8.904725837055594e-05, "loss_iou": 0.21875, "loss_num": 0.00848388671875, "loss_xval": 0.478515625, "num_input_tokens_seen": 416887804, "step": 7440 }, { "epoch": 16.57238307349666, "grad_norm": 17.98710060119629, "learning_rate": 1e-06, "loss": 0.5698, "num_input_tokens_seen": 416944556, "step": 7441 }, { "epoch": 16.57238307349666, "loss": 0.5035039782524109, "loss_ce": 8.599211287219077e-05, "loss_iou": 0.2021484375, "loss_num": 0.0201416015625, "loss_xval": 0.50390625, "num_input_tokens_seen": 416944556, "step": 7441 }, { "epoch": 16.574610244988865, "grad_norm": 17.1207332611084, "learning_rate": 1e-06, "loss": 0.5436, "num_input_tokens_seen": 417000276, "step": 7442 }, { "epoch": 16.574610244988865, "loss": 0.4915321469306946, "loss_ce": 7.706407632213086e-05, "loss_iou": 0.208984375, "loss_num": 0.01458740234375, "loss_xval": 0.4921875, "num_input_tokens_seen": 417000276, "step": 7442 }, { "epoch": 16.57683741648107, "grad_norm": 12.052204132080078, "learning_rate": 1e-06, "loss": 0.3561, "num_input_tokens_seen": 417056968, "step": 7443 }, { "epoch": 16.57683741648107, "loss": 0.3951103687286377, "loss_ce": 9.083513577934355e-05, "loss_iou": 0.150390625, "loss_num": 0.0186767578125, "loss_xval": 0.39453125, "num_input_tokens_seen": 417056968, "step": 7443 }, { "epoch": 16.579064587973274, "grad_norm": 29.809144973754883, "learning_rate": 1e-06, "loss": 0.3865, "num_input_tokens_seen": 417111816, "step": 7444 }, { "epoch": 16.579064587973274, "loss": 0.3363809287548065, "loss_ce": 7.720827125012875e-05, "loss_iou": 0.1513671875, "loss_num": 0.006866455078125, "loss_xval": 0.3359375, "num_input_tokens_seen": 417111816, "step": 7444 }, { "epoch": 16.58129175946548, "grad_norm": 43.40556716918945, "learning_rate": 1e-06, "loss": 0.3145, "num_input_tokens_seen": 417170836, "step": 7445 }, { "epoch": 16.58129175946548, "loss": 0.2581978440284729, "loss_ce": 8.018691733013839e-05, "loss_iou": 0.1123046875, "loss_num": 0.00653076171875, "loss_xval": 0.2578125, "num_input_tokens_seen": 417170836, "step": 7445 }, { "epoch": 16.583518930957684, "grad_norm": 19.96676254272461, "learning_rate": 1e-06, "loss": 0.2797, "num_input_tokens_seen": 417226100, "step": 7446 }, { "epoch": 16.583518930957684, "loss": 0.307026207447052, "loss_ce": 8.040900866035372e-05, "loss_iou": 0.13671875, "loss_num": 0.006744384765625, "loss_xval": 0.306640625, "num_input_tokens_seen": 417226100, "step": 7446 }, { "epoch": 16.58574610244989, "grad_norm": 18.28860855102539, "learning_rate": 1e-06, "loss": 0.4555, "num_input_tokens_seen": 417282572, "step": 7447 }, { "epoch": 16.58574610244989, "loss": 0.434162974357605, "loss_ce": 8.092315692920238e-05, "loss_iou": 0.185546875, "loss_num": 0.01263427734375, "loss_xval": 0.43359375, "num_input_tokens_seen": 417282572, "step": 7447 }, { "epoch": 16.587973273942094, "grad_norm": 19.922115325927734, "learning_rate": 1e-06, "loss": 0.3285, "num_input_tokens_seen": 417336348, "step": 7448 }, { "epoch": 16.587973273942094, "loss": 0.2561919093132019, "loss_ce": 8.838169014779851e-05, "loss_iou": 0.1162109375, "loss_num": 0.00482177734375, "loss_xval": 0.255859375, "num_input_tokens_seen": 417336348, "step": 7448 }, { "epoch": 16.5902004454343, "grad_norm": 15.854660034179688, "learning_rate": 1e-06, "loss": 0.3569, "num_input_tokens_seen": 417393976, "step": 7449 }, { "epoch": 16.5902004454343, "loss": 0.4261277914047241, "loss_ce": 0.00010242144344374537, "loss_iou": 0.1962890625, "loss_num": 0.006561279296875, "loss_xval": 0.42578125, "num_input_tokens_seen": 417393976, "step": 7449 }, { "epoch": 16.592427616926503, "grad_norm": 15.823163986206055, "learning_rate": 1e-06, "loss": 0.4953, "num_input_tokens_seen": 417453068, "step": 7450 }, { "epoch": 16.592427616926503, "loss": 0.43979281187057495, "loss_ce": 9.553506970405579e-05, "loss_iou": 0.1689453125, "loss_num": 0.0205078125, "loss_xval": 0.439453125, "num_input_tokens_seen": 417453068, "step": 7450 }, { "epoch": 16.59465478841871, "grad_norm": 15.928594589233398, "learning_rate": 1e-06, "loss": 0.5031, "num_input_tokens_seen": 417509392, "step": 7451 }, { "epoch": 16.59465478841871, "loss": 0.5577445030212402, "loss_ce": 0.00012731979950331151, "loss_iou": 0.25, "loss_num": 0.01123046875, "loss_xval": 0.55859375, "num_input_tokens_seen": 417509392, "step": 7451 }, { "epoch": 16.596881959910913, "grad_norm": 20.19134521484375, "learning_rate": 1e-06, "loss": 0.3192, "num_input_tokens_seen": 417565928, "step": 7452 }, { "epoch": 16.596881959910913, "loss": 0.2749907076358795, "loss_ce": 8.836391498334706e-05, "loss_iou": 0.10107421875, "loss_num": 0.01458740234375, "loss_xval": 0.275390625, "num_input_tokens_seen": 417565928, "step": 7452 }, { "epoch": 16.599109131403118, "grad_norm": 28.828327178955078, "learning_rate": 1e-06, "loss": 0.4603, "num_input_tokens_seen": 417621032, "step": 7453 }, { "epoch": 16.599109131403118, "loss": 0.46700233221054077, "loss_ce": 8.33997328300029e-05, "loss_iou": 0.220703125, "loss_num": 0.00494384765625, "loss_xval": 0.466796875, "num_input_tokens_seen": 417621032, "step": 7453 }, { "epoch": 16.601336302895323, "grad_norm": 42.66580581665039, "learning_rate": 1e-06, "loss": 0.5839, "num_input_tokens_seen": 417678064, "step": 7454 }, { "epoch": 16.601336302895323, "loss": 0.4696599841117859, "loss_ce": 0.00042174485861323774, "loss_iou": 0.2041015625, "loss_num": 0.01214599609375, "loss_xval": 0.46875, "num_input_tokens_seen": 417678064, "step": 7454 }, { "epoch": 16.603563474387528, "grad_norm": 13.862714767456055, "learning_rate": 1e-06, "loss": 0.2689, "num_input_tokens_seen": 417734624, "step": 7455 }, { "epoch": 16.603563474387528, "loss": 0.2475118190050125, "loss_ce": 7.529581489507109e-05, "loss_iou": 0.1064453125, "loss_num": 0.006805419921875, "loss_xval": 0.2470703125, "num_input_tokens_seen": 417734624, "step": 7455 }, { "epoch": 16.605790645879733, "grad_norm": 18.104511260986328, "learning_rate": 1e-06, "loss": 0.3205, "num_input_tokens_seen": 417790132, "step": 7456 }, { "epoch": 16.605790645879733, "loss": 0.2718583941459656, "loss_ce": 0.00012988239177502692, "loss_iou": 0.109375, "loss_num": 0.0106201171875, "loss_xval": 0.271484375, "num_input_tokens_seen": 417790132, "step": 7456 }, { "epoch": 16.608017817371937, "grad_norm": 15.675362586975098, "learning_rate": 1e-06, "loss": 0.455, "num_input_tokens_seen": 417844852, "step": 7457 }, { "epoch": 16.608017817371937, "loss": 0.5134113430976868, "loss_ce": 0.00010569683945504948, "loss_iou": 0.212890625, "loss_num": 0.017578125, "loss_xval": 0.51171875, "num_input_tokens_seen": 417844852, "step": 7457 }, { "epoch": 16.610244988864142, "grad_norm": 18.084177017211914, "learning_rate": 1e-06, "loss": 0.5179, "num_input_tokens_seen": 417901268, "step": 7458 }, { "epoch": 16.610244988864142, "loss": 0.5478125810623169, "loss_ce": 8.308600808959454e-05, "loss_iou": 0.2392578125, "loss_num": 0.013916015625, "loss_xval": 0.546875, "num_input_tokens_seen": 417901268, "step": 7458 }, { "epoch": 16.612472160356347, "grad_norm": 15.700066566467285, "learning_rate": 1e-06, "loss": 0.4414, "num_input_tokens_seen": 417957084, "step": 7459 }, { "epoch": 16.612472160356347, "loss": 0.4971623420715332, "loss_ce": 9.201420471072197e-05, "loss_iou": 0.20703125, "loss_num": 0.016357421875, "loss_xval": 0.49609375, "num_input_tokens_seen": 417957084, "step": 7459 }, { "epoch": 16.614699331848552, "grad_norm": 19.968868255615234, "learning_rate": 1e-06, "loss": 0.4322, "num_input_tokens_seen": 418013952, "step": 7460 }, { "epoch": 16.614699331848552, "loss": 0.4334394335746765, "loss_ce": 8.979369886219501e-05, "loss_iou": 0.1982421875, "loss_num": 0.0074462890625, "loss_xval": 0.43359375, "num_input_tokens_seen": 418013952, "step": 7460 }, { "epoch": 16.616926503340757, "grad_norm": 15.141510009765625, "learning_rate": 1e-06, "loss": 0.3758, "num_input_tokens_seen": 418069596, "step": 7461 }, { "epoch": 16.616926503340757, "loss": 0.35867738723754883, "loss_ce": 9.585694351699203e-05, "loss_iou": 0.1552734375, "loss_num": 0.009521484375, "loss_xval": 0.359375, "num_input_tokens_seen": 418069596, "step": 7461 }, { "epoch": 16.619153674832962, "grad_norm": 16.423025131225586, "learning_rate": 1e-06, "loss": 0.5082, "num_input_tokens_seen": 418125016, "step": 7462 }, { "epoch": 16.619153674832962, "loss": 0.7706419229507446, "loss_ce": 0.00013412557018455118, "loss_iou": 0.30078125, "loss_num": 0.033935546875, "loss_xval": 0.76953125, "num_input_tokens_seen": 418125016, "step": 7462 }, { "epoch": 16.621380846325167, "grad_norm": 21.270771026611328, "learning_rate": 1e-06, "loss": 0.5281, "num_input_tokens_seen": 418180920, "step": 7463 }, { "epoch": 16.621380846325167, "loss": 0.5241183042526245, "loss_ce": 7.047592953313142e-05, "loss_iou": 0.203125, "loss_num": 0.023681640625, "loss_xval": 0.5234375, "num_input_tokens_seen": 418180920, "step": 7463 }, { "epoch": 16.62360801781737, "grad_norm": 18.848526000976562, "learning_rate": 1e-06, "loss": 0.4706, "num_input_tokens_seen": 418236248, "step": 7464 }, { "epoch": 16.62360801781737, "loss": 0.5589928030967712, "loss_ce": 0.0002769696293398738, "loss_iou": 0.232421875, "loss_num": 0.018798828125, "loss_xval": 0.55859375, "num_input_tokens_seen": 418236248, "step": 7464 }, { "epoch": 16.625835189309576, "grad_norm": 18.260950088500977, "learning_rate": 1e-06, "loss": 0.4122, "num_input_tokens_seen": 418292596, "step": 7465 }, { "epoch": 16.625835189309576, "loss": 0.5881445407867432, "loss_ce": 0.00013190052413847297, "loss_iou": 0.255859375, "loss_num": 0.015380859375, "loss_xval": 0.58984375, "num_input_tokens_seen": 418292596, "step": 7465 }, { "epoch": 16.62806236080178, "grad_norm": 19.902740478515625, "learning_rate": 1e-06, "loss": 0.4294, "num_input_tokens_seen": 418348084, "step": 7466 }, { "epoch": 16.62806236080178, "loss": 0.4913012981414795, "loss_ce": 9.037186828209087e-05, "loss_iou": 0.2275390625, "loss_num": 0.00726318359375, "loss_xval": 0.4921875, "num_input_tokens_seen": 418348084, "step": 7466 }, { "epoch": 16.630289532293986, "grad_norm": 23.93308448791504, "learning_rate": 1e-06, "loss": 0.3016, "num_input_tokens_seen": 418404308, "step": 7467 }, { "epoch": 16.630289532293986, "loss": 0.2502018213272095, "loss_ce": 7.976325287017971e-05, "loss_iou": 0.11572265625, "loss_num": 0.0037689208984375, "loss_xval": 0.25, "num_input_tokens_seen": 418404308, "step": 7467 }, { "epoch": 16.63251670378619, "grad_norm": 16.425756454467773, "learning_rate": 1e-06, "loss": 0.4722, "num_input_tokens_seen": 418460596, "step": 7468 }, { "epoch": 16.63251670378619, "loss": 0.5028359889984131, "loss_ce": 8.939744293456897e-05, "loss_iou": 0.23046875, "loss_num": 0.00860595703125, "loss_xval": 0.50390625, "num_input_tokens_seen": 418460596, "step": 7468 }, { "epoch": 16.634743875278396, "grad_norm": 15.644196510314941, "learning_rate": 1e-06, "loss": 0.5605, "num_input_tokens_seen": 418516840, "step": 7469 }, { "epoch": 16.634743875278396, "loss": 0.8118726015090942, "loss_ce": 0.00010501188808120787, "loss_iou": 0.306640625, "loss_num": 0.03955078125, "loss_xval": 0.8125, "num_input_tokens_seen": 418516840, "step": 7469 }, { "epoch": 16.6369710467706, "grad_norm": 21.61148452758789, "learning_rate": 1e-06, "loss": 0.5139, "num_input_tokens_seen": 418575128, "step": 7470 }, { "epoch": 16.6369710467706, "loss": 0.6065396070480347, "loss_ce": 9.430487989448011e-05, "loss_iou": 0.271484375, "loss_num": 0.01239013671875, "loss_xval": 0.60546875, "num_input_tokens_seen": 418575128, "step": 7470 }, { "epoch": 16.639198218262806, "grad_norm": 17.30372428894043, "learning_rate": 1e-06, "loss": 0.5143, "num_input_tokens_seen": 418631736, "step": 7471 }, { "epoch": 16.639198218262806, "loss": 0.32015693187713623, "loss_ce": 8.858228102326393e-05, "loss_iou": 0.134765625, "loss_num": 0.0098876953125, "loss_xval": 0.3203125, "num_input_tokens_seen": 418631736, "step": 7471 }, { "epoch": 16.64142538975501, "grad_norm": 20.17923355102539, "learning_rate": 1e-06, "loss": 0.3716, "num_input_tokens_seen": 418685912, "step": 7472 }, { "epoch": 16.64142538975501, "loss": 0.39355170726776123, "loss_ce": 0.0007294651586562395, "loss_iou": 0.1826171875, "loss_num": 0.005645751953125, "loss_xval": 0.392578125, "num_input_tokens_seen": 418685912, "step": 7472 }, { "epoch": 16.643652561247215, "grad_norm": 21.51079559326172, "learning_rate": 1e-06, "loss": 0.386, "num_input_tokens_seen": 418739856, "step": 7473 }, { "epoch": 16.643652561247215, "loss": 0.3053746223449707, "loss_ce": 7.678715337533504e-05, "loss_iou": 0.1298828125, "loss_num": 0.00897216796875, "loss_xval": 0.3046875, "num_input_tokens_seen": 418739856, "step": 7473 }, { "epoch": 16.64587973273942, "grad_norm": 18.511350631713867, "learning_rate": 1e-06, "loss": 0.5517, "num_input_tokens_seen": 418796664, "step": 7474 }, { "epoch": 16.64587973273942, "loss": 0.4539787769317627, "loss_ce": 0.0003654793545138091, "loss_iou": 0.19140625, "loss_num": 0.0142822265625, "loss_xval": 0.453125, "num_input_tokens_seen": 418796664, "step": 7474 }, { "epoch": 16.648106904231625, "grad_norm": 18.879634857177734, "learning_rate": 1e-06, "loss": 0.5122, "num_input_tokens_seen": 418853068, "step": 7475 }, { "epoch": 16.648106904231625, "loss": 0.6420133113861084, "loss_ce": 0.00010659612598828971, "loss_iou": 0.296875, "loss_num": 0.00970458984375, "loss_xval": 0.640625, "num_input_tokens_seen": 418853068, "step": 7475 }, { "epoch": 16.65033407572383, "grad_norm": 22.095844268798828, "learning_rate": 1e-06, "loss": 0.4475, "num_input_tokens_seen": 418907716, "step": 7476 }, { "epoch": 16.65033407572383, "loss": 0.5320791006088257, "loss_ce": 9.662572119850665e-05, "loss_iou": 0.251953125, "loss_num": 0.00555419921875, "loss_xval": 0.53125, "num_input_tokens_seen": 418907716, "step": 7476 }, { "epoch": 16.652561247216035, "grad_norm": 22.207090377807617, "learning_rate": 1e-06, "loss": 0.4054, "num_input_tokens_seen": 418963728, "step": 7477 }, { "epoch": 16.652561247216035, "loss": 0.3793591856956482, "loss_ce": 8.672128024045378e-05, "loss_iou": 0.1728515625, "loss_num": 0.00677490234375, "loss_xval": 0.37890625, "num_input_tokens_seen": 418963728, "step": 7477 }, { "epoch": 16.65478841870824, "grad_norm": 16.67801856994629, "learning_rate": 1e-06, "loss": 0.6013, "num_input_tokens_seen": 419016604, "step": 7478 }, { "epoch": 16.65478841870824, "loss": 0.6391165256500244, "loss_ce": 7.844362698961049e-05, "loss_iou": 0.279296875, "loss_num": 0.016357421875, "loss_xval": 0.640625, "num_input_tokens_seen": 419016604, "step": 7478 }, { "epoch": 16.657015590200444, "grad_norm": 14.033241271972656, "learning_rate": 1e-06, "loss": 0.4418, "num_input_tokens_seen": 419074052, "step": 7479 }, { "epoch": 16.657015590200444, "loss": 0.3826572299003601, "loss_ce": 8.888993761502206e-05, "loss_iou": 0.177734375, "loss_num": 0.0054931640625, "loss_xval": 0.3828125, "num_input_tokens_seen": 419074052, "step": 7479 }, { "epoch": 16.65924276169265, "grad_norm": 13.08681869506836, "learning_rate": 1e-06, "loss": 0.3011, "num_input_tokens_seen": 419133076, "step": 7480 }, { "epoch": 16.65924276169265, "loss": 0.18371227383613586, "loss_ce": 8.800254727248102e-05, "loss_iou": 0.07373046875, "loss_num": 0.007110595703125, "loss_xval": 0.18359375, "num_input_tokens_seen": 419133076, "step": 7480 }, { "epoch": 16.661469933184854, "grad_norm": 19.100345611572266, "learning_rate": 1e-06, "loss": 0.3935, "num_input_tokens_seen": 419186412, "step": 7481 }, { "epoch": 16.661469933184854, "loss": 0.3841213583946228, "loss_ce": 0.00021022261353209615, "loss_iou": 0.1748046875, "loss_num": 0.006591796875, "loss_xval": 0.384765625, "num_input_tokens_seen": 419186412, "step": 7481 }, { "epoch": 16.66369710467706, "grad_norm": 15.811746597290039, "learning_rate": 1e-06, "loss": 0.3697, "num_input_tokens_seen": 419244064, "step": 7482 }, { "epoch": 16.66369710467706, "loss": 0.36989980936050415, "loss_ce": 8.779930794844404e-05, "loss_iou": 0.169921875, "loss_num": 0.0059814453125, "loss_xval": 0.369140625, "num_input_tokens_seen": 419244064, "step": 7482 }, { "epoch": 16.665924276169264, "grad_norm": 19.878259658813477, "learning_rate": 1e-06, "loss": 0.4357, "num_input_tokens_seen": 419300828, "step": 7483 }, { "epoch": 16.665924276169264, "loss": 0.3201012909412384, "loss_ce": 9.395321103511378e-05, "loss_iou": 0.134765625, "loss_num": 0.01007080078125, "loss_xval": 0.3203125, "num_input_tokens_seen": 419300828, "step": 7483 }, { "epoch": 16.66815144766147, "grad_norm": 18.946975708007812, "learning_rate": 1e-06, "loss": 0.4566, "num_input_tokens_seen": 419355872, "step": 7484 }, { "epoch": 16.66815144766147, "loss": 0.48397648334503174, "loss_ce": 8.974310185294598e-05, "loss_iou": 0.2236328125, "loss_num": 0.00750732421875, "loss_xval": 0.484375, "num_input_tokens_seen": 419355872, "step": 7484 }, { "epoch": 16.670378619153674, "grad_norm": 30.613773345947266, "learning_rate": 1e-06, "loss": 0.4782, "num_input_tokens_seen": 419411584, "step": 7485 }, { "epoch": 16.670378619153674, "loss": 0.42331117391586304, "loss_ce": 0.00021546825882978737, "loss_iou": 0.16796875, "loss_num": 0.0174560546875, "loss_xval": 0.423828125, "num_input_tokens_seen": 419411584, "step": 7485 }, { "epoch": 16.67260579064588, "grad_norm": 12.629964828491211, "learning_rate": 1e-06, "loss": 0.4128, "num_input_tokens_seen": 419468052, "step": 7486 }, { "epoch": 16.67260579064588, "loss": 0.5439175367355347, "loss_ce": 9.432664228370413e-05, "loss_iou": 0.2314453125, "loss_num": 0.01611328125, "loss_xval": 0.54296875, "num_input_tokens_seen": 419468052, "step": 7486 }, { "epoch": 16.674832962138083, "grad_norm": 21.88045883178711, "learning_rate": 1e-06, "loss": 0.3252, "num_input_tokens_seen": 419522816, "step": 7487 }, { "epoch": 16.674832962138083, "loss": 0.3819087743759155, "loss_ce": 0.00013384762860368937, "loss_iou": 0.1640625, "loss_num": 0.01080322265625, "loss_xval": 0.380859375, "num_input_tokens_seen": 419522816, "step": 7487 }, { "epoch": 16.677060133630288, "grad_norm": 20.802846908569336, "learning_rate": 1e-06, "loss": 0.4882, "num_input_tokens_seen": 419576664, "step": 7488 }, { "epoch": 16.677060133630288, "loss": 0.3609151840209961, "loss_ce": 0.00010585598647594452, "loss_iou": 0.171875, "loss_num": 0.003204345703125, "loss_xval": 0.361328125, "num_input_tokens_seen": 419576664, "step": 7488 }, { "epoch": 16.679287305122493, "grad_norm": 30.43656349182129, "learning_rate": 1e-06, "loss": 0.4603, "num_input_tokens_seen": 419631152, "step": 7489 }, { "epoch": 16.679287305122493, "loss": 0.5530772805213928, "loss_ce": 9.875621617538854e-05, "loss_iou": 0.2490234375, "loss_num": 0.01104736328125, "loss_xval": 0.5546875, "num_input_tokens_seen": 419631152, "step": 7489 }, { "epoch": 16.681514476614698, "grad_norm": 24.13094139099121, "learning_rate": 1e-06, "loss": 0.4435, "num_input_tokens_seen": 419687488, "step": 7490 }, { "epoch": 16.681514476614698, "loss": 0.37974900007247925, "loss_ce": 0.00011031976464437321, "loss_iou": 0.1708984375, "loss_num": 0.007476806640625, "loss_xval": 0.37890625, "num_input_tokens_seen": 419687488, "step": 7490 }, { "epoch": 16.683741648106903, "grad_norm": 15.836715698242188, "learning_rate": 1e-06, "loss": 0.4628, "num_input_tokens_seen": 419745188, "step": 7491 }, { "epoch": 16.683741648106903, "loss": 0.4252764582633972, "loss_ce": 0.0001055425891536288, "loss_iou": 0.169921875, "loss_num": 0.01708984375, "loss_xval": 0.42578125, "num_input_tokens_seen": 419745188, "step": 7491 }, { "epoch": 16.685968819599108, "grad_norm": 13.860366821289062, "learning_rate": 1e-06, "loss": 0.7326, "num_input_tokens_seen": 419798892, "step": 7492 }, { "epoch": 16.685968819599108, "loss": 0.8469661474227905, "loss_ce": 0.00010327581549063325, "loss_iou": 0.33984375, "loss_num": 0.033935546875, "loss_xval": 0.84765625, "num_input_tokens_seen": 419798892, "step": 7492 }, { "epoch": 16.688195991091312, "grad_norm": 18.5252742767334, "learning_rate": 1e-06, "loss": 0.4905, "num_input_tokens_seen": 419852848, "step": 7493 }, { "epoch": 16.688195991091312, "loss": 0.4520108699798584, "loss_ce": 0.00012183596845716238, "loss_iou": 0.193359375, "loss_num": 0.01287841796875, "loss_xval": 0.451171875, "num_input_tokens_seen": 419852848, "step": 7493 }, { "epoch": 16.690423162583517, "grad_norm": 33.927978515625, "learning_rate": 1e-06, "loss": 0.4021, "num_input_tokens_seen": 419909276, "step": 7494 }, { "epoch": 16.690423162583517, "loss": 0.4526256322860718, "loss_ce": 0.00013765764015261084, "loss_iou": 0.2060546875, "loss_num": 0.0081787109375, "loss_xval": 0.453125, "num_input_tokens_seen": 419909276, "step": 7494 }, { "epoch": 16.692650334075722, "grad_norm": 17.114513397216797, "learning_rate": 1e-06, "loss": 0.4049, "num_input_tokens_seen": 419965156, "step": 7495 }, { "epoch": 16.692650334075722, "loss": 0.5079430341720581, "loss_ce": 0.00019155110931023955, "loss_iou": 0.23046875, "loss_num": 0.00933837890625, "loss_xval": 0.5078125, "num_input_tokens_seen": 419965156, "step": 7495 }, { "epoch": 16.694877505567927, "grad_norm": 20.878402709960938, "learning_rate": 1e-06, "loss": 0.3769, "num_input_tokens_seen": 420021460, "step": 7496 }, { "epoch": 16.694877505567927, "loss": 0.5149946212768555, "loss_ce": 0.00010204176942352206, "loss_iou": 0.19921875, "loss_num": 0.0235595703125, "loss_xval": 0.515625, "num_input_tokens_seen": 420021460, "step": 7496 }, { "epoch": 16.697104677060132, "grad_norm": 11.510066032409668, "learning_rate": 1e-06, "loss": 0.3932, "num_input_tokens_seen": 420077988, "step": 7497 }, { "epoch": 16.697104677060132, "loss": 0.4444170892238617, "loss_ce": 8.116405660985038e-05, "loss_iou": 0.19140625, "loss_num": 0.01214599609375, "loss_xval": 0.4453125, "num_input_tokens_seen": 420077988, "step": 7497 }, { "epoch": 16.69933184855234, "grad_norm": 21.444026947021484, "learning_rate": 1e-06, "loss": 0.4323, "num_input_tokens_seen": 420135408, "step": 7498 }, { "epoch": 16.69933184855234, "loss": 0.34426337480545044, "loss_ce": 8.61354055814445e-05, "loss_iou": 0.16015625, "loss_num": 0.004638671875, "loss_xval": 0.34375, "num_input_tokens_seen": 420135408, "step": 7498 }, { "epoch": 16.70155902004454, "grad_norm": 18.71256446838379, "learning_rate": 1e-06, "loss": 0.3837, "num_input_tokens_seen": 420191188, "step": 7499 }, { "epoch": 16.70155902004454, "loss": 0.5224635601043701, "loss_ce": 0.00012473194510675967, "loss_iou": 0.2333984375, "loss_num": 0.01129150390625, "loss_xval": 0.5234375, "num_input_tokens_seen": 420191188, "step": 7499 }, { "epoch": 16.70378619153675, "grad_norm": 29.126888275146484, "learning_rate": 1e-06, "loss": 0.3387, "num_input_tokens_seen": 420248812, "step": 7500 }, { "epoch": 16.70378619153675, "eval_seeclick_web_CIoU": 0.5869028568267822, "eval_seeclick_web_GIoU": 0.5851646363735199, "eval_seeclick_web_IoU": 0.6060521006584167, "eval_seeclick_web_MAE_all": 0.015174605650827289, "eval_seeclick_web_MAE_h": 0.007236489560455084, "eval_seeclick_web_MAE_w": 0.015413875225931406, "eval_seeclick_web_MAE_x_boxes": 0.008164346683770418, "eval_seeclick_web_MAE_y_boxes": 0.02128999726846814, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.9088981747627258, "eval_seeclick_web_loss_ce": 0.00014426549023482949, "eval_seeclick_web_loss_iou": 0.419921875, "eval_seeclick_web_loss_num": 0.01212310791015625, "eval_seeclick_web_loss_xval": 0.900146484375, "eval_seeclick_web_runtime": 24.9651, "eval_seeclick_web_samples_per_second": 2.003, "eval_seeclick_web_steps_per_second": 0.08, "num_input_tokens_seen": 420248812, "step": 7500 }, { "epoch": 16.70378619153675, "eval_icons_CIoU": 0.2704924941062927, "eval_icons_GIoU": 0.2915039211511612, "eval_icons_IoU": 0.34834006428718567, "eval_icons_MAE_all": 0.059374475851655006, "eval_icons_MAE_h": 0.03465741407126188, "eval_icons_MAE_w": 0.058007813058793545, "eval_icons_MAE_x_boxes": 0.05980631522834301, "eval_icons_MAE_y_boxes": 0.03711246699094772, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7317432165145874, "eval_icons_loss_ce": 0.0001763724285410717, "eval_icons_loss_iou": 0.680908203125, "eval_icons_loss_num": 0.0583648681640625, "eval_icons_loss_xval": 1.654541015625, "eval_icons_runtime": 24.5042, "eval_icons_samples_per_second": 2.04, "eval_icons_steps_per_second": 0.082, "num_input_tokens_seen": 420248812, "step": 7500 }, { "epoch": 16.70378619153675, "eval_screenspot_CIoU": 0.37447473406791687, "eval_screenspot_GIoU": 0.392286479473114, "eval_screenspot_IoU": 0.4470125635464986, "eval_screenspot_MAE_all": 0.057117752730846405, "eval_screenspot_MAE_h": 0.039663772409160934, "eval_screenspot_MAE_w": 0.06271877388159434, "eval_screenspot_MAE_x_boxes": 0.06692921556532383, "eval_screenspot_MAE_y_boxes": 0.03935454785823822, "eval_screenspot_inside_bbox": 0.693750003973643, "eval_screenspot_loss": 1.5693501234054565, "eval_screenspot_loss_ce": 0.0001859396434156224, "eval_screenspot_loss_iou": 0.651611328125, "eval_screenspot_loss_num": 0.06552505493164062, "eval_screenspot_loss_xval": 1.6298828125, "eval_screenspot_runtime": 40.939, "eval_screenspot_samples_per_second": 2.174, "eval_screenspot_steps_per_second": 0.073, "num_input_tokens_seen": 420248812, "step": 7500 }, { "epoch": 16.70378619153675, "eval_compot_CIoU": 0.346510648727417, "eval_compot_GIoU": 0.35510683059692383, "eval_compot_IoU": 0.4062563627958298, "eval_compot_MAE_all": 0.01756852399557829, "eval_compot_MAE_h": 0.00829399167560041, "eval_compot_MAE_w": 0.02114854846149683, "eval_compot_MAE_x_boxes": 0.029594723135232925, "eval_compot_MAE_y_boxes": 0.0067301481030881405, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3873258829116821, "eval_compot_loss_ce": 0.00013870510883862153, "eval_compot_loss_iou": 0.6431884765625, "eval_compot_loss_num": 0.016324996948242188, "eval_compot_loss_xval": 1.366943359375, "eval_compot_runtime": 25.1596, "eval_compot_samples_per_second": 1.987, "eval_compot_steps_per_second": 0.079, "num_input_tokens_seen": 420248812, "step": 7500 }, { "epoch": 16.70378619153675, "eval_custom_ui_val_CIoU": 0.47488271362251705, "eval_custom_ui_val_GIoU": 0.4806961201959186, "eval_custom_ui_val_IoU": 0.5364741186300913, "eval_custom_ui_val_MAE_all": 0.027154105249792337, "eval_custom_ui_val_MAE_h": 0.014837595815252926, "eval_custom_ui_val_MAE_w": 0.036442533497595124, "eval_custom_ui_val_MAE_x_boxes": 0.03312135862910913, "eval_custom_ui_val_MAE_y_boxes": 0.012457883761574825, "eval_custom_ui_val_inside_bbox": 0.7754629651705424, "eval_custom_ui_val_loss": 1.1677404642105103, "eval_custom_ui_val_loss_ce": 0.00016540858794340037, "eval_custom_ui_val_loss_iou": 0.5015055338541666, "eval_custom_ui_val_loss_num": 0.023720741271972656, "eval_custom_ui_val_loss_xval": 1.1214735243055556, "eval_custom_ui_val_runtime": 78.9911, "eval_custom_ui_val_samples_per_second": 3.355, "eval_custom_ui_val_steps_per_second": 0.114, "num_input_tokens_seen": 420248812, "step": 7500 }, { "epoch": 16.70378619153675, "loss": 0.8367924690246582, "loss_ce": 0.00012252142187207937, "loss_iou": 0.376953125, "loss_num": 0.016845703125, "loss_xval": 0.8359375, "num_input_tokens_seen": 420248812, "step": 7500 }, { "epoch": 16.706013363028955, "grad_norm": 16.65875244140625, "learning_rate": 1e-06, "loss": 0.4408, "num_input_tokens_seen": 420303256, "step": 7501 }, { "epoch": 16.706013363028955, "loss": 0.3736181855201721, "loss_ce": 8.305851952172816e-05, "loss_iou": 0.1591796875, "loss_num": 0.01116943359375, "loss_xval": 0.373046875, "num_input_tokens_seen": 420303256, "step": 7501 }, { "epoch": 16.70824053452116, "grad_norm": 17.692901611328125, "learning_rate": 1e-06, "loss": 0.5898, "num_input_tokens_seen": 420356912, "step": 7502 }, { "epoch": 16.70824053452116, "loss": 0.33262205123901367, "loss_ce": 0.00010252789070364088, "loss_iou": 0.142578125, "loss_num": 0.00958251953125, "loss_xval": 0.33203125, "num_input_tokens_seen": 420356912, "step": 7502 }, { "epoch": 16.710467706013365, "grad_norm": 19.661197662353516, "learning_rate": 1e-06, "loss": 0.4288, "num_input_tokens_seen": 420414376, "step": 7503 }, { "epoch": 16.710467706013365, "loss": 0.4624839723110199, "loss_ce": 8.163749589584768e-05, "loss_iou": 0.181640625, "loss_num": 0.0196533203125, "loss_xval": 0.462890625, "num_input_tokens_seen": 420414376, "step": 7503 }, { "epoch": 16.71269487750557, "grad_norm": 16.957685470581055, "learning_rate": 1e-06, "loss": 0.4464, "num_input_tokens_seen": 420470196, "step": 7504 }, { "epoch": 16.71269487750557, "loss": 0.39802345633506775, "loss_ce": 7.423176430165768e-05, "loss_iou": 0.166015625, "loss_num": 0.0130615234375, "loss_xval": 0.3984375, "num_input_tokens_seen": 420470196, "step": 7504 }, { "epoch": 16.714922048997774, "grad_norm": 18.1544246673584, "learning_rate": 1e-06, "loss": 0.4431, "num_input_tokens_seen": 420525872, "step": 7505 }, { "epoch": 16.714922048997774, "loss": 0.48571863770484924, "loss_ce": 0.00024499627761542797, "loss_iou": 0.2001953125, "loss_num": 0.016845703125, "loss_xval": 0.486328125, "num_input_tokens_seen": 420525872, "step": 7505 }, { "epoch": 16.71714922048998, "grad_norm": 14.577059745788574, "learning_rate": 1e-06, "loss": 0.5386, "num_input_tokens_seen": 420582216, "step": 7506 }, { "epoch": 16.71714922048998, "loss": 0.6150659918785095, "loss_ce": 0.00013681976997759193, "loss_iou": 0.2451171875, "loss_num": 0.0247802734375, "loss_xval": 0.61328125, "num_input_tokens_seen": 420582216, "step": 7506 }, { "epoch": 16.719376391982184, "grad_norm": 16.228076934814453, "learning_rate": 1e-06, "loss": 0.3875, "num_input_tokens_seen": 420636876, "step": 7507 }, { "epoch": 16.719376391982184, "loss": 0.5354748964309692, "loss_ce": 7.452783756889403e-05, "loss_iou": 0.240234375, "loss_num": 0.01080322265625, "loss_xval": 0.53515625, "num_input_tokens_seen": 420636876, "step": 7507 }, { "epoch": 16.72160356347439, "grad_norm": 17.355379104614258, "learning_rate": 1e-06, "loss": 0.3967, "num_input_tokens_seen": 420692648, "step": 7508 }, { "epoch": 16.72160356347439, "loss": 0.5319057703018188, "loss_ce": 0.00010642388951964676, "loss_iou": 0.2216796875, "loss_num": 0.017578125, "loss_xval": 0.53125, "num_input_tokens_seen": 420692648, "step": 7508 }, { "epoch": 16.723830734966594, "grad_norm": 22.888402938842773, "learning_rate": 1e-06, "loss": 0.3304, "num_input_tokens_seen": 420749380, "step": 7509 }, { "epoch": 16.723830734966594, "loss": 0.2873430550098419, "loss_ce": 0.00011160832218592986, "loss_iou": 0.1142578125, "loss_num": 0.01171875, "loss_xval": 0.287109375, "num_input_tokens_seen": 420749380, "step": 7509 }, { "epoch": 16.7260579064588, "grad_norm": 23.103485107421875, "learning_rate": 1e-06, "loss": 0.5022, "num_input_tokens_seen": 420805372, "step": 7510 }, { "epoch": 16.7260579064588, "loss": 0.6403497457504272, "loss_ce": 9.102303010877222e-05, "loss_iou": 0.296875, "loss_num": 0.00958251953125, "loss_xval": 0.640625, "num_input_tokens_seen": 420805372, "step": 7510 }, { "epoch": 16.728285077951004, "grad_norm": 26.94756317138672, "learning_rate": 1e-06, "loss": 0.4418, "num_input_tokens_seen": 420859376, "step": 7511 }, { "epoch": 16.728285077951004, "loss": 0.3712967336177826, "loss_ce": 8.091152994893491e-05, "loss_iou": 0.166015625, "loss_num": 0.00787353515625, "loss_xval": 0.37109375, "num_input_tokens_seen": 420859376, "step": 7511 }, { "epoch": 16.73051224944321, "grad_norm": 29.820947647094727, "learning_rate": 1e-06, "loss": 0.4807, "num_input_tokens_seen": 420916012, "step": 7512 }, { "epoch": 16.73051224944321, "loss": 0.5431552529335022, "loss_ce": 0.00018649011326488107, "loss_iou": 0.2265625, "loss_num": 0.0179443359375, "loss_xval": 0.54296875, "num_input_tokens_seen": 420916012, "step": 7512 }, { "epoch": 16.732739420935413, "grad_norm": 16.320466995239258, "learning_rate": 1e-06, "loss": 0.7292, "num_input_tokens_seen": 420970600, "step": 7513 }, { "epoch": 16.732739420935413, "loss": 0.877778172492981, "loss_ce": 9.26764914765954e-05, "loss_iou": 0.34765625, "loss_num": 0.036376953125, "loss_xval": 0.87890625, "num_input_tokens_seen": 420970600, "step": 7513 }, { "epoch": 16.734966592427618, "grad_norm": 19.527677536010742, "learning_rate": 1e-06, "loss": 0.3925, "num_input_tokens_seen": 421024580, "step": 7514 }, { "epoch": 16.734966592427618, "loss": 0.29793840646743774, "loss_ce": 8.684221393195912e-05, "loss_iou": 0.1376953125, "loss_num": 0.0045166015625, "loss_xval": 0.296875, "num_input_tokens_seen": 421024580, "step": 7514 }, { "epoch": 16.737193763919823, "grad_norm": 16.6092472076416, "learning_rate": 1e-06, "loss": 0.4416, "num_input_tokens_seen": 421080592, "step": 7515 }, { "epoch": 16.737193763919823, "loss": 0.32314634323120117, "loss_ce": 8.72400269145146e-05, "loss_iou": 0.1328125, "loss_num": 0.01141357421875, "loss_xval": 0.322265625, "num_input_tokens_seen": 421080592, "step": 7515 }, { "epoch": 16.739420935412028, "grad_norm": 18.05164337158203, "learning_rate": 1e-06, "loss": 0.4574, "num_input_tokens_seen": 421138628, "step": 7516 }, { "epoch": 16.739420935412028, "loss": 0.44442254304885864, "loss_ce": 8.662776235723868e-05, "loss_iou": 0.1884765625, "loss_num": 0.013671875, "loss_xval": 0.4453125, "num_input_tokens_seen": 421138628, "step": 7516 }, { "epoch": 16.741648106904233, "grad_norm": 34.90769958496094, "learning_rate": 1e-06, "loss": 0.3832, "num_input_tokens_seen": 421194844, "step": 7517 }, { "epoch": 16.741648106904233, "loss": 0.4219837486743927, "loss_ce": 0.00010874809231609106, "loss_iou": 0.197265625, "loss_num": 0.00555419921875, "loss_xval": 0.421875, "num_input_tokens_seen": 421194844, "step": 7517 }, { "epoch": 16.743875278396438, "grad_norm": 19.655269622802734, "learning_rate": 1e-06, "loss": 0.404, "num_input_tokens_seen": 421248904, "step": 7518 }, { "epoch": 16.743875278396438, "loss": 0.48480457067489624, "loss_ce": 0.00012440019054338336, "loss_iou": 0.216796875, "loss_num": 0.01019287109375, "loss_xval": 0.484375, "num_input_tokens_seen": 421248904, "step": 7518 }, { "epoch": 16.746102449888642, "grad_norm": 22.647844314575195, "learning_rate": 1e-06, "loss": 0.671, "num_input_tokens_seen": 421304408, "step": 7519 }, { "epoch": 16.746102449888642, "loss": 0.8310278654098511, "loss_ce": 9.524515189696103e-05, "loss_iou": 0.353515625, "loss_num": 0.025146484375, "loss_xval": 0.83203125, "num_input_tokens_seen": 421304408, "step": 7519 }, { "epoch": 16.748329621380847, "grad_norm": 12.694985389709473, "learning_rate": 1e-06, "loss": 0.3328, "num_input_tokens_seen": 421359956, "step": 7520 }, { "epoch": 16.748329621380847, "loss": 0.3517601191997528, "loss_ce": 7.55441069486551e-05, "loss_iou": 0.158203125, "loss_num": 0.007232666015625, "loss_xval": 0.3515625, "num_input_tokens_seen": 421359956, "step": 7520 }, { "epoch": 16.750556792873052, "grad_norm": 26.37984275817871, "learning_rate": 1e-06, "loss": 0.635, "num_input_tokens_seen": 421418924, "step": 7521 }, { "epoch": 16.750556792873052, "loss": 0.6089690923690796, "loss_ce": 8.241091563832015e-05, "loss_iou": 0.255859375, "loss_num": 0.019775390625, "loss_xval": 0.609375, "num_input_tokens_seen": 421418924, "step": 7521 }, { "epoch": 16.752783964365257, "grad_norm": 19.50495719909668, "learning_rate": 1e-06, "loss": 0.3985, "num_input_tokens_seen": 421474016, "step": 7522 }, { "epoch": 16.752783964365257, "loss": 0.46183595061302185, "loss_ce": 0.00010498787742108107, "loss_iou": 0.203125, "loss_num": 0.01123046875, "loss_xval": 0.4609375, "num_input_tokens_seen": 421474016, "step": 7522 }, { "epoch": 16.755011135857462, "grad_norm": 19.55943489074707, "learning_rate": 1e-06, "loss": 0.5932, "num_input_tokens_seen": 421528604, "step": 7523 }, { "epoch": 16.755011135857462, "loss": 0.672928512096405, "loss_ce": 0.00019901388441212475, "loss_iou": 0.279296875, "loss_num": 0.02294921875, "loss_xval": 0.671875, "num_input_tokens_seen": 421528604, "step": 7523 }, { "epoch": 16.757238307349667, "grad_norm": 18.727272033691406, "learning_rate": 1e-06, "loss": 0.4604, "num_input_tokens_seen": 421582348, "step": 7524 }, { "epoch": 16.757238307349667, "loss": 0.43595296144485474, "loss_ce": 0.00016193735064007342, "loss_iou": 0.1962890625, "loss_num": 0.008544921875, "loss_xval": 0.435546875, "num_input_tokens_seen": 421582348, "step": 7524 }, { "epoch": 16.75946547884187, "grad_norm": 14.18976879119873, "learning_rate": 1e-06, "loss": 0.3096, "num_input_tokens_seen": 421637456, "step": 7525 }, { "epoch": 16.75946547884187, "loss": 0.2345210313796997, "loss_ce": 8.500301919411868e-05, "loss_iou": 0.10888671875, "loss_num": 0.0033416748046875, "loss_xval": 0.234375, "num_input_tokens_seen": 421637456, "step": 7525 }, { "epoch": 16.761692650334076, "grad_norm": 14.92958927154541, "learning_rate": 1e-06, "loss": 0.4751, "num_input_tokens_seen": 421694176, "step": 7526 }, { "epoch": 16.761692650334076, "loss": 0.3878576159477234, "loss_ce": 0.00010129041038453579, "loss_iou": 0.1591796875, "loss_num": 0.0140380859375, "loss_xval": 0.388671875, "num_input_tokens_seen": 421694176, "step": 7526 }, { "epoch": 16.76391982182628, "grad_norm": 23.31275749206543, "learning_rate": 1e-06, "loss": 0.5295, "num_input_tokens_seen": 421751032, "step": 7527 }, { "epoch": 16.76391982182628, "loss": 0.5836684703826904, "loss_ce": 8.08104668976739e-05, "loss_iou": 0.224609375, "loss_num": 0.02685546875, "loss_xval": 0.58203125, "num_input_tokens_seen": 421751032, "step": 7527 }, { "epoch": 16.766146993318486, "grad_norm": 24.81498146057129, "learning_rate": 1e-06, "loss": 0.4622, "num_input_tokens_seen": 421806836, "step": 7528 }, { "epoch": 16.766146993318486, "loss": 0.37533727288246155, "loss_ce": 9.314088674727827e-05, "loss_iou": 0.1728515625, "loss_num": 0.005767822265625, "loss_xval": 0.375, "num_input_tokens_seen": 421806836, "step": 7528 }, { "epoch": 16.76837416481069, "grad_norm": 23.45658302307129, "learning_rate": 1e-06, "loss": 0.4903, "num_input_tokens_seen": 421863004, "step": 7529 }, { "epoch": 16.76837416481069, "loss": 0.6420989036560059, "loss_ce": 0.00013110964209772646, "loss_iou": 0.291015625, "loss_num": 0.011962890625, "loss_xval": 0.640625, "num_input_tokens_seen": 421863004, "step": 7529 }, { "epoch": 16.770601336302896, "grad_norm": 17.918804168701172, "learning_rate": 1e-06, "loss": 0.3386, "num_input_tokens_seen": 421918756, "step": 7530 }, { "epoch": 16.770601336302896, "loss": 0.3590957224369049, "loss_ce": 8.693411655258387e-05, "loss_iou": 0.150390625, "loss_num": 0.0115966796875, "loss_xval": 0.359375, "num_input_tokens_seen": 421918756, "step": 7530 }, { "epoch": 16.7728285077951, "grad_norm": 21.4240665435791, "learning_rate": 1e-06, "loss": 0.4388, "num_input_tokens_seen": 421975608, "step": 7531 }, { "epoch": 16.7728285077951, "loss": 0.3268599510192871, "loss_ce": 7.773490506224334e-05, "loss_iou": 0.138671875, "loss_num": 0.009765625, "loss_xval": 0.326171875, "num_input_tokens_seen": 421975608, "step": 7531 }, { "epoch": 16.775055679287306, "grad_norm": 22.135826110839844, "learning_rate": 1e-06, "loss": 0.2985, "num_input_tokens_seen": 422029336, "step": 7532 }, { "epoch": 16.775055679287306, "loss": 0.2817379832267761, "loss_ce": 9.12583782337606e-05, "loss_iou": 0.12109375, "loss_num": 0.00799560546875, "loss_xval": 0.28125, "num_input_tokens_seen": 422029336, "step": 7532 }, { "epoch": 16.77728285077951, "grad_norm": 12.753730773925781, "learning_rate": 1e-06, "loss": 0.4948, "num_input_tokens_seen": 422087488, "step": 7533 }, { "epoch": 16.77728285077951, "loss": 0.6205785274505615, "loss_ce": 0.00046129614929668605, "loss_iou": 0.2734375, "loss_num": 0.01483154296875, "loss_xval": 0.62109375, "num_input_tokens_seen": 422087488, "step": 7533 }, { "epoch": 16.779510022271715, "grad_norm": 17.831968307495117, "learning_rate": 1e-06, "loss": 0.3904, "num_input_tokens_seen": 422144960, "step": 7534 }, { "epoch": 16.779510022271715, "loss": 0.3965558111667633, "loss_ce": 7.143749098759145e-05, "loss_iou": 0.150390625, "loss_num": 0.0194091796875, "loss_xval": 0.396484375, "num_input_tokens_seen": 422144960, "step": 7534 }, { "epoch": 16.78173719376392, "grad_norm": 19.048486709594727, "learning_rate": 1e-06, "loss": 0.2356, "num_input_tokens_seen": 422202632, "step": 7535 }, { "epoch": 16.78173719376392, "loss": 0.19454200565814972, "loss_ce": 8.399138459935784e-05, "loss_iou": 0.0888671875, "loss_num": 0.003265380859375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 422202632, "step": 7535 }, { "epoch": 16.783964365256125, "grad_norm": 15.023216247558594, "learning_rate": 1e-06, "loss": 0.3777, "num_input_tokens_seen": 422258820, "step": 7536 }, { "epoch": 16.783964365256125, "loss": 0.25117045640945435, "loss_ce": 7.181320688687265e-05, "loss_iou": 0.1162109375, "loss_num": 0.003814697265625, "loss_xval": 0.251953125, "num_input_tokens_seen": 422258820, "step": 7536 }, { "epoch": 16.78619153674833, "grad_norm": 12.973251342773438, "learning_rate": 1e-06, "loss": 0.421, "num_input_tokens_seen": 422317024, "step": 7537 }, { "epoch": 16.78619153674833, "loss": 0.37338966131210327, "loss_ce": 9.861498983809724e-05, "loss_iou": 0.1591796875, "loss_num": 0.01092529296875, "loss_xval": 0.373046875, "num_input_tokens_seen": 422317024, "step": 7537 }, { "epoch": 16.788418708240535, "grad_norm": 26.241365432739258, "learning_rate": 1e-06, "loss": 0.3026, "num_input_tokens_seen": 422372340, "step": 7538 }, { "epoch": 16.788418708240535, "loss": 0.31962519884109497, "loss_ce": 0.00010615789506118745, "loss_iou": 0.1513671875, "loss_num": 0.0032806396484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 422372340, "step": 7538 }, { "epoch": 16.79064587973274, "grad_norm": 17.4132080078125, "learning_rate": 1e-06, "loss": 0.3208, "num_input_tokens_seen": 422427068, "step": 7539 }, { "epoch": 16.79064587973274, "loss": 0.4059646427631378, "loss_ce": 8.086010348051786e-05, "loss_iou": 0.1875, "loss_num": 0.006317138671875, "loss_xval": 0.40625, "num_input_tokens_seen": 422427068, "step": 7539 }, { "epoch": 16.792873051224944, "grad_norm": 18.924129486083984, "learning_rate": 1e-06, "loss": 0.4015, "num_input_tokens_seen": 422485208, "step": 7540 }, { "epoch": 16.792873051224944, "loss": 0.549044668674469, "loss_ce": 9.448261698707938e-05, "loss_iou": 0.2294921875, "loss_num": 0.0181884765625, "loss_xval": 0.55078125, "num_input_tokens_seen": 422485208, "step": 7540 }, { "epoch": 16.79510022271715, "grad_norm": 21.377471923828125, "learning_rate": 1e-06, "loss": 0.2579, "num_input_tokens_seen": 422541288, "step": 7541 }, { "epoch": 16.79510022271715, "loss": 0.22633783519268036, "loss_ce": 8.05138552095741e-05, "loss_iou": 0.09619140625, "loss_num": 0.006805419921875, "loss_xval": 0.2265625, "num_input_tokens_seen": 422541288, "step": 7541 }, { "epoch": 16.797327394209354, "grad_norm": 18.096935272216797, "learning_rate": 1e-06, "loss": 0.4972, "num_input_tokens_seen": 422598728, "step": 7542 }, { "epoch": 16.797327394209354, "loss": 0.598351240158081, "loss_ce": 8.468693704344332e-05, "loss_iou": 0.2353515625, "loss_num": 0.025146484375, "loss_xval": 0.59765625, "num_input_tokens_seen": 422598728, "step": 7542 }, { "epoch": 16.79955456570156, "grad_norm": 18.19603157043457, "learning_rate": 1e-06, "loss": 0.463, "num_input_tokens_seen": 422653832, "step": 7543 }, { "epoch": 16.79955456570156, "loss": 0.5140941143035889, "loss_ce": 8.656126010464504e-05, "loss_iou": 0.205078125, "loss_num": 0.0205078125, "loss_xval": 0.515625, "num_input_tokens_seen": 422653832, "step": 7543 }, { "epoch": 16.801781737193764, "grad_norm": 20.3741397857666, "learning_rate": 1e-06, "loss": 0.3391, "num_input_tokens_seen": 422711052, "step": 7544 }, { "epoch": 16.801781737193764, "loss": 0.37486082315444946, "loss_ce": 0.00010497802577447146, "loss_iou": 0.173828125, "loss_num": 0.00518798828125, "loss_xval": 0.375, "num_input_tokens_seen": 422711052, "step": 7544 }, { "epoch": 16.80400890868597, "grad_norm": 15.383708953857422, "learning_rate": 1e-06, "loss": 0.4458, "num_input_tokens_seen": 422768836, "step": 7545 }, { "epoch": 16.80400890868597, "loss": 0.2621135711669922, "loss_ce": 8.963444997789338e-05, "loss_iou": 0.1201171875, "loss_num": 0.004302978515625, "loss_xval": 0.26171875, "num_input_tokens_seen": 422768836, "step": 7545 }, { "epoch": 16.806236080178174, "grad_norm": 19.3375186920166, "learning_rate": 1e-06, "loss": 0.3409, "num_input_tokens_seen": 422826720, "step": 7546 }, { "epoch": 16.806236080178174, "loss": 0.23085373640060425, "loss_ce": 7.98294713604264e-05, "loss_iou": 0.107421875, "loss_num": 0.003204345703125, "loss_xval": 0.23046875, "num_input_tokens_seen": 422826720, "step": 7546 }, { "epoch": 16.80846325167038, "grad_norm": 27.87537956237793, "learning_rate": 1e-06, "loss": 0.3236, "num_input_tokens_seen": 422881060, "step": 7547 }, { "epoch": 16.80846325167038, "loss": 0.2520201802253723, "loss_ce": 6.706906424369663e-05, "loss_iou": 0.08837890625, "loss_num": 0.01513671875, "loss_xval": 0.251953125, "num_input_tokens_seen": 422881060, "step": 7547 }, { "epoch": 16.810690423162583, "grad_norm": 9.865744590759277, "learning_rate": 1e-06, "loss": 0.3187, "num_input_tokens_seen": 422938468, "step": 7548 }, { "epoch": 16.810690423162583, "loss": 0.39917415380477905, "loss_ce": 0.00012633406731765717, "loss_iou": 0.1787109375, "loss_num": 0.00830078125, "loss_xval": 0.3984375, "num_input_tokens_seen": 422938468, "step": 7548 }, { "epoch": 16.812917594654788, "grad_norm": 18.907991409301758, "learning_rate": 1e-06, "loss": 0.4061, "num_input_tokens_seen": 422996068, "step": 7549 }, { "epoch": 16.812917594654788, "loss": 0.5208408832550049, "loss_ce": 8.891297329682857e-05, "loss_iou": 0.21875, "loss_num": 0.0167236328125, "loss_xval": 0.51953125, "num_input_tokens_seen": 422996068, "step": 7549 }, { "epoch": 16.815144766146993, "grad_norm": 14.58835506439209, "learning_rate": 1e-06, "loss": 0.3215, "num_input_tokens_seen": 423051768, "step": 7550 }, { "epoch": 16.815144766146993, "loss": 0.4090169668197632, "loss_ce": 8.143430750351399e-05, "loss_iou": 0.1884765625, "loss_num": 0.00628662109375, "loss_xval": 0.408203125, "num_input_tokens_seen": 423051768, "step": 7550 }, { "epoch": 16.817371937639198, "grad_norm": 14.23434066772461, "learning_rate": 1e-06, "loss": 0.3621, "num_input_tokens_seen": 423107452, "step": 7551 }, { "epoch": 16.817371937639198, "loss": 0.2738141119480133, "loss_ce": 7.142306276364252e-05, "loss_iou": 0.119140625, "loss_num": 0.00701904296875, "loss_xval": 0.2734375, "num_input_tokens_seen": 423107452, "step": 7551 }, { "epoch": 16.819599109131403, "grad_norm": 17.120817184448242, "learning_rate": 1e-06, "loss": 0.4439, "num_input_tokens_seen": 423165408, "step": 7552 }, { "epoch": 16.819599109131403, "loss": 0.466147780418396, "loss_ce": 8.328772673849016e-05, "loss_iou": 0.1865234375, "loss_num": 0.0185546875, "loss_xval": 0.466796875, "num_input_tokens_seen": 423165408, "step": 7552 }, { "epoch": 16.821826280623608, "grad_norm": 16.54876708984375, "learning_rate": 1e-06, "loss": 0.5828, "num_input_tokens_seen": 423220424, "step": 7553 }, { "epoch": 16.821826280623608, "loss": 0.656104326248169, "loss_ce": 9.84652797342278e-05, "loss_iou": 0.28515625, "loss_num": 0.017333984375, "loss_xval": 0.65625, "num_input_tokens_seen": 423220424, "step": 7553 }, { "epoch": 16.824053452115812, "grad_norm": 29.920957565307617, "learning_rate": 1e-06, "loss": 0.5121, "num_input_tokens_seen": 423275472, "step": 7554 }, { "epoch": 16.824053452115812, "loss": 0.4944070279598236, "loss_ce": 8.32905643619597e-05, "loss_iou": 0.1943359375, "loss_num": 0.02099609375, "loss_xval": 0.494140625, "num_input_tokens_seen": 423275472, "step": 7554 }, { "epoch": 16.826280623608017, "grad_norm": 14.726850509643555, "learning_rate": 1e-06, "loss": 0.3757, "num_input_tokens_seen": 423332856, "step": 7555 }, { "epoch": 16.826280623608017, "loss": 0.4061638116836548, "loss_ce": 9.694288019090891e-05, "loss_iou": 0.16796875, "loss_num": 0.013916015625, "loss_xval": 0.40625, "num_input_tokens_seen": 423332856, "step": 7555 }, { "epoch": 16.828507795100222, "grad_norm": 18.766006469726562, "learning_rate": 1e-06, "loss": 0.5588, "num_input_tokens_seen": 423388552, "step": 7556 }, { "epoch": 16.828507795100222, "loss": 0.5013834834098816, "loss_ce": 0.00010174483759328723, "loss_iou": 0.205078125, "loss_num": 0.018310546875, "loss_xval": 0.5, "num_input_tokens_seen": 423388552, "step": 7556 }, { "epoch": 16.830734966592427, "grad_norm": 16.07285499572754, "learning_rate": 1e-06, "loss": 0.4426, "num_input_tokens_seen": 423443352, "step": 7557 }, { "epoch": 16.830734966592427, "loss": 0.531631588935852, "loss_ce": 0.00013745043543167412, "loss_iou": 0.236328125, "loss_num": 0.01177978515625, "loss_xval": 0.53125, "num_input_tokens_seen": 423443352, "step": 7557 }, { "epoch": 16.832962138084632, "grad_norm": 22.96206283569336, "learning_rate": 1e-06, "loss": 0.2828, "num_input_tokens_seen": 423497072, "step": 7558 }, { "epoch": 16.832962138084632, "loss": 0.2979274392127991, "loss_ce": 7.588087464682758e-05, "loss_iou": 0.12353515625, "loss_num": 0.01025390625, "loss_xval": 0.296875, "num_input_tokens_seen": 423497072, "step": 7558 }, { "epoch": 16.835189309576837, "grad_norm": 14.747753143310547, "learning_rate": 1e-06, "loss": 0.3194, "num_input_tokens_seen": 423554452, "step": 7559 }, { "epoch": 16.835189309576837, "loss": 0.41189420223236084, "loss_ce": 9.000718273455277e-05, "loss_iou": 0.1923828125, "loss_num": 0.005218505859375, "loss_xval": 0.412109375, "num_input_tokens_seen": 423554452, "step": 7559 }, { "epoch": 16.83741648106904, "grad_norm": 13.279129981994629, "learning_rate": 1e-06, "loss": 0.3928, "num_input_tokens_seen": 423612016, "step": 7560 }, { "epoch": 16.83741648106904, "loss": 0.32673531770706177, "loss_ce": 7.517023186665028e-05, "loss_iou": 0.1376953125, "loss_num": 0.01025390625, "loss_xval": 0.326171875, "num_input_tokens_seen": 423612016, "step": 7560 }, { "epoch": 16.839643652561247, "grad_norm": 15.924846649169922, "learning_rate": 1e-06, "loss": 0.392, "num_input_tokens_seen": 423668608, "step": 7561 }, { "epoch": 16.839643652561247, "loss": 0.2862039804458618, "loss_ce": 7.118898065527901e-05, "loss_iou": 0.12353515625, "loss_num": 0.00775146484375, "loss_xval": 0.28515625, "num_input_tokens_seen": 423668608, "step": 7561 }, { "epoch": 16.84187082405345, "grad_norm": 19.908687591552734, "learning_rate": 1e-06, "loss": 0.5825, "num_input_tokens_seen": 423725484, "step": 7562 }, { "epoch": 16.84187082405345, "loss": 0.5031726360321045, "loss_ce": 0.00012085959315299988, "loss_iou": 0.2236328125, "loss_num": 0.01116943359375, "loss_xval": 0.50390625, "num_input_tokens_seen": 423725484, "step": 7562 }, { "epoch": 16.844097995545656, "grad_norm": 25.447425842285156, "learning_rate": 1e-06, "loss": 0.3432, "num_input_tokens_seen": 423778384, "step": 7563 }, { "epoch": 16.844097995545656, "loss": 0.3695857524871826, "loss_ce": 7.891730638220906e-05, "loss_iou": 0.1591796875, "loss_num": 0.01019287109375, "loss_xval": 0.369140625, "num_input_tokens_seen": 423778384, "step": 7563 }, { "epoch": 16.84632516703786, "grad_norm": 13.935490608215332, "learning_rate": 1e-06, "loss": 0.4838, "num_input_tokens_seen": 423834376, "step": 7564 }, { "epoch": 16.84632516703786, "loss": 0.4895142912864685, "loss_ce": 0.00013441329065244645, "loss_iou": 0.1845703125, "loss_num": 0.024169921875, "loss_xval": 0.490234375, "num_input_tokens_seen": 423834376, "step": 7564 }, { "epoch": 16.848552338530066, "grad_norm": 16.72061538696289, "learning_rate": 1e-06, "loss": 0.3445, "num_input_tokens_seen": 423890956, "step": 7565 }, { "epoch": 16.848552338530066, "loss": 0.4084116220474243, "loss_ce": 0.00014747484237886965, "loss_iou": 0.1826171875, "loss_num": 0.00848388671875, "loss_xval": 0.408203125, "num_input_tokens_seen": 423890956, "step": 7565 }, { "epoch": 16.85077951002227, "grad_norm": 26.282730102539062, "learning_rate": 1e-06, "loss": 0.519, "num_input_tokens_seen": 423947064, "step": 7566 }, { "epoch": 16.85077951002227, "loss": 0.6066428422927856, "loss_ce": 7.544091204181314e-05, "loss_iou": 0.25390625, "loss_num": 0.0196533203125, "loss_xval": 0.60546875, "num_input_tokens_seen": 423947064, "step": 7566 }, { "epoch": 16.853006681514476, "grad_norm": 15.286888122558594, "learning_rate": 1e-06, "loss": 0.4031, "num_input_tokens_seen": 424005024, "step": 7567 }, { "epoch": 16.853006681514476, "loss": 0.29945024847984314, "loss_ce": 7.280143472598866e-05, "loss_iou": 0.1318359375, "loss_num": 0.007171630859375, "loss_xval": 0.298828125, "num_input_tokens_seen": 424005024, "step": 7567 }, { "epoch": 16.85523385300668, "grad_norm": 22.391944885253906, "learning_rate": 1e-06, "loss": 0.3159, "num_input_tokens_seen": 424061520, "step": 7568 }, { "epoch": 16.85523385300668, "loss": 0.3320120573043823, "loss_ce": 0.00010290154023095965, "loss_iou": 0.15234375, "loss_num": 0.005401611328125, "loss_xval": 0.33203125, "num_input_tokens_seen": 424061520, "step": 7568 }, { "epoch": 16.857461024498885, "grad_norm": 28.07231903076172, "learning_rate": 1e-06, "loss": 0.512, "num_input_tokens_seen": 424115184, "step": 7569 }, { "epoch": 16.857461024498885, "loss": 0.4680614471435547, "loss_ce": 0.00010489917622180656, "loss_iou": 0.212890625, "loss_num": 0.00860595703125, "loss_xval": 0.46875, "num_input_tokens_seen": 424115184, "step": 7569 }, { "epoch": 16.85968819599109, "grad_norm": 23.56254768371582, "learning_rate": 1e-06, "loss": 0.3468, "num_input_tokens_seen": 424169644, "step": 7570 }, { "epoch": 16.85968819599109, "loss": 0.41865983605384827, "loss_ce": 8.075643563643098e-05, "loss_iou": 0.1953125, "loss_num": 0.0054931640625, "loss_xval": 0.41796875, "num_input_tokens_seen": 424169644, "step": 7570 }, { "epoch": 16.861915367483295, "grad_norm": 25.315942764282227, "learning_rate": 1e-06, "loss": 0.4083, "num_input_tokens_seen": 424227580, "step": 7571 }, { "epoch": 16.861915367483295, "loss": 0.37649667263031006, "loss_ce": 9.28444933379069e-05, "loss_iou": 0.166015625, "loss_num": 0.00872802734375, "loss_xval": 0.376953125, "num_input_tokens_seen": 424227580, "step": 7571 }, { "epoch": 16.8641425389755, "grad_norm": 15.719208717346191, "learning_rate": 1e-06, "loss": 0.4909, "num_input_tokens_seen": 424283408, "step": 7572 }, { "epoch": 16.8641425389755, "loss": 0.4299257695674896, "loss_ce": 0.00011618290591286495, "loss_iou": 0.185546875, "loss_num": 0.01165771484375, "loss_xval": 0.4296875, "num_input_tokens_seen": 424283408, "step": 7572 }, { "epoch": 16.866369710467705, "grad_norm": 28.304363250732422, "learning_rate": 1e-06, "loss": 0.2948, "num_input_tokens_seen": 424338840, "step": 7573 }, { "epoch": 16.866369710467705, "loss": 0.3035458028316498, "loss_ce": 7.902114157332107e-05, "loss_iou": 0.1318359375, "loss_num": 0.0078125, "loss_xval": 0.302734375, "num_input_tokens_seen": 424338840, "step": 7573 }, { "epoch": 16.86859688195991, "grad_norm": 13.032916069030762, "learning_rate": 1e-06, "loss": 0.5414, "num_input_tokens_seen": 424394268, "step": 7574 }, { "epoch": 16.86859688195991, "loss": 0.5727271437644958, "loss_ce": 9.532412514090538e-05, "loss_iou": 0.2373046875, "loss_num": 0.0196533203125, "loss_xval": 0.57421875, "num_input_tokens_seen": 424394268, "step": 7574 }, { "epoch": 16.870824053452115, "grad_norm": 16.96046257019043, "learning_rate": 1e-06, "loss": 0.4261, "num_input_tokens_seen": 424453868, "step": 7575 }, { "epoch": 16.870824053452115, "loss": 0.38206472992897034, "loss_ce": 0.0001067298071575351, "loss_iou": 0.173828125, "loss_num": 0.007080078125, "loss_xval": 0.3828125, "num_input_tokens_seen": 424453868, "step": 7575 }, { "epoch": 16.87305122494432, "grad_norm": 23.169174194335938, "learning_rate": 1e-06, "loss": 0.3393, "num_input_tokens_seen": 424510884, "step": 7576 }, { "epoch": 16.87305122494432, "loss": 0.33035528659820557, "loss_ce": 9.405257151229307e-05, "loss_iou": 0.1416015625, "loss_num": 0.00927734375, "loss_xval": 0.330078125, "num_input_tokens_seen": 424510884, "step": 7576 }, { "epoch": 16.875278396436524, "grad_norm": 20.488079071044922, "learning_rate": 1e-06, "loss": 0.3433, "num_input_tokens_seen": 424566384, "step": 7577 }, { "epoch": 16.875278396436524, "loss": 0.32358628511428833, "loss_ce": 9.996739390771836e-05, "loss_iou": 0.142578125, "loss_num": 0.00750732421875, "loss_xval": 0.32421875, "num_input_tokens_seen": 424566384, "step": 7577 }, { "epoch": 16.87750556792873, "grad_norm": 13.715553283691406, "learning_rate": 1e-06, "loss": 0.3461, "num_input_tokens_seen": 424623228, "step": 7578 }, { "epoch": 16.87750556792873, "loss": 0.3440733253955841, "loss_ce": 7.916930189821869e-05, "loss_iou": 0.1455078125, "loss_num": 0.0107421875, "loss_xval": 0.34375, "num_input_tokens_seen": 424623228, "step": 7578 }, { "epoch": 16.879732739420934, "grad_norm": 16.320846557617188, "learning_rate": 1e-06, "loss": 0.5924, "num_input_tokens_seen": 424682444, "step": 7579 }, { "epoch": 16.879732739420934, "loss": 0.6377947330474854, "loss_ce": 9.944755584001541e-05, "loss_iou": 0.2734375, "loss_num": 0.0181884765625, "loss_xval": 0.63671875, "num_input_tokens_seen": 424682444, "step": 7579 }, { "epoch": 16.88195991091314, "grad_norm": 18.41837501525879, "learning_rate": 1e-06, "loss": 0.6497, "num_input_tokens_seen": 424740464, "step": 7580 }, { "epoch": 16.88195991091314, "loss": 0.6376847624778748, "loss_ce": 0.00011151684884680435, "loss_iou": 0.26953125, "loss_num": 0.0196533203125, "loss_xval": 0.63671875, "num_input_tokens_seen": 424740464, "step": 7580 }, { "epoch": 16.884187082405344, "grad_norm": 16.372474670410156, "learning_rate": 1e-06, "loss": 0.3357, "num_input_tokens_seen": 424794412, "step": 7581 }, { "epoch": 16.884187082405344, "loss": 0.28870633244514465, "loss_ce": 7.107005512807518e-05, "loss_iou": 0.1181640625, "loss_num": 0.01043701171875, "loss_xval": 0.2890625, "num_input_tokens_seen": 424794412, "step": 7581 }, { "epoch": 16.88641425389755, "grad_norm": 23.682388305664062, "learning_rate": 1e-06, "loss": 0.4056, "num_input_tokens_seen": 424852012, "step": 7582 }, { "epoch": 16.88641425389755, "loss": 0.34498876333236694, "loss_ce": 7.910602289484814e-05, "loss_iou": 0.15234375, "loss_num": 0.0079345703125, "loss_xval": 0.345703125, "num_input_tokens_seen": 424852012, "step": 7582 }, { "epoch": 16.888641425389753, "grad_norm": 22.36638069152832, "learning_rate": 1e-06, "loss": 0.4973, "num_input_tokens_seen": 424903972, "step": 7583 }, { "epoch": 16.888641425389753, "loss": 0.41586142778396606, "loss_ce": 8.992112998384982e-05, "loss_iou": 0.1865234375, "loss_num": 0.008544921875, "loss_xval": 0.416015625, "num_input_tokens_seen": 424903972, "step": 7583 }, { "epoch": 16.89086859688196, "grad_norm": 28.00014305114746, "learning_rate": 1e-06, "loss": 0.5041, "num_input_tokens_seen": 424960808, "step": 7584 }, { "epoch": 16.89086859688196, "loss": 0.5798003077507019, "loss_ce": 8.836777124088258e-05, "loss_iou": 0.251953125, "loss_num": 0.0155029296875, "loss_xval": 0.578125, "num_input_tokens_seen": 424960808, "step": 7584 }, { "epoch": 16.893095768374163, "grad_norm": 18.74024200439453, "learning_rate": 1e-06, "loss": 0.4247, "num_input_tokens_seen": 425018468, "step": 7585 }, { "epoch": 16.893095768374163, "loss": 0.477266788482666, "loss_ce": 9.394106746185571e-05, "loss_iou": 0.2119140625, "loss_num": 0.01043701171875, "loss_xval": 0.4765625, "num_input_tokens_seen": 425018468, "step": 7585 }, { "epoch": 16.895322939866368, "grad_norm": 20.79447364807129, "learning_rate": 1e-06, "loss": 0.4927, "num_input_tokens_seen": 425074148, "step": 7586 }, { "epoch": 16.895322939866368, "loss": 0.7077399492263794, "loss_ce": 9.840876737143844e-05, "loss_iou": 0.279296875, "loss_num": 0.029541015625, "loss_xval": 0.70703125, "num_input_tokens_seen": 425074148, "step": 7586 }, { "epoch": 16.897550111358576, "grad_norm": 23.163110733032227, "learning_rate": 1e-06, "loss": 0.3244, "num_input_tokens_seen": 425129428, "step": 7587 }, { "epoch": 16.897550111358576, "loss": 0.3205108642578125, "loss_ce": 7.629128958797082e-05, "loss_iou": 0.142578125, "loss_num": 0.007049560546875, "loss_xval": 0.3203125, "num_input_tokens_seen": 425129428, "step": 7587 }, { "epoch": 16.899777282850778, "grad_norm": 23.743364334106445, "learning_rate": 1e-06, "loss": 0.4087, "num_input_tokens_seen": 425184424, "step": 7588 }, { "epoch": 16.899777282850778, "loss": 0.5007613897323608, "loss_ce": 0.00015102376346476376, "loss_iou": 0.2255859375, "loss_num": 0.00994873046875, "loss_xval": 0.5, "num_input_tokens_seen": 425184424, "step": 7588 }, { "epoch": 16.902004454342986, "grad_norm": 22.105934143066406, "learning_rate": 1e-06, "loss": 0.3156, "num_input_tokens_seen": 425241216, "step": 7589 }, { "epoch": 16.902004454342986, "loss": 0.30233582854270935, "loss_ce": 8.974589582066983e-05, "loss_iou": 0.1357421875, "loss_num": 0.00604248046875, "loss_xval": 0.302734375, "num_input_tokens_seen": 425241216, "step": 7589 }, { "epoch": 16.90423162583519, "grad_norm": 26.043792724609375, "learning_rate": 1e-06, "loss": 0.6462, "num_input_tokens_seen": 425294740, "step": 7590 }, { "epoch": 16.90423162583519, "loss": 0.3308909833431244, "loss_ce": 8.044774585869163e-05, "loss_iou": 0.140625, "loss_num": 0.00982666015625, "loss_xval": 0.330078125, "num_input_tokens_seen": 425294740, "step": 7590 }, { "epoch": 16.906458797327396, "grad_norm": 25.259113311767578, "learning_rate": 1e-06, "loss": 0.4507, "num_input_tokens_seen": 425352476, "step": 7591 }, { "epoch": 16.906458797327396, "loss": 0.44027650356292725, "loss_ce": 9.096147550735623e-05, "loss_iou": 0.201171875, "loss_num": 0.007568359375, "loss_xval": 0.439453125, "num_input_tokens_seen": 425352476, "step": 7591 }, { "epoch": 16.9086859688196, "grad_norm": 18.93207359313965, "learning_rate": 1e-06, "loss": 0.4337, "num_input_tokens_seen": 425410420, "step": 7592 }, { "epoch": 16.9086859688196, "loss": 0.556984543800354, "loss_ce": 9.982170013245195e-05, "loss_iou": 0.2177734375, "loss_num": 0.0242919921875, "loss_xval": 0.55859375, "num_input_tokens_seen": 425410420, "step": 7592 }, { "epoch": 16.910913140311806, "grad_norm": 17.402809143066406, "learning_rate": 1e-06, "loss": 0.3179, "num_input_tokens_seen": 425468280, "step": 7593 }, { "epoch": 16.910913140311806, "loss": 0.3924216330051422, "loss_ce": 8.76395424711518e-05, "loss_iou": 0.177734375, "loss_num": 0.007568359375, "loss_xval": 0.392578125, "num_input_tokens_seen": 425468280, "step": 7593 }, { "epoch": 16.91314031180401, "grad_norm": 14.503034591674805, "learning_rate": 1e-06, "loss": 0.6439, "num_input_tokens_seen": 425525096, "step": 7594 }, { "epoch": 16.91314031180401, "loss": 0.9615048170089722, "loss_ce": 0.0005673256237059832, "loss_iou": 0.365234375, "loss_num": 0.04638671875, "loss_xval": 0.9609375, "num_input_tokens_seen": 425525096, "step": 7594 }, { "epoch": 16.915367483296215, "grad_norm": 21.804555892944336, "learning_rate": 1e-06, "loss": 0.4446, "num_input_tokens_seen": 425579188, "step": 7595 }, { "epoch": 16.915367483296215, "loss": 0.41501274704933167, "loss_ce": 9.576005686540157e-05, "loss_iou": 0.19140625, "loss_num": 0.00616455078125, "loss_xval": 0.4140625, "num_input_tokens_seen": 425579188, "step": 7595 }, { "epoch": 16.91759465478842, "grad_norm": 17.96055793762207, "learning_rate": 1e-06, "loss": 0.435, "num_input_tokens_seen": 425635356, "step": 7596 }, { "epoch": 16.91759465478842, "loss": 0.44575822353363037, "loss_ce": 7.95404048403725e-05, "loss_iou": 0.1982421875, "loss_num": 0.0096435546875, "loss_xval": 0.4453125, "num_input_tokens_seen": 425635356, "step": 7596 }, { "epoch": 16.919821826280625, "grad_norm": 22.727903366088867, "learning_rate": 1e-06, "loss": 0.4284, "num_input_tokens_seen": 425691988, "step": 7597 }, { "epoch": 16.919821826280625, "loss": 0.4949197769165039, "loss_ce": 0.0004129420267418027, "loss_iou": 0.2197265625, "loss_num": 0.01092529296875, "loss_xval": 0.494140625, "num_input_tokens_seen": 425691988, "step": 7597 }, { "epoch": 16.92204899777283, "grad_norm": 19.787315368652344, "learning_rate": 1e-06, "loss": 0.3273, "num_input_tokens_seen": 425749968, "step": 7598 }, { "epoch": 16.92204899777283, "loss": 0.2782631516456604, "loss_ce": 6.490422674687579e-05, "loss_iou": 0.1123046875, "loss_num": 0.0107421875, "loss_xval": 0.27734375, "num_input_tokens_seen": 425749968, "step": 7598 }, { "epoch": 16.924276169265035, "grad_norm": 19.99041748046875, "learning_rate": 1e-06, "loss": 0.5476, "num_input_tokens_seen": 425805264, "step": 7599 }, { "epoch": 16.924276169265035, "loss": 0.7404612302780151, "loss_ce": 0.00010478113836143166, "loss_iou": 0.30078125, "loss_num": 0.02783203125, "loss_xval": 0.7421875, "num_input_tokens_seen": 425805264, "step": 7599 }, { "epoch": 16.92650334075724, "grad_norm": 20.051441192626953, "learning_rate": 1e-06, "loss": 0.3816, "num_input_tokens_seen": 425860344, "step": 7600 }, { "epoch": 16.92650334075724, "loss": 0.4353218674659729, "loss_ce": 8.015791536308825e-05, "loss_iou": 0.1962890625, "loss_num": 0.0084228515625, "loss_xval": 0.435546875, "num_input_tokens_seen": 425860344, "step": 7600 }, { "epoch": 16.928730512249444, "grad_norm": 19.339773178100586, "learning_rate": 1e-06, "loss": 0.5078, "num_input_tokens_seen": 425916088, "step": 7601 }, { "epoch": 16.928730512249444, "loss": 0.4659002423286438, "loss_ce": 7.990589074324816e-05, "loss_iou": 0.21484375, "loss_num": 0.007171630859375, "loss_xval": 0.46484375, "num_input_tokens_seen": 425916088, "step": 7601 }, { "epoch": 16.93095768374165, "grad_norm": 11.72046947479248, "learning_rate": 1e-06, "loss": 0.271, "num_input_tokens_seen": 425971676, "step": 7602 }, { "epoch": 16.93095768374165, "loss": 0.3369971513748169, "loss_ce": 8.307769894599915e-05, "loss_iou": 0.142578125, "loss_num": 0.010498046875, "loss_xval": 0.3359375, "num_input_tokens_seen": 425971676, "step": 7602 }, { "epoch": 16.933184855233854, "grad_norm": 26.19060516357422, "learning_rate": 1e-06, "loss": 0.43, "num_input_tokens_seen": 426026612, "step": 7603 }, { "epoch": 16.933184855233854, "loss": 0.33956053853034973, "loss_ce": 8.299553883261979e-05, "loss_iou": 0.1416015625, "loss_num": 0.01123046875, "loss_xval": 0.33984375, "num_input_tokens_seen": 426026612, "step": 7603 }, { "epoch": 16.93541202672606, "grad_norm": 36.85425567626953, "learning_rate": 1e-06, "loss": 0.3744, "num_input_tokens_seen": 426084004, "step": 7604 }, { "epoch": 16.93541202672606, "loss": 0.4844852089881897, "loss_ce": 0.0004153858171775937, "loss_iou": 0.21484375, "loss_num": 0.01116943359375, "loss_xval": 0.484375, "num_input_tokens_seen": 426084004, "step": 7604 }, { "epoch": 16.937639198218264, "grad_norm": 22.670175552368164, "learning_rate": 1e-06, "loss": 0.3386, "num_input_tokens_seen": 426139888, "step": 7605 }, { "epoch": 16.937639198218264, "loss": 0.3556758165359497, "loss_ce": 8.5016421508044e-05, "loss_iou": 0.162109375, "loss_num": 0.006103515625, "loss_xval": 0.35546875, "num_input_tokens_seen": 426139888, "step": 7605 }, { "epoch": 16.93986636971047, "grad_norm": 18.98232650756836, "learning_rate": 1e-06, "loss": 0.4427, "num_input_tokens_seen": 426196256, "step": 7606 }, { "epoch": 16.93986636971047, "loss": 0.4649594724178314, "loss_ce": 0.00011571809591259807, "loss_iou": 0.205078125, "loss_num": 0.01104736328125, "loss_xval": 0.46484375, "num_input_tokens_seen": 426196256, "step": 7606 }, { "epoch": 16.942093541202674, "grad_norm": 774.5324096679688, "learning_rate": 1e-06, "loss": 0.3919, "num_input_tokens_seen": 426253332, "step": 7607 }, { "epoch": 16.942093541202674, "loss": 0.3354228436946869, "loss_ce": 9.569604299031198e-05, "loss_iou": 0.12890625, "loss_num": 0.015625, "loss_xval": 0.3359375, "num_input_tokens_seen": 426253332, "step": 7607 }, { "epoch": 16.94432071269488, "grad_norm": 53.58465576171875, "learning_rate": 1e-06, "loss": 0.4068, "num_input_tokens_seen": 426310116, "step": 7608 }, { "epoch": 16.94432071269488, "loss": 0.2681652903556824, "loss_ce": 9.887861961033195e-05, "loss_iou": 0.11669921875, "loss_num": 0.006988525390625, "loss_xval": 0.267578125, "num_input_tokens_seen": 426310116, "step": 7608 }, { "epoch": 16.946547884187083, "grad_norm": 19.653953552246094, "learning_rate": 1e-06, "loss": 0.3449, "num_input_tokens_seen": 426367920, "step": 7609 }, { "epoch": 16.946547884187083, "loss": 0.2864606976509094, "loss_ce": 0.0001142588589573279, "loss_iou": 0.126953125, "loss_num": 0.00640869140625, "loss_xval": 0.287109375, "num_input_tokens_seen": 426367920, "step": 7609 }, { "epoch": 16.948775055679288, "grad_norm": 15.588455200195312, "learning_rate": 1e-06, "loss": 0.5204, "num_input_tokens_seen": 426423652, "step": 7610 }, { "epoch": 16.948775055679288, "loss": 0.5305978655815125, "loss_ce": 8.031211473280564e-05, "loss_iou": 0.240234375, "loss_num": 0.01019287109375, "loss_xval": 0.53125, "num_input_tokens_seen": 426423652, "step": 7610 }, { "epoch": 16.951002227171493, "grad_norm": 14.135087013244629, "learning_rate": 1e-06, "loss": 0.3808, "num_input_tokens_seen": 426480624, "step": 7611 }, { "epoch": 16.951002227171493, "loss": 0.4105345606803894, "loss_ce": 7.310426735784858e-05, "loss_iou": 0.1767578125, "loss_num": 0.01123046875, "loss_xval": 0.41015625, "num_input_tokens_seen": 426480624, "step": 7611 }, { "epoch": 16.953229398663698, "grad_norm": 14.242335319519043, "learning_rate": 1e-06, "loss": 0.3675, "num_input_tokens_seen": 426537756, "step": 7612 }, { "epoch": 16.953229398663698, "loss": 0.37502074241638184, "loss_ce": 8.180327131412923e-05, "loss_iou": 0.171875, "loss_num": 0.00628662109375, "loss_xval": 0.375, "num_input_tokens_seen": 426537756, "step": 7612 }, { "epoch": 16.955456570155903, "grad_norm": 17.119190216064453, "learning_rate": 1e-06, "loss": 0.3499, "num_input_tokens_seen": 426592748, "step": 7613 }, { "epoch": 16.955456570155903, "loss": 0.3910788893699646, "loss_ce": 8.76820704434067e-05, "loss_iou": 0.158203125, "loss_num": 0.0146484375, "loss_xval": 0.390625, "num_input_tokens_seen": 426592748, "step": 7613 }, { "epoch": 16.957683741648108, "grad_norm": 19.266357421875, "learning_rate": 1e-06, "loss": 0.3833, "num_input_tokens_seen": 426648636, "step": 7614 }, { "epoch": 16.957683741648108, "loss": 0.27193689346313477, "loss_ce": 8.629226795164868e-05, "loss_iou": 0.12353515625, "loss_num": 0.004852294921875, "loss_xval": 0.271484375, "num_input_tokens_seen": 426648636, "step": 7614 }, { "epoch": 16.959910913140313, "grad_norm": 14.205963134765625, "learning_rate": 1e-06, "loss": 0.3386, "num_input_tokens_seen": 426706392, "step": 7615 }, { "epoch": 16.959910913140313, "loss": 0.3684968948364258, "loss_ce": 8.871590398484841e-05, "loss_iou": 0.166015625, "loss_num": 0.00726318359375, "loss_xval": 0.369140625, "num_input_tokens_seen": 426706392, "step": 7615 }, { "epoch": 16.962138084632517, "grad_norm": 31.090105056762695, "learning_rate": 1e-06, "loss": 0.4142, "num_input_tokens_seen": 426762144, "step": 7616 }, { "epoch": 16.962138084632517, "loss": 0.2914935052394867, "loss_ce": 8.114362572086975e-05, "loss_iou": 0.11962890625, "loss_num": 0.0106201171875, "loss_xval": 0.291015625, "num_input_tokens_seen": 426762144, "step": 7616 }, { "epoch": 16.964365256124722, "grad_norm": 16.874919891357422, "learning_rate": 1e-06, "loss": 0.3053, "num_input_tokens_seen": 426820804, "step": 7617 }, { "epoch": 16.964365256124722, "loss": 0.3068510890007019, "loss_ce": 8.837327914079651e-05, "loss_iou": 0.140625, "loss_num": 0.00506591796875, "loss_xval": 0.306640625, "num_input_tokens_seen": 426820804, "step": 7617 }, { "epoch": 16.966592427616927, "grad_norm": 21.651851654052734, "learning_rate": 1e-06, "loss": 0.6437, "num_input_tokens_seen": 426878344, "step": 7618 }, { "epoch": 16.966592427616927, "loss": 0.6224051713943481, "loss_ce": 9.070623491425067e-05, "loss_iou": 0.26953125, "loss_num": 0.0164794921875, "loss_xval": 0.62109375, "num_input_tokens_seen": 426878344, "step": 7618 }, { "epoch": 16.968819599109132, "grad_norm": 15.368985176086426, "learning_rate": 1e-06, "loss": 0.4305, "num_input_tokens_seen": 426936372, "step": 7619 }, { "epoch": 16.968819599109132, "loss": 0.4007796049118042, "loss_ce": 8.378856000490487e-05, "loss_iou": 0.162109375, "loss_num": 0.01544189453125, "loss_xval": 0.400390625, "num_input_tokens_seen": 426936372, "step": 7619 }, { "epoch": 16.971046770601337, "grad_norm": 16.223318099975586, "learning_rate": 1e-06, "loss": 0.2849, "num_input_tokens_seen": 426990840, "step": 7620 }, { "epoch": 16.971046770601337, "loss": 0.3703581690788269, "loss_ce": 0.00017992404173128307, "loss_iou": 0.16796875, "loss_num": 0.0069580078125, "loss_xval": 0.37109375, "num_input_tokens_seen": 426990840, "step": 7620 }, { "epoch": 16.97327394209354, "grad_norm": 42.12100601196289, "learning_rate": 1e-06, "loss": 0.4774, "num_input_tokens_seen": 427049316, "step": 7621 }, { "epoch": 16.97327394209354, "loss": 0.4062255620956421, "loss_ce": 9.765510185388848e-05, "loss_iou": 0.1767578125, "loss_num": 0.010498046875, "loss_xval": 0.40625, "num_input_tokens_seen": 427049316, "step": 7621 }, { "epoch": 16.975501113585747, "grad_norm": 15.246018409729004, "learning_rate": 1e-06, "loss": 0.4491, "num_input_tokens_seen": 427105040, "step": 7622 }, { "epoch": 16.975501113585747, "loss": 0.4461430311203003, "loss_ce": 9.8112752311863e-05, "loss_iou": 0.1982421875, "loss_num": 0.01007080078125, "loss_xval": 0.4453125, "num_input_tokens_seen": 427105040, "step": 7622 }, { "epoch": 16.97772828507795, "grad_norm": 74.06937408447266, "learning_rate": 1e-06, "loss": 0.5622, "num_input_tokens_seen": 427159736, "step": 7623 }, { "epoch": 16.97772828507795, "loss": 0.5899186730384827, "loss_ce": 7.488045957870781e-05, "loss_iou": 0.259765625, "loss_num": 0.0140380859375, "loss_xval": 0.58984375, "num_input_tokens_seen": 427159736, "step": 7623 }, { "epoch": 16.979955456570156, "grad_norm": 17.075176239013672, "learning_rate": 1e-06, "loss": 0.4932, "num_input_tokens_seen": 427216528, "step": 7624 }, { "epoch": 16.979955456570156, "loss": 0.5001233816146851, "loss_ce": 0.00012338865781202912, "loss_iou": 0.234375, "loss_num": 0.00616455078125, "loss_xval": 0.5, "num_input_tokens_seen": 427216528, "step": 7624 }, { "epoch": 16.98218262806236, "grad_norm": 12.647735595703125, "learning_rate": 1e-06, "loss": 0.3257, "num_input_tokens_seen": 427274456, "step": 7625 }, { "epoch": 16.98218262806236, "loss": 0.2577294707298279, "loss_ce": 0.00010009224934037775, "loss_iou": 0.1142578125, "loss_num": 0.005889892578125, "loss_xval": 0.2578125, "num_input_tokens_seen": 427274456, "step": 7625 }, { "epoch": 16.984409799554566, "grad_norm": 14.19373607635498, "learning_rate": 1e-06, "loss": 0.5021, "num_input_tokens_seen": 427328280, "step": 7626 }, { "epoch": 16.984409799554566, "loss": 0.5511077642440796, "loss_ce": 8.234484994318336e-05, "loss_iou": 0.2314453125, "loss_num": 0.0174560546875, "loss_xval": 0.55078125, "num_input_tokens_seen": 427328280, "step": 7626 }, { "epoch": 16.98663697104677, "grad_norm": 14.599117279052734, "learning_rate": 1e-06, "loss": 0.4076, "num_input_tokens_seen": 427384024, "step": 7627 }, { "epoch": 16.98663697104677, "loss": 0.40987348556518555, "loss_ce": 8.344671368831769e-05, "loss_iou": 0.1845703125, "loss_num": 0.00811767578125, "loss_xval": 0.41015625, "num_input_tokens_seen": 427384024, "step": 7627 }, { "epoch": 16.988864142538976, "grad_norm": 34.7947998046875, "learning_rate": 1e-06, "loss": 0.391, "num_input_tokens_seen": 427439656, "step": 7628 }, { "epoch": 16.988864142538976, "loss": 0.4776158928871155, "loss_ce": 7.684988668188453e-05, "loss_iou": 0.2138671875, "loss_num": 0.00994873046875, "loss_xval": 0.4765625, "num_input_tokens_seen": 427439656, "step": 7628 }, { "epoch": 16.99109131403118, "grad_norm": 14.100838661193848, "learning_rate": 1e-06, "loss": 0.3771, "num_input_tokens_seen": 427495292, "step": 7629 }, { "epoch": 16.99109131403118, "loss": 0.41444265842437744, "loss_ce": 0.000105498475022614, "loss_iou": 0.1845703125, "loss_num": 0.0089111328125, "loss_xval": 0.4140625, "num_input_tokens_seen": 427495292, "step": 7629 }, { "epoch": 16.993318485523385, "grad_norm": 25.862396240234375, "learning_rate": 1e-06, "loss": 0.3754, "num_input_tokens_seen": 427552112, "step": 7630 }, { "epoch": 16.993318485523385, "loss": 0.4190499782562256, "loss_ce": 0.00010467211541254073, "loss_iou": 0.1884765625, "loss_num": 0.00848388671875, "loss_xval": 0.41796875, "num_input_tokens_seen": 427552112, "step": 7630 }, { "epoch": 16.99554565701559, "grad_norm": 16.033910751342773, "learning_rate": 1e-06, "loss": 0.2646, "num_input_tokens_seen": 427609520, "step": 7631 }, { "epoch": 16.99554565701559, "loss": 0.28566277027130127, "loss_ce": 7.926442776806653e-05, "loss_iou": 0.1240234375, "loss_num": 0.007354736328125, "loss_xval": 0.28515625, "num_input_tokens_seen": 427609520, "step": 7631 }, { "epoch": 16.997772828507795, "grad_norm": 20.488983154296875, "learning_rate": 1e-06, "loss": 0.4234, "num_input_tokens_seen": 427661616, "step": 7632 }, { "epoch": 16.997772828507795, "loss": 0.40573614835739136, "loss_ce": 9.650552237872034e-05, "loss_iou": 0.1494140625, "loss_num": 0.021484375, "loss_xval": 0.40625, "num_input_tokens_seen": 427661616, "step": 7632 }, { "epoch": 17.0, "grad_norm": 23.362516403198242, "learning_rate": 1e-06, "loss": 0.3575, "num_input_tokens_seen": 427716580, "step": 7633 }, { "epoch": 17.0, "loss": 0.32436951994895935, "loss_ce": 8.972680370789021e-05, "loss_iou": 0.1484375, "loss_num": 0.0052490234375, "loss_xval": 0.32421875, "num_input_tokens_seen": 427716580, "step": 7633 }, { "epoch": 17.002227171492205, "grad_norm": 17.92319107055664, "learning_rate": 1e-06, "loss": 0.4558, "num_input_tokens_seen": 427773004, "step": 7634 }, { "epoch": 17.002227171492205, "loss": 0.4817723333835602, "loss_ce": 8.288519165944308e-05, "loss_iou": 0.2080078125, "loss_num": 0.012939453125, "loss_xval": 0.482421875, "num_input_tokens_seen": 427773004, "step": 7634 }, { "epoch": 17.00445434298441, "grad_norm": 26.28542709350586, "learning_rate": 1e-06, "loss": 0.4383, "num_input_tokens_seen": 427828620, "step": 7635 }, { "epoch": 17.00445434298441, "loss": 0.4154224097728729, "loss_ce": 7.814820855855942e-05, "loss_iou": 0.185546875, "loss_num": 0.00885009765625, "loss_xval": 0.416015625, "num_input_tokens_seen": 427828620, "step": 7635 }, { "epoch": 17.006681514476615, "grad_norm": 18.24226188659668, "learning_rate": 1e-06, "loss": 0.3023, "num_input_tokens_seen": 427885192, "step": 7636 }, { "epoch": 17.006681514476615, "loss": 0.2849215865135193, "loss_ce": 7.052010187180713e-05, "loss_iou": 0.130859375, "loss_num": 0.0045166015625, "loss_xval": 0.28515625, "num_input_tokens_seen": 427885192, "step": 7636 }, { "epoch": 17.00890868596882, "grad_norm": 30.52312660217285, "learning_rate": 1e-06, "loss": 0.4783, "num_input_tokens_seen": 427939364, "step": 7637 }, { "epoch": 17.00890868596882, "loss": 0.46554726362228394, "loss_ce": 9.315512579632923e-05, "loss_iou": 0.19921875, "loss_num": 0.01348876953125, "loss_xval": 0.46484375, "num_input_tokens_seen": 427939364, "step": 7637 }, { "epoch": 17.011135857461024, "grad_norm": 66.4842300415039, "learning_rate": 1e-06, "loss": 0.3289, "num_input_tokens_seen": 427998324, "step": 7638 }, { "epoch": 17.011135857461024, "loss": 0.23030278086662292, "loss_ce": 7.816310971975327e-05, "loss_iou": 0.1044921875, "loss_num": 0.00433349609375, "loss_xval": 0.23046875, "num_input_tokens_seen": 427998324, "step": 7638 }, { "epoch": 17.01336302895323, "grad_norm": 20.621551513671875, "learning_rate": 1e-06, "loss": 0.4443, "num_input_tokens_seen": 428052968, "step": 7639 }, { "epoch": 17.01336302895323, "loss": 0.502518355846405, "loss_ce": 7.69448815844953e-05, "loss_iou": 0.1943359375, "loss_num": 0.022705078125, "loss_xval": 0.50390625, "num_input_tokens_seen": 428052968, "step": 7639 }, { "epoch": 17.015590200445434, "grad_norm": 16.662851333618164, "learning_rate": 1e-06, "loss": 0.6814, "num_input_tokens_seen": 428110384, "step": 7640 }, { "epoch": 17.015590200445434, "loss": 0.931036114692688, "loss_ce": 0.00015838468971196562, "loss_iou": 0.349609375, "loss_num": 0.04638671875, "loss_xval": 0.9296875, "num_input_tokens_seen": 428110384, "step": 7640 }, { "epoch": 17.01781737193764, "grad_norm": 22.56868553161621, "learning_rate": 1e-06, "loss": 0.2991, "num_input_tokens_seen": 428166924, "step": 7641 }, { "epoch": 17.01781737193764, "loss": 0.22560018301010132, "loss_ce": 7.529689173679799e-05, "loss_iou": 0.09423828125, "loss_num": 0.0074462890625, "loss_xval": 0.2255859375, "num_input_tokens_seen": 428166924, "step": 7641 }, { "epoch": 17.020044543429844, "grad_norm": 19.790267944335938, "learning_rate": 1e-06, "loss": 0.4237, "num_input_tokens_seen": 428223112, "step": 7642 }, { "epoch": 17.020044543429844, "loss": 0.424101322889328, "loss_ce": 0.00015111861284822226, "loss_iou": 0.185546875, "loss_num": 0.0107421875, "loss_xval": 0.423828125, "num_input_tokens_seen": 428223112, "step": 7642 }, { "epoch": 17.02227171492205, "grad_norm": 22.081754684448242, "learning_rate": 1e-06, "loss": 0.5015, "num_input_tokens_seen": 428279696, "step": 7643 }, { "epoch": 17.02227171492205, "loss": 0.5504837036132812, "loss_ce": 0.00019074320152867585, "loss_iou": 0.2265625, "loss_num": 0.0196533203125, "loss_xval": 0.55078125, "num_input_tokens_seen": 428279696, "step": 7643 }, { "epoch": 17.024498886414253, "grad_norm": 12.306164741516113, "learning_rate": 1e-06, "loss": 0.4179, "num_input_tokens_seen": 428335044, "step": 7644 }, { "epoch": 17.024498886414253, "loss": 0.5076683163642883, "loss_ce": 9.997590677812696e-05, "loss_iou": 0.2392578125, "loss_num": 0.005889892578125, "loss_xval": 0.5078125, "num_input_tokens_seen": 428335044, "step": 7644 }, { "epoch": 17.02672605790646, "grad_norm": 21.696956634521484, "learning_rate": 1e-06, "loss": 0.48, "num_input_tokens_seen": 428390008, "step": 7645 }, { "epoch": 17.02672605790646, "loss": 0.4708196818828583, "loss_ce": 0.00011655631533358246, "loss_iou": 0.1982421875, "loss_num": 0.01470947265625, "loss_xval": 0.470703125, "num_input_tokens_seen": 428390008, "step": 7645 }, { "epoch": 17.028953229398663, "grad_norm": 36.35521697998047, "learning_rate": 1e-06, "loss": 0.3621, "num_input_tokens_seen": 428447080, "step": 7646 }, { "epoch": 17.028953229398663, "loss": 0.2672528028488159, "loss_ce": 0.00016677916573826224, "loss_iou": 0.10546875, "loss_num": 0.0111083984375, "loss_xval": 0.267578125, "num_input_tokens_seen": 428447080, "step": 7646 }, { "epoch": 17.031180400890868, "grad_norm": 21.043344497680664, "learning_rate": 1e-06, "loss": 0.3144, "num_input_tokens_seen": 428503564, "step": 7647 }, { "epoch": 17.031180400890868, "loss": 0.36540961265563965, "loss_ce": 8.367877308046445e-05, "loss_iou": 0.1728515625, "loss_num": 0.003997802734375, "loss_xval": 0.365234375, "num_input_tokens_seen": 428503564, "step": 7647 }, { "epoch": 17.033407572383073, "grad_norm": 24.031978607177734, "learning_rate": 1e-06, "loss": 0.334, "num_input_tokens_seen": 428559008, "step": 7648 }, { "epoch": 17.033407572383073, "loss": 0.2920740842819214, "loss_ce": 8.189551590476185e-05, "loss_iou": 0.1220703125, "loss_num": 0.00958251953125, "loss_xval": 0.29296875, "num_input_tokens_seen": 428559008, "step": 7648 }, { "epoch": 17.035634743875278, "grad_norm": 21.48931121826172, "learning_rate": 1e-06, "loss": 0.4046, "num_input_tokens_seen": 428613808, "step": 7649 }, { "epoch": 17.035634743875278, "loss": 0.21085509657859802, "loss_ce": 8.544648881070316e-05, "loss_iou": 0.0947265625, "loss_num": 0.0042724609375, "loss_xval": 0.2109375, "num_input_tokens_seen": 428613808, "step": 7649 }, { "epoch": 17.037861915367483, "grad_norm": 13.954534530639648, "learning_rate": 1e-06, "loss": 0.4201, "num_input_tokens_seen": 428672352, "step": 7650 }, { "epoch": 17.037861915367483, "loss": 0.25243866443634033, "loss_ce": 8.880048699211329e-05, "loss_iou": 0.10986328125, "loss_num": 0.006561279296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 428672352, "step": 7650 }, { "epoch": 17.040089086859687, "grad_norm": 16.805583953857422, "learning_rate": 1e-06, "loss": 0.4008, "num_input_tokens_seen": 428726452, "step": 7651 }, { "epoch": 17.040089086859687, "loss": 0.3927674889564514, "loss_ce": 6.728603329975158e-05, "loss_iou": 0.1630859375, "loss_num": 0.01318359375, "loss_xval": 0.392578125, "num_input_tokens_seen": 428726452, "step": 7651 }, { "epoch": 17.042316258351892, "grad_norm": 22.515275955200195, "learning_rate": 1e-06, "loss": 0.3941, "num_input_tokens_seen": 428783940, "step": 7652 }, { "epoch": 17.042316258351892, "loss": 0.4546872675418854, "loss_ce": 9.743101691128686e-05, "loss_iou": 0.19921875, "loss_num": 0.010986328125, "loss_xval": 0.455078125, "num_input_tokens_seen": 428783940, "step": 7652 }, { "epoch": 17.044543429844097, "grad_norm": 23.46599769592285, "learning_rate": 1e-06, "loss": 0.3476, "num_input_tokens_seen": 428839472, "step": 7653 }, { "epoch": 17.044543429844097, "loss": 0.33765748143196106, "loss_ce": 7.202455890364945e-05, "loss_iou": 0.1328125, "loss_num": 0.01458740234375, "loss_xval": 0.337890625, "num_input_tokens_seen": 428839472, "step": 7653 }, { "epoch": 17.046770601336302, "grad_norm": 21.571443557739258, "learning_rate": 1e-06, "loss": 0.3301, "num_input_tokens_seen": 428896740, "step": 7654 }, { "epoch": 17.046770601336302, "loss": 0.3378799855709076, "loss_ce": 0.00011143009760417044, "loss_iou": 0.1474609375, "loss_num": 0.00836181640625, "loss_xval": 0.337890625, "num_input_tokens_seen": 428896740, "step": 7654 }, { "epoch": 17.048997772828507, "grad_norm": 20.132381439208984, "learning_rate": 1e-06, "loss": 0.2645, "num_input_tokens_seen": 428951072, "step": 7655 }, { "epoch": 17.048997772828507, "loss": 0.24837635457515717, "loss_ce": 8.534367952961475e-05, "loss_iou": 0.1103515625, "loss_num": 0.00543212890625, "loss_xval": 0.248046875, "num_input_tokens_seen": 428951072, "step": 7655 }, { "epoch": 17.051224944320712, "grad_norm": 20.21780014038086, "learning_rate": 1e-06, "loss": 0.4544, "num_input_tokens_seen": 429007504, "step": 7656 }, { "epoch": 17.051224944320712, "loss": 0.4101518392562866, "loss_ce": 0.00011766105308197439, "loss_iou": 0.1904296875, "loss_num": 0.005859375, "loss_xval": 0.41015625, "num_input_tokens_seen": 429007504, "step": 7656 }, { "epoch": 17.053452115812917, "grad_norm": 17.552833557128906, "learning_rate": 1e-06, "loss": 0.4551, "num_input_tokens_seen": 429061384, "step": 7657 }, { "epoch": 17.053452115812917, "loss": 0.42342638969421387, "loss_ce": 8.654448902234435e-05, "loss_iou": 0.1943359375, "loss_num": 0.0069580078125, "loss_xval": 0.423828125, "num_input_tokens_seen": 429061384, "step": 7657 }, { "epoch": 17.05567928730512, "grad_norm": 16.712745666503906, "learning_rate": 1e-06, "loss": 0.4366, "num_input_tokens_seen": 429117780, "step": 7658 }, { "epoch": 17.05567928730512, "loss": 0.5296164155006409, "loss_ce": 7.536636258009821e-05, "loss_iou": 0.2412109375, "loss_num": 0.0091552734375, "loss_xval": 0.53125, "num_input_tokens_seen": 429117780, "step": 7658 }, { "epoch": 17.057906458797326, "grad_norm": 17.775182723999023, "learning_rate": 1e-06, "loss": 0.4586, "num_input_tokens_seen": 429174152, "step": 7659 }, { "epoch": 17.057906458797326, "loss": 0.34088775515556335, "loss_ce": 6.743887206539512e-05, "loss_iou": 0.146484375, "loss_num": 0.00946044921875, "loss_xval": 0.33984375, "num_input_tokens_seen": 429174152, "step": 7659 }, { "epoch": 17.06013363028953, "grad_norm": 18.0156192779541, "learning_rate": 1e-06, "loss": 0.3511, "num_input_tokens_seen": 429231128, "step": 7660 }, { "epoch": 17.06013363028953, "loss": 0.3127681612968445, "loss_ce": 8.506246376782656e-05, "loss_iou": 0.130859375, "loss_num": 0.01025390625, "loss_xval": 0.3125, "num_input_tokens_seen": 429231128, "step": 7660 }, { "epoch": 17.062360801781736, "grad_norm": 52.67565155029297, "learning_rate": 1e-06, "loss": 0.6399, "num_input_tokens_seen": 429284628, "step": 7661 }, { "epoch": 17.062360801781736, "loss": 0.8818075060844421, "loss_ce": 9.362171840621158e-05, "loss_iou": 0.35546875, "loss_num": 0.03466796875, "loss_xval": 0.8828125, "num_input_tokens_seen": 429284628, "step": 7661 }, { "epoch": 17.06458797327394, "grad_norm": 27.630023956298828, "learning_rate": 1e-06, "loss": 0.4078, "num_input_tokens_seen": 429338128, "step": 7662 }, { "epoch": 17.06458797327394, "loss": 0.40474581718444824, "loss_ce": 8.270953549072146e-05, "loss_iou": 0.1875, "loss_num": 0.005828857421875, "loss_xval": 0.404296875, "num_input_tokens_seen": 429338128, "step": 7662 }, { "epoch": 17.066815144766146, "grad_norm": 22.74899673461914, "learning_rate": 1e-06, "loss": 0.5521, "num_input_tokens_seen": 429390620, "step": 7663 }, { "epoch": 17.066815144766146, "loss": 0.45908236503601074, "loss_ce": 9.800391853787005e-05, "loss_iou": 0.19921875, "loss_num": 0.011962890625, "loss_xval": 0.458984375, "num_input_tokens_seen": 429390620, "step": 7663 }, { "epoch": 17.06904231625835, "grad_norm": 16.609214782714844, "learning_rate": 1e-06, "loss": 0.4099, "num_input_tokens_seen": 429445244, "step": 7664 }, { "epoch": 17.06904231625835, "loss": 0.3943771719932556, "loss_ce": 9.003834566101432e-05, "loss_iou": 0.162109375, "loss_num": 0.01409912109375, "loss_xval": 0.39453125, "num_input_tokens_seen": 429445244, "step": 7664 }, { "epoch": 17.071269487750556, "grad_norm": 21.4134578704834, "learning_rate": 1e-06, "loss": 0.5788, "num_input_tokens_seen": 429502620, "step": 7665 }, { "epoch": 17.071269487750556, "loss": 0.5071187615394592, "loss_ce": 9.971446706913412e-05, "loss_iou": 0.2236328125, "loss_num": 0.0118408203125, "loss_xval": 0.5078125, "num_input_tokens_seen": 429502620, "step": 7665 }, { "epoch": 17.07349665924276, "grad_norm": 18.723894119262695, "learning_rate": 1e-06, "loss": 0.4943, "num_input_tokens_seen": 429562224, "step": 7666 }, { "epoch": 17.07349665924276, "loss": 0.47562456130981445, "loss_ce": 0.0001607102749403566, "loss_iou": 0.2080078125, "loss_num": 0.011962890625, "loss_xval": 0.474609375, "num_input_tokens_seen": 429562224, "step": 7666 }, { "epoch": 17.075723830734965, "grad_norm": 19.977685928344727, "learning_rate": 1e-06, "loss": 0.3345, "num_input_tokens_seen": 429620100, "step": 7667 }, { "epoch": 17.075723830734965, "loss": 0.31252235174179077, "loss_ce": 8.34053716971539e-05, "loss_iou": 0.134765625, "loss_num": 0.00836181640625, "loss_xval": 0.3125, "num_input_tokens_seen": 429620100, "step": 7667 }, { "epoch": 17.07795100222717, "grad_norm": 19.599905014038086, "learning_rate": 1e-06, "loss": 0.4729, "num_input_tokens_seen": 429677708, "step": 7668 }, { "epoch": 17.07795100222717, "loss": 0.6072638034820557, "loss_ce": 8.606135088484734e-05, "loss_iou": 0.28125, "loss_num": 0.0091552734375, "loss_xval": 0.60546875, "num_input_tokens_seen": 429677708, "step": 7668 }, { "epoch": 17.080178173719375, "grad_norm": 14.583081245422363, "learning_rate": 1e-06, "loss": 0.4426, "num_input_tokens_seen": 429735204, "step": 7669 }, { "epoch": 17.080178173719375, "loss": 0.43759340047836304, "loss_ce": 9.339496318716556e-05, "loss_iou": 0.2060546875, "loss_num": 0.005096435546875, "loss_xval": 0.4375, "num_input_tokens_seen": 429735204, "step": 7669 }, { "epoch": 17.08240534521158, "grad_norm": 34.837467193603516, "learning_rate": 1e-06, "loss": 0.4082, "num_input_tokens_seen": 429789724, "step": 7670 }, { "epoch": 17.08240534521158, "loss": 0.3650968670845032, "loss_ce": 0.00010662678687367588, "loss_iou": 0.1689453125, "loss_num": 0.0054931640625, "loss_xval": 0.365234375, "num_input_tokens_seen": 429789724, "step": 7670 }, { "epoch": 17.084632516703785, "grad_norm": 16.056995391845703, "learning_rate": 1e-06, "loss": 0.332, "num_input_tokens_seen": 429846636, "step": 7671 }, { "epoch": 17.084632516703785, "loss": 0.30904102325439453, "loss_ce": 8.104251901386306e-05, "loss_iou": 0.13671875, "loss_num": 0.006988525390625, "loss_xval": 0.30859375, "num_input_tokens_seen": 429846636, "step": 7671 }, { "epoch": 17.08685968819599, "grad_norm": 19.151451110839844, "learning_rate": 1e-06, "loss": 0.4416, "num_input_tokens_seen": 429905284, "step": 7672 }, { "epoch": 17.08685968819599, "loss": 0.47447669506073, "loss_ce": 0.0001114873739425093, "loss_iou": 0.224609375, "loss_num": 0.00518798828125, "loss_xval": 0.474609375, "num_input_tokens_seen": 429905284, "step": 7672 }, { "epoch": 17.089086859688194, "grad_norm": 14.329705238342285, "learning_rate": 1e-06, "loss": 0.6205, "num_input_tokens_seen": 429960820, "step": 7673 }, { "epoch": 17.089086859688194, "loss": 0.6159265041351318, "loss_ce": 8.180980512406677e-05, "loss_iou": 0.255859375, "loss_num": 0.020751953125, "loss_xval": 0.6171875, "num_input_tokens_seen": 429960820, "step": 7673 }, { "epoch": 17.0913140311804, "grad_norm": 12.72126579284668, "learning_rate": 1e-06, "loss": 0.2597, "num_input_tokens_seen": 430014656, "step": 7674 }, { "epoch": 17.0913140311804, "loss": 0.32735052704811096, "loss_ce": 8.001940295798704e-05, "loss_iou": 0.123046875, "loss_num": 0.0162353515625, "loss_xval": 0.328125, "num_input_tokens_seen": 430014656, "step": 7674 }, { "epoch": 17.093541202672604, "grad_norm": 15.150796890258789, "learning_rate": 1e-06, "loss": 0.414, "num_input_tokens_seen": 430071736, "step": 7675 }, { "epoch": 17.093541202672604, "loss": 0.316550612449646, "loss_ce": 8.332452125614509e-05, "loss_iou": 0.146484375, "loss_num": 0.00482177734375, "loss_xval": 0.31640625, "num_input_tokens_seen": 430071736, "step": 7675 }, { "epoch": 17.09576837416481, "grad_norm": 22.740732192993164, "learning_rate": 1e-06, "loss": 0.4941, "num_input_tokens_seen": 430127552, "step": 7676 }, { "epoch": 17.09576837416481, "loss": 0.688327968120575, "loss_ce": 9.552988194627687e-05, "loss_iou": 0.283203125, "loss_num": 0.024169921875, "loss_xval": 0.6875, "num_input_tokens_seen": 430127552, "step": 7676 }, { "epoch": 17.097995545657014, "grad_norm": 15.583344459533691, "learning_rate": 1e-06, "loss": 0.3403, "num_input_tokens_seen": 430183984, "step": 7677 }, { "epoch": 17.097995545657014, "loss": 0.2791343033313751, "loss_ce": 8.15772291389294e-05, "loss_iou": 0.11865234375, "loss_num": 0.00823974609375, "loss_xval": 0.279296875, "num_input_tokens_seen": 430183984, "step": 7677 }, { "epoch": 17.100222717149222, "grad_norm": 15.863219261169434, "learning_rate": 1e-06, "loss": 0.506, "num_input_tokens_seen": 430239088, "step": 7678 }, { "epoch": 17.100222717149222, "loss": 0.4502248167991638, "loss_ce": 9.05133638298139e-05, "loss_iou": 0.197265625, "loss_num": 0.010986328125, "loss_xval": 0.44921875, "num_input_tokens_seen": 430239088, "step": 7678 }, { "epoch": 17.102449888641427, "grad_norm": 17.146371841430664, "learning_rate": 1e-06, "loss": 0.4628, "num_input_tokens_seen": 430293988, "step": 7679 }, { "epoch": 17.102449888641427, "loss": 0.5290303826332092, "loss_ce": 9.971238614525646e-05, "loss_iou": 0.20703125, "loss_num": 0.02294921875, "loss_xval": 0.52734375, "num_input_tokens_seen": 430293988, "step": 7679 }, { "epoch": 17.104677060133632, "grad_norm": 21.910921096801758, "learning_rate": 1e-06, "loss": 0.4983, "num_input_tokens_seen": 430350332, "step": 7680 }, { "epoch": 17.104677060133632, "loss": 0.520658552646637, "loss_ce": 8.970967610366642e-05, "loss_iou": 0.203125, "loss_num": 0.023193359375, "loss_xval": 0.51953125, "num_input_tokens_seen": 430350332, "step": 7680 }, { "epoch": 17.106904231625837, "grad_norm": 21.317134857177734, "learning_rate": 1e-06, "loss": 0.4361, "num_input_tokens_seen": 430407436, "step": 7681 }, { "epoch": 17.106904231625837, "loss": 0.5610302090644836, "loss_ce": 0.00011710502440109849, "loss_iou": 0.234375, "loss_num": 0.018310546875, "loss_xval": 0.5625, "num_input_tokens_seen": 430407436, "step": 7681 }, { "epoch": 17.10913140311804, "grad_norm": 15.316725730895996, "learning_rate": 1e-06, "loss": 0.3235, "num_input_tokens_seen": 430464132, "step": 7682 }, { "epoch": 17.10913140311804, "loss": 0.3164796233177185, "loss_ce": 7.337974238907918e-05, "loss_iou": 0.1337890625, "loss_num": 0.0098876953125, "loss_xval": 0.31640625, "num_input_tokens_seen": 430464132, "step": 7682 }, { "epoch": 17.111358574610247, "grad_norm": 17.729398727416992, "learning_rate": 1e-06, "loss": 0.6469, "num_input_tokens_seen": 430518768, "step": 7683 }, { "epoch": 17.111358574610247, "loss": 0.49617958068847656, "loss_ce": 8.584219176555052e-05, "loss_iou": 0.216796875, "loss_num": 0.012451171875, "loss_xval": 0.49609375, "num_input_tokens_seen": 430518768, "step": 7683 }, { "epoch": 17.11358574610245, "grad_norm": 25.321056365966797, "learning_rate": 1e-06, "loss": 0.3942, "num_input_tokens_seen": 430574312, "step": 7684 }, { "epoch": 17.11358574610245, "loss": 0.2743665277957916, "loss_ce": 8.980404527392238e-05, "loss_iou": 0.1201171875, "loss_num": 0.006866455078125, "loss_xval": 0.2734375, "num_input_tokens_seen": 430574312, "step": 7684 }, { "epoch": 17.115812917594656, "grad_norm": 33.18670654296875, "learning_rate": 1e-06, "loss": 0.4818, "num_input_tokens_seen": 430629924, "step": 7685 }, { "epoch": 17.115812917594656, "loss": 0.4744425117969513, "loss_ce": 7.726340845692903e-05, "loss_iou": 0.201171875, "loss_num": 0.01434326171875, "loss_xval": 0.474609375, "num_input_tokens_seen": 430629924, "step": 7685 }, { "epoch": 17.11804008908686, "grad_norm": 18.0744571685791, "learning_rate": 1e-06, "loss": 0.4743, "num_input_tokens_seen": 430684496, "step": 7686 }, { "epoch": 17.11804008908686, "loss": 0.4138874113559723, "loss_ce": 6.904240581206977e-05, "loss_iou": 0.1875, "loss_num": 0.007537841796875, "loss_xval": 0.4140625, "num_input_tokens_seen": 430684496, "step": 7686 }, { "epoch": 17.120267260579066, "grad_norm": 12.516131401062012, "learning_rate": 1e-06, "loss": 0.4325, "num_input_tokens_seen": 430740936, "step": 7687 }, { "epoch": 17.120267260579066, "loss": 0.41224995255470276, "loss_ce": 0.00011005052510881796, "loss_iou": 0.1923828125, "loss_num": 0.00555419921875, "loss_xval": 0.412109375, "num_input_tokens_seen": 430740936, "step": 7687 }, { "epoch": 17.12249443207127, "grad_norm": 14.209662437438965, "learning_rate": 1e-06, "loss": 0.3043, "num_input_tokens_seen": 430797872, "step": 7688 }, { "epoch": 17.12249443207127, "loss": 0.36360567808151245, "loss_ce": 8.030241588130593e-05, "loss_iou": 0.1650390625, "loss_num": 0.00665283203125, "loss_xval": 0.36328125, "num_input_tokens_seen": 430797872, "step": 7688 }, { "epoch": 17.124721603563476, "grad_norm": 24.326807022094727, "learning_rate": 1e-06, "loss": 0.4336, "num_input_tokens_seen": 430856816, "step": 7689 }, { "epoch": 17.124721603563476, "loss": 0.40657997131347656, "loss_ce": 8.581295696785673e-05, "loss_iou": 0.1650390625, "loss_num": 0.01507568359375, "loss_xval": 0.40625, "num_input_tokens_seen": 430856816, "step": 7689 }, { "epoch": 17.12694877505568, "grad_norm": 15.567830085754395, "learning_rate": 1e-06, "loss": 0.4065, "num_input_tokens_seen": 430913996, "step": 7690 }, { "epoch": 17.12694877505568, "loss": 0.32644379138946533, "loss_ce": 8.880942186806351e-05, "loss_iou": 0.1474609375, "loss_num": 0.006256103515625, "loss_xval": 0.326171875, "num_input_tokens_seen": 430913996, "step": 7690 }, { "epoch": 17.129175946547885, "grad_norm": 27.735408782958984, "learning_rate": 1e-06, "loss": 0.3461, "num_input_tokens_seen": 430970364, "step": 7691 }, { "epoch": 17.129175946547885, "loss": 0.3675483167171478, "loss_ce": 0.00011668518709484488, "loss_iou": 0.1591796875, "loss_num": 0.00994873046875, "loss_xval": 0.3671875, "num_input_tokens_seen": 430970364, "step": 7691 }, { "epoch": 17.13140311804009, "grad_norm": 13.408978462219238, "learning_rate": 1e-06, "loss": 0.435, "num_input_tokens_seen": 431026824, "step": 7692 }, { "epoch": 17.13140311804009, "loss": 0.49011173844337463, "loss_ce": 0.00012149102985858917, "loss_iou": 0.2021484375, "loss_num": 0.0169677734375, "loss_xval": 0.490234375, "num_input_tokens_seen": 431026824, "step": 7692 }, { "epoch": 17.133630289532295, "grad_norm": 19.401525497436523, "learning_rate": 1e-06, "loss": 0.5539, "num_input_tokens_seen": 431082556, "step": 7693 }, { "epoch": 17.133630289532295, "loss": 0.5180162787437439, "loss_ce": 7.190792530309409e-05, "loss_iou": 0.22265625, "loss_num": 0.014404296875, "loss_xval": 0.51953125, "num_input_tokens_seen": 431082556, "step": 7693 }, { "epoch": 17.1358574610245, "grad_norm": 17.272777557373047, "learning_rate": 1e-06, "loss": 0.4486, "num_input_tokens_seen": 431140336, "step": 7694 }, { "epoch": 17.1358574610245, "loss": 0.39839935302734375, "loss_ce": 8.391607843805104e-05, "loss_iou": 0.1650390625, "loss_num": 0.01361083984375, "loss_xval": 0.3984375, "num_input_tokens_seen": 431140336, "step": 7694 }, { "epoch": 17.138084632516705, "grad_norm": 13.000822067260742, "learning_rate": 1e-06, "loss": 0.4472, "num_input_tokens_seen": 431196616, "step": 7695 }, { "epoch": 17.138084632516705, "loss": 0.39486390352249146, "loss_ce": 8.850420999806374e-05, "loss_iou": 0.1806640625, "loss_num": 0.00677490234375, "loss_xval": 0.39453125, "num_input_tokens_seen": 431196616, "step": 7695 }, { "epoch": 17.14031180400891, "grad_norm": 12.154135704040527, "learning_rate": 1e-06, "loss": 0.3779, "num_input_tokens_seen": 431253236, "step": 7696 }, { "epoch": 17.14031180400891, "loss": 0.3711104691028595, "loss_ce": 7.775596168357879e-05, "loss_iou": 0.1708984375, "loss_num": 0.005950927734375, "loss_xval": 0.37109375, "num_input_tokens_seen": 431253236, "step": 7696 }, { "epoch": 17.142538975501115, "grad_norm": 19.33846092224121, "learning_rate": 1e-06, "loss": 0.5077, "num_input_tokens_seen": 431311832, "step": 7697 }, { "epoch": 17.142538975501115, "loss": 0.5122911930084229, "loss_ce": 8.418020297540352e-05, "loss_iou": 0.2197265625, "loss_num": 0.0145263671875, "loss_xval": 0.51171875, "num_input_tokens_seen": 431311832, "step": 7697 }, { "epoch": 17.14476614699332, "grad_norm": 13.823887825012207, "learning_rate": 1e-06, "loss": 0.304, "num_input_tokens_seen": 431367724, "step": 7698 }, { "epoch": 17.14476614699332, "loss": 0.37503981590270996, "loss_ce": 0.00010085223766509444, "loss_iou": 0.16015625, "loss_num": 0.01080322265625, "loss_xval": 0.375, "num_input_tokens_seen": 431367724, "step": 7698 }, { "epoch": 17.146993318485524, "grad_norm": 20.612548828125, "learning_rate": 1e-06, "loss": 0.4507, "num_input_tokens_seen": 431423316, "step": 7699 }, { "epoch": 17.146993318485524, "loss": 0.5613565444946289, "loss_ce": 7.721249130554497e-05, "loss_iou": 0.205078125, "loss_num": 0.0301513671875, "loss_xval": 0.5625, "num_input_tokens_seen": 431423316, "step": 7699 }, { "epoch": 17.14922048997773, "grad_norm": 23.749818801879883, "learning_rate": 1e-06, "loss": 0.4074, "num_input_tokens_seen": 431478884, "step": 7700 }, { "epoch": 17.14922048997773, "loss": 0.3793688118457794, "loss_ce": 9.636204777052626e-05, "loss_iou": 0.1513671875, "loss_num": 0.0152587890625, "loss_xval": 0.37890625, "num_input_tokens_seen": 431478884, "step": 7700 }, { "epoch": 17.151447661469934, "grad_norm": 17.16455841064453, "learning_rate": 1e-06, "loss": 0.3772, "num_input_tokens_seen": 431537440, "step": 7701 }, { "epoch": 17.151447661469934, "loss": 0.4580909013748169, "loss_ce": 8.308385440614074e-05, "loss_iou": 0.185546875, "loss_num": 0.01708984375, "loss_xval": 0.45703125, "num_input_tokens_seen": 431537440, "step": 7701 }, { "epoch": 17.15367483296214, "grad_norm": 19.00710105895996, "learning_rate": 1e-06, "loss": 0.4474, "num_input_tokens_seen": 431592904, "step": 7702 }, { "epoch": 17.15367483296214, "loss": 0.4547297954559326, "loss_ce": 7.894145528553054e-05, "loss_iou": 0.1904296875, "loss_num": 0.01483154296875, "loss_xval": 0.455078125, "num_input_tokens_seen": 431592904, "step": 7702 }, { "epoch": 17.155902004454344, "grad_norm": 14.55823040008545, "learning_rate": 1e-06, "loss": 0.2756, "num_input_tokens_seen": 431651384, "step": 7703 }, { "epoch": 17.155902004454344, "loss": 0.23866218328475952, "loss_ce": 7.575111521873623e-05, "loss_iou": 0.10546875, "loss_num": 0.005462646484375, "loss_xval": 0.23828125, "num_input_tokens_seen": 431651384, "step": 7703 }, { "epoch": 17.15812917594655, "grad_norm": 13.731185913085938, "learning_rate": 1e-06, "loss": 0.37, "num_input_tokens_seen": 431708828, "step": 7704 }, { "epoch": 17.15812917594655, "loss": 0.347605437040329, "loss_ce": 7.126451964722946e-05, "loss_iou": 0.1474609375, "loss_num": 0.01043701171875, "loss_xval": 0.34765625, "num_input_tokens_seen": 431708828, "step": 7704 }, { "epoch": 17.160356347438753, "grad_norm": 21.002777099609375, "learning_rate": 1e-06, "loss": 0.5383, "num_input_tokens_seen": 431765240, "step": 7705 }, { "epoch": 17.160356347438753, "loss": 0.4782218933105469, "loss_ce": 7.249362533912063e-05, "loss_iou": 0.1943359375, "loss_num": 0.017822265625, "loss_xval": 0.478515625, "num_input_tokens_seen": 431765240, "step": 7705 }, { "epoch": 17.16258351893096, "grad_norm": 21.7739315032959, "learning_rate": 1e-06, "loss": 0.6177, "num_input_tokens_seen": 431820024, "step": 7706 }, { "epoch": 17.16258351893096, "loss": 0.7398481369018555, "loss_ce": 0.00010208625462837517, "loss_iou": 0.330078125, "loss_num": 0.015625, "loss_xval": 0.73828125, "num_input_tokens_seen": 431820024, "step": 7706 }, { "epoch": 17.164810690423163, "grad_norm": 18.27759552001953, "learning_rate": 1e-06, "loss": 0.3539, "num_input_tokens_seen": 431873500, "step": 7707 }, { "epoch": 17.164810690423163, "loss": 0.3486563265323639, "loss_ce": 8.456400246359408e-05, "loss_iou": 0.1630859375, "loss_num": 0.004669189453125, "loss_xval": 0.34765625, "num_input_tokens_seen": 431873500, "step": 7707 }, { "epoch": 17.167037861915368, "grad_norm": 32.455810546875, "learning_rate": 1e-06, "loss": 0.4563, "num_input_tokens_seen": 431928992, "step": 7708 }, { "epoch": 17.167037861915368, "loss": 0.5565398931503296, "loss_ce": 8.237551082856953e-05, "loss_iou": 0.2060546875, "loss_num": 0.029052734375, "loss_xval": 0.5546875, "num_input_tokens_seen": 431928992, "step": 7708 }, { "epoch": 17.169265033407573, "grad_norm": 24.227785110473633, "learning_rate": 1e-06, "loss": 0.4164, "num_input_tokens_seen": 431985988, "step": 7709 }, { "epoch": 17.169265033407573, "loss": 0.21856805682182312, "loss_ce": 9.271766612073407e-05, "loss_iou": 0.10205078125, "loss_num": 0.00274658203125, "loss_xval": 0.21875, "num_input_tokens_seen": 431985988, "step": 7709 }, { "epoch": 17.171492204899778, "grad_norm": 11.984196662902832, "learning_rate": 1e-06, "loss": 0.2504, "num_input_tokens_seen": 432040140, "step": 7710 }, { "epoch": 17.171492204899778, "loss": 0.2045476734638214, "loss_ce": 7.990330050233752e-05, "loss_iou": 0.087890625, "loss_num": 0.0057373046875, "loss_xval": 0.2041015625, "num_input_tokens_seen": 432040140, "step": 7710 }, { "epoch": 17.173719376391983, "grad_norm": 23.284042358398438, "learning_rate": 1e-06, "loss": 0.3402, "num_input_tokens_seen": 432093984, "step": 7711 }, { "epoch": 17.173719376391983, "loss": 0.3212023973464966, "loss_ce": 6.591899727936834e-05, "loss_iou": 0.138671875, "loss_num": 0.00872802734375, "loss_xval": 0.3203125, "num_input_tokens_seen": 432093984, "step": 7711 }, { "epoch": 17.175946547884188, "grad_norm": 15.671782493591309, "learning_rate": 1e-06, "loss": 0.3822, "num_input_tokens_seen": 432149584, "step": 7712 }, { "epoch": 17.175946547884188, "loss": 0.2187042385339737, "loss_ce": 7.631281914655119e-05, "loss_iou": 0.0869140625, "loss_num": 0.0089111328125, "loss_xval": 0.21875, "num_input_tokens_seen": 432149584, "step": 7712 }, { "epoch": 17.178173719376392, "grad_norm": 17.098852157592773, "learning_rate": 1e-06, "loss": 0.3588, "num_input_tokens_seen": 432206804, "step": 7713 }, { "epoch": 17.178173719376392, "loss": 0.4782329201698303, "loss_ce": 8.348520350409672e-05, "loss_iou": 0.2197265625, "loss_num": 0.007537841796875, "loss_xval": 0.478515625, "num_input_tokens_seen": 432206804, "step": 7713 }, { "epoch": 17.180400890868597, "grad_norm": 23.695720672607422, "learning_rate": 1e-06, "loss": 0.6842, "num_input_tokens_seen": 432259452, "step": 7714 }, { "epoch": 17.180400890868597, "loss": 0.6622397899627686, "loss_ce": 0.00025244534481316805, "loss_iou": 0.29296875, "loss_num": 0.0155029296875, "loss_xval": 0.66015625, "num_input_tokens_seen": 432259452, "step": 7714 }, { "epoch": 17.182628062360802, "grad_norm": 16.1234073638916, "learning_rate": 1e-06, "loss": 0.3404, "num_input_tokens_seen": 432317140, "step": 7715 }, { "epoch": 17.182628062360802, "loss": 0.2790215313434601, "loss_ce": 9.086594945983961e-05, "loss_iou": 0.12109375, "loss_num": 0.007476806640625, "loss_xval": 0.279296875, "num_input_tokens_seen": 432317140, "step": 7715 }, { "epoch": 17.184855233853007, "grad_norm": 21.128246307373047, "learning_rate": 1e-06, "loss": 0.4791, "num_input_tokens_seen": 432375580, "step": 7716 }, { "epoch": 17.184855233853007, "loss": 0.7009245753288269, "loss_ce": 8.839939255267382e-05, "loss_iou": 0.294921875, "loss_num": 0.0224609375, "loss_xval": 0.69921875, "num_input_tokens_seen": 432375580, "step": 7716 }, { "epoch": 17.187082405345212, "grad_norm": 18.01925277709961, "learning_rate": 1e-06, "loss": 0.3742, "num_input_tokens_seen": 432428196, "step": 7717 }, { "epoch": 17.187082405345212, "loss": 0.48470473289489746, "loss_ce": 8.559299749322236e-05, "loss_iou": 0.2138671875, "loss_num": 0.0115966796875, "loss_xval": 0.484375, "num_input_tokens_seen": 432428196, "step": 7717 }, { "epoch": 17.189309576837417, "grad_norm": 19.113862991333008, "learning_rate": 1e-06, "loss": 0.4242, "num_input_tokens_seen": 432482660, "step": 7718 }, { "epoch": 17.189309576837417, "loss": 0.41426295042037964, "loss_ce": 7.838521560188383e-05, "loss_iou": 0.18359375, "loss_num": 0.009521484375, "loss_xval": 0.4140625, "num_input_tokens_seen": 432482660, "step": 7718 }, { "epoch": 17.19153674832962, "grad_norm": 18.660619735717773, "learning_rate": 1e-06, "loss": 0.3095, "num_input_tokens_seen": 432539144, "step": 7719 }, { "epoch": 17.19153674832962, "loss": 0.2771776020526886, "loss_ce": 7.799692684784532e-05, "loss_iou": 0.111328125, "loss_num": 0.0108642578125, "loss_xval": 0.27734375, "num_input_tokens_seen": 432539144, "step": 7719 }, { "epoch": 17.193763919821826, "grad_norm": 18.885120391845703, "learning_rate": 1e-06, "loss": 0.3698, "num_input_tokens_seen": 432596392, "step": 7720 }, { "epoch": 17.193763919821826, "loss": 0.4347517788410187, "loss_ce": 0.00030352562316693366, "loss_iou": 0.1962890625, "loss_num": 0.0086669921875, "loss_xval": 0.43359375, "num_input_tokens_seen": 432596392, "step": 7720 }, { "epoch": 17.19599109131403, "grad_norm": 11.850611686706543, "learning_rate": 1e-06, "loss": 0.5695, "num_input_tokens_seen": 432650932, "step": 7721 }, { "epoch": 17.19599109131403, "loss": 0.5424901247024536, "loss_ce": 7.069206185406074e-05, "loss_iou": 0.22265625, "loss_num": 0.019287109375, "loss_xval": 0.54296875, "num_input_tokens_seen": 432650932, "step": 7721 }, { "epoch": 17.198218262806236, "grad_norm": 19.987720489501953, "learning_rate": 1e-06, "loss": 0.3179, "num_input_tokens_seen": 432706504, "step": 7722 }, { "epoch": 17.198218262806236, "loss": 0.3480537533760071, "loss_ce": 9.233770833816379e-05, "loss_iou": 0.1572265625, "loss_num": 0.006866455078125, "loss_xval": 0.34765625, "num_input_tokens_seen": 432706504, "step": 7722 }, { "epoch": 17.20044543429844, "grad_norm": 18.74901008605957, "learning_rate": 1e-06, "loss": 0.4127, "num_input_tokens_seen": 432765232, "step": 7723 }, { "epoch": 17.20044543429844, "loss": 0.4682392477989197, "loss_ce": 9.959404997061938e-05, "loss_iou": 0.208984375, "loss_num": 0.0098876953125, "loss_xval": 0.46875, "num_input_tokens_seen": 432765232, "step": 7723 }, { "epoch": 17.202672605790646, "grad_norm": 18.15958023071289, "learning_rate": 1e-06, "loss": 0.3961, "num_input_tokens_seen": 432820076, "step": 7724 }, { "epoch": 17.202672605790646, "loss": 0.41411644220352173, "loss_ce": 8.445019193459302e-05, "loss_iou": 0.185546875, "loss_num": 0.008544921875, "loss_xval": 0.4140625, "num_input_tokens_seen": 432820076, "step": 7724 }, { "epoch": 17.20489977728285, "grad_norm": 27.001567840576172, "learning_rate": 1e-06, "loss": 0.4505, "num_input_tokens_seen": 432877112, "step": 7725 }, { "epoch": 17.20489977728285, "loss": 0.46639156341552734, "loss_ce": 8.295044244732708e-05, "loss_iou": 0.2001953125, "loss_num": 0.01300048828125, "loss_xval": 0.466796875, "num_input_tokens_seen": 432877112, "step": 7725 }, { "epoch": 17.207126948775056, "grad_norm": 30.94824981689453, "learning_rate": 1e-06, "loss": 0.3738, "num_input_tokens_seen": 432934028, "step": 7726 }, { "epoch": 17.207126948775056, "loss": 0.31847047805786133, "loss_ce": 8.058200182858855e-05, "loss_iou": 0.142578125, "loss_num": 0.006591796875, "loss_xval": 0.318359375, "num_input_tokens_seen": 432934028, "step": 7726 }, { "epoch": 17.20935412026726, "grad_norm": 17.023353576660156, "learning_rate": 1e-06, "loss": 0.4936, "num_input_tokens_seen": 432991524, "step": 7727 }, { "epoch": 17.20935412026726, "loss": 0.4206371307373047, "loss_ce": 0.0002880272513721138, "loss_iou": 0.1669921875, "loss_num": 0.0174560546875, "loss_xval": 0.419921875, "num_input_tokens_seen": 432991524, "step": 7727 }, { "epoch": 17.211581291759465, "grad_norm": 17.618732452392578, "learning_rate": 1e-06, "loss": 0.3742, "num_input_tokens_seen": 433050212, "step": 7728 }, { "epoch": 17.211581291759465, "loss": 0.3367721438407898, "loss_ce": 0.0001022043579723686, "loss_iou": 0.15234375, "loss_num": 0.00653076171875, "loss_xval": 0.3359375, "num_input_tokens_seen": 433050212, "step": 7728 }, { "epoch": 17.21380846325167, "grad_norm": 17.69324493408203, "learning_rate": 1e-06, "loss": 0.4379, "num_input_tokens_seen": 433106864, "step": 7729 }, { "epoch": 17.21380846325167, "loss": 0.5612530708312988, "loss_ce": 9.585064981365576e-05, "loss_iou": 0.26171875, "loss_num": 0.007171630859375, "loss_xval": 0.5625, "num_input_tokens_seen": 433106864, "step": 7729 }, { "epoch": 17.216035634743875, "grad_norm": 20.909269332885742, "learning_rate": 1e-06, "loss": 0.3521, "num_input_tokens_seen": 433163416, "step": 7730 }, { "epoch": 17.216035634743875, "loss": 0.40844836831092834, "loss_ce": 9.26676148083061e-05, "loss_iou": 0.1748046875, "loss_num": 0.01190185546875, "loss_xval": 0.408203125, "num_input_tokens_seen": 433163416, "step": 7730 }, { "epoch": 17.21826280623608, "grad_norm": 17.81989860534668, "learning_rate": 1e-06, "loss": 0.3125, "num_input_tokens_seen": 433218392, "step": 7731 }, { "epoch": 17.21826280623608, "loss": 0.32392922043800354, "loss_ce": 7.668677426408976e-05, "loss_iou": 0.1298828125, "loss_num": 0.01300048828125, "loss_xval": 0.32421875, "num_input_tokens_seen": 433218392, "step": 7731 }, { "epoch": 17.220489977728285, "grad_norm": 17.176774978637695, "learning_rate": 1e-06, "loss": 0.3715, "num_input_tokens_seen": 433273928, "step": 7732 }, { "epoch": 17.220489977728285, "loss": 0.35421961545944214, "loss_ce": 9.36458382057026e-05, "loss_iou": 0.1630859375, "loss_num": 0.005584716796875, "loss_xval": 0.353515625, "num_input_tokens_seen": 433273928, "step": 7732 }, { "epoch": 17.22271714922049, "grad_norm": 13.050132751464844, "learning_rate": 1e-06, "loss": 0.534, "num_input_tokens_seen": 433330336, "step": 7733 }, { "epoch": 17.22271714922049, "loss": 0.5098897814750671, "loss_ce": 0.00012416887329891324, "loss_iou": 0.21484375, "loss_num": 0.015869140625, "loss_xval": 0.5078125, "num_input_tokens_seen": 433330336, "step": 7733 }, { "epoch": 17.224944320712694, "grad_norm": 20.13163948059082, "learning_rate": 1e-06, "loss": 0.4318, "num_input_tokens_seen": 433384892, "step": 7734 }, { "epoch": 17.224944320712694, "loss": 0.34920066595077515, "loss_ce": 7.957725756568834e-05, "loss_iou": 0.1552734375, "loss_num": 0.007720947265625, "loss_xval": 0.349609375, "num_input_tokens_seen": 433384892, "step": 7734 }, { "epoch": 17.2271714922049, "grad_norm": 25.29994773864746, "learning_rate": 1e-06, "loss": 0.2885, "num_input_tokens_seen": 433440724, "step": 7735 }, { "epoch": 17.2271714922049, "loss": 0.22060546278953552, "loss_ce": 8.54411773616448e-05, "loss_iou": 0.1005859375, "loss_num": 0.0038299560546875, "loss_xval": 0.220703125, "num_input_tokens_seen": 433440724, "step": 7735 }, { "epoch": 17.229398663697104, "grad_norm": 19.234081268310547, "learning_rate": 1e-06, "loss": 0.2669, "num_input_tokens_seen": 433497228, "step": 7736 }, { "epoch": 17.229398663697104, "loss": 0.3394434154033661, "loss_ce": 8.795637404546142e-05, "loss_iou": 0.142578125, "loss_num": 0.0107421875, "loss_xval": 0.33984375, "num_input_tokens_seen": 433497228, "step": 7736 }, { "epoch": 17.23162583518931, "grad_norm": 25.836366653442383, "learning_rate": 1e-06, "loss": 0.4823, "num_input_tokens_seen": 433553476, "step": 7737 }, { "epoch": 17.23162583518931, "loss": 0.45820939540863037, "loss_ce": 7.951643783599138e-05, "loss_iou": 0.1865234375, "loss_num": 0.0172119140625, "loss_xval": 0.458984375, "num_input_tokens_seen": 433553476, "step": 7737 }, { "epoch": 17.233853006681514, "grad_norm": 34.01211166381836, "learning_rate": 1e-06, "loss": 0.4521, "num_input_tokens_seen": 433608588, "step": 7738 }, { "epoch": 17.233853006681514, "loss": 0.5483601093292236, "loss_ce": 8.125473686959594e-05, "loss_iou": 0.244140625, "loss_num": 0.011962890625, "loss_xval": 0.546875, "num_input_tokens_seen": 433608588, "step": 7738 }, { "epoch": 17.23608017817372, "grad_norm": 26.347332000732422, "learning_rate": 1e-06, "loss": 0.403, "num_input_tokens_seen": 433665036, "step": 7739 }, { "epoch": 17.23608017817372, "loss": 0.4297289252281189, "loss_ce": 0.00010247422324027866, "loss_iou": 0.1806640625, "loss_num": 0.01373291015625, "loss_xval": 0.4296875, "num_input_tokens_seen": 433665036, "step": 7739 }, { "epoch": 17.238307349665924, "grad_norm": 16.483013153076172, "learning_rate": 1e-06, "loss": 0.4577, "num_input_tokens_seen": 433721384, "step": 7740 }, { "epoch": 17.238307349665924, "loss": 0.40596240758895874, "loss_ce": 7.860736513976008e-05, "loss_iou": 0.1787109375, "loss_num": 0.0098876953125, "loss_xval": 0.40625, "num_input_tokens_seen": 433721384, "step": 7740 }, { "epoch": 17.24053452115813, "grad_norm": 16.415891647338867, "learning_rate": 1e-06, "loss": 0.34, "num_input_tokens_seen": 433779720, "step": 7741 }, { "epoch": 17.24053452115813, "loss": 0.3139420747756958, "loss_ce": 9.928335202857852e-05, "loss_iou": 0.1376953125, "loss_num": 0.007598876953125, "loss_xval": 0.314453125, "num_input_tokens_seen": 433779720, "step": 7741 }, { "epoch": 17.242761692650333, "grad_norm": 13.877442359924316, "learning_rate": 1e-06, "loss": 0.335, "num_input_tokens_seen": 433835132, "step": 7742 }, { "epoch": 17.242761692650333, "loss": 0.32295700907707214, "loss_ce": 8.10332567198202e-05, "loss_iou": 0.14453125, "loss_num": 0.00677490234375, "loss_xval": 0.322265625, "num_input_tokens_seen": 433835132, "step": 7742 }, { "epoch": 17.244988864142538, "grad_norm": 25.894933700561523, "learning_rate": 1e-06, "loss": 0.4914, "num_input_tokens_seen": 433890280, "step": 7743 }, { "epoch": 17.244988864142538, "loss": 0.47230130434036255, "loss_ce": 8.75686964718625e-05, "loss_iou": 0.2109375, "loss_num": 0.0103759765625, "loss_xval": 0.47265625, "num_input_tokens_seen": 433890280, "step": 7743 }, { "epoch": 17.247216035634743, "grad_norm": 28.23295021057129, "learning_rate": 1e-06, "loss": 0.4699, "num_input_tokens_seen": 433945752, "step": 7744 }, { "epoch": 17.247216035634743, "loss": 0.6319034695625305, "loss_ce": 0.00018957318388856947, "loss_iou": 0.2578125, "loss_num": 0.0234375, "loss_xval": 0.6328125, "num_input_tokens_seen": 433945752, "step": 7744 }, { "epoch": 17.249443207126948, "grad_norm": 19.394866943359375, "learning_rate": 1e-06, "loss": 0.4551, "num_input_tokens_seen": 434001564, "step": 7745 }, { "epoch": 17.249443207126948, "loss": 0.531867504119873, "loss_ce": 0.0002207824436482042, "loss_iou": 0.2001953125, "loss_num": 0.0263671875, "loss_xval": 0.53125, "num_input_tokens_seen": 434001564, "step": 7745 }, { "epoch": 17.251670378619153, "grad_norm": 31.168285369873047, "learning_rate": 1e-06, "loss": 0.4663, "num_input_tokens_seen": 434058448, "step": 7746 }, { "epoch": 17.251670378619153, "loss": 0.6216709613800049, "loss_ce": 8.895625069271773e-05, "loss_iou": 0.2734375, "loss_num": 0.014892578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 434058448, "step": 7746 }, { "epoch": 17.253897550111358, "grad_norm": 15.39581298828125, "learning_rate": 1e-06, "loss": 0.4586, "num_input_tokens_seen": 434115584, "step": 7747 }, { "epoch": 17.253897550111358, "loss": 0.5104674100875854, "loss_ce": 9.140316979028285e-05, "loss_iou": 0.2158203125, "loss_num": 0.0155029296875, "loss_xval": 0.51171875, "num_input_tokens_seen": 434115584, "step": 7747 }, { "epoch": 17.256124721603562, "grad_norm": 26.95697593688965, "learning_rate": 1e-06, "loss": 0.4361, "num_input_tokens_seen": 434169820, "step": 7748 }, { "epoch": 17.256124721603562, "loss": 0.35939961671829224, "loss_ce": 8.56606347952038e-05, "loss_iou": 0.162109375, "loss_num": 0.006988525390625, "loss_xval": 0.359375, "num_input_tokens_seen": 434169820, "step": 7748 }, { "epoch": 17.258351893095767, "grad_norm": 41.06592559814453, "learning_rate": 1e-06, "loss": 0.3006, "num_input_tokens_seen": 434228656, "step": 7749 }, { "epoch": 17.258351893095767, "loss": 0.29035407304763794, "loss_ce": 0.0008032863843254745, "loss_iou": 0.126953125, "loss_num": 0.00726318359375, "loss_xval": 0.2890625, "num_input_tokens_seen": 434228656, "step": 7749 }, { "epoch": 17.260579064587972, "grad_norm": 12.836146354675293, "learning_rate": 1e-06, "loss": 0.2803, "num_input_tokens_seen": 434285972, "step": 7750 }, { "epoch": 17.260579064587972, "eval_seeclick_web_CIoU": 0.5904313921928406, "eval_seeclick_web_GIoU": 0.5887089371681213, "eval_seeclick_web_IoU": 0.6092908382415771, "eval_seeclick_web_MAE_all": 0.015212189638987184, "eval_seeclick_web_MAE_h": 0.007458887062966824, "eval_seeclick_web_MAE_w": 0.01523585431277752, "eval_seeclick_web_MAE_x_boxes": 0.007938009221106768, "eval_seeclick_web_MAE_y_boxes": 0.021370060741901398, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.8967059254646301, "eval_seeclick_web_loss_ce": 0.0001397554951836355, "eval_seeclick_web_loss_iou": 0.413818359375, "eval_seeclick_web_loss_num": 0.012087821960449219, "eval_seeclick_web_loss_xval": 0.887451171875, "eval_seeclick_web_runtime": 21.8766, "eval_seeclick_web_samples_per_second": 2.286, "eval_seeclick_web_steps_per_second": 0.091, "num_input_tokens_seen": 434285972, "step": 7750 }, { "epoch": 17.260579064587972, "eval_icons_CIoU": 0.25680967420339584, "eval_icons_GIoU": 0.2801186218857765, "eval_icons_IoU": 0.3349318951368332, "eval_icons_MAE_all": 0.05978231504559517, "eval_icons_MAE_h": 0.0324998227879405, "eval_icons_MAE_w": 0.06001891568303108, "eval_icons_MAE_x_boxes": 0.06087409518659115, "eval_icons_MAE_y_boxes": 0.03790356032550335, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7364193201065063, "eval_icons_loss_ce": 0.00016331803635694087, "eval_icons_loss_iou": 0.6763916015625, "eval_icons_loss_num": 0.052509307861328125, "eval_icons_loss_xval": 1.615966796875, "eval_icons_runtime": 21.8048, "eval_icons_samples_per_second": 2.293, "eval_icons_steps_per_second": 0.092, "num_input_tokens_seen": 434285972, "step": 7750 }, { "epoch": 17.260579064587972, "eval_screenspot_CIoU": 0.3815999726454417, "eval_screenspot_GIoU": 0.3997166156768799, "eval_screenspot_IoU": 0.45147113005320233, "eval_screenspot_MAE_all": 0.055647388100624084, "eval_screenspot_MAE_h": 0.03920063997308413, "eval_screenspot_MAE_w": 0.06165233999490738, "eval_screenspot_MAE_x_boxes": 0.06318879996736844, "eval_screenspot_MAE_y_boxes": 0.03922058828175068, "eval_screenspot_inside_bbox": 0.7145833373069763, "eval_screenspot_loss": 1.546026349067688, "eval_screenspot_loss_ce": 0.00020924270696317157, "eval_screenspot_loss_iou": 0.6417643229166666, "eval_screenspot_loss_num": 0.06403223673502605, "eval_screenspot_loss_xval": 1.6028645833333333, "eval_screenspot_runtime": 37.6194, "eval_screenspot_samples_per_second": 2.366, "eval_screenspot_steps_per_second": 0.08, "num_input_tokens_seen": 434285972, "step": 7750 }, { "epoch": 17.260579064587972, "eval_compot_CIoU": 0.3500685393810272, "eval_compot_GIoU": 0.36062242090702057, "eval_compot_IoU": 0.4054069072008133, "eval_compot_MAE_all": 0.0187700055539608, "eval_compot_MAE_h": 0.010957159101963043, "eval_compot_MAE_w": 0.02098592184484005, "eval_compot_MAE_x_boxes": 0.03000013902783394, "eval_compot_MAE_y_boxes": 0.006979038938879967, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3751345872879028, "eval_compot_loss_ce": 0.00013413318083621562, "eval_compot_loss_iou": 0.6326904296875, "eval_compot_loss_num": 0.017452239990234375, "eval_compot_loss_xval": 1.35302734375, "eval_compot_runtime": 23.5427, "eval_compot_samples_per_second": 2.124, "eval_compot_steps_per_second": 0.085, "num_input_tokens_seen": 434285972, "step": 7750 }, { "epoch": 17.260579064587972, "eval_custom_ui_val_CIoU": 0.47184327410327065, "eval_custom_ui_val_GIoU": 0.4758959710597992, "eval_custom_ui_val_IoU": 0.5333681570159065, "eval_custom_ui_val_MAE_all": 0.027272911483628884, "eval_custom_ui_val_MAE_h": 0.014620246106965674, "eval_custom_ui_val_MAE_w": 0.03669780415172378, "eval_custom_ui_val_MAE_x_boxes": 0.03337866820705434, "eval_custom_ui_val_MAE_y_boxes": 0.012776042127774822, "eval_custom_ui_val_inside_bbox": 0.7754629651705424, "eval_custom_ui_val_loss": 1.177891492843628, "eval_custom_ui_val_loss_ce": 0.00015998236550432112, "eval_custom_ui_val_loss_iou": 0.5063612196180556, "eval_custom_ui_val_loss_num": 0.023825857374403212, "eval_custom_ui_val_loss_xval": 1.1314561631944444, "eval_custom_ui_val_runtime": 65.7899, "eval_custom_ui_val_samples_per_second": 4.028, "eval_custom_ui_val_steps_per_second": 0.137, "num_input_tokens_seen": 434285972, "step": 7750 }, { "epoch": 17.260579064587972, "loss": 0.8485028743743896, "loss_ce": 0.00011421847739256918, "loss_iou": 0.3828125, "loss_num": 0.016845703125, "loss_xval": 0.84765625, "num_input_tokens_seen": 434285972, "step": 7750 }, { "epoch": 17.262806236080177, "grad_norm": 40.981712341308594, "learning_rate": 1e-06, "loss": 0.4852, "num_input_tokens_seen": 434344348, "step": 7751 }, { "epoch": 17.262806236080177, "loss": 0.6921710968017578, "loss_ce": 0.00021551080862991512, "loss_iou": 0.263671875, "loss_num": 0.03271484375, "loss_xval": 0.69140625, "num_input_tokens_seen": 434344348, "step": 7751 }, { "epoch": 17.265033407572382, "grad_norm": 21.000991821289062, "learning_rate": 1e-06, "loss": 0.3312, "num_input_tokens_seen": 434397948, "step": 7752 }, { "epoch": 17.265033407572382, "loss": 0.3175837993621826, "loss_ce": 7.892360736150295e-05, "loss_iou": 0.1455078125, "loss_num": 0.005340576171875, "loss_xval": 0.318359375, "num_input_tokens_seen": 434397948, "step": 7752 }, { "epoch": 17.267260579064587, "grad_norm": 19.446720123291016, "learning_rate": 1e-06, "loss": 0.4803, "num_input_tokens_seen": 434452904, "step": 7753 }, { "epoch": 17.267260579064587, "loss": 0.5616533756256104, "loss_ce": 9.945797501131892e-05, "loss_iou": 0.2314453125, "loss_num": 0.0196533203125, "loss_xval": 0.5625, "num_input_tokens_seen": 434452904, "step": 7753 }, { "epoch": 17.26948775055679, "grad_norm": 18.333148956298828, "learning_rate": 1e-06, "loss": 0.4172, "num_input_tokens_seen": 434508660, "step": 7754 }, { "epoch": 17.26948775055679, "loss": 0.34176281094551086, "loss_ce": 8.800373325357214e-05, "loss_iou": 0.138671875, "loss_num": 0.012939453125, "loss_xval": 0.341796875, "num_input_tokens_seen": 434508660, "step": 7754 }, { "epoch": 17.271714922048996, "grad_norm": 19.104904174804688, "learning_rate": 1e-06, "loss": 0.3037, "num_input_tokens_seen": 434565492, "step": 7755 }, { "epoch": 17.271714922048996, "loss": 0.27949851751327515, "loss_ce": 7.9582110629417e-05, "loss_iou": 0.11669921875, "loss_num": 0.0091552734375, "loss_xval": 0.279296875, "num_input_tokens_seen": 434565492, "step": 7755 }, { "epoch": 17.2739420935412, "grad_norm": 16.93346405029297, "learning_rate": 1e-06, "loss": 0.3254, "num_input_tokens_seen": 434619016, "step": 7756 }, { "epoch": 17.2739420935412, "loss": 0.2303803563117981, "loss_ce": 9.471694647800177e-05, "loss_iou": 0.09326171875, "loss_num": 0.00872802734375, "loss_xval": 0.23046875, "num_input_tokens_seen": 434619016, "step": 7756 }, { "epoch": 17.276169265033406, "grad_norm": 15.693754196166992, "learning_rate": 1e-06, "loss": 0.3854, "num_input_tokens_seen": 434674412, "step": 7757 }, { "epoch": 17.276169265033406, "loss": 0.5041153430938721, "loss_ce": 8.705217624083161e-05, "loss_iou": 0.2041015625, "loss_num": 0.0191650390625, "loss_xval": 0.50390625, "num_input_tokens_seen": 434674412, "step": 7757 }, { "epoch": 17.27839643652561, "grad_norm": 44.67047882080078, "learning_rate": 1e-06, "loss": 0.3596, "num_input_tokens_seen": 434728900, "step": 7758 }, { "epoch": 17.27839643652561, "loss": 0.4251922070980072, "loss_ce": 8.232867548940703e-05, "loss_iou": 0.1953125, "loss_num": 0.006744384765625, "loss_xval": 0.42578125, "num_input_tokens_seen": 434728900, "step": 7758 }, { "epoch": 17.280623608017816, "grad_norm": 49.442901611328125, "learning_rate": 1e-06, "loss": 0.3936, "num_input_tokens_seen": 434784116, "step": 7759 }, { "epoch": 17.280623608017816, "loss": 0.41439324617385864, "loss_ce": 8.657870057504624e-05, "loss_iou": 0.1728515625, "loss_num": 0.0137939453125, "loss_xval": 0.4140625, "num_input_tokens_seen": 434784116, "step": 7759 }, { "epoch": 17.28285077951002, "grad_norm": 19.590133666992188, "learning_rate": 1e-06, "loss": 0.4671, "num_input_tokens_seen": 434838652, "step": 7760 }, { "epoch": 17.28285077951002, "loss": 0.31147128343582153, "loss_ce": 6.993389979470521e-05, "loss_iou": 0.1376953125, "loss_num": 0.0072021484375, "loss_xval": 0.310546875, "num_input_tokens_seen": 434838652, "step": 7760 }, { "epoch": 17.285077951002226, "grad_norm": 18.076194763183594, "learning_rate": 1e-06, "loss": 0.408, "num_input_tokens_seen": 434892168, "step": 7761 }, { "epoch": 17.285077951002226, "loss": 0.35101962089538574, "loss_ce": 6.74750772304833e-05, "loss_iou": 0.15234375, "loss_num": 0.009033203125, "loss_xval": 0.3515625, "num_input_tokens_seen": 434892168, "step": 7761 }, { "epoch": 17.28730512249443, "grad_norm": 20.00171661376953, "learning_rate": 1e-06, "loss": 0.3366, "num_input_tokens_seen": 434949224, "step": 7762 }, { "epoch": 17.28730512249443, "loss": 0.38625282049179077, "loss_ce": 8.34054226288572e-05, "loss_iou": 0.17578125, "loss_num": 0.007110595703125, "loss_xval": 0.38671875, "num_input_tokens_seen": 434949224, "step": 7762 }, { "epoch": 17.289532293986635, "grad_norm": 24.69287109375, "learning_rate": 1e-06, "loss": 0.35, "num_input_tokens_seen": 435001896, "step": 7763 }, { "epoch": 17.289532293986635, "loss": 0.408402681350708, "loss_ce": 7.747893687337637e-05, "loss_iou": 0.173828125, "loss_num": 0.01220703125, "loss_xval": 0.408203125, "num_input_tokens_seen": 435001896, "step": 7763 }, { "epoch": 17.29175946547884, "grad_norm": 11.8654203414917, "learning_rate": 1e-06, "loss": 0.2978, "num_input_tokens_seen": 435057448, "step": 7764 }, { "epoch": 17.29175946547884, "loss": 0.35043925046920776, "loss_ce": 9.744616545503959e-05, "loss_iou": 0.1455078125, "loss_num": 0.01171875, "loss_xval": 0.349609375, "num_input_tokens_seen": 435057448, "step": 7764 }, { "epoch": 17.293986636971045, "grad_norm": 15.643658638000488, "learning_rate": 1e-06, "loss": 0.3667, "num_input_tokens_seen": 435114576, "step": 7765 }, { "epoch": 17.293986636971045, "loss": 0.2830941081047058, "loss_ce": 7.410335820168257e-05, "loss_iou": 0.1279296875, "loss_num": 0.005615234375, "loss_xval": 0.283203125, "num_input_tokens_seen": 435114576, "step": 7765 }, { "epoch": 17.29621380846325, "grad_norm": 28.67169189453125, "learning_rate": 1e-06, "loss": 0.3936, "num_input_tokens_seen": 435170508, "step": 7766 }, { "epoch": 17.29621380846325, "loss": 0.4348151683807373, "loss_ce": 0.00012277913629077375, "loss_iou": 0.189453125, "loss_num": 0.010986328125, "loss_xval": 0.435546875, "num_input_tokens_seen": 435170508, "step": 7766 }, { "epoch": 17.29844097995546, "grad_norm": 40.479034423828125, "learning_rate": 1e-06, "loss": 0.5411, "num_input_tokens_seen": 435225040, "step": 7767 }, { "epoch": 17.29844097995546, "loss": 0.44416916370391846, "loss_ce": 7.737807754892856e-05, "loss_iou": 0.1826171875, "loss_num": 0.015625, "loss_xval": 0.443359375, "num_input_tokens_seen": 435225040, "step": 7767 }, { "epoch": 17.30066815144766, "grad_norm": 19.786476135253906, "learning_rate": 1e-06, "loss": 0.275, "num_input_tokens_seen": 435280060, "step": 7768 }, { "epoch": 17.30066815144766, "loss": 0.25245094299316406, "loss_ce": 7.053982699289918e-05, "loss_iou": 0.1162109375, "loss_num": 0.0040283203125, "loss_xval": 0.251953125, "num_input_tokens_seen": 435280060, "step": 7768 }, { "epoch": 17.302895322939868, "grad_norm": 25.475353240966797, "learning_rate": 1e-06, "loss": 0.4287, "num_input_tokens_seen": 435337596, "step": 7769 }, { "epoch": 17.302895322939868, "loss": 0.4136672914028168, "loss_ce": 9.308782318839803e-05, "loss_iou": 0.1875, "loss_num": 0.0076904296875, "loss_xval": 0.4140625, "num_input_tokens_seen": 435337596, "step": 7769 }, { "epoch": 17.305122494432073, "grad_norm": 13.328644752502441, "learning_rate": 1e-06, "loss": 0.3073, "num_input_tokens_seen": 435394108, "step": 7770 }, { "epoch": 17.305122494432073, "loss": 0.34184500575065613, "loss_ce": 7.864900544518605e-05, "loss_iou": 0.15234375, "loss_num": 0.007568359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 435394108, "step": 7770 }, { "epoch": 17.307349665924278, "grad_norm": 19.182100296020508, "learning_rate": 1e-06, "loss": 0.4608, "num_input_tokens_seen": 435448756, "step": 7771 }, { "epoch": 17.307349665924278, "loss": 0.577645480632782, "loss_ce": 6.979001045692712e-05, "loss_iou": 0.2470703125, "loss_num": 0.0169677734375, "loss_xval": 0.578125, "num_input_tokens_seen": 435448756, "step": 7771 }, { "epoch": 17.309576837416483, "grad_norm": 22.650524139404297, "learning_rate": 1e-06, "loss": 0.3382, "num_input_tokens_seen": 435505496, "step": 7772 }, { "epoch": 17.309576837416483, "loss": 0.413726806640625, "loss_ce": 9.157357271760702e-05, "loss_iou": 0.1865234375, "loss_num": 0.0081787109375, "loss_xval": 0.4140625, "num_input_tokens_seen": 435505496, "step": 7772 }, { "epoch": 17.311804008908688, "grad_norm": 22.297222137451172, "learning_rate": 1e-06, "loss": 0.5371, "num_input_tokens_seen": 435561892, "step": 7773 }, { "epoch": 17.311804008908688, "loss": 0.3261268436908722, "loss_ce": 7.704827294219285e-05, "loss_iou": 0.1376953125, "loss_num": 0.01007080078125, "loss_xval": 0.326171875, "num_input_tokens_seen": 435561892, "step": 7773 }, { "epoch": 17.314031180400892, "grad_norm": 23.894563674926758, "learning_rate": 1e-06, "loss": 0.38, "num_input_tokens_seen": 435617896, "step": 7774 }, { "epoch": 17.314031180400892, "loss": 0.3799806237220764, "loss_ce": 9.779801621334627e-05, "loss_iou": 0.1748046875, "loss_num": 0.0062255859375, "loss_xval": 0.37890625, "num_input_tokens_seen": 435617896, "step": 7774 }, { "epoch": 17.316258351893097, "grad_norm": 21.17926597595215, "learning_rate": 1e-06, "loss": 0.4776, "num_input_tokens_seen": 435674376, "step": 7775 }, { "epoch": 17.316258351893097, "loss": 0.47175133228302, "loss_ce": 7.16680588084273e-05, "loss_iou": 0.21484375, "loss_num": 0.00848388671875, "loss_xval": 0.47265625, "num_input_tokens_seen": 435674376, "step": 7775 }, { "epoch": 17.318485523385302, "grad_norm": 18.95273208618164, "learning_rate": 1e-06, "loss": 0.3432, "num_input_tokens_seen": 435734040, "step": 7776 }, { "epoch": 17.318485523385302, "loss": 0.3788660764694214, "loss_ce": 8.189848449546844e-05, "loss_iou": 0.16796875, "loss_num": 0.0084228515625, "loss_xval": 0.37890625, "num_input_tokens_seen": 435734040, "step": 7776 }, { "epoch": 17.320712694877507, "grad_norm": 12.807289123535156, "learning_rate": 1e-06, "loss": 0.3866, "num_input_tokens_seen": 435790640, "step": 7777 }, { "epoch": 17.320712694877507, "loss": 0.34432125091552734, "loss_ce": 8.297587919514626e-05, "loss_iou": 0.150390625, "loss_num": 0.00860595703125, "loss_xval": 0.34375, "num_input_tokens_seen": 435790640, "step": 7777 }, { "epoch": 17.322939866369712, "grad_norm": 19.11941146850586, "learning_rate": 1e-06, "loss": 0.4882, "num_input_tokens_seen": 435847892, "step": 7778 }, { "epoch": 17.322939866369712, "loss": 0.5650254487991333, "loss_ce": 8.401433296967298e-05, "loss_iou": 0.228515625, "loss_num": 0.021728515625, "loss_xval": 0.56640625, "num_input_tokens_seen": 435847892, "step": 7778 }, { "epoch": 17.325167037861917, "grad_norm": 19.643787384033203, "learning_rate": 1e-06, "loss": 0.4666, "num_input_tokens_seen": 435903812, "step": 7779 }, { "epoch": 17.325167037861917, "loss": 0.39589911699295044, "loss_ce": 8.612501551397145e-05, "loss_iou": 0.166015625, "loss_num": 0.0126953125, "loss_xval": 0.396484375, "num_input_tokens_seen": 435903812, "step": 7779 }, { "epoch": 17.32739420935412, "grad_norm": 22.365102767944336, "learning_rate": 1e-06, "loss": 0.4378, "num_input_tokens_seen": 435960584, "step": 7780 }, { "epoch": 17.32739420935412, "loss": 0.39949724078178406, "loss_ce": 8.315553714055568e-05, "loss_iou": 0.1728515625, "loss_num": 0.01092529296875, "loss_xval": 0.3984375, "num_input_tokens_seen": 435960584, "step": 7780 }, { "epoch": 17.329621380846326, "grad_norm": 27.921226501464844, "learning_rate": 1e-06, "loss": 0.4324, "num_input_tokens_seen": 436016512, "step": 7781 }, { "epoch": 17.329621380846326, "loss": 0.5156280994415283, "loss_ce": 0.00012516917195171118, "loss_iou": 0.224609375, "loss_num": 0.01312255859375, "loss_xval": 0.515625, "num_input_tokens_seen": 436016512, "step": 7781 }, { "epoch": 17.33184855233853, "grad_norm": 24.685932159423828, "learning_rate": 1e-06, "loss": 0.4589, "num_input_tokens_seen": 436072952, "step": 7782 }, { "epoch": 17.33184855233853, "loss": 0.41859763860702515, "loss_ce": 7.957972411531955e-05, "loss_iou": 0.1953125, "loss_num": 0.0054931640625, "loss_xval": 0.41796875, "num_input_tokens_seen": 436072952, "step": 7782 }, { "epoch": 17.334075723830736, "grad_norm": 30.725080490112305, "learning_rate": 1e-06, "loss": 0.5439, "num_input_tokens_seen": 436127800, "step": 7783 }, { "epoch": 17.334075723830736, "loss": 0.4986262917518616, "loss_ce": 9.114194836001843e-05, "loss_iou": 0.2314453125, "loss_num": 0.007232666015625, "loss_xval": 0.498046875, "num_input_tokens_seen": 436127800, "step": 7783 }, { "epoch": 17.33630289532294, "grad_norm": 20.326061248779297, "learning_rate": 1e-06, "loss": 0.3683, "num_input_tokens_seen": 436187140, "step": 7784 }, { "epoch": 17.33630289532294, "loss": 0.24855035543441772, "loss_ce": 7.622801786055788e-05, "loss_iou": 0.11083984375, "loss_num": 0.00537109375, "loss_xval": 0.248046875, "num_input_tokens_seen": 436187140, "step": 7784 }, { "epoch": 17.338530066815146, "grad_norm": 21.3463077545166, "learning_rate": 1e-06, "loss": 0.4456, "num_input_tokens_seen": 436242216, "step": 7785 }, { "epoch": 17.338530066815146, "loss": 0.3916824460029602, "loss_ce": 8.088418690022081e-05, "loss_iou": 0.169921875, "loss_num": 0.01025390625, "loss_xval": 0.390625, "num_input_tokens_seen": 436242216, "step": 7785 }, { "epoch": 17.34075723830735, "grad_norm": 17.23738670349121, "learning_rate": 1e-06, "loss": 0.467, "num_input_tokens_seen": 436301260, "step": 7786 }, { "epoch": 17.34075723830735, "loss": 0.549955427646637, "loss_ce": 8.968937618192285e-05, "loss_iou": 0.21875, "loss_num": 0.0223388671875, "loss_xval": 0.55078125, "num_input_tokens_seen": 436301260, "step": 7786 }, { "epoch": 17.342984409799556, "grad_norm": 15.01907730102539, "learning_rate": 1e-06, "loss": 0.3399, "num_input_tokens_seen": 436357676, "step": 7787 }, { "epoch": 17.342984409799556, "loss": 0.34413158893585205, "loss_ce": 7.641033153049648e-05, "loss_iou": 0.1455078125, "loss_num": 0.010498046875, "loss_xval": 0.34375, "num_input_tokens_seen": 436357676, "step": 7787 }, { "epoch": 17.34521158129176, "grad_norm": 17.24225425720215, "learning_rate": 1e-06, "loss": 0.3079, "num_input_tokens_seen": 436412808, "step": 7788 }, { "epoch": 17.34521158129176, "loss": 0.2603484094142914, "loss_ce": 7.925922545837238e-05, "loss_iou": 0.12060546875, "loss_num": 0.0037994384765625, "loss_xval": 0.259765625, "num_input_tokens_seen": 436412808, "step": 7788 }, { "epoch": 17.347438752783965, "grad_norm": 17.269451141357422, "learning_rate": 1e-06, "loss": 0.457, "num_input_tokens_seen": 436468436, "step": 7789 }, { "epoch": 17.347438752783965, "loss": 0.6248146891593933, "loss_ce": 0.00011986246681772172, "loss_iou": 0.275390625, "loss_num": 0.0146484375, "loss_xval": 0.625, "num_input_tokens_seen": 436468436, "step": 7789 }, { "epoch": 17.34966592427617, "grad_norm": 28.061927795410156, "learning_rate": 1e-06, "loss": 0.3785, "num_input_tokens_seen": 436526876, "step": 7790 }, { "epoch": 17.34966592427617, "loss": 0.4654099941253662, "loss_ce": 7.796321006026119e-05, "loss_iou": 0.205078125, "loss_num": 0.0111083984375, "loss_xval": 0.46484375, "num_input_tokens_seen": 436526876, "step": 7790 }, { "epoch": 17.351893095768375, "grad_norm": 10.565998077392578, "learning_rate": 1e-06, "loss": 0.3896, "num_input_tokens_seen": 436583952, "step": 7791 }, { "epoch": 17.351893095768375, "loss": 0.30404287576675415, "loss_ce": 8.778244227869436e-05, "loss_iou": 0.134765625, "loss_num": 0.006805419921875, "loss_xval": 0.3046875, "num_input_tokens_seen": 436583952, "step": 7791 }, { "epoch": 17.35412026726058, "grad_norm": 15.32834243774414, "learning_rate": 1e-06, "loss": 0.4977, "num_input_tokens_seen": 436640224, "step": 7792 }, { "epoch": 17.35412026726058, "loss": 0.5239353179931641, "loss_ce": 7.061338692437857e-05, "loss_iou": 0.2216796875, "loss_num": 0.0162353515625, "loss_xval": 0.5234375, "num_input_tokens_seen": 436640224, "step": 7792 }, { "epoch": 17.356347438752785, "grad_norm": 34.02385711669922, "learning_rate": 1e-06, "loss": 0.6217, "num_input_tokens_seen": 436698680, "step": 7793 }, { "epoch": 17.356347438752785, "loss": 0.467492938041687, "loss_ce": 8.570231148041785e-05, "loss_iou": 0.21484375, "loss_num": 0.007537841796875, "loss_xval": 0.466796875, "num_input_tokens_seen": 436698680, "step": 7793 }, { "epoch": 17.35857461024499, "grad_norm": 20.18070411682129, "learning_rate": 1e-06, "loss": 0.408, "num_input_tokens_seen": 436753572, "step": 7794 }, { "epoch": 17.35857461024499, "loss": 0.2802870273590088, "loss_ce": 7.461066707037389e-05, "loss_iou": 0.119140625, "loss_num": 0.00823974609375, "loss_xval": 0.279296875, "num_input_tokens_seen": 436753572, "step": 7794 }, { "epoch": 17.360801781737194, "grad_norm": 18.094005584716797, "learning_rate": 1e-06, "loss": 0.4583, "num_input_tokens_seen": 436807624, "step": 7795 }, { "epoch": 17.360801781737194, "loss": 0.48153021931648254, "loss_ce": 8.488957246299833e-05, "loss_iou": 0.1982421875, "loss_num": 0.0172119140625, "loss_xval": 0.48046875, "num_input_tokens_seen": 436807624, "step": 7795 }, { "epoch": 17.3630289532294, "grad_norm": 13.45447826385498, "learning_rate": 1e-06, "loss": 0.3153, "num_input_tokens_seen": 436863496, "step": 7796 }, { "epoch": 17.3630289532294, "loss": 0.3097211420536041, "loss_ce": 8.980316488305107e-05, "loss_iou": 0.1328125, "loss_num": 0.00872802734375, "loss_xval": 0.310546875, "num_input_tokens_seen": 436863496, "step": 7796 }, { "epoch": 17.365256124721604, "grad_norm": 16.23236083984375, "learning_rate": 1e-06, "loss": 0.3146, "num_input_tokens_seen": 436920076, "step": 7797 }, { "epoch": 17.365256124721604, "loss": 0.3300950825214386, "loss_ce": 7.799094601068646e-05, "loss_iou": 0.1396484375, "loss_num": 0.010009765625, "loss_xval": 0.330078125, "num_input_tokens_seen": 436920076, "step": 7797 }, { "epoch": 17.36748329621381, "grad_norm": 16.27332305908203, "learning_rate": 1e-06, "loss": 0.4944, "num_input_tokens_seen": 436975300, "step": 7798 }, { "epoch": 17.36748329621381, "loss": 0.3892417550086975, "loss_ce": 8.160505967680365e-05, "loss_iou": 0.171875, "loss_num": 0.0091552734375, "loss_xval": 0.388671875, "num_input_tokens_seen": 436975300, "step": 7798 }, { "epoch": 17.369710467706014, "grad_norm": 13.5247163772583, "learning_rate": 1e-06, "loss": 0.508, "num_input_tokens_seen": 437030760, "step": 7799 }, { "epoch": 17.369710467706014, "loss": 0.5794344544410706, "loss_ce": 8.873187471181154e-05, "loss_iou": 0.259765625, "loss_num": 0.01171875, "loss_xval": 0.578125, "num_input_tokens_seen": 437030760, "step": 7799 }, { "epoch": 17.37193763919822, "grad_norm": 16.678621292114258, "learning_rate": 1e-06, "loss": 0.4149, "num_input_tokens_seen": 437086492, "step": 7800 }, { "epoch": 17.37193763919822, "loss": 0.6766193509101868, "loss_ce": 0.00010563358955550939, "loss_iou": 0.283203125, "loss_num": 0.0220947265625, "loss_xval": 0.67578125, "num_input_tokens_seen": 437086492, "step": 7800 }, { "epoch": 17.374164810690424, "grad_norm": 16.469213485717773, "learning_rate": 1e-06, "loss": 0.3798, "num_input_tokens_seen": 437142412, "step": 7801 }, { "epoch": 17.374164810690424, "loss": 0.413549542427063, "loss_ce": 9.739689994603395e-05, "loss_iou": 0.1962890625, "loss_num": 0.004150390625, "loss_xval": 0.4140625, "num_input_tokens_seen": 437142412, "step": 7801 }, { "epoch": 17.37639198218263, "grad_norm": 27.44245719909668, "learning_rate": 1e-06, "loss": 0.4157, "num_input_tokens_seen": 437199560, "step": 7802 }, { "epoch": 17.37639198218263, "loss": 0.3337703347206116, "loss_ce": 0.00027421663980931044, "loss_iou": 0.1494140625, "loss_num": 0.00689697265625, "loss_xval": 0.333984375, "num_input_tokens_seen": 437199560, "step": 7802 }, { "epoch": 17.378619153674833, "grad_norm": 24.417043685913086, "learning_rate": 1e-06, "loss": 0.3968, "num_input_tokens_seen": 437253536, "step": 7803 }, { "epoch": 17.378619153674833, "loss": 0.3829765319824219, "loss_ce": 7.25047430023551e-05, "loss_iou": 0.16015625, "loss_num": 0.01251220703125, "loss_xval": 0.3828125, "num_input_tokens_seen": 437253536, "step": 7803 }, { "epoch": 17.380846325167038, "grad_norm": 18.77859878540039, "learning_rate": 1e-06, "loss": 0.4349, "num_input_tokens_seen": 437311192, "step": 7804 }, { "epoch": 17.380846325167038, "loss": 0.4672635793685913, "loss_ce": 0.00010047997056972235, "loss_iou": 0.220703125, "loss_num": 0.005401611328125, "loss_xval": 0.466796875, "num_input_tokens_seen": 437311192, "step": 7804 }, { "epoch": 17.383073496659243, "grad_norm": 14.215685844421387, "learning_rate": 1e-06, "loss": 0.5266, "num_input_tokens_seen": 437368228, "step": 7805 }, { "epoch": 17.383073496659243, "loss": 0.6981062293052673, "loss_ce": 0.00035229395143687725, "loss_iou": 0.306640625, "loss_num": 0.0167236328125, "loss_xval": 0.69921875, "num_input_tokens_seen": 437368228, "step": 7805 }, { "epoch": 17.385300668151448, "grad_norm": 13.988162994384766, "learning_rate": 1e-06, "loss": 0.3419, "num_input_tokens_seen": 437426224, "step": 7806 }, { "epoch": 17.385300668151448, "loss": 0.3153742551803589, "loss_ce": 6.663544627372175e-05, "loss_iou": 0.1396484375, "loss_num": 0.0072021484375, "loss_xval": 0.314453125, "num_input_tokens_seen": 437426224, "step": 7806 }, { "epoch": 17.387527839643653, "grad_norm": 15.999502182006836, "learning_rate": 1e-06, "loss": 0.4759, "num_input_tokens_seen": 437483276, "step": 7807 }, { "epoch": 17.387527839643653, "loss": 0.4618630111217499, "loss_ce": 7.104083488229662e-05, "loss_iou": 0.185546875, "loss_num": 0.0181884765625, "loss_xval": 0.4609375, "num_input_tokens_seen": 437483276, "step": 7807 }, { "epoch": 17.389755011135858, "grad_norm": 19.859189987182617, "learning_rate": 1e-06, "loss": 0.5076, "num_input_tokens_seen": 437539780, "step": 7808 }, { "epoch": 17.389755011135858, "loss": 0.4627245366573334, "loss_ce": 7.806568464729935e-05, "loss_iou": 0.201171875, "loss_num": 0.01214599609375, "loss_xval": 0.462890625, "num_input_tokens_seen": 437539780, "step": 7808 }, { "epoch": 17.391982182628063, "grad_norm": 18.35259437561035, "learning_rate": 1e-06, "loss": 0.4693, "num_input_tokens_seen": 437593776, "step": 7809 }, { "epoch": 17.391982182628063, "loss": 0.6174068450927734, "loss_ce": 9.7297815955244e-05, "loss_iou": 0.275390625, "loss_num": 0.01336669921875, "loss_xval": 0.6171875, "num_input_tokens_seen": 437593776, "step": 7809 }, { "epoch": 17.394209354120267, "grad_norm": 22.22394371032715, "learning_rate": 1e-06, "loss": 0.4799, "num_input_tokens_seen": 437648068, "step": 7810 }, { "epoch": 17.394209354120267, "loss": 0.5823743939399719, "loss_ce": 0.00016007671365514398, "loss_iou": 0.255859375, "loss_num": 0.0137939453125, "loss_xval": 0.58203125, "num_input_tokens_seen": 437648068, "step": 7810 }, { "epoch": 17.396436525612472, "grad_norm": 44.06281280517578, "learning_rate": 1e-06, "loss": 0.5151, "num_input_tokens_seen": 437703580, "step": 7811 }, { "epoch": 17.396436525612472, "loss": 0.674892783164978, "loss_ce": 8.813235035631806e-05, "loss_iou": 0.28125, "loss_num": 0.022705078125, "loss_xval": 0.67578125, "num_input_tokens_seen": 437703580, "step": 7811 }, { "epoch": 17.398663697104677, "grad_norm": 23.47769546508789, "learning_rate": 1e-06, "loss": 0.5547, "num_input_tokens_seen": 437757572, "step": 7812 }, { "epoch": 17.398663697104677, "loss": 0.5415530204772949, "loss_ce": 0.0001712157973088324, "loss_iou": 0.240234375, "loss_num": 0.0120849609375, "loss_xval": 0.54296875, "num_input_tokens_seen": 437757572, "step": 7812 }, { "epoch": 17.400890868596882, "grad_norm": 17.036544799804688, "learning_rate": 1e-06, "loss": 0.4651, "num_input_tokens_seen": 437810472, "step": 7813 }, { "epoch": 17.400890868596882, "loss": 0.3841232657432556, "loss_ce": 9.007104381453246e-05, "loss_iou": 0.166015625, "loss_num": 0.0103759765625, "loss_xval": 0.384765625, "num_input_tokens_seen": 437810472, "step": 7813 }, { "epoch": 17.403118040089087, "grad_norm": 17.07404327392578, "learning_rate": 1e-06, "loss": 0.3801, "num_input_tokens_seen": 437868112, "step": 7814 }, { "epoch": 17.403118040089087, "loss": 0.4032171964645386, "loss_ce": 7.997561624506488e-05, "loss_iou": 0.1689453125, "loss_num": 0.01300048828125, "loss_xval": 0.40234375, "num_input_tokens_seen": 437868112, "step": 7814 }, { "epoch": 17.40534521158129, "grad_norm": 13.92210578918457, "learning_rate": 1e-06, "loss": 0.3186, "num_input_tokens_seen": 437923616, "step": 7815 }, { "epoch": 17.40534521158129, "loss": 0.40229788422584534, "loss_ce": 7.62125855544582e-05, "loss_iou": 0.16796875, "loss_num": 0.01312255859375, "loss_xval": 0.40234375, "num_input_tokens_seen": 437923616, "step": 7815 }, { "epoch": 17.407572383073497, "grad_norm": 19.43670082092285, "learning_rate": 1e-06, "loss": 0.4878, "num_input_tokens_seen": 437979980, "step": 7816 }, { "epoch": 17.407572383073497, "loss": 0.7343358397483826, "loss_ce": 8.290271944133565e-05, "loss_iou": 0.306640625, "loss_num": 0.02392578125, "loss_xval": 0.734375, "num_input_tokens_seen": 437979980, "step": 7816 }, { "epoch": 17.4097995545657, "grad_norm": 13.963506698608398, "learning_rate": 1e-06, "loss": 0.4524, "num_input_tokens_seen": 438033348, "step": 7817 }, { "epoch": 17.4097995545657, "loss": 0.41097211837768555, "loss_ce": 8.345420064870268e-05, "loss_iou": 0.1748046875, "loss_num": 0.0123291015625, "loss_xval": 0.41015625, "num_input_tokens_seen": 438033348, "step": 7817 }, { "epoch": 17.412026726057906, "grad_norm": 18.236024856567383, "learning_rate": 1e-06, "loss": 0.3729, "num_input_tokens_seen": 438091792, "step": 7818 }, { "epoch": 17.412026726057906, "loss": 0.31764712929725647, "loss_ce": 8.121843711705878e-05, "loss_iou": 0.1376953125, "loss_num": 0.00848388671875, "loss_xval": 0.318359375, "num_input_tokens_seen": 438091792, "step": 7818 }, { "epoch": 17.41425389755011, "grad_norm": 19.653343200683594, "learning_rate": 1e-06, "loss": 0.2597, "num_input_tokens_seen": 438148088, "step": 7819 }, { "epoch": 17.41425389755011, "loss": 0.3163628578186035, "loss_ce": 7.866387022659183e-05, "loss_iou": 0.14453125, "loss_num": 0.00531005859375, "loss_xval": 0.31640625, "num_input_tokens_seen": 438148088, "step": 7819 }, { "epoch": 17.416481069042316, "grad_norm": 16.326854705810547, "learning_rate": 1e-06, "loss": 0.4654, "num_input_tokens_seen": 438205184, "step": 7820 }, { "epoch": 17.416481069042316, "loss": 0.4878746271133423, "loss_ce": 8.16921892692335e-05, "loss_iou": 0.2119140625, "loss_num": 0.0130615234375, "loss_xval": 0.48828125, "num_input_tokens_seen": 438205184, "step": 7820 }, { "epoch": 17.41870824053452, "grad_norm": 29.167247772216797, "learning_rate": 1e-06, "loss": 0.3659, "num_input_tokens_seen": 438260072, "step": 7821 }, { "epoch": 17.41870824053452, "loss": 0.40425050258636475, "loss_ce": 7.568299042759463e-05, "loss_iou": 0.185546875, "loss_num": 0.00665283203125, "loss_xval": 0.404296875, "num_input_tokens_seen": 438260072, "step": 7821 }, { "epoch": 17.420935412026726, "grad_norm": 18.79923439025879, "learning_rate": 1e-06, "loss": 0.4383, "num_input_tokens_seen": 438314384, "step": 7822 }, { "epoch": 17.420935412026726, "loss": 0.5045883655548096, "loss_ce": 7.173811900429428e-05, "loss_iou": 0.1962890625, "loss_num": 0.022216796875, "loss_xval": 0.50390625, "num_input_tokens_seen": 438314384, "step": 7822 }, { "epoch": 17.42316258351893, "grad_norm": 15.445906639099121, "learning_rate": 1e-06, "loss": 0.3509, "num_input_tokens_seen": 438370632, "step": 7823 }, { "epoch": 17.42316258351893, "loss": 0.43685096502304077, "loss_ce": 8.341444481629878e-05, "loss_iou": 0.1982421875, "loss_num": 0.00830078125, "loss_xval": 0.4375, "num_input_tokens_seen": 438370632, "step": 7823 }, { "epoch": 17.425389755011135, "grad_norm": 32.27328109741211, "learning_rate": 1e-06, "loss": 0.3304, "num_input_tokens_seen": 438423192, "step": 7824 }, { "epoch": 17.425389755011135, "loss": 0.33272212743759155, "loss_ce": 8.052479824982584e-05, "loss_iou": 0.146484375, "loss_num": 0.00811767578125, "loss_xval": 0.33203125, "num_input_tokens_seen": 438423192, "step": 7824 }, { "epoch": 17.42761692650334, "grad_norm": 23.52182960510254, "learning_rate": 1e-06, "loss": 0.6017, "num_input_tokens_seen": 438477292, "step": 7825 }, { "epoch": 17.42761692650334, "loss": 0.5533384680747986, "loss_ce": 0.00011578819248825312, "loss_iou": 0.2578125, "loss_num": 0.007781982421875, "loss_xval": 0.5546875, "num_input_tokens_seen": 438477292, "step": 7825 }, { "epoch": 17.429844097995545, "grad_norm": 26.976110458374023, "learning_rate": 1e-06, "loss": 0.4182, "num_input_tokens_seen": 438531796, "step": 7826 }, { "epoch": 17.429844097995545, "loss": 0.4023064076900482, "loss_ce": 8.469966996926814e-05, "loss_iou": 0.17578125, "loss_num": 0.0101318359375, "loss_xval": 0.40234375, "num_input_tokens_seen": 438531796, "step": 7826 }, { "epoch": 17.43207126948775, "grad_norm": 19.38138198852539, "learning_rate": 1e-06, "loss": 0.4998, "num_input_tokens_seen": 438589904, "step": 7827 }, { "epoch": 17.43207126948775, "loss": 0.3881371021270752, "loss_ce": 7.558944344054908e-05, "loss_iou": 0.1611328125, "loss_num": 0.0133056640625, "loss_xval": 0.388671875, "num_input_tokens_seen": 438589904, "step": 7827 }, { "epoch": 17.434298440979955, "grad_norm": 31.262649536132812, "learning_rate": 1e-06, "loss": 0.3436, "num_input_tokens_seen": 438643728, "step": 7828 }, { "epoch": 17.434298440979955, "loss": 0.3436218798160553, "loss_ce": 0.00011602583253988996, "loss_iou": 0.1552734375, "loss_num": 0.006744384765625, "loss_xval": 0.34375, "num_input_tokens_seen": 438643728, "step": 7828 }, { "epoch": 17.43652561247216, "grad_norm": 14.571344375610352, "learning_rate": 1e-06, "loss": 0.3795, "num_input_tokens_seen": 438701432, "step": 7829 }, { "epoch": 17.43652561247216, "loss": 0.4284363389015198, "loss_ce": 9.162020432995632e-05, "loss_iou": 0.1845703125, "loss_num": 0.0120849609375, "loss_xval": 0.427734375, "num_input_tokens_seen": 438701432, "step": 7829 }, { "epoch": 17.438752783964365, "grad_norm": 25.686256408691406, "learning_rate": 1e-06, "loss": 0.3957, "num_input_tokens_seen": 438755012, "step": 7830 }, { "epoch": 17.438752783964365, "loss": 0.42792049050331116, "loss_ce": 0.00012508549843914807, "loss_iou": 0.181640625, "loss_num": 0.01312255859375, "loss_xval": 0.427734375, "num_input_tokens_seen": 438755012, "step": 7830 }, { "epoch": 17.44097995545657, "grad_norm": 17.335966110229492, "learning_rate": 1e-06, "loss": 0.4715, "num_input_tokens_seen": 438807680, "step": 7831 }, { "epoch": 17.44097995545657, "loss": 0.4803728461265564, "loss_ce": 8.723569044377655e-05, "loss_iou": 0.203125, "loss_num": 0.01483154296875, "loss_xval": 0.48046875, "num_input_tokens_seen": 438807680, "step": 7831 }, { "epoch": 17.443207126948774, "grad_norm": 24.54792022705078, "learning_rate": 1e-06, "loss": 0.4759, "num_input_tokens_seen": 438864696, "step": 7832 }, { "epoch": 17.443207126948774, "loss": 0.5594247579574585, "loss_ce": 9.856373799266294e-05, "loss_iou": 0.2470703125, "loss_num": 0.01318359375, "loss_xval": 0.55859375, "num_input_tokens_seen": 438864696, "step": 7832 }, { "epoch": 17.44543429844098, "grad_norm": 21.406185150146484, "learning_rate": 1e-06, "loss": 0.4039, "num_input_tokens_seen": 438918952, "step": 7833 }, { "epoch": 17.44543429844098, "loss": 0.5075805187225342, "loss_ce": 0.00013420640607364476, "loss_iou": 0.220703125, "loss_num": 0.01348876953125, "loss_xval": 0.5078125, "num_input_tokens_seen": 438918952, "step": 7833 }, { "epoch": 17.447661469933184, "grad_norm": 21.712177276611328, "learning_rate": 1e-06, "loss": 0.264, "num_input_tokens_seen": 438976448, "step": 7834 }, { "epoch": 17.447661469933184, "loss": 0.1915939748287201, "loss_ce": 6.564041541423649e-05, "loss_iou": 0.08203125, "loss_num": 0.00555419921875, "loss_xval": 0.19140625, "num_input_tokens_seen": 438976448, "step": 7834 }, { "epoch": 17.44988864142539, "grad_norm": 16.437280654907227, "learning_rate": 1e-06, "loss": 0.3436, "num_input_tokens_seen": 439030240, "step": 7835 }, { "epoch": 17.44988864142539, "loss": 0.2427883744239807, "loss_ce": 8.206407801480964e-05, "loss_iou": 0.10791015625, "loss_num": 0.005340576171875, "loss_xval": 0.2431640625, "num_input_tokens_seen": 439030240, "step": 7835 }, { "epoch": 17.452115812917594, "grad_norm": 16.297969818115234, "learning_rate": 1e-06, "loss": 0.3661, "num_input_tokens_seen": 439087720, "step": 7836 }, { "epoch": 17.452115812917594, "loss": 0.3819359540939331, "loss_ce": 0.00010002277849707752, "loss_iou": 0.1767578125, "loss_num": 0.005767822265625, "loss_xval": 0.3828125, "num_input_tokens_seen": 439087720, "step": 7836 }, { "epoch": 17.4543429844098, "grad_norm": 14.235069274902344, "learning_rate": 1e-06, "loss": 0.4091, "num_input_tokens_seen": 439142152, "step": 7837 }, { "epoch": 17.4543429844098, "loss": 0.363969087600708, "loss_ce": 7.749867654638365e-05, "loss_iou": 0.150390625, "loss_num": 0.01251220703125, "loss_xval": 0.36328125, "num_input_tokens_seen": 439142152, "step": 7837 }, { "epoch": 17.456570155902003, "grad_norm": 18.100412368774414, "learning_rate": 1e-06, "loss": 0.4347, "num_input_tokens_seen": 439198644, "step": 7838 }, { "epoch": 17.456570155902003, "loss": 0.47042351961135864, "loss_ce": 8.66147456690669e-05, "loss_iou": 0.2001953125, "loss_num": 0.01422119140625, "loss_xval": 0.470703125, "num_input_tokens_seen": 439198644, "step": 7838 }, { "epoch": 17.45879732739421, "grad_norm": 17.923023223876953, "learning_rate": 1e-06, "loss": 0.5361, "num_input_tokens_seen": 439253288, "step": 7839 }, { "epoch": 17.45879732739421, "loss": 0.31425806879997253, "loss_ce": 0.0004763224278576672, "loss_iou": 0.140625, "loss_num": 0.006500244140625, "loss_xval": 0.314453125, "num_input_tokens_seen": 439253288, "step": 7839 }, { "epoch": 17.461024498886413, "grad_norm": 19.96112632751465, "learning_rate": 1e-06, "loss": 0.4533, "num_input_tokens_seen": 439308972, "step": 7840 }, { "epoch": 17.461024498886413, "loss": 0.40413880348205566, "loss_ce": 8.605894981883466e-05, "loss_iou": 0.1875, "loss_num": 0.00567626953125, "loss_xval": 0.404296875, "num_input_tokens_seen": 439308972, "step": 7840 }, { "epoch": 17.463251670378618, "grad_norm": 15.366341590881348, "learning_rate": 1e-06, "loss": 0.2831, "num_input_tokens_seen": 439367272, "step": 7841 }, { "epoch": 17.463251670378618, "loss": 0.33577966690063477, "loss_ce": 8.628957584733143e-05, "loss_iou": 0.138671875, "loss_num": 0.0115966796875, "loss_xval": 0.3359375, "num_input_tokens_seen": 439367272, "step": 7841 }, { "epoch": 17.465478841870823, "grad_norm": 15.852892875671387, "learning_rate": 1e-06, "loss": 0.5239, "num_input_tokens_seen": 439424584, "step": 7842 }, { "epoch": 17.465478841870823, "loss": 0.6492735147476196, "loss_ce": 0.00010354960249969736, "loss_iou": 0.279296875, "loss_num": 0.018310546875, "loss_xval": 0.6484375, "num_input_tokens_seen": 439424584, "step": 7842 }, { "epoch": 17.467706013363028, "grad_norm": 14.707501411437988, "learning_rate": 1e-06, "loss": 0.3847, "num_input_tokens_seen": 439479788, "step": 7843 }, { "epoch": 17.467706013363028, "loss": 0.31831464171409607, "loss_ce": 7.733616075711325e-05, "loss_iou": 0.1357421875, "loss_num": 0.00958251953125, "loss_xval": 0.318359375, "num_input_tokens_seen": 439479788, "step": 7843 }, { "epoch": 17.469933184855233, "grad_norm": 42.824195861816406, "learning_rate": 1e-06, "loss": 0.4873, "num_input_tokens_seen": 439536572, "step": 7844 }, { "epoch": 17.469933184855233, "loss": 0.3835739493370056, "loss_ce": 9.006427717395127e-05, "loss_iou": 0.177734375, "loss_num": 0.005767822265625, "loss_xval": 0.3828125, "num_input_tokens_seen": 439536572, "step": 7844 }, { "epoch": 17.472160356347437, "grad_norm": 14.055753707885742, "learning_rate": 1e-06, "loss": 0.564, "num_input_tokens_seen": 439594084, "step": 7845 }, { "epoch": 17.472160356347437, "loss": 0.6532701253890991, "loss_ce": 7.185361755546182e-05, "loss_iou": 0.265625, "loss_num": 0.0245361328125, "loss_xval": 0.65234375, "num_input_tokens_seen": 439594084, "step": 7845 }, { "epoch": 17.474387527839642, "grad_norm": 24.9837589263916, "learning_rate": 1e-06, "loss": 0.4575, "num_input_tokens_seen": 439649312, "step": 7846 }, { "epoch": 17.474387527839642, "loss": 0.45571112632751465, "loss_ce": 0.000144733494380489, "loss_iou": 0.1845703125, "loss_num": 0.0174560546875, "loss_xval": 0.455078125, "num_input_tokens_seen": 439649312, "step": 7846 }, { "epoch": 17.476614699331847, "grad_norm": 12.236130714416504, "learning_rate": 1e-06, "loss": 0.3456, "num_input_tokens_seen": 439705312, "step": 7847 }, { "epoch": 17.476614699331847, "loss": 0.4908040165901184, "loss_ce": 8.134340168908238e-05, "loss_iou": 0.2158203125, "loss_num": 0.011962890625, "loss_xval": 0.490234375, "num_input_tokens_seen": 439705312, "step": 7847 }, { "epoch": 17.478841870824052, "grad_norm": 15.341350555419922, "learning_rate": 1e-06, "loss": 0.2637, "num_input_tokens_seen": 439762020, "step": 7848 }, { "epoch": 17.478841870824052, "loss": 0.2432081252336502, "loss_ce": 0.00010509882849873975, "loss_iou": 0.10302734375, "loss_num": 0.00750732421875, "loss_xval": 0.2431640625, "num_input_tokens_seen": 439762020, "step": 7848 }, { "epoch": 17.481069042316257, "grad_norm": 20.993581771850586, "learning_rate": 1e-06, "loss": 0.3698, "num_input_tokens_seen": 439818004, "step": 7849 }, { "epoch": 17.481069042316257, "loss": 0.41051971912384033, "loss_ce": 0.00011930659093195572, "loss_iou": 0.1904296875, "loss_num": 0.00567626953125, "loss_xval": 0.41015625, "num_input_tokens_seen": 439818004, "step": 7849 }, { "epoch": 17.48329621380846, "grad_norm": 26.052579879760742, "learning_rate": 1e-06, "loss": 0.4034, "num_input_tokens_seen": 439876168, "step": 7850 }, { "epoch": 17.48329621380846, "loss": 0.42060598731040955, "loss_ce": 7.376498251687735e-05, "loss_iou": 0.1875, "loss_num": 0.0091552734375, "loss_xval": 0.419921875, "num_input_tokens_seen": 439876168, "step": 7850 }, { "epoch": 17.485523385300667, "grad_norm": 15.98454475402832, "learning_rate": 1e-06, "loss": 0.4183, "num_input_tokens_seen": 439929952, "step": 7851 }, { "epoch": 17.485523385300667, "loss": 0.3435441553592682, "loss_ce": 6.880345608806238e-05, "loss_iou": 0.142578125, "loss_num": 0.01171875, "loss_xval": 0.34375, "num_input_tokens_seen": 439929952, "step": 7851 }, { "epoch": 17.48775055679287, "grad_norm": 27.797683715820312, "learning_rate": 1e-06, "loss": 0.3736, "num_input_tokens_seen": 439986488, "step": 7852 }, { "epoch": 17.48775055679287, "loss": 0.3528550863265991, "loss_ce": 7.186994480434805e-05, "loss_iou": 0.162109375, "loss_num": 0.005645751953125, "loss_xval": 0.353515625, "num_input_tokens_seen": 439986488, "step": 7852 }, { "epoch": 17.489977728285076, "grad_norm": 17.55838966369629, "learning_rate": 1e-06, "loss": 0.4012, "num_input_tokens_seen": 440043948, "step": 7853 }, { "epoch": 17.489977728285076, "loss": 0.28866440057754517, "loss_ce": 9.020272409543395e-05, "loss_iou": 0.12890625, "loss_num": 0.006134033203125, "loss_xval": 0.2890625, "num_input_tokens_seen": 440043948, "step": 7853 }, { "epoch": 17.49220489977728, "grad_norm": 16.255395889282227, "learning_rate": 1e-06, "loss": 0.5026, "num_input_tokens_seen": 440101172, "step": 7854 }, { "epoch": 17.49220489977728, "loss": 0.4820084571838379, "loss_ce": 7.487165566999465e-05, "loss_iou": 0.220703125, "loss_num": 0.00836181640625, "loss_xval": 0.482421875, "num_input_tokens_seen": 440101172, "step": 7854 }, { "epoch": 17.494432071269486, "grad_norm": 13.98792839050293, "learning_rate": 1e-06, "loss": 0.3042, "num_input_tokens_seen": 440158728, "step": 7855 }, { "epoch": 17.494432071269486, "loss": 0.27856212854385376, "loss_ce": 8.1585232692305e-05, "loss_iou": 0.1279296875, "loss_num": 0.00457763671875, "loss_xval": 0.279296875, "num_input_tokens_seen": 440158728, "step": 7855 }, { "epoch": 17.49665924276169, "grad_norm": 15.809954643249512, "learning_rate": 1e-06, "loss": 0.4814, "num_input_tokens_seen": 440214632, "step": 7856 }, { "epoch": 17.49665924276169, "loss": 0.5641576647758484, "loss_ce": 7.077249756548554e-05, "loss_iou": 0.20703125, "loss_num": 0.030029296875, "loss_xval": 0.5625, "num_input_tokens_seen": 440214632, "step": 7856 }, { "epoch": 17.498886414253896, "grad_norm": 19.271411895751953, "learning_rate": 1e-06, "loss": 0.3269, "num_input_tokens_seen": 440273540, "step": 7857 }, { "epoch": 17.498886414253896, "loss": 0.36080411076545715, "loss_ce": 8.634180994704366e-05, "loss_iou": 0.15234375, "loss_num": 0.01123046875, "loss_xval": 0.361328125, "num_input_tokens_seen": 440273540, "step": 7857 }, { "epoch": 17.501113585746104, "grad_norm": 28.240005493164062, "learning_rate": 1e-06, "loss": 0.3847, "num_input_tokens_seen": 440328284, "step": 7858 }, { "epoch": 17.501113585746104, "loss": 0.3158857226371765, "loss_ce": 8.983007137430832e-05, "loss_iou": 0.1416015625, "loss_num": 0.006378173828125, "loss_xval": 0.31640625, "num_input_tokens_seen": 440328284, "step": 7858 }, { "epoch": 17.50334075723831, "grad_norm": 21.047122955322266, "learning_rate": 1e-06, "loss": 0.566, "num_input_tokens_seen": 440384876, "step": 7859 }, { "epoch": 17.50334075723831, "loss": 0.5007518529891968, "loss_ce": 0.00014154997188597918, "loss_iou": 0.1943359375, "loss_num": 0.0225830078125, "loss_xval": 0.5, "num_input_tokens_seen": 440384876, "step": 7859 }, { "epoch": 17.505567928730514, "grad_norm": 14.648688316345215, "learning_rate": 1e-06, "loss": 0.3644, "num_input_tokens_seen": 440440236, "step": 7860 }, { "epoch": 17.505567928730514, "loss": 0.33874061703681946, "loss_ce": 0.00020912080071866512, "loss_iou": 0.1484375, "loss_num": 0.0081787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 440440236, "step": 7860 }, { "epoch": 17.50779510022272, "grad_norm": 14.066313743591309, "learning_rate": 1e-06, "loss": 0.3047, "num_input_tokens_seen": 440495832, "step": 7861 }, { "epoch": 17.50779510022272, "loss": 0.2872577905654907, "loss_ce": 8.740053453948349e-05, "loss_iou": 0.11474609375, "loss_num": 0.011474609375, "loss_xval": 0.287109375, "num_input_tokens_seen": 440495832, "step": 7861 }, { "epoch": 17.510022271714924, "grad_norm": 14.307774543762207, "learning_rate": 1e-06, "loss": 0.3997, "num_input_tokens_seen": 440551848, "step": 7862 }, { "epoch": 17.510022271714924, "loss": 0.47919386625289917, "loss_ce": 6.788225437048823e-05, "loss_iou": 0.197265625, "loss_num": 0.01708984375, "loss_xval": 0.478515625, "num_input_tokens_seen": 440551848, "step": 7862 }, { "epoch": 17.51224944320713, "grad_norm": 21.205509185791016, "learning_rate": 1e-06, "loss": 0.359, "num_input_tokens_seen": 440605772, "step": 7863 }, { "epoch": 17.51224944320713, "loss": 0.2875545918941498, "loss_ce": 7.898983312770724e-05, "loss_iou": 0.125, "loss_num": 0.007415771484375, "loss_xval": 0.287109375, "num_input_tokens_seen": 440605772, "step": 7863 }, { "epoch": 17.514476614699333, "grad_norm": 17.232234954833984, "learning_rate": 1e-06, "loss": 0.4147, "num_input_tokens_seen": 440664340, "step": 7864 }, { "epoch": 17.514476614699333, "loss": 0.43732959032058716, "loss_ce": 7.375919085461646e-05, "loss_iou": 0.193359375, "loss_num": 0.01025390625, "loss_xval": 0.4375, "num_input_tokens_seen": 440664340, "step": 7864 }, { "epoch": 17.51670378619154, "grad_norm": 13.662554740905762, "learning_rate": 1e-06, "loss": 0.351, "num_input_tokens_seen": 440720316, "step": 7865 }, { "epoch": 17.51670378619154, "loss": 0.3087965250015259, "loss_ce": 8.073220669757575e-05, "loss_iou": 0.1279296875, "loss_num": 0.010498046875, "loss_xval": 0.30859375, "num_input_tokens_seen": 440720316, "step": 7865 }, { "epoch": 17.518930957683743, "grad_norm": 27.160282135009766, "learning_rate": 1e-06, "loss": 0.4684, "num_input_tokens_seen": 440777844, "step": 7866 }, { "epoch": 17.518930957683743, "loss": 0.5026195049285889, "loss_ce": 8.654060366097838e-05, "loss_iou": 0.2158203125, "loss_num": 0.01409912109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 440777844, "step": 7866 }, { "epoch": 17.521158129175948, "grad_norm": 16.337854385375977, "learning_rate": 1e-06, "loss": 0.4732, "num_input_tokens_seen": 440835848, "step": 7867 }, { "epoch": 17.521158129175948, "loss": 0.5095036625862122, "loss_ce": 0.00010422736522741616, "loss_iou": 0.2138671875, "loss_num": 0.016357421875, "loss_xval": 0.5078125, "num_input_tokens_seen": 440835848, "step": 7867 }, { "epoch": 17.523385300668153, "grad_norm": 16.995683670043945, "learning_rate": 1e-06, "loss": 0.4265, "num_input_tokens_seen": 440894060, "step": 7868 }, { "epoch": 17.523385300668153, "loss": 0.2940109968185425, "loss_ce": 6.568758544744924e-05, "loss_iou": 0.126953125, "loss_num": 0.0081787109375, "loss_xval": 0.29296875, "num_input_tokens_seen": 440894060, "step": 7868 }, { "epoch": 17.525612472160358, "grad_norm": 19.353191375732422, "learning_rate": 1e-06, "loss": 0.4849, "num_input_tokens_seen": 440950212, "step": 7869 }, { "epoch": 17.525612472160358, "loss": 0.3159940838813782, "loss_ce": 7.612561603309587e-05, "loss_iou": 0.125, "loss_num": 0.01318359375, "loss_xval": 0.31640625, "num_input_tokens_seen": 440950212, "step": 7869 }, { "epoch": 17.527839643652563, "grad_norm": 29.26142692565918, "learning_rate": 1e-06, "loss": 0.5345, "num_input_tokens_seen": 441005288, "step": 7870 }, { "epoch": 17.527839643652563, "loss": 0.500335693359375, "loss_ce": 9.152606799034402e-05, "loss_iou": 0.2109375, "loss_num": 0.015625, "loss_xval": 0.5, "num_input_tokens_seen": 441005288, "step": 7870 }, { "epoch": 17.530066815144767, "grad_norm": 21.974380493164062, "learning_rate": 1e-06, "loss": 0.4412, "num_input_tokens_seen": 441060244, "step": 7871 }, { "epoch": 17.530066815144767, "loss": 0.42531251907348633, "loss_ce": 8.057255035964772e-05, "loss_iou": 0.19140625, "loss_num": 0.00848388671875, "loss_xval": 0.42578125, "num_input_tokens_seen": 441060244, "step": 7871 }, { "epoch": 17.532293986636972, "grad_norm": 12.20707893371582, "learning_rate": 1e-06, "loss": 0.3677, "num_input_tokens_seen": 441116740, "step": 7872 }, { "epoch": 17.532293986636972, "loss": 0.22715777158737183, "loss_ce": 6.883872265461832e-05, "loss_iou": 0.0908203125, "loss_num": 0.0091552734375, "loss_xval": 0.2275390625, "num_input_tokens_seen": 441116740, "step": 7872 }, { "epoch": 17.534521158129177, "grad_norm": 55.13029479980469, "learning_rate": 1e-06, "loss": 0.4155, "num_input_tokens_seen": 441171904, "step": 7873 }, { "epoch": 17.534521158129177, "loss": 0.3325577676296234, "loss_ce": 9.927057544700801e-05, "loss_iou": 0.140625, "loss_num": 0.01025390625, "loss_xval": 0.33203125, "num_input_tokens_seen": 441171904, "step": 7873 }, { "epoch": 17.536748329621382, "grad_norm": 16.75377082824707, "learning_rate": 1e-06, "loss": 0.4681, "num_input_tokens_seen": 441228244, "step": 7874 }, { "epoch": 17.536748329621382, "loss": 0.4394185543060303, "loss_ce": 8.750054985284805e-05, "loss_iou": 0.181640625, "loss_num": 0.01544189453125, "loss_xval": 0.439453125, "num_input_tokens_seen": 441228244, "step": 7874 }, { "epoch": 17.538975501113587, "grad_norm": 15.606276512145996, "learning_rate": 1e-06, "loss": 0.6399, "num_input_tokens_seen": 441283016, "step": 7875 }, { "epoch": 17.538975501113587, "loss": 0.5754181742668152, "loss_ce": 0.0001007895843940787, "loss_iou": 0.255859375, "loss_num": 0.01251220703125, "loss_xval": 0.57421875, "num_input_tokens_seen": 441283016, "step": 7875 }, { "epoch": 17.54120267260579, "grad_norm": 12.660006523132324, "learning_rate": 1e-06, "loss": 0.4726, "num_input_tokens_seen": 441340588, "step": 7876 }, { "epoch": 17.54120267260579, "loss": 0.37886592745780945, "loss_ce": 8.173068636097014e-05, "loss_iou": 0.15625, "loss_num": 0.0133056640625, "loss_xval": 0.37890625, "num_input_tokens_seen": 441340588, "step": 7876 }, { "epoch": 17.543429844097997, "grad_norm": 28.50706672668457, "learning_rate": 1e-06, "loss": 0.5147, "num_input_tokens_seen": 441396552, "step": 7877 }, { "epoch": 17.543429844097997, "loss": 0.40114733576774597, "loss_ce": 8.532906213076785e-05, "loss_iou": 0.1796875, "loss_num": 0.00823974609375, "loss_xval": 0.400390625, "num_input_tokens_seen": 441396552, "step": 7877 }, { "epoch": 17.5456570155902, "grad_norm": 18.953811645507812, "learning_rate": 1e-06, "loss": 0.3756, "num_input_tokens_seen": 441454428, "step": 7878 }, { "epoch": 17.5456570155902, "loss": 0.40304529666900635, "loss_ce": 9.119759488385171e-05, "loss_iou": 0.1708984375, "loss_num": 0.01226806640625, "loss_xval": 0.40234375, "num_input_tokens_seen": 441454428, "step": 7878 }, { "epoch": 17.547884187082406, "grad_norm": 40.04744338989258, "learning_rate": 1e-06, "loss": 0.3407, "num_input_tokens_seen": 441509888, "step": 7879 }, { "epoch": 17.547884187082406, "loss": 0.3022136688232422, "loss_ce": 8.962298306869343e-05, "loss_iou": 0.1318359375, "loss_num": 0.00775146484375, "loss_xval": 0.302734375, "num_input_tokens_seen": 441509888, "step": 7879 }, { "epoch": 17.55011135857461, "grad_norm": 23.62949562072754, "learning_rate": 1e-06, "loss": 0.3179, "num_input_tokens_seen": 441567796, "step": 7880 }, { "epoch": 17.55011135857461, "loss": 0.26015806198120117, "loss_ce": 8.726240776013583e-05, "loss_iou": 0.11328125, "loss_num": 0.006805419921875, "loss_xval": 0.259765625, "num_input_tokens_seen": 441567796, "step": 7880 }, { "epoch": 17.552338530066816, "grad_norm": 11.53111743927002, "learning_rate": 1e-06, "loss": 0.5086, "num_input_tokens_seen": 441624796, "step": 7881 }, { "epoch": 17.552338530066816, "loss": 0.4687040448188782, "loss_ce": 7.612221816089004e-05, "loss_iou": 0.1953125, "loss_num": 0.01544189453125, "loss_xval": 0.46875, "num_input_tokens_seen": 441624796, "step": 7881 }, { "epoch": 17.55456570155902, "grad_norm": 18.621740341186523, "learning_rate": 1e-06, "loss": 0.4156, "num_input_tokens_seen": 441681052, "step": 7882 }, { "epoch": 17.55456570155902, "loss": 0.4146304726600647, "loss_ce": 7.969325815793127e-05, "loss_iou": 0.1904296875, "loss_num": 0.006622314453125, "loss_xval": 0.4140625, "num_input_tokens_seen": 441681052, "step": 7882 }, { "epoch": 17.556792873051226, "grad_norm": 19.41095542907715, "learning_rate": 1e-06, "loss": 0.426, "num_input_tokens_seen": 441736492, "step": 7883 }, { "epoch": 17.556792873051226, "loss": 0.48913177847862244, "loss_ce": 0.00011811777949333191, "loss_iou": 0.1943359375, "loss_num": 0.0198974609375, "loss_xval": 0.48828125, "num_input_tokens_seen": 441736492, "step": 7883 }, { "epoch": 17.55902004454343, "grad_norm": 225.51150512695312, "learning_rate": 1e-06, "loss": 0.4115, "num_input_tokens_seen": 441791384, "step": 7884 }, { "epoch": 17.55902004454343, "loss": 0.42270350456237793, "loss_ce": 9.607183164916933e-05, "loss_iou": 0.1943359375, "loss_num": 0.006683349609375, "loss_xval": 0.421875, "num_input_tokens_seen": 441791384, "step": 7884 }, { "epoch": 17.561247216035635, "grad_norm": 22.389850616455078, "learning_rate": 1e-06, "loss": 0.4788, "num_input_tokens_seen": 441846172, "step": 7885 }, { "epoch": 17.561247216035635, "loss": 0.44719523191452026, "loss_ce": 0.00011274641292402521, "loss_iou": 0.19140625, "loss_num": 0.01275634765625, "loss_xval": 0.447265625, "num_input_tokens_seen": 441846172, "step": 7885 }, { "epoch": 17.56347438752784, "grad_norm": 25.177894592285156, "learning_rate": 1e-06, "loss": 0.4156, "num_input_tokens_seen": 441901144, "step": 7886 }, { "epoch": 17.56347438752784, "loss": 0.5812680721282959, "loss_ce": 9.13147086976096e-05, "loss_iou": 0.255859375, "loss_num": 0.014404296875, "loss_xval": 0.58203125, "num_input_tokens_seen": 441901144, "step": 7886 }, { "epoch": 17.565701559020045, "grad_norm": 20.27315902709961, "learning_rate": 1e-06, "loss": 0.3407, "num_input_tokens_seen": 441956844, "step": 7887 }, { "epoch": 17.565701559020045, "loss": 0.32326167821884155, "loss_ce": 8.051642362261191e-05, "loss_iou": 0.142578125, "loss_num": 0.007537841796875, "loss_xval": 0.322265625, "num_input_tokens_seen": 441956844, "step": 7887 }, { "epoch": 17.56792873051225, "grad_norm": 15.661298751831055, "learning_rate": 1e-06, "loss": 0.359, "num_input_tokens_seen": 442013600, "step": 7888 }, { "epoch": 17.56792873051225, "loss": 0.3030818700790405, "loss_ce": 0.00010335086699342355, "loss_iou": 0.1337890625, "loss_num": 0.0069580078125, "loss_xval": 0.302734375, "num_input_tokens_seen": 442013600, "step": 7888 }, { "epoch": 17.570155902004455, "grad_norm": 16.606544494628906, "learning_rate": 1e-06, "loss": 0.3319, "num_input_tokens_seen": 442070068, "step": 7889 }, { "epoch": 17.570155902004455, "loss": 0.34052973985671997, "loss_ce": 7.563138206023723e-05, "loss_iou": 0.1396484375, "loss_num": 0.01226806640625, "loss_xval": 0.33984375, "num_input_tokens_seen": 442070068, "step": 7889 }, { "epoch": 17.57238307349666, "grad_norm": 17.5957088470459, "learning_rate": 1e-06, "loss": 0.307, "num_input_tokens_seen": 442125372, "step": 7890 }, { "epoch": 17.57238307349666, "loss": 0.29927778244018555, "loss_ce": 8.342567889485508e-05, "loss_iou": 0.125, "loss_num": 0.00970458984375, "loss_xval": 0.298828125, "num_input_tokens_seen": 442125372, "step": 7890 }, { "epoch": 17.574610244988865, "grad_norm": 22.874229431152344, "learning_rate": 1e-06, "loss": 0.3021, "num_input_tokens_seen": 442180908, "step": 7891 }, { "epoch": 17.574610244988865, "loss": 0.34810495376586914, "loss_ce": 8.246798097388819e-05, "loss_iou": 0.1484375, "loss_num": 0.01007080078125, "loss_xval": 0.34765625, "num_input_tokens_seen": 442180908, "step": 7891 }, { "epoch": 17.57683741648107, "grad_norm": 12.980911254882812, "learning_rate": 1e-06, "loss": 0.5465, "num_input_tokens_seen": 442237344, "step": 7892 }, { "epoch": 17.57683741648107, "loss": 0.6626755595207214, "loss_ce": 7.78659014031291e-05, "loss_iou": 0.267578125, "loss_num": 0.025390625, "loss_xval": 0.6640625, "num_input_tokens_seen": 442237344, "step": 7892 }, { "epoch": 17.579064587973274, "grad_norm": 16.26838493347168, "learning_rate": 1e-06, "loss": 0.3547, "num_input_tokens_seen": 442291704, "step": 7893 }, { "epoch": 17.579064587973274, "loss": 0.18787457048892975, "loss_ce": 6.940308230696246e-05, "loss_iou": 0.07763671875, "loss_num": 0.006500244140625, "loss_xval": 0.1875, "num_input_tokens_seen": 442291704, "step": 7893 }, { "epoch": 17.58129175946548, "grad_norm": 19.98689079284668, "learning_rate": 1e-06, "loss": 0.5964, "num_input_tokens_seen": 442348288, "step": 7894 }, { "epoch": 17.58129175946548, "loss": 0.5989159345626831, "loss_ce": 0.00028316857060417533, "loss_iou": 0.263671875, "loss_num": 0.0142822265625, "loss_xval": 0.59765625, "num_input_tokens_seen": 442348288, "step": 7894 }, { "epoch": 17.583518930957684, "grad_norm": 16.321231842041016, "learning_rate": 1e-06, "loss": 0.354, "num_input_tokens_seen": 442404364, "step": 7895 }, { "epoch": 17.583518930957684, "loss": 0.3744066655635834, "loss_ce": 7.803218613844365e-05, "loss_iou": 0.1689453125, "loss_num": 0.00750732421875, "loss_xval": 0.375, "num_input_tokens_seen": 442404364, "step": 7895 }, { "epoch": 17.58574610244989, "grad_norm": 18.746238708496094, "learning_rate": 1e-06, "loss": 0.3205, "num_input_tokens_seen": 442461344, "step": 7896 }, { "epoch": 17.58574610244989, "loss": 0.2867514491081238, "loss_ce": 6.934157863724977e-05, "loss_iou": 0.1259765625, "loss_num": 0.007110595703125, "loss_xval": 0.287109375, "num_input_tokens_seen": 442461344, "step": 7896 }, { "epoch": 17.587973273942094, "grad_norm": 25.406570434570312, "learning_rate": 1e-06, "loss": 0.4222, "num_input_tokens_seen": 442516584, "step": 7897 }, { "epoch": 17.587973273942094, "loss": 0.33216458559036255, "loss_ce": 7.232959615066648e-05, "loss_iou": 0.150390625, "loss_num": 0.006195068359375, "loss_xval": 0.33203125, "num_input_tokens_seen": 442516584, "step": 7897 }, { "epoch": 17.5902004454343, "grad_norm": 18.10183334350586, "learning_rate": 1e-06, "loss": 0.3157, "num_input_tokens_seen": 442573824, "step": 7898 }, { "epoch": 17.5902004454343, "loss": 0.22795072197914124, "loss_ce": 7.596083742100745e-05, "loss_iou": 0.0966796875, "loss_num": 0.0068359375, "loss_xval": 0.2275390625, "num_input_tokens_seen": 442573824, "step": 7898 }, { "epoch": 17.592427616926503, "grad_norm": 38.653526306152344, "learning_rate": 1e-06, "loss": 0.5042, "num_input_tokens_seen": 442629168, "step": 7899 }, { "epoch": 17.592427616926503, "loss": 0.3851657509803772, "loss_ce": 9.493608376942575e-05, "loss_iou": 0.17578125, "loss_num": 0.00677490234375, "loss_xval": 0.384765625, "num_input_tokens_seen": 442629168, "step": 7899 }, { "epoch": 17.59465478841871, "grad_norm": 32.544410705566406, "learning_rate": 1e-06, "loss": 0.5607, "num_input_tokens_seen": 442683144, "step": 7900 }, { "epoch": 17.59465478841871, "loss": 0.556228756904602, "loss_ce": 7.639994146302342e-05, "loss_iou": 0.25390625, "loss_num": 0.010009765625, "loss_xval": 0.5546875, "num_input_tokens_seen": 442683144, "step": 7900 }, { "epoch": 17.596881959910913, "grad_norm": 19.76438331604004, "learning_rate": 1e-06, "loss": 0.5191, "num_input_tokens_seen": 442737092, "step": 7901 }, { "epoch": 17.596881959910913, "loss": 0.436367928981781, "loss_ce": 7.337699935305864e-05, "loss_iou": 0.1982421875, "loss_num": 0.008056640625, "loss_xval": 0.435546875, "num_input_tokens_seen": 442737092, "step": 7901 }, { "epoch": 17.599109131403118, "grad_norm": 25.040283203125, "learning_rate": 1e-06, "loss": 0.5854, "num_input_tokens_seen": 442790552, "step": 7902 }, { "epoch": 17.599109131403118, "loss": 0.5836904048919678, "loss_ce": 7.226417073979974e-05, "loss_iou": 0.26953125, "loss_num": 0.00897216796875, "loss_xval": 0.58203125, "num_input_tokens_seen": 442790552, "step": 7902 }, { "epoch": 17.601336302895323, "grad_norm": 25.69877052307129, "learning_rate": 1e-06, "loss": 0.4327, "num_input_tokens_seen": 442845024, "step": 7903 }, { "epoch": 17.601336302895323, "loss": 0.28219154477119446, "loss_ce": 8.705261279828846e-05, "loss_iou": 0.12158203125, "loss_num": 0.00787353515625, "loss_xval": 0.28125, "num_input_tokens_seen": 442845024, "step": 7903 }, { "epoch": 17.603563474387528, "grad_norm": 22.128719329833984, "learning_rate": 1e-06, "loss": 0.3273, "num_input_tokens_seen": 442901976, "step": 7904 }, { "epoch": 17.603563474387528, "loss": 0.382587730884552, "loss_ce": 8.040809188969433e-05, "loss_iou": 0.169921875, "loss_num": 0.00848388671875, "loss_xval": 0.3828125, "num_input_tokens_seen": 442901976, "step": 7904 }, { "epoch": 17.605790645879733, "grad_norm": 21.455659866333008, "learning_rate": 1e-06, "loss": 0.5433, "num_input_tokens_seen": 442959192, "step": 7905 }, { "epoch": 17.605790645879733, "loss": 0.4209858179092407, "loss_ce": 8.739200711715966e-05, "loss_iou": 0.1904296875, "loss_num": 0.00799560546875, "loss_xval": 0.421875, "num_input_tokens_seen": 442959192, "step": 7905 }, { "epoch": 17.608017817371937, "grad_norm": 19.420513153076172, "learning_rate": 1e-06, "loss": 0.3687, "num_input_tokens_seen": 443016136, "step": 7906 }, { "epoch": 17.608017817371937, "loss": 0.3827753961086273, "loss_ce": 8.494692883687094e-05, "loss_iou": 0.1572265625, "loss_num": 0.0135498046875, "loss_xval": 0.3828125, "num_input_tokens_seen": 443016136, "step": 7906 }, { "epoch": 17.610244988864142, "grad_norm": 15.976387023925781, "learning_rate": 1e-06, "loss": 0.6103, "num_input_tokens_seen": 443072296, "step": 7907 }, { "epoch": 17.610244988864142, "loss": 0.8074872493743896, "loss_ce": 0.0001141867833212018, "loss_iou": 0.310546875, "loss_num": 0.037353515625, "loss_xval": 0.80859375, "num_input_tokens_seen": 443072296, "step": 7907 }, { "epoch": 17.612472160356347, "grad_norm": 17.13764190673828, "learning_rate": 1e-06, "loss": 0.3673, "num_input_tokens_seen": 443128500, "step": 7908 }, { "epoch": 17.612472160356347, "loss": 0.4594786763191223, "loss_ce": 0.00012808736937586218, "loss_iou": 0.181640625, "loss_num": 0.019287109375, "loss_xval": 0.458984375, "num_input_tokens_seen": 443128500, "step": 7908 }, { "epoch": 17.614699331848552, "grad_norm": 17.32769203186035, "learning_rate": 1e-06, "loss": 0.3735, "num_input_tokens_seen": 443183760, "step": 7909 }, { "epoch": 17.614699331848552, "loss": 0.44984328746795654, "loss_ce": 7.522152736783028e-05, "loss_iou": 0.1982421875, "loss_num": 0.01068115234375, "loss_xval": 0.44921875, "num_input_tokens_seen": 443183760, "step": 7909 }, { "epoch": 17.616926503340757, "grad_norm": 22.6795711517334, "learning_rate": 1e-06, "loss": 0.3399, "num_input_tokens_seen": 443237420, "step": 7910 }, { "epoch": 17.616926503340757, "loss": 0.4001952111721039, "loss_ce": 0.0001708113995846361, "loss_iou": 0.1787109375, "loss_num": 0.00836181640625, "loss_xval": 0.400390625, "num_input_tokens_seen": 443237420, "step": 7910 }, { "epoch": 17.619153674832962, "grad_norm": 74.46725463867188, "learning_rate": 1e-06, "loss": 0.504, "num_input_tokens_seen": 443295776, "step": 7911 }, { "epoch": 17.619153674832962, "loss": 0.5105748772621155, "loss_ce": 7.682420255150646e-05, "loss_iou": 0.2216796875, "loss_num": 0.013427734375, "loss_xval": 0.51171875, "num_input_tokens_seen": 443295776, "step": 7911 }, { "epoch": 17.621380846325167, "grad_norm": 14.628600120544434, "learning_rate": 1e-06, "loss": 0.3959, "num_input_tokens_seen": 443352484, "step": 7912 }, { "epoch": 17.621380846325167, "loss": 0.5070550441741943, "loss_ce": 9.699978545540944e-05, "loss_iou": 0.2275390625, "loss_num": 0.01043701171875, "loss_xval": 0.5078125, "num_input_tokens_seen": 443352484, "step": 7912 }, { "epoch": 17.62360801781737, "grad_norm": 19.935930252075195, "learning_rate": 1e-06, "loss": 0.4519, "num_input_tokens_seen": 443406844, "step": 7913 }, { "epoch": 17.62360801781737, "loss": 0.47246021032333374, "loss_ce": 7.862341590225697e-05, "loss_iou": 0.2109375, "loss_num": 0.01007080078125, "loss_xval": 0.47265625, "num_input_tokens_seen": 443406844, "step": 7913 }, { "epoch": 17.625835189309576, "grad_norm": 18.648386001586914, "learning_rate": 1e-06, "loss": 0.37, "num_input_tokens_seen": 443463412, "step": 7914 }, { "epoch": 17.625835189309576, "loss": 0.4554082155227661, "loss_ce": 8.59397478052415e-05, "loss_iou": 0.2021484375, "loss_num": 0.01043701171875, "loss_xval": 0.455078125, "num_input_tokens_seen": 443463412, "step": 7914 }, { "epoch": 17.62806236080178, "grad_norm": 11.401290893554688, "learning_rate": 1e-06, "loss": 0.4383, "num_input_tokens_seen": 443517876, "step": 7915 }, { "epoch": 17.62806236080178, "loss": 0.4097170829772949, "loss_ce": 7.962982635945082e-05, "loss_iou": 0.1767578125, "loss_num": 0.0113525390625, "loss_xval": 0.41015625, "num_input_tokens_seen": 443517876, "step": 7915 }, { "epoch": 17.630289532293986, "grad_norm": 15.960351943969727, "learning_rate": 1e-06, "loss": 0.4956, "num_input_tokens_seen": 443575536, "step": 7916 }, { "epoch": 17.630289532293986, "loss": 0.5167785882949829, "loss_ce": 0.00011598135461099446, "loss_iou": 0.2158203125, "loss_num": 0.01708984375, "loss_xval": 0.515625, "num_input_tokens_seen": 443575536, "step": 7916 }, { "epoch": 17.63251670378619, "grad_norm": 21.701749801635742, "learning_rate": 1e-06, "loss": 0.4341, "num_input_tokens_seen": 443631704, "step": 7917 }, { "epoch": 17.63251670378619, "loss": 0.501663863658905, "loss_ce": 7.692172948736697e-05, "loss_iou": 0.2265625, "loss_num": 0.009765625, "loss_xval": 0.5, "num_input_tokens_seen": 443631704, "step": 7917 }, { "epoch": 17.634743875278396, "grad_norm": 14.116578102111816, "learning_rate": 1e-06, "loss": 0.4556, "num_input_tokens_seen": 443688408, "step": 7918 }, { "epoch": 17.634743875278396, "loss": 0.4800468385219574, "loss_ce": 6.637441401835531e-05, "loss_iou": 0.197265625, "loss_num": 0.016845703125, "loss_xval": 0.48046875, "num_input_tokens_seen": 443688408, "step": 7918 }, { "epoch": 17.6369710467706, "grad_norm": 83.376708984375, "learning_rate": 1e-06, "loss": 0.4785, "num_input_tokens_seen": 443745244, "step": 7919 }, { "epoch": 17.6369710467706, "loss": 0.38171082735061646, "loss_ce": 0.00011904077837243676, "loss_iou": 0.1650390625, "loss_num": 0.01031494140625, "loss_xval": 0.380859375, "num_input_tokens_seen": 443745244, "step": 7919 }, { "epoch": 17.639198218262806, "grad_norm": 18.27570152282715, "learning_rate": 1e-06, "loss": 0.4711, "num_input_tokens_seen": 443804444, "step": 7920 }, { "epoch": 17.639198218262806, "loss": 0.5395940542221069, "loss_ce": 0.00010429859685245901, "loss_iou": 0.2265625, "loss_num": 0.01708984375, "loss_xval": 0.5390625, "num_input_tokens_seen": 443804444, "step": 7920 }, { "epoch": 17.64142538975501, "grad_norm": 18.891740798950195, "learning_rate": 1e-06, "loss": 0.4308, "num_input_tokens_seen": 443860448, "step": 7921 }, { "epoch": 17.64142538975501, "loss": 0.2655089795589447, "loss_ce": 6.710184970870614e-05, "loss_iou": 0.11962890625, "loss_num": 0.005157470703125, "loss_xval": 0.265625, "num_input_tokens_seen": 443860448, "step": 7921 }, { "epoch": 17.643652561247215, "grad_norm": 21.501697540283203, "learning_rate": 1e-06, "loss": 0.2732, "num_input_tokens_seen": 443917392, "step": 7922 }, { "epoch": 17.643652561247215, "loss": 0.34542766213417053, "loss_ce": 9.074142144527286e-05, "loss_iou": 0.1572265625, "loss_num": 0.006317138671875, "loss_xval": 0.345703125, "num_input_tokens_seen": 443917392, "step": 7922 }, { "epoch": 17.64587973273942, "grad_norm": 25.702478408813477, "learning_rate": 1e-06, "loss": 0.5289, "num_input_tokens_seen": 443970952, "step": 7923 }, { "epoch": 17.64587973273942, "loss": 0.6104388236999512, "loss_ce": 8.727777458261698e-05, "loss_iou": 0.26953125, "loss_num": 0.01458740234375, "loss_xval": 0.609375, "num_input_tokens_seen": 443970952, "step": 7923 }, { "epoch": 17.648106904231625, "grad_norm": 17.689945220947266, "learning_rate": 1e-06, "loss": 0.4139, "num_input_tokens_seen": 444025404, "step": 7924 }, { "epoch": 17.648106904231625, "loss": 0.3999115824699402, "loss_ce": 8.553590305382386e-05, "loss_iou": 0.1796875, "loss_num": 0.00799560546875, "loss_xval": 0.400390625, "num_input_tokens_seen": 444025404, "step": 7924 }, { "epoch": 17.65033407572383, "grad_norm": 17.883432388305664, "learning_rate": 1e-06, "loss": 0.4983, "num_input_tokens_seen": 444081664, "step": 7925 }, { "epoch": 17.65033407572383, "loss": 0.4602789878845215, "loss_ce": 7.389666279777884e-05, "loss_iou": 0.1884765625, "loss_num": 0.0167236328125, "loss_xval": 0.4609375, "num_input_tokens_seen": 444081664, "step": 7925 }, { "epoch": 17.652561247216035, "grad_norm": 16.05899429321289, "learning_rate": 1e-06, "loss": 0.472, "num_input_tokens_seen": 444141192, "step": 7926 }, { "epoch": 17.652561247216035, "loss": 0.3934362232685089, "loss_ce": 9.515189594822004e-05, "loss_iou": 0.1708984375, "loss_num": 0.0103759765625, "loss_xval": 0.392578125, "num_input_tokens_seen": 444141192, "step": 7926 }, { "epoch": 17.65478841870824, "grad_norm": 26.469680786132812, "learning_rate": 1e-06, "loss": 0.4919, "num_input_tokens_seen": 444197160, "step": 7927 }, { "epoch": 17.65478841870824, "loss": 0.4767637252807617, "loss_ce": 7.915783498901874e-05, "loss_iou": 0.1962890625, "loss_num": 0.0167236328125, "loss_xval": 0.4765625, "num_input_tokens_seen": 444197160, "step": 7927 }, { "epoch": 17.657015590200444, "grad_norm": 13.821784973144531, "learning_rate": 1e-06, "loss": 0.3612, "num_input_tokens_seen": 444252964, "step": 7928 }, { "epoch": 17.657015590200444, "loss": 0.23647625744342804, "loss_ce": 8.710073598194867e-05, "loss_iou": 0.10302734375, "loss_num": 0.006103515625, "loss_xval": 0.236328125, "num_input_tokens_seen": 444252964, "step": 7928 }, { "epoch": 17.65924276169265, "grad_norm": 16.62885284423828, "learning_rate": 1e-06, "loss": 0.386, "num_input_tokens_seen": 444307512, "step": 7929 }, { "epoch": 17.65924276169265, "loss": 0.4383907914161682, "loss_ce": 6.681391096208245e-05, "loss_iou": 0.1689453125, "loss_num": 0.020263671875, "loss_xval": 0.4375, "num_input_tokens_seen": 444307512, "step": 7929 }, { "epoch": 17.661469933184854, "grad_norm": 20.293445587158203, "learning_rate": 1e-06, "loss": 0.4577, "num_input_tokens_seen": 444363292, "step": 7930 }, { "epoch": 17.661469933184854, "loss": 0.3440844714641571, "loss_ce": 9.034699178300798e-05, "loss_iou": 0.146484375, "loss_num": 0.01025390625, "loss_xval": 0.34375, "num_input_tokens_seen": 444363292, "step": 7930 }, { "epoch": 17.66369710467706, "grad_norm": 19.445138931274414, "learning_rate": 1e-06, "loss": 0.4071, "num_input_tokens_seen": 444417980, "step": 7931 }, { "epoch": 17.66369710467706, "loss": 0.35793596506118774, "loss_ce": 8.685909415362403e-05, "loss_iou": 0.1611328125, "loss_num": 0.0069580078125, "loss_xval": 0.357421875, "num_input_tokens_seen": 444417980, "step": 7931 }, { "epoch": 17.665924276169264, "grad_norm": 21.812707901000977, "learning_rate": 1e-06, "loss": 0.4571, "num_input_tokens_seen": 444475180, "step": 7932 }, { "epoch": 17.665924276169264, "loss": 0.48831433057785034, "loss_ce": 9.407131437910721e-05, "loss_iou": 0.2216796875, "loss_num": 0.0089111328125, "loss_xval": 0.48828125, "num_input_tokens_seen": 444475180, "step": 7932 }, { "epoch": 17.66815144766147, "grad_norm": 21.94272804260254, "learning_rate": 1e-06, "loss": 0.4738, "num_input_tokens_seen": 444530092, "step": 7933 }, { "epoch": 17.66815144766147, "loss": 0.4068112075328827, "loss_ce": 7.29096500435844e-05, "loss_iou": 0.1708984375, "loss_num": 0.0128173828125, "loss_xval": 0.40625, "num_input_tokens_seen": 444530092, "step": 7933 }, { "epoch": 17.670378619153674, "grad_norm": 21.521142959594727, "learning_rate": 1e-06, "loss": 0.5135, "num_input_tokens_seen": 444585348, "step": 7934 }, { "epoch": 17.670378619153674, "loss": 0.5115725994110107, "loss_ce": 9.7946947789751e-05, "loss_iou": 0.2197265625, "loss_num": 0.01434326171875, "loss_xval": 0.51171875, "num_input_tokens_seen": 444585348, "step": 7934 }, { "epoch": 17.67260579064588, "grad_norm": 14.11042308807373, "learning_rate": 1e-06, "loss": 0.2493, "num_input_tokens_seen": 444641960, "step": 7935 }, { "epoch": 17.67260579064588, "loss": 0.23591431975364685, "loss_ce": 7.447184179909527e-05, "loss_iou": 0.1064453125, "loss_num": 0.004547119140625, "loss_xval": 0.236328125, "num_input_tokens_seen": 444641960, "step": 7935 }, { "epoch": 17.674832962138083, "grad_norm": 23.0136775970459, "learning_rate": 1e-06, "loss": 0.4793, "num_input_tokens_seen": 444695372, "step": 7936 }, { "epoch": 17.674832962138083, "loss": 0.3692181408405304, "loss_ce": 7.750838994979858e-05, "loss_iou": 0.1630859375, "loss_num": 0.008544921875, "loss_xval": 0.369140625, "num_input_tokens_seen": 444695372, "step": 7936 }, { "epoch": 17.677060133630288, "grad_norm": 20.372121810913086, "learning_rate": 1e-06, "loss": 0.414, "num_input_tokens_seen": 444750148, "step": 7937 }, { "epoch": 17.677060133630288, "loss": 0.34128010272979736, "loss_ce": 9.356189548270777e-05, "loss_iou": 0.1591796875, "loss_num": 0.004547119140625, "loss_xval": 0.341796875, "num_input_tokens_seen": 444750148, "step": 7937 }, { "epoch": 17.679287305122493, "grad_norm": 23.466459274291992, "learning_rate": 1e-06, "loss": 0.3234, "num_input_tokens_seen": 444809496, "step": 7938 }, { "epoch": 17.679287305122493, "loss": 0.36702263355255127, "loss_ce": 7.92567734606564e-05, "loss_iou": 0.1630859375, "loss_num": 0.008056640625, "loss_xval": 0.3671875, "num_input_tokens_seen": 444809496, "step": 7938 }, { "epoch": 17.681514476614698, "grad_norm": 18.88533592224121, "learning_rate": 1e-06, "loss": 0.5686, "num_input_tokens_seen": 444865560, "step": 7939 }, { "epoch": 17.681514476614698, "loss": 0.4967142343521118, "loss_ce": 0.00013218897220212966, "loss_iou": 0.2060546875, "loss_num": 0.0169677734375, "loss_xval": 0.49609375, "num_input_tokens_seen": 444865560, "step": 7939 }, { "epoch": 17.683741648106903, "grad_norm": 31.180578231811523, "learning_rate": 1e-06, "loss": 0.5194, "num_input_tokens_seen": 444921728, "step": 7940 }, { "epoch": 17.683741648106903, "loss": 0.3380870223045349, "loss_ce": 0.0001658698165556416, "loss_iou": 0.1455078125, "loss_num": 0.00921630859375, "loss_xval": 0.337890625, "num_input_tokens_seen": 444921728, "step": 7940 }, { "epoch": 17.685968819599108, "grad_norm": 28.334543228149414, "learning_rate": 1e-06, "loss": 0.4694, "num_input_tokens_seen": 444976224, "step": 7941 }, { "epoch": 17.685968819599108, "loss": 0.44913750886917114, "loss_ce": 0.00010187811858486384, "loss_iou": 0.1953125, "loss_num": 0.01177978515625, "loss_xval": 0.44921875, "num_input_tokens_seen": 444976224, "step": 7941 }, { "epoch": 17.688195991091312, "grad_norm": 19.259824752807617, "learning_rate": 1e-06, "loss": 0.368, "num_input_tokens_seen": 445031004, "step": 7942 }, { "epoch": 17.688195991091312, "loss": 0.35163217782974243, "loss_ce": 6.968076922930777e-05, "loss_iou": 0.1572265625, "loss_num": 0.00750732421875, "loss_xval": 0.3515625, "num_input_tokens_seen": 445031004, "step": 7942 }, { "epoch": 17.690423162583517, "grad_norm": 15.660816192626953, "learning_rate": 1e-06, "loss": 0.4662, "num_input_tokens_seen": 445088476, "step": 7943 }, { "epoch": 17.690423162583517, "loss": 0.40098705887794495, "loss_ce": 0.00010816691064974293, "loss_iou": 0.185546875, "loss_num": 0.005859375, "loss_xval": 0.400390625, "num_input_tokens_seen": 445088476, "step": 7943 }, { "epoch": 17.692650334075722, "grad_norm": 63.67619705200195, "learning_rate": 1e-06, "loss": 0.5689, "num_input_tokens_seen": 445146920, "step": 7944 }, { "epoch": 17.692650334075722, "loss": 0.620599091053009, "loss_ce": 0.00011569763591978699, "loss_iou": 0.275390625, "loss_num": 0.01373291015625, "loss_xval": 0.62109375, "num_input_tokens_seen": 445146920, "step": 7944 }, { "epoch": 17.694877505567927, "grad_norm": 29.446853637695312, "learning_rate": 1e-06, "loss": 0.4434, "num_input_tokens_seen": 445199824, "step": 7945 }, { "epoch": 17.694877505567927, "loss": 0.44784268736839294, "loss_ce": 8.878282096702605e-05, "loss_iou": 0.1962890625, "loss_num": 0.0108642578125, "loss_xval": 0.447265625, "num_input_tokens_seen": 445199824, "step": 7945 }, { "epoch": 17.697104677060132, "grad_norm": 25.64476203918457, "learning_rate": 1e-06, "loss": 0.4774, "num_input_tokens_seen": 445257296, "step": 7946 }, { "epoch": 17.697104677060132, "loss": 0.3815593123435974, "loss_ce": 8.9571054559201e-05, "loss_iou": 0.1474609375, "loss_num": 0.0172119140625, "loss_xval": 0.380859375, "num_input_tokens_seen": 445257296, "step": 7946 }, { "epoch": 17.69933184855234, "grad_norm": 16.938840866088867, "learning_rate": 1e-06, "loss": 0.3748, "num_input_tokens_seen": 445314848, "step": 7947 }, { "epoch": 17.69933184855234, "loss": 0.32621848583221436, "loss_ce": 7.714293315075338e-05, "loss_iou": 0.1376953125, "loss_num": 0.0098876953125, "loss_xval": 0.326171875, "num_input_tokens_seen": 445314848, "step": 7947 }, { "epoch": 17.70155902004454, "grad_norm": 15.301907539367676, "learning_rate": 1e-06, "loss": 0.2937, "num_input_tokens_seen": 445372204, "step": 7948 }, { "epoch": 17.70155902004454, "loss": 0.30735015869140625, "loss_ce": 9.919532749336213e-05, "loss_iou": 0.138671875, "loss_num": 0.006103515625, "loss_xval": 0.306640625, "num_input_tokens_seen": 445372204, "step": 7948 }, { "epoch": 17.70378619153675, "grad_norm": 16.031461715698242, "learning_rate": 1e-06, "loss": 0.4855, "num_input_tokens_seen": 445430708, "step": 7949 }, { "epoch": 17.70378619153675, "loss": 0.49899452924728394, "loss_ce": 9.31676768232137e-05, "loss_iou": 0.18359375, "loss_num": 0.0262451171875, "loss_xval": 0.498046875, "num_input_tokens_seen": 445430708, "step": 7949 }, { "epoch": 17.706013363028955, "grad_norm": 13.996493339538574, "learning_rate": 1e-06, "loss": 0.4567, "num_input_tokens_seen": 445485268, "step": 7950 }, { "epoch": 17.706013363028955, "loss": 0.4317079484462738, "loss_ce": 6.729987217113376e-05, "loss_iou": 0.1796875, "loss_num": 0.01434326171875, "loss_xval": 0.431640625, "num_input_tokens_seen": 445485268, "step": 7950 }, { "epoch": 17.70824053452116, "grad_norm": 19.51962661743164, "learning_rate": 1e-06, "loss": 0.5369, "num_input_tokens_seen": 445538736, "step": 7951 }, { "epoch": 17.70824053452116, "loss": 0.7260493636131287, "loss_ce": 9.723611583467573e-05, "loss_iou": 0.310546875, "loss_num": 0.020751953125, "loss_xval": 0.7265625, "num_input_tokens_seen": 445538736, "step": 7951 }, { "epoch": 17.710467706013365, "grad_norm": 34.36812210083008, "learning_rate": 1e-06, "loss": 0.6776, "num_input_tokens_seen": 445593288, "step": 7952 }, { "epoch": 17.710467706013365, "loss": 0.8874142169952393, "loss_ce": 8.507441089022905e-05, "loss_iou": 0.3984375, "loss_num": 0.0185546875, "loss_xval": 0.88671875, "num_input_tokens_seen": 445593288, "step": 7952 }, { "epoch": 17.71269487750557, "grad_norm": 19.97995948791504, "learning_rate": 1e-06, "loss": 0.3888, "num_input_tokens_seen": 445651940, "step": 7953 }, { "epoch": 17.71269487750557, "loss": 0.35836881399154663, "loss_ce": 9.244780812878162e-05, "loss_iou": 0.1533203125, "loss_num": 0.01019287109375, "loss_xval": 0.357421875, "num_input_tokens_seen": 445651940, "step": 7953 }, { "epoch": 17.714922048997774, "grad_norm": 18.99198341369629, "learning_rate": 1e-06, "loss": 0.3816, "num_input_tokens_seen": 445707972, "step": 7954 }, { "epoch": 17.714922048997774, "loss": 0.43836966156959534, "loss_ce": 7.619359530508518e-05, "loss_iou": 0.193359375, "loss_num": 0.01043701171875, "loss_xval": 0.4375, "num_input_tokens_seen": 445707972, "step": 7954 }, { "epoch": 17.71714922048998, "grad_norm": 24.05302619934082, "learning_rate": 1e-06, "loss": 0.4304, "num_input_tokens_seen": 445765012, "step": 7955 }, { "epoch": 17.71714922048998, "loss": 0.5598816871643066, "loss_ce": 0.00012829234765376896, "loss_iou": 0.24609375, "loss_num": 0.013427734375, "loss_xval": 0.55859375, "num_input_tokens_seen": 445765012, "step": 7955 }, { "epoch": 17.719376391982184, "grad_norm": 22.349224090576172, "learning_rate": 1e-06, "loss": 0.4478, "num_input_tokens_seen": 445820252, "step": 7956 }, { "epoch": 17.719376391982184, "loss": 0.5640408992767334, "loss_ce": 7.6019496191293e-05, "loss_iou": 0.2265625, "loss_num": 0.0220947265625, "loss_xval": 0.5625, "num_input_tokens_seen": 445820252, "step": 7956 }, { "epoch": 17.72160356347439, "grad_norm": 33.075538635253906, "learning_rate": 1e-06, "loss": 0.399, "num_input_tokens_seen": 445875052, "step": 7957 }, { "epoch": 17.72160356347439, "loss": 0.39026761054992676, "loss_ce": 6.987876258790493e-05, "loss_iou": 0.173828125, "loss_num": 0.00860595703125, "loss_xval": 0.390625, "num_input_tokens_seen": 445875052, "step": 7957 }, { "epoch": 17.723830734966594, "grad_norm": 16.910858154296875, "learning_rate": 1e-06, "loss": 0.4148, "num_input_tokens_seen": 445928940, "step": 7958 }, { "epoch": 17.723830734966594, "loss": 0.2792437970638275, "loss_ce": 6.90067681716755e-05, "loss_iou": 0.11669921875, "loss_num": 0.0091552734375, "loss_xval": 0.279296875, "num_input_tokens_seen": 445928940, "step": 7958 }, { "epoch": 17.7260579064588, "grad_norm": 26.71137046813965, "learning_rate": 1e-06, "loss": 0.3061, "num_input_tokens_seen": 445983828, "step": 7959 }, { "epoch": 17.7260579064588, "loss": 0.20442020893096924, "loss_ce": 7.449374243151397e-05, "loss_iou": 0.0908203125, "loss_num": 0.004547119140625, "loss_xval": 0.2041015625, "num_input_tokens_seen": 445983828, "step": 7959 }, { "epoch": 17.728285077951004, "grad_norm": 18.750337600708008, "learning_rate": 1e-06, "loss": 0.4311, "num_input_tokens_seen": 446040604, "step": 7960 }, { "epoch": 17.728285077951004, "loss": 0.4378310739994049, "loss_ce": 8.695643191458657e-05, "loss_iou": 0.181640625, "loss_num": 0.0147705078125, "loss_xval": 0.4375, "num_input_tokens_seen": 446040604, "step": 7960 }, { "epoch": 17.73051224944321, "grad_norm": 21.329652786254883, "learning_rate": 1e-06, "loss": 0.6707, "num_input_tokens_seen": 446096288, "step": 7961 }, { "epoch": 17.73051224944321, "loss": 0.8002760410308838, "loss_ce": 0.00034930004039779305, "loss_iou": 0.32421875, "loss_num": 0.0299072265625, "loss_xval": 0.80078125, "num_input_tokens_seen": 446096288, "step": 7961 }, { "epoch": 17.732739420935413, "grad_norm": 19.80221176147461, "learning_rate": 1e-06, "loss": 0.4496, "num_input_tokens_seen": 446153240, "step": 7962 }, { "epoch": 17.732739420935413, "loss": 0.37191396951675415, "loss_ce": 8.780106145422906e-05, "loss_iou": 0.1484375, "loss_num": 0.0150146484375, "loss_xval": 0.37109375, "num_input_tokens_seen": 446153240, "step": 7962 }, { "epoch": 17.734966592427618, "grad_norm": 23.22903823852539, "learning_rate": 1e-06, "loss": 0.5215, "num_input_tokens_seen": 446209048, "step": 7963 }, { "epoch": 17.734966592427618, "loss": 0.5368286967277527, "loss_ce": 8.552963117836043e-05, "loss_iou": 0.2421875, "loss_num": 0.01055908203125, "loss_xval": 0.53515625, "num_input_tokens_seen": 446209048, "step": 7963 }, { "epoch": 17.737193763919823, "grad_norm": 22.78981590270996, "learning_rate": 1e-06, "loss": 0.3933, "num_input_tokens_seen": 446266012, "step": 7964 }, { "epoch": 17.737193763919823, "loss": 0.48521602153778076, "loss_ce": 0.00010859415488084778, "loss_iou": 0.2236328125, "loss_num": 0.007659912109375, "loss_xval": 0.484375, "num_input_tokens_seen": 446266012, "step": 7964 }, { "epoch": 17.739420935412028, "grad_norm": 19.937816619873047, "learning_rate": 1e-06, "loss": 0.2245, "num_input_tokens_seen": 446323072, "step": 7965 }, { "epoch": 17.739420935412028, "loss": 0.2009565830230713, "loss_ce": 8.989279740490019e-05, "loss_iou": 0.087890625, "loss_num": 0.005126953125, "loss_xval": 0.201171875, "num_input_tokens_seen": 446323072, "step": 7965 }, { "epoch": 17.741648106904233, "grad_norm": 15.368204116821289, "learning_rate": 1e-06, "loss": 0.4583, "num_input_tokens_seen": 446378856, "step": 7966 }, { "epoch": 17.741648106904233, "loss": 0.5300416350364685, "loss_ce": 7.336361159104854e-05, "loss_iou": 0.2275390625, "loss_num": 0.01507568359375, "loss_xval": 0.53125, "num_input_tokens_seen": 446378856, "step": 7966 }, { "epoch": 17.743875278396438, "grad_norm": 22.195009231567383, "learning_rate": 1e-06, "loss": 0.3353, "num_input_tokens_seen": 446434664, "step": 7967 }, { "epoch": 17.743875278396438, "loss": 0.3601817786693573, "loss_ce": 7.436297892127186e-05, "loss_iou": 0.1484375, "loss_num": 0.01251220703125, "loss_xval": 0.359375, "num_input_tokens_seen": 446434664, "step": 7967 }, { "epoch": 17.746102449888642, "grad_norm": 13.607110977172852, "learning_rate": 1e-06, "loss": 0.4167, "num_input_tokens_seen": 446490256, "step": 7968 }, { "epoch": 17.746102449888642, "loss": 0.5662601590156555, "loss_ce": 9.807750029722229e-05, "loss_iou": 0.2353515625, "loss_num": 0.0194091796875, "loss_xval": 0.56640625, "num_input_tokens_seen": 446490256, "step": 7968 }, { "epoch": 17.748329621380847, "grad_norm": 14.89394474029541, "learning_rate": 1e-06, "loss": 0.3675, "num_input_tokens_seen": 446545948, "step": 7969 }, { "epoch": 17.748329621380847, "loss": 0.38515520095825195, "loss_ce": 8.440592500846833e-05, "loss_iou": 0.166015625, "loss_num": 0.01055908203125, "loss_xval": 0.384765625, "num_input_tokens_seen": 446545948, "step": 7969 }, { "epoch": 17.750556792873052, "grad_norm": 18.065866470336914, "learning_rate": 1e-06, "loss": 0.3868, "num_input_tokens_seen": 446601844, "step": 7970 }, { "epoch": 17.750556792873052, "loss": 0.3480460047721863, "loss_ce": 8.455889474134892e-05, "loss_iou": 0.1513671875, "loss_num": 0.00909423828125, "loss_xval": 0.34765625, "num_input_tokens_seen": 446601844, "step": 7970 }, { "epoch": 17.752783964365257, "grad_norm": 17.822830200195312, "learning_rate": 1e-06, "loss": 0.4991, "num_input_tokens_seen": 446657308, "step": 7971 }, { "epoch": 17.752783964365257, "loss": 0.5141241550445557, "loss_ce": 8.604546019341797e-05, "loss_iou": 0.2314453125, "loss_num": 0.01007080078125, "loss_xval": 0.515625, "num_input_tokens_seen": 446657308, "step": 7971 }, { "epoch": 17.755011135857462, "grad_norm": 19.482011795043945, "learning_rate": 1e-06, "loss": 0.4116, "num_input_tokens_seen": 446714312, "step": 7972 }, { "epoch": 17.755011135857462, "loss": 0.3692222833633423, "loss_ce": 8.163502207025886e-05, "loss_iou": 0.173828125, "loss_num": 0.004425048828125, "loss_xval": 0.369140625, "num_input_tokens_seen": 446714312, "step": 7972 }, { "epoch": 17.757238307349667, "grad_norm": 17.875993728637695, "learning_rate": 1e-06, "loss": 0.3371, "num_input_tokens_seen": 446768140, "step": 7973 }, { "epoch": 17.757238307349667, "loss": 0.3051214814186096, "loss_ce": 6.776393274776638e-05, "loss_iou": 0.130859375, "loss_num": 0.00860595703125, "loss_xval": 0.3046875, "num_input_tokens_seen": 446768140, "step": 7973 }, { "epoch": 17.75946547884187, "grad_norm": 16.569456100463867, "learning_rate": 1e-06, "loss": 0.617, "num_input_tokens_seen": 446822136, "step": 7974 }, { "epoch": 17.75946547884187, "loss": 0.8183131814002991, "loss_ce": 0.0001368634111713618, "loss_iou": 0.275390625, "loss_num": 0.052978515625, "loss_xval": 0.81640625, "num_input_tokens_seen": 446822136, "step": 7974 }, { "epoch": 17.761692650334076, "grad_norm": 18.004579544067383, "learning_rate": 1e-06, "loss": 0.3327, "num_input_tokens_seen": 446878228, "step": 7975 }, { "epoch": 17.761692650334076, "loss": 0.2270025908946991, "loss_ce": 7.388347876258194e-05, "loss_iou": 0.09814453125, "loss_num": 0.006072998046875, "loss_xval": 0.2265625, "num_input_tokens_seen": 446878228, "step": 7975 }, { "epoch": 17.76391982182628, "grad_norm": 45.05828857421875, "learning_rate": 1e-06, "loss": 0.4351, "num_input_tokens_seen": 446934920, "step": 7976 }, { "epoch": 17.76391982182628, "loss": 0.36751073598861694, "loss_ce": 0.00010961330553982407, "loss_iou": 0.1396484375, "loss_num": 0.0174560546875, "loss_xval": 0.3671875, "num_input_tokens_seen": 446934920, "step": 7976 }, { "epoch": 17.766146993318486, "grad_norm": 19.841493606567383, "learning_rate": 1e-06, "loss": 0.5039, "num_input_tokens_seen": 446989424, "step": 7977 }, { "epoch": 17.766146993318486, "loss": 0.4566991627216339, "loss_ce": 9.516975842416286e-05, "loss_iou": 0.19140625, "loss_num": 0.0147705078125, "loss_xval": 0.45703125, "num_input_tokens_seen": 446989424, "step": 7977 }, { "epoch": 17.76837416481069, "grad_norm": 18.41941261291504, "learning_rate": 1e-06, "loss": 0.4241, "num_input_tokens_seen": 447046508, "step": 7978 }, { "epoch": 17.76837416481069, "loss": 0.3700698912143707, "loss_ce": 7.478601764887571e-05, "loss_iou": 0.1669921875, "loss_num": 0.00732421875, "loss_xval": 0.369140625, "num_input_tokens_seen": 447046508, "step": 7978 }, { "epoch": 17.770601336302896, "grad_norm": 19.740684509277344, "learning_rate": 1e-06, "loss": 0.3984, "num_input_tokens_seen": 447104012, "step": 7979 }, { "epoch": 17.770601336302896, "loss": 0.27607256174087524, "loss_ce": 7.156394713092595e-05, "loss_iou": 0.1279296875, "loss_num": 0.003875732421875, "loss_xval": 0.275390625, "num_input_tokens_seen": 447104012, "step": 7979 }, { "epoch": 17.7728285077951, "grad_norm": 15.659636497497559, "learning_rate": 1e-06, "loss": 0.2204, "num_input_tokens_seen": 447159184, "step": 7980 }, { "epoch": 17.7728285077951, "loss": 0.18346147239208221, "loss_ce": 8.134550444083288e-05, "loss_iou": 0.0732421875, "loss_num": 0.007476806640625, "loss_xval": 0.18359375, "num_input_tokens_seen": 447159184, "step": 7980 }, { "epoch": 17.775055679287306, "grad_norm": 18.540023803710938, "learning_rate": 1e-06, "loss": 0.3167, "num_input_tokens_seen": 447212524, "step": 7981 }, { "epoch": 17.775055679287306, "loss": 0.33039259910583496, "loss_ce": 7.033500878605992e-05, "loss_iou": 0.1435546875, "loss_num": 0.00860595703125, "loss_xval": 0.330078125, "num_input_tokens_seen": 447212524, "step": 7981 }, { "epoch": 17.77728285077951, "grad_norm": 26.151935577392578, "learning_rate": 1e-06, "loss": 0.4296, "num_input_tokens_seen": 447268040, "step": 7982 }, { "epoch": 17.77728285077951, "loss": 0.48384368419647217, "loss_ce": 7.902232027845457e-05, "loss_iou": 0.2197265625, "loss_num": 0.00909423828125, "loss_xval": 0.484375, "num_input_tokens_seen": 447268040, "step": 7982 }, { "epoch": 17.779510022271715, "grad_norm": 19.188148498535156, "learning_rate": 1e-06, "loss": 0.4998, "num_input_tokens_seen": 447323576, "step": 7983 }, { "epoch": 17.779510022271715, "loss": 0.35879120230674744, "loss_ce": 8.758992771618068e-05, "loss_iou": 0.16015625, "loss_num": 0.00787353515625, "loss_xval": 0.359375, "num_input_tokens_seen": 447323576, "step": 7983 }, { "epoch": 17.78173719376392, "grad_norm": 20.830575942993164, "learning_rate": 1e-06, "loss": 0.5727, "num_input_tokens_seen": 447379108, "step": 7984 }, { "epoch": 17.78173719376392, "loss": 0.5355324745178223, "loss_ce": 0.00013207507436163723, "loss_iou": 0.228515625, "loss_num": 0.015869140625, "loss_xval": 0.53515625, "num_input_tokens_seen": 447379108, "step": 7984 }, { "epoch": 17.783964365256125, "grad_norm": 24.485782623291016, "learning_rate": 1e-06, "loss": 0.3293, "num_input_tokens_seen": 447436116, "step": 7985 }, { "epoch": 17.783964365256125, "loss": 0.38479992747306824, "loss_ce": 9.53392154769972e-05, "loss_iou": 0.1689453125, "loss_num": 0.00909423828125, "loss_xval": 0.384765625, "num_input_tokens_seen": 447436116, "step": 7985 }, { "epoch": 17.78619153674833, "grad_norm": 27.353883743286133, "learning_rate": 1e-06, "loss": 0.3948, "num_input_tokens_seen": 447492812, "step": 7986 }, { "epoch": 17.78619153674833, "loss": 0.2935987710952759, "loss_ce": 8.068819442996755e-05, "loss_iou": 0.1240234375, "loss_num": 0.00909423828125, "loss_xval": 0.29296875, "num_input_tokens_seen": 447492812, "step": 7986 }, { "epoch": 17.788418708240535, "grad_norm": 17.87607192993164, "learning_rate": 1e-06, "loss": 0.35, "num_input_tokens_seen": 447549280, "step": 7987 }, { "epoch": 17.788418708240535, "loss": 0.3173452615737915, "loss_ce": 8.452979091089219e-05, "loss_iou": 0.13671875, "loss_num": 0.00897216796875, "loss_xval": 0.31640625, "num_input_tokens_seen": 447549280, "step": 7987 }, { "epoch": 17.79064587973274, "grad_norm": 21.554283142089844, "learning_rate": 1e-06, "loss": 0.4563, "num_input_tokens_seen": 447603876, "step": 7988 }, { "epoch": 17.79064587973274, "loss": 0.5782496929168701, "loss_ce": 0.00012468949717003852, "loss_iou": 0.25, "loss_num": 0.01513671875, "loss_xval": 0.578125, "num_input_tokens_seen": 447603876, "step": 7988 }, { "epoch": 17.792873051224944, "grad_norm": 17.834577560424805, "learning_rate": 1e-06, "loss": 0.3622, "num_input_tokens_seen": 447660636, "step": 7989 }, { "epoch": 17.792873051224944, "loss": 0.22425898909568787, "loss_ce": 7.684796582907438e-05, "loss_iou": 0.103515625, "loss_num": 0.0034332275390625, "loss_xval": 0.224609375, "num_input_tokens_seen": 447660636, "step": 7989 }, { "epoch": 17.79510022271715, "grad_norm": 16.187517166137695, "learning_rate": 1e-06, "loss": 0.3169, "num_input_tokens_seen": 447715244, "step": 7990 }, { "epoch": 17.79510022271715, "loss": 0.3078160881996155, "loss_ce": 7.682182331336662e-05, "loss_iou": 0.1298828125, "loss_num": 0.009521484375, "loss_xval": 0.30859375, "num_input_tokens_seen": 447715244, "step": 7990 }, { "epoch": 17.797327394209354, "grad_norm": 30.0116024017334, "learning_rate": 1e-06, "loss": 0.3553, "num_input_tokens_seen": 447771124, "step": 7991 }, { "epoch": 17.797327394209354, "loss": 0.2718275189399719, "loss_ce": 9.136189328273758e-05, "loss_iou": 0.123046875, "loss_num": 0.005126953125, "loss_xval": 0.271484375, "num_input_tokens_seen": 447771124, "step": 7991 }, { "epoch": 17.79955456570156, "grad_norm": 20.42539405822754, "learning_rate": 1e-06, "loss": 0.4136, "num_input_tokens_seen": 447826024, "step": 7992 }, { "epoch": 17.79955456570156, "loss": 0.4224761426448822, "loss_ce": 0.00011284490756224841, "loss_iou": 0.1826171875, "loss_num": 0.01141357421875, "loss_xval": 0.421875, "num_input_tokens_seen": 447826024, "step": 7992 }, { "epoch": 17.801781737193764, "grad_norm": 15.1818208694458, "learning_rate": 1e-06, "loss": 0.494, "num_input_tokens_seen": 447881836, "step": 7993 }, { "epoch": 17.801781737193764, "loss": 0.37318992614746094, "loss_ce": 8.200886804843321e-05, "loss_iou": 0.1494140625, "loss_num": 0.014892578125, "loss_xval": 0.373046875, "num_input_tokens_seen": 447881836, "step": 7993 }, { "epoch": 17.80400890868597, "grad_norm": 11.075129508972168, "learning_rate": 1e-06, "loss": 0.5868, "num_input_tokens_seen": 447938492, "step": 7994 }, { "epoch": 17.80400890868597, "loss": 0.8123493790626526, "loss_ce": 0.0001545300183352083, "loss_iou": 0.341796875, "loss_num": 0.0260009765625, "loss_xval": 0.8125, "num_input_tokens_seen": 447938492, "step": 7994 }, { "epoch": 17.806236080178174, "grad_norm": 26.124393463134766, "learning_rate": 1e-06, "loss": 0.5909, "num_input_tokens_seen": 447992224, "step": 7995 }, { "epoch": 17.806236080178174, "loss": 0.5432973504066467, "loss_ce": 8.448238804703578e-05, "loss_iou": 0.2412109375, "loss_num": 0.0123291015625, "loss_xval": 0.54296875, "num_input_tokens_seen": 447992224, "step": 7995 }, { "epoch": 17.80846325167038, "grad_norm": 21.8154296875, "learning_rate": 1e-06, "loss": 0.3901, "num_input_tokens_seen": 448051428, "step": 7996 }, { "epoch": 17.80846325167038, "loss": 0.4403107762336731, "loss_ce": 0.0001252087822649628, "loss_iou": 0.1953125, "loss_num": 0.009765625, "loss_xval": 0.439453125, "num_input_tokens_seen": 448051428, "step": 7996 }, { "epoch": 17.810690423162583, "grad_norm": 29.165897369384766, "learning_rate": 1e-06, "loss": 0.4263, "num_input_tokens_seen": 448108228, "step": 7997 }, { "epoch": 17.810690423162583, "loss": 0.35651570558547974, "loss_ce": 7.038043258944526e-05, "loss_iou": 0.1572265625, "loss_num": 0.0084228515625, "loss_xval": 0.35546875, "num_input_tokens_seen": 448108228, "step": 7997 }, { "epoch": 17.812917594654788, "grad_norm": 26.357572555541992, "learning_rate": 1e-06, "loss": 0.3326, "num_input_tokens_seen": 448163460, "step": 7998 }, { "epoch": 17.812917594654788, "loss": 0.2939828336238861, "loss_ce": 9.854609379544854e-05, "loss_iou": 0.134765625, "loss_num": 0.004852294921875, "loss_xval": 0.29296875, "num_input_tokens_seen": 448163460, "step": 7998 }, { "epoch": 17.815144766146993, "grad_norm": 17.523059844970703, "learning_rate": 1e-06, "loss": 0.3924, "num_input_tokens_seen": 448220148, "step": 7999 }, { "epoch": 17.815144766146993, "loss": 0.29866713285446167, "loss_ce": 8.312883437611163e-05, "loss_iou": 0.138671875, "loss_num": 0.00439453125, "loss_xval": 0.298828125, "num_input_tokens_seen": 448220148, "step": 7999 }, { "epoch": 17.817371937639198, "grad_norm": 21.25289535522461, "learning_rate": 1e-06, "loss": 0.5296, "num_input_tokens_seen": 448275732, "step": 8000 }, { "epoch": 17.817371937639198, "eval_seeclick_web_CIoU": 0.5856780111789703, "eval_seeclick_web_GIoU": 0.5835351943969727, "eval_seeclick_web_IoU": 0.6051326394081116, "eval_seeclick_web_MAE_all": 0.015136118279770017, "eval_seeclick_web_MAE_h": 0.007184438407421112, "eval_seeclick_web_MAE_w": 0.015148711390793324, "eval_seeclick_web_MAE_x_boxes": 0.008295744191855192, "eval_seeclick_web_MAE_y_boxes": 0.021377818658947945, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.9110763072967529, "eval_seeclick_web_loss_ce": 0.00013513932935893536, "eval_seeclick_web_loss_iou": 0.4219970703125, "eval_seeclick_web_loss_num": 0.012152671813964844, "eval_seeclick_web_loss_xval": 0.905029296875, "eval_seeclick_web_runtime": 23.0578, "eval_seeclick_web_samples_per_second": 2.168, "eval_seeclick_web_steps_per_second": 0.087, "num_input_tokens_seen": 448275732, "step": 8000 }, { "epoch": 17.817371937639198, "eval_icons_CIoU": 0.2600770592689514, "eval_icons_GIoU": 0.28505839407444, "eval_icons_IoU": 0.3360164016485214, "eval_icons_MAE_all": 0.05904686264693737, "eval_icons_MAE_h": 0.03290587291121483, "eval_icons_MAE_w": 0.06054178345948458, "eval_icons_MAE_x_boxes": 0.05734286643564701, "eval_icons_MAE_y_boxes": 0.03782099112868309, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.714854121208191, "eval_icons_loss_ce": 0.0001619036338524893, "eval_icons_loss_iou": 0.6759033203125, "eval_icons_loss_num": 0.058238983154296875, "eval_icons_loss_xval": 1.643798828125, "eval_icons_runtime": 21.0775, "eval_icons_samples_per_second": 2.372, "eval_icons_steps_per_second": 0.095, "num_input_tokens_seen": 448275732, "step": 8000 }, { "epoch": 17.817371937639198, "eval_screenspot_CIoU": 0.3892778257528941, "eval_screenspot_GIoU": 0.4072861274083455, "eval_screenspot_IoU": 0.45707400639851886, "eval_screenspot_MAE_all": 0.053338187436262764, "eval_screenspot_MAE_h": 0.03920335695147514, "eval_screenspot_MAE_w": 0.05588290343681971, "eval_screenspot_MAE_x_boxes": 0.062214924643437065, "eval_screenspot_MAE_y_boxes": 0.03902036137878895, "eval_screenspot_inside_bbox": 0.725000003973643, "eval_screenspot_loss": 1.5115993022918701, "eval_screenspot_loss_ce": 0.00019147307709014663, "eval_screenspot_loss_iou": 0.6321614583333334, "eval_screenspot_loss_num": 0.061505635579427086, "eval_screenspot_loss_xval": 1.5716145833333333, "eval_screenspot_runtime": 38.7561, "eval_screenspot_samples_per_second": 2.296, "eval_screenspot_steps_per_second": 0.077, "num_input_tokens_seen": 448275732, "step": 8000 }, { "epoch": 17.817371937639198, "eval_compot_CIoU": 0.3504429906606674, "eval_compot_GIoU": 0.35775288939476013, "eval_compot_IoU": 0.4085061550140381, "eval_compot_MAE_all": 0.01769328536465764, "eval_compot_MAE_h": 0.008335361024364829, "eval_compot_MAE_w": 0.020572240464389324, "eval_compot_MAE_x_boxes": 0.029930624179542065, "eval_compot_MAE_y_boxes": 0.007017011754214764, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3885656595230103, "eval_compot_loss_ce": 0.0001280211581615731, "eval_compot_loss_iou": 0.6407470703125, "eval_compot_loss_num": 0.016317367553710938, "eval_compot_loss_xval": 1.364013671875, "eval_compot_runtime": 20.8794, "eval_compot_samples_per_second": 2.395, "eval_compot_steps_per_second": 0.096, "num_input_tokens_seen": 448275732, "step": 8000 }, { "epoch": 17.817371937639198, "eval_custom_ui_val_CIoU": 0.4733108580112457, "eval_custom_ui_val_GIoU": 0.47808146807882523, "eval_custom_ui_val_IoU": 0.535189237859514, "eval_custom_ui_val_MAE_all": 0.027121951182683308, "eval_custom_ui_val_MAE_h": 0.014743615692067478, "eval_custom_ui_val_MAE_w": 0.03644685043642918, "eval_custom_ui_val_MAE_x_boxes": 0.033436041299460664, "eval_custom_ui_val_MAE_y_boxes": 0.012357140529072948, "eval_custom_ui_val_inside_bbox": 0.7685185207260979, "eval_custom_ui_val_loss": 1.1683473587036133, "eval_custom_ui_val_loss_ce": 0.00015340095301831348, "eval_custom_ui_val_loss_iou": 0.5008816189236112, "eval_custom_ui_val_loss_num": 0.0236766603257921, "eval_custom_ui_val_loss_xval": 1.1200358072916667, "eval_custom_ui_val_runtime": 70.4441, "eval_custom_ui_val_samples_per_second": 3.762, "eval_custom_ui_val_steps_per_second": 0.128, "num_input_tokens_seen": 448275732, "step": 8000 }, { "epoch": 17.817371937639198, "loss": 0.8243292570114136, "loss_ce": 0.00011052313493564725, "loss_iou": 0.37109375, "loss_num": 0.0167236328125, "loss_xval": 0.82421875, "num_input_tokens_seen": 448275732, "step": 8000 }, { "epoch": 17.819599109131403, "grad_norm": 16.163515090942383, "learning_rate": 1e-06, "loss": 0.3863, "num_input_tokens_seen": 448332804, "step": 8001 }, { "epoch": 17.819599109131403, "loss": 0.4817669987678528, "loss_ce": 7.758761057630181e-05, "loss_iou": 0.2080078125, "loss_num": 0.013427734375, "loss_xval": 0.482421875, "num_input_tokens_seen": 448332804, "step": 8001 }, { "epoch": 17.821826280623608, "grad_norm": 21.57859992980957, "learning_rate": 1e-06, "loss": 0.4429, "num_input_tokens_seen": 448389324, "step": 8002 }, { "epoch": 17.821826280623608, "loss": 0.41340628266334534, "loss_ce": 7.620293035870418e-05, "loss_iou": 0.19140625, "loss_num": 0.005950927734375, "loss_xval": 0.4140625, "num_input_tokens_seen": 448389324, "step": 8002 }, { "epoch": 17.824053452115812, "grad_norm": 21.757307052612305, "learning_rate": 1e-06, "loss": 0.4946, "num_input_tokens_seen": 448446192, "step": 8003 }, { "epoch": 17.824053452115812, "loss": 0.5204148292541504, "loss_ce": 0.00015119729505386204, "loss_iou": 0.224609375, "loss_num": 0.0140380859375, "loss_xval": 0.51953125, "num_input_tokens_seen": 448446192, "step": 8003 }, { "epoch": 17.826280623608017, "grad_norm": 18.847604751586914, "learning_rate": 1e-06, "loss": 0.3709, "num_input_tokens_seen": 448503164, "step": 8004 }, { "epoch": 17.826280623608017, "loss": 0.2775651812553406, "loss_ce": 6.884684989927337e-05, "loss_iou": 0.111328125, "loss_num": 0.0108642578125, "loss_xval": 0.27734375, "num_input_tokens_seen": 448503164, "step": 8004 }, { "epoch": 17.828507795100222, "grad_norm": 20.84021759033203, "learning_rate": 1e-06, "loss": 0.4736, "num_input_tokens_seen": 448558900, "step": 8005 }, { "epoch": 17.828507795100222, "loss": 0.31135812401771545, "loss_ce": 7.882342470111325e-05, "loss_iou": 0.146484375, "loss_num": 0.0038299560546875, "loss_xval": 0.310546875, "num_input_tokens_seen": 448558900, "step": 8005 }, { "epoch": 17.830734966592427, "grad_norm": 16.282339096069336, "learning_rate": 1e-06, "loss": 0.398, "num_input_tokens_seen": 448614056, "step": 8006 }, { "epoch": 17.830734966592427, "loss": 0.30350208282470703, "loss_ce": 9.633424633648247e-05, "loss_iou": 0.13671875, "loss_num": 0.006103515625, "loss_xval": 0.302734375, "num_input_tokens_seen": 448614056, "step": 8006 }, { "epoch": 17.832962138084632, "grad_norm": 16.245391845703125, "learning_rate": 1e-06, "loss": 0.3856, "num_input_tokens_seen": 448668056, "step": 8007 }, { "epoch": 17.832962138084632, "loss": 0.30444949865341187, "loss_ce": 6.718316581100225e-05, "loss_iou": 0.13671875, "loss_num": 0.006195068359375, "loss_xval": 0.3046875, "num_input_tokens_seen": 448668056, "step": 8007 }, { "epoch": 17.835189309576837, "grad_norm": 21.52396011352539, "learning_rate": 1e-06, "loss": 0.4856, "num_input_tokens_seen": 448724780, "step": 8008 }, { "epoch": 17.835189309576837, "loss": 0.36812153458595276, "loss_ce": 7.954495958983898e-05, "loss_iou": 0.158203125, "loss_num": 0.01007080078125, "loss_xval": 0.3671875, "num_input_tokens_seen": 448724780, "step": 8008 }, { "epoch": 17.83741648106904, "grad_norm": 15.395381927490234, "learning_rate": 1e-06, "loss": 0.4704, "num_input_tokens_seen": 448781896, "step": 8009 }, { "epoch": 17.83741648106904, "loss": 0.42657172679901123, "loss_ce": 8.857337525114417e-05, "loss_iou": 0.189453125, "loss_num": 0.0093994140625, "loss_xval": 0.42578125, "num_input_tokens_seen": 448781896, "step": 8009 }, { "epoch": 17.839643652561247, "grad_norm": 14.230578422546387, "learning_rate": 1e-06, "loss": 0.5784, "num_input_tokens_seen": 448835332, "step": 8010 }, { "epoch": 17.839643652561247, "loss": 0.7218839526176453, "loss_ce": 8.216348942369223e-05, "loss_iou": 0.3125, "loss_num": 0.0191650390625, "loss_xval": 0.72265625, "num_input_tokens_seen": 448835332, "step": 8010 }, { "epoch": 17.84187082405345, "grad_norm": 25.663774490356445, "learning_rate": 1e-06, "loss": 0.2717, "num_input_tokens_seen": 448890712, "step": 8011 }, { "epoch": 17.84187082405345, "loss": 0.25164851546287537, "loss_ce": 6.160917837405577e-05, "loss_iou": 0.1142578125, "loss_num": 0.00469970703125, "loss_xval": 0.251953125, "num_input_tokens_seen": 448890712, "step": 8011 }, { "epoch": 17.844097995545656, "grad_norm": 16.865924835205078, "learning_rate": 1e-06, "loss": 0.4874, "num_input_tokens_seen": 448948344, "step": 8012 }, { "epoch": 17.844097995545656, "loss": 0.7613823413848877, "loss_ce": 9.086247882805765e-05, "loss_iou": 0.328125, "loss_num": 0.0205078125, "loss_xval": 0.76171875, "num_input_tokens_seen": 448948344, "step": 8012 }, { "epoch": 17.84632516703786, "grad_norm": 31.958927154541016, "learning_rate": 1e-06, "loss": 0.3505, "num_input_tokens_seen": 449001604, "step": 8013 }, { "epoch": 17.84632516703786, "loss": 0.2753995656967163, "loss_ce": 6.995358853600919e-05, "loss_iou": 0.11279296875, "loss_num": 0.009765625, "loss_xval": 0.275390625, "num_input_tokens_seen": 449001604, "step": 8013 }, { "epoch": 17.848552338530066, "grad_norm": 19.097925186157227, "learning_rate": 1e-06, "loss": 0.4448, "num_input_tokens_seen": 449057784, "step": 8014 }, { "epoch": 17.848552338530066, "loss": 0.42127907276153564, "loss_ce": 7.547304267063737e-05, "loss_iou": 0.17578125, "loss_num": 0.01373291015625, "loss_xval": 0.421875, "num_input_tokens_seen": 449057784, "step": 8014 }, { "epoch": 17.85077951002227, "grad_norm": 22.88896942138672, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 449115216, "step": 8015 }, { "epoch": 17.85077951002227, "loss": 0.40796226263046265, "loss_ce": 0.0004915721947327256, "loss_iou": 0.1611328125, "loss_num": 0.0172119140625, "loss_xval": 0.408203125, "num_input_tokens_seen": 449115216, "step": 8015 }, { "epoch": 17.853006681514476, "grad_norm": 17.308486938476562, "learning_rate": 1e-06, "loss": 0.3195, "num_input_tokens_seen": 449171056, "step": 8016 }, { "epoch": 17.853006681514476, "loss": 0.23818965256214142, "loss_ce": 9.149865218205377e-05, "loss_iou": 0.10888671875, "loss_num": 0.004119873046875, "loss_xval": 0.23828125, "num_input_tokens_seen": 449171056, "step": 8016 }, { "epoch": 17.85523385300668, "grad_norm": 26.236623764038086, "learning_rate": 1e-06, "loss": 0.3454, "num_input_tokens_seen": 449226700, "step": 8017 }, { "epoch": 17.85523385300668, "loss": 0.30879464745521545, "loss_ce": 7.88304241723381e-05, "loss_iou": 0.140625, "loss_num": 0.005523681640625, "loss_xval": 0.30859375, "num_input_tokens_seen": 449226700, "step": 8017 }, { "epoch": 17.857461024498885, "grad_norm": 20.232860565185547, "learning_rate": 1e-06, "loss": 0.4247, "num_input_tokens_seen": 449282184, "step": 8018 }, { "epoch": 17.857461024498885, "loss": 0.5520839691162109, "loss_ce": 8.206926577258855e-05, "loss_iou": 0.2265625, "loss_num": 0.0198974609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 449282184, "step": 8018 }, { "epoch": 17.85968819599109, "grad_norm": 16.037681579589844, "learning_rate": 1e-06, "loss": 0.4071, "num_input_tokens_seen": 449338720, "step": 8019 }, { "epoch": 17.85968819599109, "loss": 0.3350412845611572, "loss_ce": 8.032634650589898e-05, "loss_iou": 0.1484375, "loss_num": 0.00750732421875, "loss_xval": 0.3359375, "num_input_tokens_seen": 449338720, "step": 8019 }, { "epoch": 17.861915367483295, "grad_norm": 15.602432250976562, "learning_rate": 1e-06, "loss": 0.3929, "num_input_tokens_seen": 449394160, "step": 8020 }, { "epoch": 17.861915367483295, "loss": 0.3082042932510376, "loss_ce": 9.883566235657781e-05, "loss_iou": 0.1376953125, "loss_num": 0.00665283203125, "loss_xval": 0.30859375, "num_input_tokens_seen": 449394160, "step": 8020 }, { "epoch": 17.8641425389755, "grad_norm": 20.469484329223633, "learning_rate": 1e-06, "loss": 0.2791, "num_input_tokens_seen": 449448936, "step": 8021 }, { "epoch": 17.8641425389755, "loss": 0.3228263556957245, "loss_ce": 7.245552114909515e-05, "loss_iou": 0.1298828125, "loss_num": 0.01263427734375, "loss_xval": 0.322265625, "num_input_tokens_seen": 449448936, "step": 8021 }, { "epoch": 17.866369710467705, "grad_norm": 11.62727165222168, "learning_rate": 1e-06, "loss": 0.3996, "num_input_tokens_seen": 449505480, "step": 8022 }, { "epoch": 17.866369710467705, "loss": 0.4349091947078705, "loss_ce": 9.47638473007828e-05, "loss_iou": 0.1875, "loss_num": 0.01202392578125, "loss_xval": 0.435546875, "num_input_tokens_seen": 449505480, "step": 8022 }, { "epoch": 17.86859688195991, "grad_norm": 24.577762603759766, "learning_rate": 1e-06, "loss": 0.4352, "num_input_tokens_seen": 449561952, "step": 8023 }, { "epoch": 17.86859688195991, "loss": 0.5243306756019592, "loss_ce": 0.00016074010636657476, "loss_iou": 0.2373046875, "loss_num": 0.009765625, "loss_xval": 0.5234375, "num_input_tokens_seen": 449561952, "step": 8023 }, { "epoch": 17.870824053452115, "grad_norm": 24.447031021118164, "learning_rate": 1e-06, "loss": 0.3999, "num_input_tokens_seen": 449614092, "step": 8024 }, { "epoch": 17.870824053452115, "loss": 0.44069501757621765, "loss_ce": 8.22499132482335e-05, "loss_iou": 0.1904296875, "loss_num": 0.01214599609375, "loss_xval": 0.44140625, "num_input_tokens_seen": 449614092, "step": 8024 }, { "epoch": 17.87305122494432, "grad_norm": 22.536909103393555, "learning_rate": 1e-06, "loss": 0.5064, "num_input_tokens_seen": 449669248, "step": 8025 }, { "epoch": 17.87305122494432, "loss": 0.4587002098560333, "loss_ce": 8.205789345083758e-05, "loss_iou": 0.1962890625, "loss_num": 0.012939453125, "loss_xval": 0.458984375, "num_input_tokens_seen": 449669248, "step": 8025 }, { "epoch": 17.875278396436524, "grad_norm": 16.079544067382812, "learning_rate": 1e-06, "loss": 0.4124, "num_input_tokens_seen": 449726032, "step": 8026 }, { "epoch": 17.875278396436524, "loss": 0.402201771736145, "loss_ce": 0.00010215782094746828, "loss_iou": 0.173828125, "loss_num": 0.0108642578125, "loss_xval": 0.40234375, "num_input_tokens_seen": 449726032, "step": 8026 }, { "epoch": 17.87750556792873, "grad_norm": 16.75348472595215, "learning_rate": 1e-06, "loss": 0.589, "num_input_tokens_seen": 449782292, "step": 8027 }, { "epoch": 17.87750556792873, "loss": 0.5953076481819153, "loss_ce": 9.280974336434156e-05, "loss_iou": 0.244140625, "loss_num": 0.021484375, "loss_xval": 0.59375, "num_input_tokens_seen": 449782292, "step": 8027 }, { "epoch": 17.879732739420934, "grad_norm": 54.10028839111328, "learning_rate": 1e-06, "loss": 0.4516, "num_input_tokens_seen": 449837808, "step": 8028 }, { "epoch": 17.879732739420934, "loss": 0.6945455074310303, "loss_ce": 8.752994472160935e-05, "loss_iou": 0.26953125, "loss_num": 0.031494140625, "loss_xval": 0.6953125, "num_input_tokens_seen": 449837808, "step": 8028 }, { "epoch": 17.88195991091314, "grad_norm": 18.08180046081543, "learning_rate": 1e-06, "loss": 0.4964, "num_input_tokens_seen": 449896016, "step": 8029 }, { "epoch": 17.88195991091314, "loss": 0.36804813146591187, "loss_ce": 0.00049442338058725, "loss_iou": 0.169921875, "loss_num": 0.005706787109375, "loss_xval": 0.3671875, "num_input_tokens_seen": 449896016, "step": 8029 }, { "epoch": 17.884187082405344, "grad_norm": 20.480897903442383, "learning_rate": 1e-06, "loss": 0.4051, "num_input_tokens_seen": 449952976, "step": 8030 }, { "epoch": 17.884187082405344, "loss": 0.34723442792892456, "loss_ce": 6.646300607826561e-05, "loss_iou": 0.154296875, "loss_num": 0.007598876953125, "loss_xval": 0.34765625, "num_input_tokens_seen": 449952976, "step": 8030 }, { "epoch": 17.88641425389755, "grad_norm": 38.36183166503906, "learning_rate": 1e-06, "loss": 0.4273, "num_input_tokens_seen": 450010648, "step": 8031 }, { "epoch": 17.88641425389755, "loss": 0.3772750496864319, "loss_ce": 7.779923907946795e-05, "loss_iou": 0.162109375, "loss_num": 0.01068115234375, "loss_xval": 0.376953125, "num_input_tokens_seen": 450010648, "step": 8031 }, { "epoch": 17.888641425389753, "grad_norm": 21.646387100219727, "learning_rate": 1e-06, "loss": 0.3668, "num_input_tokens_seen": 450065440, "step": 8032 }, { "epoch": 17.888641425389753, "loss": 0.363721638917923, "loss_ce": 7.417659799102694e-05, "loss_iou": 0.142578125, "loss_num": 0.0155029296875, "loss_xval": 0.36328125, "num_input_tokens_seen": 450065440, "step": 8032 }, { "epoch": 17.89086859688196, "grad_norm": 14.319405555725098, "learning_rate": 1e-06, "loss": 0.3845, "num_input_tokens_seen": 450122044, "step": 8033 }, { "epoch": 17.89086859688196, "loss": 0.41428041458129883, "loss_ce": 9.586406667949632e-05, "loss_iou": 0.1787109375, "loss_num": 0.01129150390625, "loss_xval": 0.4140625, "num_input_tokens_seen": 450122044, "step": 8033 }, { "epoch": 17.893095768374163, "grad_norm": 35.73308563232422, "learning_rate": 1e-06, "loss": 0.4115, "num_input_tokens_seen": 450178668, "step": 8034 }, { "epoch": 17.893095768374163, "loss": 0.4911773204803467, "loss_ce": 8.844825788401067e-05, "loss_iou": 0.203125, "loss_num": 0.016845703125, "loss_xval": 0.490234375, "num_input_tokens_seen": 450178668, "step": 8034 }, { "epoch": 17.895322939866368, "grad_norm": 20.128206253051758, "learning_rate": 1e-06, "loss": 0.2906, "num_input_tokens_seen": 450235868, "step": 8035 }, { "epoch": 17.895322939866368, "loss": 0.31039196252822876, "loss_ce": 8.923389395931736e-05, "loss_iou": 0.13671875, "loss_num": 0.00732421875, "loss_xval": 0.310546875, "num_input_tokens_seen": 450235868, "step": 8035 }, { "epoch": 17.897550111358576, "grad_norm": 16.04209327697754, "learning_rate": 1e-06, "loss": 0.3579, "num_input_tokens_seen": 450292616, "step": 8036 }, { "epoch": 17.897550111358576, "loss": 0.3482053279876709, "loss_ce": 0.00018285455007571727, "loss_iou": 0.16015625, "loss_num": 0.00579833984375, "loss_xval": 0.34765625, "num_input_tokens_seen": 450292616, "step": 8036 }, { "epoch": 17.899777282850778, "grad_norm": 25.872989654541016, "learning_rate": 1e-06, "loss": 0.4616, "num_input_tokens_seen": 450348424, "step": 8037 }, { "epoch": 17.899777282850778, "loss": 0.4966566562652588, "loss_ce": 7.462559005944058e-05, "loss_iou": 0.208984375, "loss_num": 0.0157470703125, "loss_xval": 0.49609375, "num_input_tokens_seen": 450348424, "step": 8037 }, { "epoch": 17.902004454342986, "grad_norm": 17.968185424804688, "learning_rate": 1e-06, "loss": 0.397, "num_input_tokens_seen": 450404344, "step": 8038 }, { "epoch": 17.902004454342986, "loss": 0.43942493200302124, "loss_ce": 9.388932812726125e-05, "loss_iou": 0.1962890625, "loss_num": 0.00927734375, "loss_xval": 0.439453125, "num_input_tokens_seen": 450404344, "step": 8038 }, { "epoch": 17.90423162583519, "grad_norm": 22.768774032592773, "learning_rate": 1e-06, "loss": 0.4194, "num_input_tokens_seen": 450457360, "step": 8039 }, { "epoch": 17.90423162583519, "loss": 0.4151099920272827, "loss_ce": 7.091661973390728e-05, "loss_iou": 0.1806640625, "loss_num": 0.01080322265625, "loss_xval": 0.4140625, "num_input_tokens_seen": 450457360, "step": 8039 }, { "epoch": 17.906458797327396, "grad_norm": 11.735021591186523, "learning_rate": 1e-06, "loss": 0.4589, "num_input_tokens_seen": 450512100, "step": 8040 }, { "epoch": 17.906458797327396, "loss": 0.5093483924865723, "loss_ce": 0.0001931376027641818, "loss_iou": 0.224609375, "loss_num": 0.011962890625, "loss_xval": 0.5078125, "num_input_tokens_seen": 450512100, "step": 8040 }, { "epoch": 17.9086859688196, "grad_norm": 16.40501594543457, "learning_rate": 1e-06, "loss": 0.4686, "num_input_tokens_seen": 450569724, "step": 8041 }, { "epoch": 17.9086859688196, "loss": 0.3620176613330841, "loss_ce": 7.919156632851809e-05, "loss_iou": 0.16796875, "loss_num": 0.0050048828125, "loss_xval": 0.361328125, "num_input_tokens_seen": 450569724, "step": 8041 }, { "epoch": 17.910913140311806, "grad_norm": 18.876176834106445, "learning_rate": 1e-06, "loss": 0.336, "num_input_tokens_seen": 450626440, "step": 8042 }, { "epoch": 17.910913140311806, "loss": 0.3145880401134491, "loss_ce": 7.385006756521761e-05, "loss_iou": 0.14453125, "loss_num": 0.005096435546875, "loss_xval": 0.314453125, "num_input_tokens_seen": 450626440, "step": 8042 }, { "epoch": 17.91314031180401, "grad_norm": 12.777106285095215, "learning_rate": 1e-06, "loss": 0.3849, "num_input_tokens_seen": 450684836, "step": 8043 }, { "epoch": 17.91314031180401, "loss": 0.5031352043151855, "loss_ce": 8.34753445815295e-05, "loss_iou": 0.2294921875, "loss_num": 0.00885009765625, "loss_xval": 0.50390625, "num_input_tokens_seen": 450684836, "step": 8043 }, { "epoch": 17.915367483296215, "grad_norm": 30.933420181274414, "learning_rate": 1e-06, "loss": 0.357, "num_input_tokens_seen": 450739768, "step": 8044 }, { "epoch": 17.915367483296215, "loss": 0.3753129243850708, "loss_ce": 6.877203122712672e-05, "loss_iou": 0.1640625, "loss_num": 0.009521484375, "loss_xval": 0.375, "num_input_tokens_seen": 450739768, "step": 8044 }, { "epoch": 17.91759465478842, "grad_norm": 21.37887191772461, "learning_rate": 1e-06, "loss": 0.3017, "num_input_tokens_seen": 450796720, "step": 8045 }, { "epoch": 17.91759465478842, "loss": 0.34577932953834534, "loss_ce": 7.619416282977909e-05, "loss_iou": 0.1572265625, "loss_num": 0.006256103515625, "loss_xval": 0.345703125, "num_input_tokens_seen": 450796720, "step": 8045 }, { "epoch": 17.919821826280625, "grad_norm": 21.42198944091797, "learning_rate": 1e-06, "loss": 0.5496, "num_input_tokens_seen": 450852760, "step": 8046 }, { "epoch": 17.919821826280625, "loss": 0.5246258974075317, "loss_ce": 8.982230065157637e-05, "loss_iou": 0.23046875, "loss_num": 0.01263427734375, "loss_xval": 0.5234375, "num_input_tokens_seen": 450852760, "step": 8046 }, { "epoch": 17.92204899777283, "grad_norm": 19.944995880126953, "learning_rate": 1e-06, "loss": 0.3975, "num_input_tokens_seen": 450909692, "step": 8047 }, { "epoch": 17.92204899777283, "loss": 0.4114391803741455, "loss_ce": 6.225903052836657e-05, "loss_iou": 0.185546875, "loss_num": 0.0081787109375, "loss_xval": 0.412109375, "num_input_tokens_seen": 450909692, "step": 8047 }, { "epoch": 17.924276169265035, "grad_norm": 22.188114166259766, "learning_rate": 1e-06, "loss": 0.4788, "num_input_tokens_seen": 450968200, "step": 8048 }, { "epoch": 17.924276169265035, "loss": 0.4510143995285034, "loss_ce": 8.66201298777014e-05, "loss_iou": 0.1923828125, "loss_num": 0.01312255859375, "loss_xval": 0.451171875, "num_input_tokens_seen": 450968200, "step": 8048 }, { "epoch": 17.92650334075724, "grad_norm": 16.4459228515625, "learning_rate": 1e-06, "loss": 0.3947, "num_input_tokens_seen": 451022056, "step": 8049 }, { "epoch": 17.92650334075724, "loss": 0.35028839111328125, "loss_ce": 6.865533214295283e-05, "loss_iou": 0.142578125, "loss_num": 0.01318359375, "loss_xval": 0.349609375, "num_input_tokens_seen": 451022056, "step": 8049 }, { "epoch": 17.928730512249444, "grad_norm": 14.635376930236816, "learning_rate": 1e-06, "loss": 0.3044, "num_input_tokens_seen": 451078512, "step": 8050 }, { "epoch": 17.928730512249444, "loss": 0.28396010398864746, "loss_ce": 8.557151886634529e-05, "loss_iou": 0.1240234375, "loss_num": 0.0072021484375, "loss_xval": 0.283203125, "num_input_tokens_seen": 451078512, "step": 8050 }, { "epoch": 17.93095768374165, "grad_norm": 24.16907501220703, "learning_rate": 1e-06, "loss": 0.4527, "num_input_tokens_seen": 451133112, "step": 8051 }, { "epoch": 17.93095768374165, "loss": 0.5191126465797424, "loss_ce": 0.0003138238680548966, "loss_iou": 0.1923828125, "loss_num": 0.026611328125, "loss_xval": 0.51953125, "num_input_tokens_seen": 451133112, "step": 8051 }, { "epoch": 17.933184855233854, "grad_norm": 13.90976333618164, "learning_rate": 1e-06, "loss": 0.3535, "num_input_tokens_seen": 451190024, "step": 8052 }, { "epoch": 17.933184855233854, "loss": 0.3013458847999573, "loss_ce": 7.635784277226776e-05, "loss_iou": 0.140625, "loss_num": 0.004180908203125, "loss_xval": 0.30078125, "num_input_tokens_seen": 451190024, "step": 8052 }, { "epoch": 17.93541202672606, "grad_norm": 17.68203353881836, "learning_rate": 1e-06, "loss": 0.3295, "num_input_tokens_seen": 451243952, "step": 8053 }, { "epoch": 17.93541202672606, "loss": 0.3571292757987976, "loss_ce": 7.360937888734043e-05, "loss_iou": 0.1611328125, "loss_num": 0.00701904296875, "loss_xval": 0.357421875, "num_input_tokens_seen": 451243952, "step": 8053 }, { "epoch": 17.937639198218264, "grad_norm": 24.68354034423828, "learning_rate": 1e-06, "loss": 0.4696, "num_input_tokens_seen": 451299776, "step": 8054 }, { "epoch": 17.937639198218264, "loss": 0.3794766664505005, "loss_ce": 8.214540139306337e-05, "loss_iou": 0.1689453125, "loss_num": 0.00836181640625, "loss_xval": 0.37890625, "num_input_tokens_seen": 451299776, "step": 8054 }, { "epoch": 17.93986636971047, "grad_norm": 12.557443618774414, "learning_rate": 1e-06, "loss": 0.3637, "num_input_tokens_seen": 451354904, "step": 8055 }, { "epoch": 17.93986636971047, "loss": 0.26801323890686035, "loss_ce": 6.891635712236166e-05, "loss_iou": 0.12109375, "loss_num": 0.005157470703125, "loss_xval": 0.267578125, "num_input_tokens_seen": 451354904, "step": 8055 }, { "epoch": 17.942093541202674, "grad_norm": 31.136751174926758, "learning_rate": 1e-06, "loss": 0.4939, "num_input_tokens_seen": 451406784, "step": 8056 }, { "epoch": 17.942093541202674, "loss": 0.5043526291847229, "loss_ce": 8.016474748728797e-05, "loss_iou": 0.2265625, "loss_num": 0.010498046875, "loss_xval": 0.50390625, "num_input_tokens_seen": 451406784, "step": 8056 }, { "epoch": 17.94432071269488, "grad_norm": 12.46948528289795, "learning_rate": 1e-06, "loss": 0.3284, "num_input_tokens_seen": 451464456, "step": 8057 }, { "epoch": 17.94432071269488, "loss": 0.29316914081573486, "loss_ce": 7.832865230739117e-05, "loss_iou": 0.1357421875, "loss_num": 0.004425048828125, "loss_xval": 0.29296875, "num_input_tokens_seen": 451464456, "step": 8057 }, { "epoch": 17.946547884187083, "grad_norm": 26.139951705932617, "learning_rate": 1e-06, "loss": 0.3216, "num_input_tokens_seen": 451517900, "step": 8058 }, { "epoch": 17.946547884187083, "loss": 0.3385404348373413, "loss_ce": 0.00010048142576124519, "loss_iou": 0.1572265625, "loss_num": 0.004852294921875, "loss_xval": 0.337890625, "num_input_tokens_seen": 451517900, "step": 8058 }, { "epoch": 17.948775055679288, "grad_norm": 20.38810920715332, "learning_rate": 1e-06, "loss": 0.3234, "num_input_tokens_seen": 451574048, "step": 8059 }, { "epoch": 17.948775055679288, "loss": 0.2848048210144043, "loss_ce": 7.584408012917265e-05, "loss_iou": 0.1298828125, "loss_num": 0.005096435546875, "loss_xval": 0.28515625, "num_input_tokens_seen": 451574048, "step": 8059 }, { "epoch": 17.951002227171493, "grad_norm": 17.65831756591797, "learning_rate": 1e-06, "loss": 0.5641, "num_input_tokens_seen": 451632452, "step": 8060 }, { "epoch": 17.951002227171493, "loss": 0.38423237204551697, "loss_ce": 7.70928745623678e-05, "loss_iou": 0.1748046875, "loss_num": 0.006805419921875, "loss_xval": 0.384765625, "num_input_tokens_seen": 451632452, "step": 8060 }, { "epoch": 17.953229398663698, "grad_norm": 10.756651878356934, "learning_rate": 1e-06, "loss": 0.362, "num_input_tokens_seen": 451687680, "step": 8061 }, { "epoch": 17.953229398663698, "loss": 0.46239495277404785, "loss_ce": 8.412712486460805e-05, "loss_iou": 0.171875, "loss_num": 0.0238037109375, "loss_xval": 0.462890625, "num_input_tokens_seen": 451687680, "step": 8061 }, { "epoch": 17.955456570155903, "grad_norm": 22.19445037841797, "learning_rate": 1e-06, "loss": 0.4, "num_input_tokens_seen": 451746496, "step": 8062 }, { "epoch": 17.955456570155903, "loss": 0.3811803460121155, "loss_ce": 7.682880095671862e-05, "loss_iou": 0.1669921875, "loss_num": 0.009521484375, "loss_xval": 0.380859375, "num_input_tokens_seen": 451746496, "step": 8062 }, { "epoch": 17.957683741648108, "grad_norm": 14.64289665222168, "learning_rate": 1e-06, "loss": 0.3557, "num_input_tokens_seen": 451802292, "step": 8063 }, { "epoch": 17.957683741648108, "loss": 0.3765408396720886, "loss_ce": 0.00010646959708537906, "loss_iou": 0.1572265625, "loss_num": 0.01239013671875, "loss_xval": 0.376953125, "num_input_tokens_seen": 451802292, "step": 8063 }, { "epoch": 17.959910913140313, "grad_norm": 20.15620994567871, "learning_rate": 1e-06, "loss": 0.5471, "num_input_tokens_seen": 451854968, "step": 8064 }, { "epoch": 17.959910913140313, "loss": 0.5528594255447388, "loss_ce": 6.400723214028403e-05, "loss_iou": 0.2421875, "loss_num": 0.01373291015625, "loss_xval": 0.5546875, "num_input_tokens_seen": 451854968, "step": 8064 }, { "epoch": 17.962138084632517, "grad_norm": 16.74308967590332, "learning_rate": 1e-06, "loss": 0.3935, "num_input_tokens_seen": 451913180, "step": 8065 }, { "epoch": 17.962138084632517, "loss": 0.5324406623840332, "loss_ce": 9.207165567204356e-05, "loss_iou": 0.2353515625, "loss_num": 0.01239013671875, "loss_xval": 0.53125, "num_input_tokens_seen": 451913180, "step": 8065 }, { "epoch": 17.964365256124722, "grad_norm": 17.335187911987305, "learning_rate": 1e-06, "loss": 0.5185, "num_input_tokens_seen": 451969652, "step": 8066 }, { "epoch": 17.964365256124722, "loss": 0.47579044103622437, "loss_ce": 8.243897173088044e-05, "loss_iou": 0.189453125, "loss_num": 0.0194091796875, "loss_xval": 0.4765625, "num_input_tokens_seen": 451969652, "step": 8066 }, { "epoch": 17.966592427616927, "grad_norm": 20.39455795288086, "learning_rate": 1e-06, "loss": 0.4586, "num_input_tokens_seen": 452024588, "step": 8067 }, { "epoch": 17.966592427616927, "loss": 0.6492767333984375, "loss_ce": 0.00010679697152227163, "loss_iou": 0.26171875, "loss_num": 0.0247802734375, "loss_xval": 0.6484375, "num_input_tokens_seen": 452024588, "step": 8067 }, { "epoch": 17.968819599109132, "grad_norm": 21.565893173217773, "learning_rate": 1e-06, "loss": 0.3403, "num_input_tokens_seen": 452079184, "step": 8068 }, { "epoch": 17.968819599109132, "loss": 0.3689712882041931, "loss_ce": 7.478379120584577e-05, "loss_iou": 0.1650390625, "loss_num": 0.007598876953125, "loss_xval": 0.369140625, "num_input_tokens_seen": 452079184, "step": 8068 }, { "epoch": 17.971046770601337, "grad_norm": 53.53007507324219, "learning_rate": 1e-06, "loss": 0.4302, "num_input_tokens_seen": 452132824, "step": 8069 }, { "epoch": 17.971046770601337, "loss": 0.3132534623146057, "loss_ce": 0.00014310533879324794, "loss_iou": 0.146484375, "loss_num": 0.004058837890625, "loss_xval": 0.3125, "num_input_tokens_seen": 452132824, "step": 8069 }, { "epoch": 17.97327394209354, "grad_norm": 25.093297958374023, "learning_rate": 1e-06, "loss": 0.3963, "num_input_tokens_seen": 452188908, "step": 8070 }, { "epoch": 17.97327394209354, "loss": 0.3288014531135559, "loss_ce": 6.609824777115136e-05, "loss_iou": 0.1435546875, "loss_num": 0.0084228515625, "loss_xval": 0.328125, "num_input_tokens_seen": 452188908, "step": 8070 }, { "epoch": 17.975501113585747, "grad_norm": 14.360114097595215, "learning_rate": 1e-06, "loss": 0.4598, "num_input_tokens_seen": 452245072, "step": 8071 }, { "epoch": 17.975501113585747, "loss": 0.49251696467399597, "loss_ce": 8.531761704944074e-05, "loss_iou": 0.203125, "loss_num": 0.01708984375, "loss_xval": 0.4921875, "num_input_tokens_seen": 452245072, "step": 8071 }, { "epoch": 17.97772828507795, "grad_norm": 19.300607681274414, "learning_rate": 1e-06, "loss": 0.4567, "num_input_tokens_seen": 452302856, "step": 8072 }, { "epoch": 17.97772828507795, "loss": 0.47664445638656616, "loss_ce": 8.195844566216692e-05, "loss_iou": 0.2060546875, "loss_num": 0.0125732421875, "loss_xval": 0.4765625, "num_input_tokens_seen": 452302856, "step": 8072 }, { "epoch": 17.979955456570156, "grad_norm": 22.800689697265625, "learning_rate": 1e-06, "loss": 0.3213, "num_input_tokens_seen": 452358036, "step": 8073 }, { "epoch": 17.979955456570156, "loss": 0.30914774537086487, "loss_ce": 6.572413258254528e-05, "loss_iou": 0.1259765625, "loss_num": 0.01153564453125, "loss_xval": 0.30859375, "num_input_tokens_seen": 452358036, "step": 8073 }, { "epoch": 17.98218262806236, "grad_norm": 18.20235824584961, "learning_rate": 1e-06, "loss": 0.3886, "num_input_tokens_seen": 452412620, "step": 8074 }, { "epoch": 17.98218262806236, "loss": 0.3233228921890259, "loss_ce": 8.070748299360275e-05, "loss_iou": 0.1435546875, "loss_num": 0.0072021484375, "loss_xval": 0.32421875, "num_input_tokens_seen": 452412620, "step": 8074 }, { "epoch": 17.984409799554566, "grad_norm": 14.406597137451172, "learning_rate": 1e-06, "loss": 0.4968, "num_input_tokens_seen": 452469772, "step": 8075 }, { "epoch": 17.984409799554566, "loss": 0.5883782505989075, "loss_ce": 0.00012145160872023553, "loss_iou": 0.25, "loss_num": 0.017333984375, "loss_xval": 0.58984375, "num_input_tokens_seen": 452469772, "step": 8075 }, { "epoch": 17.98663697104677, "grad_norm": 26.771181106567383, "learning_rate": 1e-06, "loss": 0.4041, "num_input_tokens_seen": 452523692, "step": 8076 }, { "epoch": 17.98663697104677, "loss": 0.5332375764846802, "loss_ce": 9.549761307425797e-05, "loss_iou": 0.2109375, "loss_num": 0.0220947265625, "loss_xval": 0.53125, "num_input_tokens_seen": 452523692, "step": 8076 }, { "epoch": 17.988864142538976, "grad_norm": 15.690217971801758, "learning_rate": 1e-06, "loss": 0.3856, "num_input_tokens_seen": 452581640, "step": 8077 }, { "epoch": 17.988864142538976, "loss": 0.3996119499206543, "loss_ce": 0.0001978981599677354, "loss_iou": 0.166015625, "loss_num": 0.01348876953125, "loss_xval": 0.3984375, "num_input_tokens_seen": 452581640, "step": 8077 }, { "epoch": 17.99109131403118, "grad_norm": 20.969528198242188, "learning_rate": 1e-06, "loss": 0.3346, "num_input_tokens_seen": 452634292, "step": 8078 }, { "epoch": 17.99109131403118, "loss": 0.3364979922771454, "loss_ce": 7.221752457553521e-05, "loss_iou": 0.1552734375, "loss_num": 0.005035400390625, "loss_xval": 0.3359375, "num_input_tokens_seen": 452634292, "step": 8078 }, { "epoch": 17.993318485523385, "grad_norm": 22.376567840576172, "learning_rate": 1e-06, "loss": 0.4623, "num_input_tokens_seen": 452691208, "step": 8079 }, { "epoch": 17.993318485523385, "loss": 0.38116979598999023, "loss_ce": 6.630241841776296e-05, "loss_iou": 0.1728515625, "loss_num": 0.007232666015625, "loss_xval": 0.380859375, "num_input_tokens_seen": 452691208, "step": 8079 }, { "epoch": 17.99554565701559, "grad_norm": 25.332250595092773, "learning_rate": 1e-06, "loss": 0.38, "num_input_tokens_seen": 452744012, "step": 8080 }, { "epoch": 17.99554565701559, "loss": 0.5036153793334961, "loss_ce": 0.00019747592159546912, "loss_iou": 0.21875, "loss_num": 0.0133056640625, "loss_xval": 0.50390625, "num_input_tokens_seen": 452744012, "step": 8080 }, { "epoch": 17.997772828507795, "grad_norm": 21.98955726623535, "learning_rate": 1e-06, "loss": 0.3497, "num_input_tokens_seen": 452801908, "step": 8081 }, { "epoch": 17.997772828507795, "loss": 0.3676411509513855, "loss_ce": 8.742515638004988e-05, "loss_iou": 0.166015625, "loss_num": 0.007080078125, "loss_xval": 0.3671875, "num_input_tokens_seen": 452801908, "step": 8081 }, { "epoch": 18.0, "grad_norm": 16.604522705078125, "learning_rate": 1e-06, "loss": 0.4416, "num_input_tokens_seen": 452859412, "step": 8082 }, { "epoch": 18.0, "loss": 0.5428001880645752, "loss_ce": 7.557860226370394e-05, "loss_iou": 0.216796875, "loss_num": 0.02197265625, "loss_xval": 0.54296875, "num_input_tokens_seen": 452859412, "step": 8082 }, { "epoch": 18.002227171492205, "grad_norm": 13.554404258728027, "learning_rate": 1e-06, "loss": 0.3327, "num_input_tokens_seen": 452915892, "step": 8083 }, { "epoch": 18.002227171492205, "loss": 0.16937774419784546, "loss_ce": 6.622253567911685e-05, "loss_iou": 0.0712890625, "loss_num": 0.005401611328125, "loss_xval": 0.1689453125, "num_input_tokens_seen": 452915892, "step": 8083 }, { "epoch": 18.00445434298441, "grad_norm": 21.166494369506836, "learning_rate": 1e-06, "loss": 0.4786, "num_input_tokens_seen": 452973948, "step": 8084 }, { "epoch": 18.00445434298441, "loss": 0.3788573145866394, "loss_ce": 7.313516107387841e-05, "loss_iou": 0.17578125, "loss_num": 0.005584716796875, "loss_xval": 0.37890625, "num_input_tokens_seen": 452973948, "step": 8084 }, { "epoch": 18.006681514476615, "grad_norm": 20.805810928344727, "learning_rate": 1e-06, "loss": 0.4782, "num_input_tokens_seen": 453030260, "step": 8085 }, { "epoch": 18.006681514476615, "loss": 0.5902899503707886, "loss_ce": 8.001519017852843e-05, "loss_iou": 0.2373046875, "loss_num": 0.0230712890625, "loss_xval": 0.58984375, "num_input_tokens_seen": 453030260, "step": 8085 }, { "epoch": 18.00890868596882, "grad_norm": 16.5931396484375, "learning_rate": 1e-06, "loss": 0.4174, "num_input_tokens_seen": 453085656, "step": 8086 }, { "epoch": 18.00890868596882, "loss": 0.4264695942401886, "loss_ce": 7.800738967489451e-05, "loss_iou": 0.185546875, "loss_num": 0.0108642578125, "loss_xval": 0.42578125, "num_input_tokens_seen": 453085656, "step": 8086 }, { "epoch": 18.011135857461024, "grad_norm": 20.476335525512695, "learning_rate": 1e-06, "loss": 0.3997, "num_input_tokens_seen": 453141140, "step": 8087 }, { "epoch": 18.011135857461024, "loss": 0.40523386001586914, "loss_ce": 0.0002045718429144472, "loss_iou": 0.1884765625, "loss_num": 0.005462646484375, "loss_xval": 0.404296875, "num_input_tokens_seen": 453141140, "step": 8087 }, { "epoch": 18.01336302895323, "grad_norm": 26.6048641204834, "learning_rate": 1e-06, "loss": 0.3262, "num_input_tokens_seen": 453199108, "step": 8088 }, { "epoch": 18.01336302895323, "loss": 0.3548569679260254, "loss_ce": 5.9605521528283134e-05, "loss_iou": 0.1572265625, "loss_num": 0.008056640625, "loss_xval": 0.35546875, "num_input_tokens_seen": 453199108, "step": 8088 }, { "epoch": 18.015590200445434, "grad_norm": 28.440725326538086, "learning_rate": 1e-06, "loss": 0.56, "num_input_tokens_seen": 453254600, "step": 8089 }, { "epoch": 18.015590200445434, "loss": 0.6332626342773438, "loss_ce": 8.392542076762766e-05, "loss_iou": 0.283203125, "loss_num": 0.0137939453125, "loss_xval": 0.6328125, "num_input_tokens_seen": 453254600, "step": 8089 }, { "epoch": 18.01781737193764, "grad_norm": 17.789857864379883, "learning_rate": 1e-06, "loss": 0.5431, "num_input_tokens_seen": 453311020, "step": 8090 }, { "epoch": 18.01781737193764, "loss": 0.7038158774375916, "loss_ce": 8.052912016864866e-05, "loss_iou": 0.30078125, "loss_num": 0.0201416015625, "loss_xval": 0.703125, "num_input_tokens_seen": 453311020, "step": 8090 }, { "epoch": 18.020044543429844, "grad_norm": 19.747709274291992, "learning_rate": 1e-06, "loss": 0.378, "num_input_tokens_seen": 453364652, "step": 8091 }, { "epoch": 18.020044543429844, "loss": 0.47260355949401855, "loss_ce": 6.938957085367292e-05, "loss_iou": 0.2119140625, "loss_num": 0.00970458984375, "loss_xval": 0.47265625, "num_input_tokens_seen": 453364652, "step": 8091 }, { "epoch": 18.02227171492205, "grad_norm": 24.440204620361328, "learning_rate": 1e-06, "loss": 0.4914, "num_input_tokens_seen": 453419676, "step": 8092 }, { "epoch": 18.02227171492205, "loss": 0.43805378675460815, "loss_ce": 6.548190140165389e-05, "loss_iou": 0.173828125, "loss_num": 0.01806640625, "loss_xval": 0.4375, "num_input_tokens_seen": 453419676, "step": 8092 }, { "epoch": 18.024498886414253, "grad_norm": 14.660680770874023, "learning_rate": 1e-06, "loss": 0.4239, "num_input_tokens_seen": 453476280, "step": 8093 }, { "epoch": 18.024498886414253, "loss": 0.3206265866756439, "loss_ce": 6.993835268076509e-05, "loss_iou": 0.1376953125, "loss_num": 0.009033203125, "loss_xval": 0.3203125, "num_input_tokens_seen": 453476280, "step": 8093 }, { "epoch": 18.02672605790646, "grad_norm": 22.54802894592285, "learning_rate": 1e-06, "loss": 0.4878, "num_input_tokens_seen": 453529916, "step": 8094 }, { "epoch": 18.02672605790646, "loss": 0.37210169434547424, "loss_ce": 9.240545477950945e-05, "loss_iou": 0.16796875, "loss_num": 0.007354736328125, "loss_xval": 0.37109375, "num_input_tokens_seen": 453529916, "step": 8094 }, { "epoch": 18.028953229398663, "grad_norm": 20.00684928894043, "learning_rate": 1e-06, "loss": 0.3406, "num_input_tokens_seen": 453583004, "step": 8095 }, { "epoch": 18.028953229398663, "loss": 0.39649391174316406, "loss_ce": 7.059978815959767e-05, "loss_iou": 0.171875, "loss_num": 0.0106201171875, "loss_xval": 0.396484375, "num_input_tokens_seen": 453583004, "step": 8095 }, { "epoch": 18.031180400890868, "grad_norm": 25.416080474853516, "learning_rate": 1e-06, "loss": 0.5202, "num_input_tokens_seen": 453637924, "step": 8096 }, { "epoch": 18.031180400890868, "loss": 0.5738502740859985, "loss_ce": 0.00011981255374848843, "loss_iou": 0.255859375, "loss_num": 0.0128173828125, "loss_xval": 0.57421875, "num_input_tokens_seen": 453637924, "step": 8096 }, { "epoch": 18.033407572383073, "grad_norm": 17.065658569335938, "learning_rate": 1e-06, "loss": 0.4977, "num_input_tokens_seen": 453693352, "step": 8097 }, { "epoch": 18.033407572383073, "loss": 0.32081741094589233, "loss_ce": 7.766792987240478e-05, "loss_iou": 0.12890625, "loss_num": 0.0126953125, "loss_xval": 0.3203125, "num_input_tokens_seen": 453693352, "step": 8097 }, { "epoch": 18.035634743875278, "grad_norm": 18.961402893066406, "learning_rate": 1e-06, "loss": 0.4149, "num_input_tokens_seen": 453748036, "step": 8098 }, { "epoch": 18.035634743875278, "loss": 0.41537126898765564, "loss_ce": 8.809100836515427e-05, "loss_iou": 0.169921875, "loss_num": 0.014892578125, "loss_xval": 0.416015625, "num_input_tokens_seen": 453748036, "step": 8098 }, { "epoch": 18.037861915367483, "grad_norm": 19.813962936401367, "learning_rate": 1e-06, "loss": 0.2732, "num_input_tokens_seen": 453804100, "step": 8099 }, { "epoch": 18.037861915367483, "loss": 0.27010267972946167, "loss_ce": 8.316422463394701e-05, "loss_iou": 0.1142578125, "loss_num": 0.00823974609375, "loss_xval": 0.26953125, "num_input_tokens_seen": 453804100, "step": 8099 }, { "epoch": 18.040089086859687, "grad_norm": 21.94256019592285, "learning_rate": 1e-06, "loss": 0.3819, "num_input_tokens_seen": 453862180, "step": 8100 }, { "epoch": 18.040089086859687, "loss": 0.40962302684783936, "loss_ce": 7.715914398431778e-05, "loss_iou": 0.189453125, "loss_num": 0.00628662109375, "loss_xval": 0.41015625, "num_input_tokens_seen": 453862180, "step": 8100 }, { "epoch": 18.042316258351892, "grad_norm": 26.7554988861084, "learning_rate": 1e-06, "loss": 0.5004, "num_input_tokens_seen": 453918752, "step": 8101 }, { "epoch": 18.042316258351892, "loss": 0.33896124362945557, "loss_ce": 9.407360630575567e-05, "loss_iou": 0.1484375, "loss_num": 0.00830078125, "loss_xval": 0.33984375, "num_input_tokens_seen": 453918752, "step": 8101 }, { "epoch": 18.044543429844097, "grad_norm": 15.414385795593262, "learning_rate": 1e-06, "loss": 0.4937, "num_input_tokens_seen": 453970240, "step": 8102 }, { "epoch": 18.044543429844097, "loss": 0.5033870935440063, "loss_ce": 9.12140094442293e-05, "loss_iou": 0.203125, "loss_num": 0.019287109375, "loss_xval": 0.50390625, "num_input_tokens_seen": 453970240, "step": 8102 }, { "epoch": 18.046770601336302, "grad_norm": 19.02543067932129, "learning_rate": 1e-06, "loss": 0.3916, "num_input_tokens_seen": 454026744, "step": 8103 }, { "epoch": 18.046770601336302, "loss": 0.46980106830596924, "loss_ce": 7.446320523740724e-05, "loss_iou": 0.1953125, "loss_num": 0.015869140625, "loss_xval": 0.46875, "num_input_tokens_seen": 454026744, "step": 8103 }, { "epoch": 18.048997772828507, "grad_norm": 14.910806655883789, "learning_rate": 1e-06, "loss": 0.3787, "num_input_tokens_seen": 454084836, "step": 8104 }, { "epoch": 18.048997772828507, "loss": 0.37930968403816223, "loss_ce": 9.826038149185479e-05, "loss_iou": 0.17578125, "loss_num": 0.005462646484375, "loss_xval": 0.37890625, "num_input_tokens_seen": 454084836, "step": 8104 }, { "epoch": 18.051224944320712, "grad_norm": 15.848285675048828, "learning_rate": 1e-06, "loss": 0.512, "num_input_tokens_seen": 454140536, "step": 8105 }, { "epoch": 18.051224944320712, "loss": 0.5733276009559631, "loss_ce": 8.542699652025476e-05, "loss_iou": 0.251953125, "loss_num": 0.01416015625, "loss_xval": 0.57421875, "num_input_tokens_seen": 454140536, "step": 8105 }, { "epoch": 18.053452115812917, "grad_norm": 18.083738327026367, "learning_rate": 1e-06, "loss": 0.3539, "num_input_tokens_seen": 454198948, "step": 8106 }, { "epoch": 18.053452115812917, "loss": 0.46540844440460205, "loss_ce": 7.641864067409188e-05, "loss_iou": 0.2119140625, "loss_num": 0.0081787109375, "loss_xval": 0.46484375, "num_input_tokens_seen": 454198948, "step": 8106 }, { "epoch": 18.05567928730512, "grad_norm": 19.562829971313477, "learning_rate": 1e-06, "loss": 0.3211, "num_input_tokens_seen": 454254564, "step": 8107 }, { "epoch": 18.05567928730512, "loss": 0.32736125588417053, "loss_ce": 9.073851106222719e-05, "loss_iou": 0.140625, "loss_num": 0.0093994140625, "loss_xval": 0.328125, "num_input_tokens_seen": 454254564, "step": 8107 }, { "epoch": 18.057906458797326, "grad_norm": 28.230247497558594, "learning_rate": 1e-06, "loss": 0.516, "num_input_tokens_seen": 454310376, "step": 8108 }, { "epoch": 18.057906458797326, "loss": 0.46714556217193604, "loss_ce": 0.00010453614231664687, "loss_iou": 0.16015625, "loss_num": 0.029296875, "loss_xval": 0.466796875, "num_input_tokens_seen": 454310376, "step": 8108 }, { "epoch": 18.06013363028953, "grad_norm": 12.480415344238281, "learning_rate": 1e-06, "loss": 0.2964, "num_input_tokens_seen": 454367776, "step": 8109 }, { "epoch": 18.06013363028953, "loss": 0.356157511472702, "loss_ce": 7.839675527065992e-05, "loss_iou": 0.14453125, "loss_num": 0.01336669921875, "loss_xval": 0.35546875, "num_input_tokens_seen": 454367776, "step": 8109 }, { "epoch": 18.062360801781736, "grad_norm": 22.18619728088379, "learning_rate": 1e-06, "loss": 0.4995, "num_input_tokens_seen": 454423244, "step": 8110 }, { "epoch": 18.062360801781736, "loss": 0.5585031509399414, "loss_ce": 0.00015356890799012035, "loss_iou": 0.232421875, "loss_num": 0.0189208984375, "loss_xval": 0.55859375, "num_input_tokens_seen": 454423244, "step": 8110 }, { "epoch": 18.06458797327394, "grad_norm": 25.902963638305664, "learning_rate": 1e-06, "loss": 0.522, "num_input_tokens_seen": 454480660, "step": 8111 }, { "epoch": 18.06458797327394, "loss": 0.49017781019210815, "loss_ce": 6.550169200636446e-05, "loss_iou": 0.216796875, "loss_num": 0.01153564453125, "loss_xval": 0.490234375, "num_input_tokens_seen": 454480660, "step": 8111 }, { "epoch": 18.066815144766146, "grad_norm": 27.04472541809082, "learning_rate": 1e-06, "loss": 0.3989, "num_input_tokens_seen": 454539016, "step": 8112 }, { "epoch": 18.066815144766146, "loss": 0.26980409026145935, "loss_ce": 8.019209781195968e-05, "loss_iou": 0.12158203125, "loss_num": 0.005340576171875, "loss_xval": 0.26953125, "num_input_tokens_seen": 454539016, "step": 8112 }, { "epoch": 18.06904231625835, "grad_norm": 15.210087776184082, "learning_rate": 1e-06, "loss": 0.5414, "num_input_tokens_seen": 454595484, "step": 8113 }, { "epoch": 18.06904231625835, "loss": 0.652846097946167, "loss_ce": 7.511470903409645e-05, "loss_iou": 0.255859375, "loss_num": 0.0284423828125, "loss_xval": 0.65234375, "num_input_tokens_seen": 454595484, "step": 8113 }, { "epoch": 18.071269487750556, "grad_norm": 24.391822814941406, "learning_rate": 1e-06, "loss": 0.5438, "num_input_tokens_seen": 454647108, "step": 8114 }, { "epoch": 18.071269487750556, "loss": 0.5746660828590393, "loss_ce": 8.115639502648264e-05, "loss_iou": 0.271484375, "loss_num": 0.006683349609375, "loss_xval": 0.57421875, "num_input_tokens_seen": 454647108, "step": 8114 }, { "epoch": 18.07349665924276, "grad_norm": 13.966802597045898, "learning_rate": 1e-06, "loss": 0.3106, "num_input_tokens_seen": 454704844, "step": 8115 }, { "epoch": 18.07349665924276, "loss": 0.31044334173202515, "loss_ce": 7.955444743856788e-05, "loss_iou": 0.1298828125, "loss_num": 0.0103759765625, "loss_xval": 0.310546875, "num_input_tokens_seen": 454704844, "step": 8115 }, { "epoch": 18.075723830734965, "grad_norm": 18.81356430053711, "learning_rate": 1e-06, "loss": 0.4646, "num_input_tokens_seen": 454762604, "step": 8116 }, { "epoch": 18.075723830734965, "loss": 0.4306223690509796, "loss_ce": 8.036733197513968e-05, "loss_iou": 0.1982421875, "loss_num": 0.00701904296875, "loss_xval": 0.4296875, "num_input_tokens_seen": 454762604, "step": 8116 }, { "epoch": 18.07795100222717, "grad_norm": 13.788688659667969, "learning_rate": 1e-06, "loss": 0.249, "num_input_tokens_seen": 454818024, "step": 8117 }, { "epoch": 18.07795100222717, "loss": 0.23960940539836884, "loss_ce": 7.69186153775081e-05, "loss_iou": 0.1025390625, "loss_num": 0.00689697265625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 454818024, "step": 8117 }, { "epoch": 18.080178173719375, "grad_norm": 14.488852500915527, "learning_rate": 1e-06, "loss": 0.3516, "num_input_tokens_seen": 454873348, "step": 8118 }, { "epoch": 18.080178173719375, "loss": 0.40378397703170776, "loss_ce": 9.744857379700989e-05, "loss_iou": 0.1669921875, "loss_num": 0.0137939453125, "loss_xval": 0.404296875, "num_input_tokens_seen": 454873348, "step": 8118 }, { "epoch": 18.08240534521158, "grad_norm": 27.27067756652832, "learning_rate": 1e-06, "loss": 0.4236, "num_input_tokens_seen": 454929640, "step": 8119 }, { "epoch": 18.08240534521158, "loss": 0.3224753439426422, "loss_ce": 8.763029472902417e-05, "loss_iou": 0.1494140625, "loss_num": 0.00469970703125, "loss_xval": 0.322265625, "num_input_tokens_seen": 454929640, "step": 8119 }, { "epoch": 18.084632516703785, "grad_norm": 18.79537010192871, "learning_rate": 1e-06, "loss": 0.4925, "num_input_tokens_seen": 454984244, "step": 8120 }, { "epoch": 18.084632516703785, "loss": 0.47800394892692566, "loss_ce": 0.00015972901019267738, "loss_iou": 0.203125, "loss_num": 0.0142822265625, "loss_xval": 0.478515625, "num_input_tokens_seen": 454984244, "step": 8120 }, { "epoch": 18.08685968819599, "grad_norm": 15.249739646911621, "learning_rate": 1e-06, "loss": 0.3372, "num_input_tokens_seen": 455041144, "step": 8121 }, { "epoch": 18.08685968819599, "loss": 0.2875575125217438, "loss_ce": 8.190819789888337e-05, "loss_iou": 0.1318359375, "loss_num": 0.004669189453125, "loss_xval": 0.287109375, "num_input_tokens_seen": 455041144, "step": 8121 }, { "epoch": 18.089086859688194, "grad_norm": 14.420731544494629, "learning_rate": 1e-06, "loss": 0.4776, "num_input_tokens_seen": 455097144, "step": 8122 }, { "epoch": 18.089086859688194, "loss": 0.3784327805042267, "loss_ce": 0.00013688384206034243, "loss_iou": 0.1572265625, "loss_num": 0.01263427734375, "loss_xval": 0.37890625, "num_input_tokens_seen": 455097144, "step": 8122 }, { "epoch": 18.0913140311804, "grad_norm": 19.662689208984375, "learning_rate": 1e-06, "loss": 0.4817, "num_input_tokens_seen": 455151808, "step": 8123 }, { "epoch": 18.0913140311804, "loss": 0.567717969417572, "loss_ce": 9.098585724132136e-05, "loss_iou": 0.244140625, "loss_num": 0.0159912109375, "loss_xval": 0.56640625, "num_input_tokens_seen": 455151808, "step": 8123 }, { "epoch": 18.093541202672604, "grad_norm": 19.106225967407227, "learning_rate": 1e-06, "loss": 0.2999, "num_input_tokens_seen": 455207776, "step": 8124 }, { "epoch": 18.093541202672604, "loss": 0.33509403467178345, "loss_ce": 7.206852023955435e-05, "loss_iou": 0.1513671875, "loss_num": 0.006591796875, "loss_xval": 0.3359375, "num_input_tokens_seen": 455207776, "step": 8124 }, { "epoch": 18.09576837416481, "grad_norm": 16.473522186279297, "learning_rate": 1e-06, "loss": 0.4367, "num_input_tokens_seen": 455264208, "step": 8125 }, { "epoch": 18.09576837416481, "loss": 0.4178709387779236, "loss_ce": 8.530144987162203e-05, "loss_iou": 0.1728515625, "loss_num": 0.014404296875, "loss_xval": 0.41796875, "num_input_tokens_seen": 455264208, "step": 8125 }, { "epoch": 18.097995545657014, "grad_norm": 26.5972900390625, "learning_rate": 1e-06, "loss": 0.3408, "num_input_tokens_seen": 455323352, "step": 8126 }, { "epoch": 18.097995545657014, "loss": 0.3901003897190094, "loss_ce": 8.57616396388039e-05, "loss_iou": 0.1796875, "loss_num": 0.00616455078125, "loss_xval": 0.390625, "num_input_tokens_seen": 455323352, "step": 8126 }, { "epoch": 18.100222717149222, "grad_norm": 21.1645565032959, "learning_rate": 1e-06, "loss": 0.4185, "num_input_tokens_seen": 455377804, "step": 8127 }, { "epoch": 18.100222717149222, "loss": 0.5210694074630737, "loss_ce": 7.333118992391974e-05, "loss_iou": 0.2265625, "loss_num": 0.01361083984375, "loss_xval": 0.51953125, "num_input_tokens_seen": 455377804, "step": 8127 }, { "epoch": 18.102449888641427, "grad_norm": 13.508529663085938, "learning_rate": 1e-06, "loss": 0.5152, "num_input_tokens_seen": 455436200, "step": 8128 }, { "epoch": 18.102449888641427, "loss": 0.6549245119094849, "loss_ce": 6.30651629762724e-05, "loss_iou": 0.265625, "loss_num": 0.0247802734375, "loss_xval": 0.65625, "num_input_tokens_seen": 455436200, "step": 8128 }, { "epoch": 18.104677060133632, "grad_norm": 36.7231330871582, "learning_rate": 1e-06, "loss": 0.3776, "num_input_tokens_seen": 455492220, "step": 8129 }, { "epoch": 18.104677060133632, "loss": 0.3046456277370453, "loss_ce": 8.02054419182241e-05, "loss_iou": 0.1337890625, "loss_num": 0.007354736328125, "loss_xval": 0.3046875, "num_input_tokens_seen": 455492220, "step": 8129 }, { "epoch": 18.106904231625837, "grad_norm": 19.406326293945312, "learning_rate": 1e-06, "loss": 0.4491, "num_input_tokens_seen": 455548928, "step": 8130 }, { "epoch": 18.106904231625837, "loss": 0.3191646337509155, "loss_ce": 7.285462925210595e-05, "loss_iou": 0.1455078125, "loss_num": 0.0057373046875, "loss_xval": 0.318359375, "num_input_tokens_seen": 455548928, "step": 8130 }, { "epoch": 18.10913140311804, "grad_norm": 12.066944122314453, "learning_rate": 1e-06, "loss": 0.4965, "num_input_tokens_seen": 455605732, "step": 8131 }, { "epoch": 18.10913140311804, "loss": 0.49812638759613037, "loss_ce": 7.951979932840914e-05, "loss_iou": 0.1962890625, "loss_num": 0.02099609375, "loss_xval": 0.498046875, "num_input_tokens_seen": 455605732, "step": 8131 }, { "epoch": 18.111358574610247, "grad_norm": 12.560493469238281, "learning_rate": 1e-06, "loss": 0.3447, "num_input_tokens_seen": 455660540, "step": 8132 }, { "epoch": 18.111358574610247, "loss": 0.4943099915981293, "loss_ce": 0.00010833313717739657, "loss_iou": 0.212890625, "loss_num": 0.01361083984375, "loss_xval": 0.494140625, "num_input_tokens_seen": 455660540, "step": 8132 }, { "epoch": 18.11358574610245, "grad_norm": 23.26740264892578, "learning_rate": 1e-06, "loss": 0.4059, "num_input_tokens_seen": 455712784, "step": 8133 }, { "epoch": 18.11358574610245, "loss": 0.533281683921814, "loss_ce": 7.859165634727105e-05, "loss_iou": 0.234375, "loss_num": 0.01275634765625, "loss_xval": 0.53125, "num_input_tokens_seen": 455712784, "step": 8133 }, { "epoch": 18.115812917594656, "grad_norm": 23.393814086914062, "learning_rate": 1e-06, "loss": 0.3933, "num_input_tokens_seen": 455769952, "step": 8134 }, { "epoch": 18.115812917594656, "loss": 0.4792192578315735, "loss_ce": 9.330162720289081e-05, "loss_iou": 0.2216796875, "loss_num": 0.007080078125, "loss_xval": 0.478515625, "num_input_tokens_seen": 455769952, "step": 8134 }, { "epoch": 18.11804008908686, "grad_norm": 23.41754913330078, "learning_rate": 1e-06, "loss": 0.3536, "num_input_tokens_seen": 455826364, "step": 8135 }, { "epoch": 18.11804008908686, "loss": 0.39287176728248596, "loss_ce": 7.998933142516762e-05, "loss_iou": 0.1728515625, "loss_num": 0.00933837890625, "loss_xval": 0.392578125, "num_input_tokens_seen": 455826364, "step": 8135 }, { "epoch": 18.120267260579066, "grad_norm": 18.5450382232666, "learning_rate": 1e-06, "loss": 0.5557, "num_input_tokens_seen": 455882520, "step": 8136 }, { "epoch": 18.120267260579066, "loss": 0.4983789920806885, "loss_ce": 8.797197369858623e-05, "loss_iou": 0.2021484375, "loss_num": 0.0189208984375, "loss_xval": 0.498046875, "num_input_tokens_seen": 455882520, "step": 8136 }, { "epoch": 18.12249443207127, "grad_norm": 21.666677474975586, "learning_rate": 1e-06, "loss": 0.6017, "num_input_tokens_seen": 455937676, "step": 8137 }, { "epoch": 18.12249443207127, "loss": 0.7193117141723633, "loss_ce": 7.343379547819495e-05, "loss_iou": 0.31640625, "loss_num": 0.017333984375, "loss_xval": 0.71875, "num_input_tokens_seen": 455937676, "step": 8137 }, { "epoch": 18.124721603563476, "grad_norm": 16.64803695678711, "learning_rate": 1e-06, "loss": 0.2924, "num_input_tokens_seen": 455993536, "step": 8138 }, { "epoch": 18.124721603563476, "loss": 0.39221036434173584, "loss_ce": 0.00018156672012992203, "loss_iou": 0.1533203125, "loss_num": 0.016845703125, "loss_xval": 0.392578125, "num_input_tokens_seen": 455993536, "step": 8138 }, { "epoch": 18.12694877505568, "grad_norm": 19.38474464416504, "learning_rate": 1e-06, "loss": 0.3963, "num_input_tokens_seen": 456050968, "step": 8139 }, { "epoch": 18.12694877505568, "loss": 0.3352212905883789, "loss_ce": 7.726570765953511e-05, "loss_iou": 0.1474609375, "loss_num": 0.0079345703125, "loss_xval": 0.3359375, "num_input_tokens_seen": 456050968, "step": 8139 }, { "epoch": 18.129175946547885, "grad_norm": 19.141530990600586, "learning_rate": 1e-06, "loss": 0.3805, "num_input_tokens_seen": 456106612, "step": 8140 }, { "epoch": 18.129175946547885, "loss": 0.3776403069496155, "loss_ce": 7.682391151320189e-05, "loss_iou": 0.16796875, "loss_num": 0.0084228515625, "loss_xval": 0.376953125, "num_input_tokens_seen": 456106612, "step": 8140 }, { "epoch": 18.13140311804009, "grad_norm": 26.25833511352539, "learning_rate": 1e-06, "loss": 0.4945, "num_input_tokens_seen": 456162008, "step": 8141 }, { "epoch": 18.13140311804009, "loss": 0.3847208023071289, "loss_ce": 7.726027979515493e-05, "loss_iou": 0.177734375, "loss_num": 0.005889892578125, "loss_xval": 0.384765625, "num_input_tokens_seen": 456162008, "step": 8141 }, { "epoch": 18.133630289532295, "grad_norm": 16.26892852783203, "learning_rate": 1e-06, "loss": 0.3777, "num_input_tokens_seen": 456218592, "step": 8142 }, { "epoch": 18.133630289532295, "loss": 0.29285314679145813, "loss_ce": 9.801473061088473e-05, "loss_iou": 0.1142578125, "loss_num": 0.0128173828125, "loss_xval": 0.29296875, "num_input_tokens_seen": 456218592, "step": 8142 }, { "epoch": 18.1358574610245, "grad_norm": 13.948265075683594, "learning_rate": 1e-06, "loss": 0.3755, "num_input_tokens_seen": 456276108, "step": 8143 }, { "epoch": 18.1358574610245, "loss": 0.2600770592689514, "loss_ce": 6.72862006467767e-05, "loss_iou": 0.1181640625, "loss_num": 0.0047607421875, "loss_xval": 0.259765625, "num_input_tokens_seen": 456276108, "step": 8143 }, { "epoch": 18.138084632516705, "grad_norm": 14.333640098571777, "learning_rate": 1e-06, "loss": 0.3341, "num_input_tokens_seen": 456331636, "step": 8144 }, { "epoch": 18.138084632516705, "loss": 0.37764212489128113, "loss_ce": 0.0008721151389181614, "loss_iou": 0.169921875, "loss_num": 0.00738525390625, "loss_xval": 0.376953125, "num_input_tokens_seen": 456331636, "step": 8144 }, { "epoch": 18.14031180400891, "grad_norm": 16.1193904876709, "learning_rate": 1e-06, "loss": 0.2635, "num_input_tokens_seen": 456388988, "step": 8145 }, { "epoch": 18.14031180400891, "loss": 0.30690258741378784, "loss_ce": 7.886123057687655e-05, "loss_iou": 0.1318359375, "loss_num": 0.0084228515625, "loss_xval": 0.306640625, "num_input_tokens_seen": 456388988, "step": 8145 }, { "epoch": 18.142538975501115, "grad_norm": 16.353914260864258, "learning_rate": 1e-06, "loss": 0.371, "num_input_tokens_seen": 456447512, "step": 8146 }, { "epoch": 18.142538975501115, "loss": 0.4683547616004944, "loss_ce": 9.304599370807409e-05, "loss_iou": 0.19140625, "loss_num": 0.0169677734375, "loss_xval": 0.46875, "num_input_tokens_seen": 456447512, "step": 8146 }, { "epoch": 18.14476614699332, "grad_norm": 21.104829788208008, "learning_rate": 1e-06, "loss": 0.3263, "num_input_tokens_seen": 456506516, "step": 8147 }, { "epoch": 18.14476614699332, "loss": 0.24555513262748718, "loss_ce": 7.17366419848986e-05, "loss_iou": 0.111328125, "loss_num": 0.00457763671875, "loss_xval": 0.2451171875, "num_input_tokens_seen": 456506516, "step": 8147 }, { "epoch": 18.146993318485524, "grad_norm": 16.034555435180664, "learning_rate": 1e-06, "loss": 0.4458, "num_input_tokens_seen": 456563372, "step": 8148 }, { "epoch": 18.146993318485524, "loss": 0.2350102663040161, "loss_ce": 8.594193059252575e-05, "loss_iou": 0.10888671875, "loss_num": 0.00335693359375, "loss_xval": 0.2353515625, "num_input_tokens_seen": 456563372, "step": 8148 }, { "epoch": 18.14922048997773, "grad_norm": 21.92203140258789, "learning_rate": 1e-06, "loss": 0.5644, "num_input_tokens_seen": 456620288, "step": 8149 }, { "epoch": 18.14922048997773, "loss": 0.2608814239501953, "loss_ce": 7.819505117367953e-05, "loss_iou": 0.12353515625, "loss_num": 0.0027313232421875, "loss_xval": 0.26171875, "num_input_tokens_seen": 456620288, "step": 8149 }, { "epoch": 18.151447661469934, "grad_norm": 14.489221572875977, "learning_rate": 1e-06, "loss": 0.306, "num_input_tokens_seen": 456677872, "step": 8150 }, { "epoch": 18.151447661469934, "loss": 0.31026989221572876, "loss_ce": 8.923574932850897e-05, "loss_iou": 0.1376953125, "loss_num": 0.00677490234375, "loss_xval": 0.310546875, "num_input_tokens_seen": 456677872, "step": 8150 }, { "epoch": 18.15367483296214, "grad_norm": 15.693221092224121, "learning_rate": 1e-06, "loss": 0.3977, "num_input_tokens_seen": 456730576, "step": 8151 }, { "epoch": 18.15367483296214, "loss": 0.35370901226997375, "loss_ce": 7.132487371563911e-05, "loss_iou": 0.15234375, "loss_num": 0.00970458984375, "loss_xval": 0.353515625, "num_input_tokens_seen": 456730576, "step": 8151 }, { "epoch": 18.155902004454344, "grad_norm": 21.944501876831055, "learning_rate": 1e-06, "loss": 0.4647, "num_input_tokens_seen": 456787084, "step": 8152 }, { "epoch": 18.155902004454344, "loss": 0.48086851835250854, "loss_ce": 9.459961438551545e-05, "loss_iou": 0.1904296875, "loss_num": 0.0201416015625, "loss_xval": 0.48046875, "num_input_tokens_seen": 456787084, "step": 8152 }, { "epoch": 18.15812917594655, "grad_norm": 17.893390655517578, "learning_rate": 1e-06, "loss": 0.409, "num_input_tokens_seen": 456845316, "step": 8153 }, { "epoch": 18.15812917594655, "loss": 0.4042748212814331, "loss_ce": 0.00010002141789300367, "loss_iou": 0.1875, "loss_num": 0.005706787109375, "loss_xval": 0.404296875, "num_input_tokens_seen": 456845316, "step": 8153 }, { "epoch": 18.160356347438753, "grad_norm": 18.064180374145508, "learning_rate": 1e-06, "loss": 0.3617, "num_input_tokens_seen": 456902888, "step": 8154 }, { "epoch": 18.160356347438753, "loss": 0.41487082839012146, "loss_ce": 7.58875539759174e-05, "loss_iou": 0.1669921875, "loss_num": 0.016357421875, "loss_xval": 0.4140625, "num_input_tokens_seen": 456902888, "step": 8154 }, { "epoch": 18.16258351893096, "grad_norm": 16.817781448364258, "learning_rate": 1e-06, "loss": 0.4589, "num_input_tokens_seen": 456955400, "step": 8155 }, { "epoch": 18.16258351893096, "loss": 0.702592134475708, "loss_ce": 0.00013854095595888793, "loss_iou": 0.287109375, "loss_num": 0.025390625, "loss_xval": 0.703125, "num_input_tokens_seen": 456955400, "step": 8155 }, { "epoch": 18.164810690423163, "grad_norm": 16.054969787597656, "learning_rate": 1e-06, "loss": 0.3455, "num_input_tokens_seen": 457011064, "step": 8156 }, { "epoch": 18.164810690423163, "loss": 0.3856889605522156, "loss_ce": 6.883072637720034e-05, "loss_iou": 0.1728515625, "loss_num": 0.008056640625, "loss_xval": 0.384765625, "num_input_tokens_seen": 457011064, "step": 8156 }, { "epoch": 18.167037861915368, "grad_norm": 16.399335861206055, "learning_rate": 1e-06, "loss": 0.3965, "num_input_tokens_seen": 457064464, "step": 8157 }, { "epoch": 18.167037861915368, "loss": 0.4999640882015228, "loss_ce": 8.612703823018819e-05, "loss_iou": 0.2060546875, "loss_num": 0.0177001953125, "loss_xval": 0.5, "num_input_tokens_seen": 457064464, "step": 8157 }, { "epoch": 18.169265033407573, "grad_norm": 26.273801803588867, "learning_rate": 1e-06, "loss": 0.2832, "num_input_tokens_seen": 457120308, "step": 8158 }, { "epoch": 18.169265033407573, "loss": 0.3040543496608734, "loss_ce": 9.926770871970803e-05, "loss_iou": 0.1396484375, "loss_num": 0.0048828125, "loss_xval": 0.3046875, "num_input_tokens_seen": 457120308, "step": 8158 }, { "epoch": 18.171492204899778, "grad_norm": 21.627939224243164, "learning_rate": 1e-06, "loss": 0.5024, "num_input_tokens_seen": 457175336, "step": 8159 }, { "epoch": 18.171492204899778, "loss": 0.5851208567619324, "loss_ce": 9.887100895866752e-05, "loss_iou": 0.267578125, "loss_num": 0.01031494140625, "loss_xval": 0.5859375, "num_input_tokens_seen": 457175336, "step": 8159 }, { "epoch": 18.173719376391983, "grad_norm": 36.85986328125, "learning_rate": 1e-06, "loss": 0.5207, "num_input_tokens_seen": 457232364, "step": 8160 }, { "epoch": 18.173719376391983, "loss": 0.4390491247177124, "loss_ce": 8.427293505519629e-05, "loss_iou": 0.197265625, "loss_num": 0.0086669921875, "loss_xval": 0.439453125, "num_input_tokens_seen": 457232364, "step": 8160 }, { "epoch": 18.175946547884188, "grad_norm": 27.42588996887207, "learning_rate": 1e-06, "loss": 0.4134, "num_input_tokens_seen": 457287852, "step": 8161 }, { "epoch": 18.175946547884188, "loss": 0.3699635863304138, "loss_ce": 9.052493987837806e-05, "loss_iou": 0.1533203125, "loss_num": 0.01251220703125, "loss_xval": 0.369140625, "num_input_tokens_seen": 457287852, "step": 8161 }, { "epoch": 18.178173719376392, "grad_norm": 19.563459396362305, "learning_rate": 1e-06, "loss": 0.4402, "num_input_tokens_seen": 457342640, "step": 8162 }, { "epoch": 18.178173719376392, "loss": 0.5288839936256409, "loss_ce": 7.537108467658982e-05, "loss_iou": 0.2216796875, "loss_num": 0.01708984375, "loss_xval": 0.52734375, "num_input_tokens_seen": 457342640, "step": 8162 }, { "epoch": 18.180400890868597, "grad_norm": 17.342567443847656, "learning_rate": 1e-06, "loss": 0.3396, "num_input_tokens_seen": 457396200, "step": 8163 }, { "epoch": 18.180400890868597, "loss": 0.36558669805526733, "loss_ce": 7.767054921714589e-05, "loss_iou": 0.1689453125, "loss_num": 0.005340576171875, "loss_xval": 0.365234375, "num_input_tokens_seen": 457396200, "step": 8163 }, { "epoch": 18.182628062360802, "grad_norm": 21.52007293701172, "learning_rate": 1e-06, "loss": 0.3781, "num_input_tokens_seen": 457453456, "step": 8164 }, { "epoch": 18.182628062360802, "loss": 0.2819899618625641, "loss_ce": 6.859673885628581e-05, "loss_iou": 0.11767578125, "loss_num": 0.00933837890625, "loss_xval": 0.28125, "num_input_tokens_seen": 457453456, "step": 8164 }, { "epoch": 18.184855233853007, "grad_norm": 23.580726623535156, "learning_rate": 1e-06, "loss": 0.4272, "num_input_tokens_seen": 457510128, "step": 8165 }, { "epoch": 18.184855233853007, "loss": 0.5174942016601562, "loss_ce": 0.00016021478222683072, "loss_iou": 0.2353515625, "loss_num": 0.00921630859375, "loss_xval": 0.515625, "num_input_tokens_seen": 457510128, "step": 8165 }, { "epoch": 18.187082405345212, "grad_norm": 17.68349266052246, "learning_rate": 1e-06, "loss": 0.3081, "num_input_tokens_seen": 457566892, "step": 8166 }, { "epoch": 18.187082405345212, "loss": 0.4296377897262573, "loss_ce": 7.235370139824226e-05, "loss_iou": 0.173828125, "loss_num": 0.016357421875, "loss_xval": 0.4296875, "num_input_tokens_seen": 457566892, "step": 8166 }, { "epoch": 18.189309576837417, "grad_norm": 15.33013916015625, "learning_rate": 1e-06, "loss": 0.3858, "num_input_tokens_seen": 457624908, "step": 8167 }, { "epoch": 18.189309576837417, "loss": 0.34201037883758545, "loss_ce": 9.140933980233967e-05, "loss_iou": 0.154296875, "loss_num": 0.00677490234375, "loss_xval": 0.341796875, "num_input_tokens_seen": 457624908, "step": 8167 }, { "epoch": 18.19153674832962, "grad_norm": 25.988941192626953, "learning_rate": 1e-06, "loss": 0.2713, "num_input_tokens_seen": 457681264, "step": 8168 }, { "epoch": 18.19153674832962, "loss": 0.2663070559501648, "loss_ce": 7.169348828028888e-05, "loss_iou": 0.1220703125, "loss_num": 0.004364013671875, "loss_xval": 0.265625, "num_input_tokens_seen": 457681264, "step": 8168 }, { "epoch": 18.193763919821826, "grad_norm": 17.212726593017578, "learning_rate": 1e-06, "loss": 0.3666, "num_input_tokens_seen": 457737384, "step": 8169 }, { "epoch": 18.193763919821826, "loss": 0.3459562063217163, "loss_ce": 8.524451550329104e-05, "loss_iou": 0.142578125, "loss_num": 0.01214599609375, "loss_xval": 0.345703125, "num_input_tokens_seen": 457737384, "step": 8169 }, { "epoch": 18.19599109131403, "grad_norm": 42.238746643066406, "learning_rate": 1e-06, "loss": 0.4758, "num_input_tokens_seen": 457792120, "step": 8170 }, { "epoch": 18.19599109131403, "loss": 0.571884036064148, "loss_ce": 0.00010671824566088617, "loss_iou": 0.2578125, "loss_num": 0.0115966796875, "loss_xval": 0.5703125, "num_input_tokens_seen": 457792120, "step": 8170 }, { "epoch": 18.198218262806236, "grad_norm": 18.594070434570312, "learning_rate": 1e-06, "loss": 0.4922, "num_input_tokens_seen": 457846880, "step": 8171 }, { "epoch": 18.198218262806236, "loss": 0.5872501134872437, "loss_ce": 9.18953082873486e-05, "loss_iou": 0.263671875, "loss_num": 0.01165771484375, "loss_xval": 0.5859375, "num_input_tokens_seen": 457846880, "step": 8171 }, { "epoch": 18.20044543429844, "grad_norm": 16.824237823486328, "learning_rate": 1e-06, "loss": 0.5248, "num_input_tokens_seen": 457901312, "step": 8172 }, { "epoch": 18.20044543429844, "loss": 0.3980518877506256, "loss_ce": 0.00010268213372910395, "loss_iou": 0.1796875, "loss_num": 0.0079345703125, "loss_xval": 0.3984375, "num_input_tokens_seen": 457901312, "step": 8172 }, { "epoch": 18.202672605790646, "grad_norm": 14.367027282714844, "learning_rate": 1e-06, "loss": 0.5356, "num_input_tokens_seen": 457956652, "step": 8173 }, { "epoch": 18.202672605790646, "loss": 0.544747531414032, "loss_ce": 6.98043149895966e-05, "loss_iou": 0.2119140625, "loss_num": 0.0242919921875, "loss_xval": 0.54296875, "num_input_tokens_seen": 457956652, "step": 8173 }, { "epoch": 18.20489977728285, "grad_norm": 16.34149742126465, "learning_rate": 1e-06, "loss": 0.4287, "num_input_tokens_seen": 458012632, "step": 8174 }, { "epoch": 18.20489977728285, "loss": 0.5932860374450684, "loss_ce": 0.0001463656226405874, "loss_iou": 0.240234375, "loss_num": 0.022705078125, "loss_xval": 0.59375, "num_input_tokens_seen": 458012632, "step": 8174 }, { "epoch": 18.207126948775056, "grad_norm": 14.004996299743652, "learning_rate": 1e-06, "loss": 0.3419, "num_input_tokens_seen": 458068888, "step": 8175 }, { "epoch": 18.207126948775056, "loss": 0.31001418828964233, "loss_ce": 7.767122588120401e-05, "loss_iou": 0.134765625, "loss_num": 0.008056640625, "loss_xval": 0.310546875, "num_input_tokens_seen": 458068888, "step": 8175 }, { "epoch": 18.20935412026726, "grad_norm": 15.622814178466797, "learning_rate": 1e-06, "loss": 0.7315, "num_input_tokens_seen": 458126280, "step": 8176 }, { "epoch": 18.20935412026726, "loss": 0.942706286907196, "loss_ce": 7.930257561383769e-05, "loss_iou": 0.39453125, "loss_num": 0.0302734375, "loss_xval": 0.94140625, "num_input_tokens_seen": 458126280, "step": 8176 }, { "epoch": 18.211581291759465, "grad_norm": 19.05216407775879, "learning_rate": 1e-06, "loss": 0.4613, "num_input_tokens_seen": 458184116, "step": 8177 }, { "epoch": 18.211581291759465, "loss": 0.4942248463630676, "loss_ce": 8.419141522608697e-05, "loss_iou": 0.224609375, "loss_num": 0.009033203125, "loss_xval": 0.494140625, "num_input_tokens_seen": 458184116, "step": 8177 }, { "epoch": 18.21380846325167, "grad_norm": 14.203112602233887, "learning_rate": 1e-06, "loss": 0.2897, "num_input_tokens_seen": 458239480, "step": 8178 }, { "epoch": 18.21380846325167, "loss": 0.18030065298080444, "loss_ce": 7.909360283520073e-05, "loss_iou": 0.06982421875, "loss_num": 0.0081787109375, "loss_xval": 0.1806640625, "num_input_tokens_seen": 458239480, "step": 8178 }, { "epoch": 18.216035634743875, "grad_norm": 18.163633346557617, "learning_rate": 1e-06, "loss": 0.3865, "num_input_tokens_seen": 458296128, "step": 8179 }, { "epoch": 18.216035634743875, "loss": 0.39704078435897827, "loss_ce": 6.810402555856854e-05, "loss_iou": 0.173828125, "loss_num": 0.00994873046875, "loss_xval": 0.396484375, "num_input_tokens_seen": 458296128, "step": 8179 }, { "epoch": 18.21826280623608, "grad_norm": 11.550317764282227, "learning_rate": 1e-06, "loss": 0.2529, "num_input_tokens_seen": 458351960, "step": 8180 }, { "epoch": 18.21826280623608, "loss": 0.25586456060409546, "loss_ce": 6.619263149332255e-05, "loss_iou": 0.1142578125, "loss_num": 0.005523681640625, "loss_xval": 0.255859375, "num_input_tokens_seen": 458351960, "step": 8180 }, { "epoch": 18.220489977728285, "grad_norm": 22.90071678161621, "learning_rate": 1e-06, "loss": 0.2964, "num_input_tokens_seen": 458407868, "step": 8181 }, { "epoch": 18.220489977728285, "loss": 0.27848535776138306, "loss_ce": 7.347905193455517e-05, "loss_iou": 0.1259765625, "loss_num": 0.005279541015625, "loss_xval": 0.279296875, "num_input_tokens_seen": 458407868, "step": 8181 }, { "epoch": 18.22271714922049, "grad_norm": 19.092378616333008, "learning_rate": 1e-06, "loss": 0.2401, "num_input_tokens_seen": 458464468, "step": 8182 }, { "epoch": 18.22271714922049, "loss": 0.25201308727264404, "loss_ce": 5.996804611640982e-05, "loss_iou": 0.11181640625, "loss_num": 0.00567626953125, "loss_xval": 0.251953125, "num_input_tokens_seen": 458464468, "step": 8182 }, { "epoch": 18.224944320712694, "grad_norm": 21.963666915893555, "learning_rate": 1e-06, "loss": 0.4337, "num_input_tokens_seen": 458520888, "step": 8183 }, { "epoch": 18.224944320712694, "loss": 0.37996935844421387, "loss_ce": 8.6537329480052e-05, "loss_iou": 0.158203125, "loss_num": 0.01251220703125, "loss_xval": 0.37890625, "num_input_tokens_seen": 458520888, "step": 8183 }, { "epoch": 18.2271714922049, "grad_norm": 25.58934211730957, "learning_rate": 1e-06, "loss": 0.5241, "num_input_tokens_seen": 458575140, "step": 8184 }, { "epoch": 18.2271714922049, "loss": 0.3485790491104126, "loss_ce": 6.830115307820961e-05, "loss_iou": 0.1416015625, "loss_num": 0.01324462890625, "loss_xval": 0.34765625, "num_input_tokens_seen": 458575140, "step": 8184 }, { "epoch": 18.229398663697104, "grad_norm": 12.794227600097656, "learning_rate": 1e-06, "loss": 0.3202, "num_input_tokens_seen": 458631632, "step": 8185 }, { "epoch": 18.229398663697104, "loss": 0.21777865290641785, "loss_ce": 6.625376408919692e-05, "loss_iou": 0.07861328125, "loss_num": 0.0120849609375, "loss_xval": 0.2177734375, "num_input_tokens_seen": 458631632, "step": 8185 }, { "epoch": 18.23162583518931, "grad_norm": 18.660388946533203, "learning_rate": 1e-06, "loss": 0.4079, "num_input_tokens_seen": 458688408, "step": 8186 }, { "epoch": 18.23162583518931, "loss": 0.34663307666778564, "loss_ce": 7.544427353423089e-05, "loss_iou": 0.1513671875, "loss_num": 0.008544921875, "loss_xval": 0.345703125, "num_input_tokens_seen": 458688408, "step": 8186 }, { "epoch": 18.233853006681514, "grad_norm": 23.186193466186523, "learning_rate": 1e-06, "loss": 0.5892, "num_input_tokens_seen": 458743956, "step": 8187 }, { "epoch": 18.233853006681514, "loss": 0.42682188749313354, "loss_ce": 6.407788896467537e-05, "loss_iou": 0.193359375, "loss_num": 0.0081787109375, "loss_xval": 0.42578125, "num_input_tokens_seen": 458743956, "step": 8187 }, { "epoch": 18.23608017817372, "grad_norm": 21.697927474975586, "learning_rate": 1e-06, "loss": 0.3607, "num_input_tokens_seen": 458798376, "step": 8188 }, { "epoch": 18.23608017817372, "loss": 0.3312641382217407, "loss_ce": 8.736809832043946e-05, "loss_iou": 0.1435546875, "loss_num": 0.0087890625, "loss_xval": 0.33203125, "num_input_tokens_seen": 458798376, "step": 8188 }, { "epoch": 18.238307349665924, "grad_norm": 14.134407997131348, "learning_rate": 1e-06, "loss": 0.4647, "num_input_tokens_seen": 458852484, "step": 8189 }, { "epoch": 18.238307349665924, "loss": 0.35396501421928406, "loss_ce": 8.317639003507793e-05, "loss_iou": 0.1630859375, "loss_num": 0.00567626953125, "loss_xval": 0.353515625, "num_input_tokens_seen": 458852484, "step": 8189 }, { "epoch": 18.24053452115813, "grad_norm": 38.25514602661133, "learning_rate": 1e-06, "loss": 0.613, "num_input_tokens_seen": 458906996, "step": 8190 }, { "epoch": 18.24053452115813, "loss": 0.5307399034500122, "loss_ce": 0.00022236474615056068, "loss_iou": 0.23828125, "loss_num": 0.0108642578125, "loss_xval": 0.53125, "num_input_tokens_seen": 458906996, "step": 8190 }, { "epoch": 18.242761692650333, "grad_norm": 15.433220863342285, "learning_rate": 1e-06, "loss": 0.3704, "num_input_tokens_seen": 458964332, "step": 8191 }, { "epoch": 18.242761692650333, "loss": 0.35981857776641846, "loss_ce": 7.736931729596108e-05, "loss_iou": 0.162109375, "loss_num": 0.0072021484375, "loss_xval": 0.359375, "num_input_tokens_seen": 458964332, "step": 8191 }, { "epoch": 18.244988864142538, "grad_norm": 20.43923568725586, "learning_rate": 1e-06, "loss": 0.3444, "num_input_tokens_seen": 459017136, "step": 8192 }, { "epoch": 18.244988864142538, "loss": 0.4209337830543518, "loss_ce": 9.635718015488237e-05, "loss_iou": 0.189453125, "loss_num": 0.00823974609375, "loss_xval": 0.419921875, "num_input_tokens_seen": 459017136, "step": 8192 }, { "epoch": 18.247216035634743, "grad_norm": 17.763324737548828, "learning_rate": 1e-06, "loss": 0.373, "num_input_tokens_seen": 459070944, "step": 8193 }, { "epoch": 18.247216035634743, "loss": 0.2321740686893463, "loss_ce": 8.788384002400562e-05, "loss_iou": 0.10400390625, "loss_num": 0.004730224609375, "loss_xval": 0.232421875, "num_input_tokens_seen": 459070944, "step": 8193 }, { "epoch": 18.249443207126948, "grad_norm": 25.71845245361328, "learning_rate": 1e-06, "loss": 0.2999, "num_input_tokens_seen": 459126108, "step": 8194 }, { "epoch": 18.249443207126948, "loss": 0.31814372539520264, "loss_ce": 8.950403571361676e-05, "loss_iou": 0.1435546875, "loss_num": 0.00634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 459126108, "step": 8194 }, { "epoch": 18.251670378619153, "grad_norm": 36.80335235595703, "learning_rate": 1e-06, "loss": 0.5035, "num_input_tokens_seen": 459183928, "step": 8195 }, { "epoch": 18.251670378619153, "loss": 0.5821275115013123, "loss_ce": 9.623746882425621e-05, "loss_iou": 0.2578125, "loss_num": 0.01324462890625, "loss_xval": 0.58203125, "num_input_tokens_seen": 459183928, "step": 8195 }, { "epoch": 18.253897550111358, "grad_norm": 19.333406448364258, "learning_rate": 1e-06, "loss": 0.3609, "num_input_tokens_seen": 459241312, "step": 8196 }, { "epoch": 18.253897550111358, "loss": 0.4230514466762543, "loss_ce": 7.78458925196901e-05, "loss_iou": 0.1806640625, "loss_num": 0.0123291015625, "loss_xval": 0.423828125, "num_input_tokens_seen": 459241312, "step": 8196 }, { "epoch": 18.256124721603562, "grad_norm": 22.73419189453125, "learning_rate": 1e-06, "loss": 0.5667, "num_input_tokens_seen": 459298740, "step": 8197 }, { "epoch": 18.256124721603562, "loss": 0.6623133420944214, "loss_ce": 8.191996312234551e-05, "loss_iou": 0.306640625, "loss_num": 0.0096435546875, "loss_xval": 0.6640625, "num_input_tokens_seen": 459298740, "step": 8197 }, { "epoch": 18.258351893095767, "grad_norm": 19.6635684967041, "learning_rate": 1e-06, "loss": 0.35, "num_input_tokens_seen": 459354172, "step": 8198 }, { "epoch": 18.258351893095767, "loss": 0.2642420530319214, "loss_ce": 8.189349318854511e-05, "loss_iou": 0.1220703125, "loss_num": 0.004058837890625, "loss_xval": 0.263671875, "num_input_tokens_seen": 459354172, "step": 8198 }, { "epoch": 18.260579064587972, "grad_norm": 14.02831745147705, "learning_rate": 1e-06, "loss": 0.3704, "num_input_tokens_seen": 459409124, "step": 8199 }, { "epoch": 18.260579064587972, "loss": 0.37897345423698425, "loss_ce": 6.719774683006108e-05, "loss_iou": 0.1728515625, "loss_num": 0.006561279296875, "loss_xval": 0.37890625, "num_input_tokens_seen": 459409124, "step": 8199 }, { "epoch": 18.262806236080177, "grad_norm": 21.54979133605957, "learning_rate": 1e-06, "loss": 0.469, "num_input_tokens_seen": 459463080, "step": 8200 }, { "epoch": 18.262806236080177, "loss": 0.46509498357772827, "loss_ce": 6.810689228586853e-05, "loss_iou": 0.18359375, "loss_num": 0.01953125, "loss_xval": 0.46484375, "num_input_tokens_seen": 459463080, "step": 8200 }, { "epoch": 18.265033407572382, "grad_norm": 15.60781192779541, "learning_rate": 1e-06, "loss": 0.3833, "num_input_tokens_seen": 459518640, "step": 8201 }, { "epoch": 18.265033407572382, "loss": 0.31758350133895874, "loss_ce": 7.863431528676301e-05, "loss_iou": 0.1376953125, "loss_num": 0.008544921875, "loss_xval": 0.318359375, "num_input_tokens_seen": 459518640, "step": 8201 }, { "epoch": 18.267260579064587, "grad_norm": 25.999467849731445, "learning_rate": 1e-06, "loss": 0.4274, "num_input_tokens_seen": 459574496, "step": 8202 }, { "epoch": 18.267260579064587, "loss": 0.3373531103134155, "loss_ce": 7.284604362212121e-05, "loss_iou": 0.150390625, "loss_num": 0.0072021484375, "loss_xval": 0.337890625, "num_input_tokens_seen": 459574496, "step": 8202 }, { "epoch": 18.26948775055679, "grad_norm": 23.961406707763672, "learning_rate": 1e-06, "loss": 0.2495, "num_input_tokens_seen": 459631000, "step": 8203 }, { "epoch": 18.26948775055679, "loss": 0.2549530863761902, "loss_ce": 7.027012179605663e-05, "loss_iou": 0.1162109375, "loss_num": 0.004547119140625, "loss_xval": 0.25390625, "num_input_tokens_seen": 459631000, "step": 8203 }, { "epoch": 18.271714922048996, "grad_norm": 13.854894638061523, "learning_rate": 1e-06, "loss": 0.4288, "num_input_tokens_seen": 459689184, "step": 8204 }, { "epoch": 18.271714922048996, "loss": 0.32967978715896606, "loss_ce": 8.993731171358377e-05, "loss_iou": 0.150390625, "loss_num": 0.005767822265625, "loss_xval": 0.330078125, "num_input_tokens_seen": 459689184, "step": 8204 }, { "epoch": 18.2739420935412, "grad_norm": 15.130828857421875, "learning_rate": 1e-06, "loss": 0.331, "num_input_tokens_seen": 459744348, "step": 8205 }, { "epoch": 18.2739420935412, "loss": 0.3028114438056946, "loss_ce": 0.0002601708984002471, "loss_iou": 0.1357421875, "loss_num": 0.00628662109375, "loss_xval": 0.302734375, "num_input_tokens_seen": 459744348, "step": 8205 }, { "epoch": 18.276169265033406, "grad_norm": 22.3679256439209, "learning_rate": 1e-06, "loss": 0.4245, "num_input_tokens_seen": 459800880, "step": 8206 }, { "epoch": 18.276169265033406, "loss": 0.3924823999404907, "loss_ce": 8.740788325667381e-05, "loss_iou": 0.173828125, "loss_num": 0.00921630859375, "loss_xval": 0.392578125, "num_input_tokens_seen": 459800880, "step": 8206 }, { "epoch": 18.27839643652561, "grad_norm": 18.131832122802734, "learning_rate": 1e-06, "loss": 0.4461, "num_input_tokens_seen": 459856516, "step": 8207 }, { "epoch": 18.27839643652561, "loss": 0.4801885783672333, "loss_ce": 8.601776062278077e-05, "loss_iou": 0.189453125, "loss_num": 0.0203857421875, "loss_xval": 0.48046875, "num_input_tokens_seen": 459856516, "step": 8207 }, { "epoch": 18.280623608017816, "grad_norm": 17.009628295898438, "learning_rate": 1e-06, "loss": 0.4747, "num_input_tokens_seen": 459912960, "step": 8208 }, { "epoch": 18.280623608017816, "loss": 0.41185104846954346, "loss_ce": 0.00010787278006318957, "loss_iou": 0.181640625, "loss_num": 0.0096435546875, "loss_xval": 0.412109375, "num_input_tokens_seen": 459912960, "step": 8208 }, { "epoch": 18.28285077951002, "grad_norm": 16.9517765045166, "learning_rate": 1e-06, "loss": 0.4285, "num_input_tokens_seen": 459971620, "step": 8209 }, { "epoch": 18.28285077951002, "loss": 0.43293631076812744, "loss_ce": 7.499181083403528e-05, "loss_iou": 0.181640625, "loss_num": 0.01409912109375, "loss_xval": 0.43359375, "num_input_tokens_seen": 459971620, "step": 8209 }, { "epoch": 18.285077951002226, "grad_norm": 19.251338958740234, "learning_rate": 1e-06, "loss": 0.3578, "num_input_tokens_seen": 460028300, "step": 8210 }, { "epoch": 18.285077951002226, "loss": 0.38544806838035583, "loss_ce": 7.210190233308822e-05, "loss_iou": 0.162109375, "loss_num": 0.01214599609375, "loss_xval": 0.384765625, "num_input_tokens_seen": 460028300, "step": 8210 }, { "epoch": 18.28730512249443, "grad_norm": 18.597429275512695, "learning_rate": 1e-06, "loss": 0.3693, "num_input_tokens_seen": 460084228, "step": 8211 }, { "epoch": 18.28730512249443, "loss": 0.2983230948448181, "loss_ce": 0.00022741041902918369, "loss_iou": 0.12890625, "loss_num": 0.00811767578125, "loss_xval": 0.298828125, "num_input_tokens_seen": 460084228, "step": 8211 }, { "epoch": 18.289532293986635, "grad_norm": 17.22545623779297, "learning_rate": 1e-06, "loss": 0.264, "num_input_tokens_seen": 460139016, "step": 8212 }, { "epoch": 18.289532293986635, "loss": 0.2883983552455902, "loss_ce": 6.828906043665484e-05, "loss_iou": 0.1298828125, "loss_num": 0.005767822265625, "loss_xval": 0.2890625, "num_input_tokens_seen": 460139016, "step": 8212 }, { "epoch": 18.29175946547884, "grad_norm": 34.40812301635742, "learning_rate": 1e-06, "loss": 0.4567, "num_input_tokens_seen": 460194692, "step": 8213 }, { "epoch": 18.29175946547884, "loss": 0.3748340904712677, "loss_ce": 7.823290070518851e-05, "loss_iou": 0.1650390625, "loss_num": 0.00909423828125, "loss_xval": 0.375, "num_input_tokens_seen": 460194692, "step": 8213 }, { "epoch": 18.293986636971045, "grad_norm": 21.1818904876709, "learning_rate": 1e-06, "loss": 0.51, "num_input_tokens_seen": 460251096, "step": 8214 }, { "epoch": 18.293986636971045, "loss": 0.6246699690818787, "loss_ce": 0.00015822870773263276, "loss_iou": 0.25390625, "loss_num": 0.0234375, "loss_xval": 0.625, "num_input_tokens_seen": 460251096, "step": 8214 }, { "epoch": 18.29621380846325, "grad_norm": 20.41522216796875, "learning_rate": 1e-06, "loss": 0.4014, "num_input_tokens_seen": 460309636, "step": 8215 }, { "epoch": 18.29621380846325, "loss": 0.46027785539627075, "loss_ce": 7.279778947122395e-05, "loss_iou": 0.2119140625, "loss_num": 0.007171630859375, "loss_xval": 0.4609375, "num_input_tokens_seen": 460309636, "step": 8215 }, { "epoch": 18.29844097995546, "grad_norm": 17.60694694519043, "learning_rate": 1e-06, "loss": 0.3979, "num_input_tokens_seen": 460365224, "step": 8216 }, { "epoch": 18.29844097995546, "loss": 0.3157610297203064, "loss_ce": 8.721975609660149e-05, "loss_iou": 0.125, "loss_num": 0.012939453125, "loss_xval": 0.31640625, "num_input_tokens_seen": 460365224, "step": 8216 }, { "epoch": 18.30066815144766, "grad_norm": 27.434608459472656, "learning_rate": 1e-06, "loss": 0.4209, "num_input_tokens_seen": 460423160, "step": 8217 }, { "epoch": 18.30066815144766, "loss": 0.5255805850028992, "loss_ce": 6.787220627302304e-05, "loss_iou": 0.23828125, "loss_num": 0.00982666015625, "loss_xval": 0.52734375, "num_input_tokens_seen": 460423160, "step": 8217 }, { "epoch": 18.302895322939868, "grad_norm": 15.852324485778809, "learning_rate": 1e-06, "loss": 0.3734, "num_input_tokens_seen": 460480832, "step": 8218 }, { "epoch": 18.302895322939868, "loss": 0.3597148060798645, "loss_ce": 9.567091183271259e-05, "loss_iou": 0.16015625, "loss_num": 0.00787353515625, "loss_xval": 0.359375, "num_input_tokens_seen": 460480832, "step": 8218 }, { "epoch": 18.305122494432073, "grad_norm": 16.711715698242188, "learning_rate": 1e-06, "loss": 0.5244, "num_input_tokens_seen": 460534584, "step": 8219 }, { "epoch": 18.305122494432073, "loss": 0.605729341506958, "loss_ce": 7.750350050628185e-05, "loss_iou": 0.2275390625, "loss_num": 0.0301513671875, "loss_xval": 0.60546875, "num_input_tokens_seen": 460534584, "step": 8219 }, { "epoch": 18.307349665924278, "grad_norm": 24.398696899414062, "learning_rate": 1e-06, "loss": 0.4038, "num_input_tokens_seen": 460591852, "step": 8220 }, { "epoch": 18.307349665924278, "loss": 0.3758271336555481, "loss_ce": 9.471758676227182e-05, "loss_iou": 0.1591796875, "loss_num": 0.0113525390625, "loss_xval": 0.375, "num_input_tokens_seen": 460591852, "step": 8220 }, { "epoch": 18.309576837416483, "grad_norm": 15.600603103637695, "learning_rate": 1e-06, "loss": 0.4869, "num_input_tokens_seen": 460650120, "step": 8221 }, { "epoch": 18.309576837416483, "loss": 0.6405990123748779, "loss_ce": 9.607061656424776e-05, "loss_iou": 0.287109375, "loss_num": 0.01348876953125, "loss_xval": 0.640625, "num_input_tokens_seen": 460650120, "step": 8221 }, { "epoch": 18.311804008908688, "grad_norm": 14.608988761901855, "learning_rate": 1e-06, "loss": 0.3083, "num_input_tokens_seen": 460708680, "step": 8222 }, { "epoch": 18.311804008908688, "loss": 0.31593137979507446, "loss_ce": 7.447078678524122e-05, "loss_iou": 0.12890625, "loss_num": 0.01153564453125, "loss_xval": 0.31640625, "num_input_tokens_seen": 460708680, "step": 8222 }, { "epoch": 18.314031180400892, "grad_norm": 12.612223625183105, "learning_rate": 1e-06, "loss": 0.431, "num_input_tokens_seen": 460762612, "step": 8223 }, { "epoch": 18.314031180400892, "loss": 0.5143847465515137, "loss_ce": 0.00010250776540488005, "loss_iou": 0.216796875, "loss_num": 0.015869140625, "loss_xval": 0.515625, "num_input_tokens_seen": 460762612, "step": 8223 }, { "epoch": 18.316258351893097, "grad_norm": 19.262920379638672, "learning_rate": 1e-06, "loss": 0.5283, "num_input_tokens_seen": 460814596, "step": 8224 }, { "epoch": 18.316258351893097, "loss": 0.6533957123756409, "loss_ce": 7.540466322097927e-05, "loss_iou": 0.28125, "loss_num": 0.0185546875, "loss_xval": 0.65234375, "num_input_tokens_seen": 460814596, "step": 8224 }, { "epoch": 18.318485523385302, "grad_norm": 17.134294509887695, "learning_rate": 1e-06, "loss": 0.3269, "num_input_tokens_seen": 460871852, "step": 8225 }, { "epoch": 18.318485523385302, "loss": 0.2810301184654236, "loss_ce": 8.527821046300232e-05, "loss_iou": 0.1318359375, "loss_num": 0.003570556640625, "loss_xval": 0.28125, "num_input_tokens_seen": 460871852, "step": 8225 }, { "epoch": 18.320712694877507, "grad_norm": 16.73043441772461, "learning_rate": 1e-06, "loss": 0.4727, "num_input_tokens_seen": 460928820, "step": 8226 }, { "epoch": 18.320712694877507, "loss": 0.4601028263568878, "loss_ce": 8.087064634310082e-05, "loss_iou": 0.177734375, "loss_num": 0.0208740234375, "loss_xval": 0.4609375, "num_input_tokens_seen": 460928820, "step": 8226 }, { "epoch": 18.322939866369712, "grad_norm": 13.59985065460205, "learning_rate": 1e-06, "loss": 0.4533, "num_input_tokens_seen": 460985356, "step": 8227 }, { "epoch": 18.322939866369712, "loss": 0.43281298875808716, "loss_ce": 0.00019579757645260543, "loss_iou": 0.201171875, "loss_num": 0.006256103515625, "loss_xval": 0.43359375, "num_input_tokens_seen": 460985356, "step": 8227 }, { "epoch": 18.325167037861917, "grad_norm": 12.674905776977539, "learning_rate": 1e-06, "loss": 0.3026, "num_input_tokens_seen": 461042168, "step": 8228 }, { "epoch": 18.325167037861917, "loss": 0.3006775975227356, "loss_ce": 7.942065712995827e-05, "loss_iou": 0.1298828125, "loss_num": 0.00811767578125, "loss_xval": 0.30078125, "num_input_tokens_seen": 461042168, "step": 8228 }, { "epoch": 18.32739420935412, "grad_norm": 17.03211784362793, "learning_rate": 1e-06, "loss": 0.4486, "num_input_tokens_seen": 461097504, "step": 8229 }, { "epoch": 18.32739420935412, "loss": 0.50324547290802, "loss_ce": 7.163839472923428e-05, "loss_iou": 0.21875, "loss_num": 0.01324462890625, "loss_xval": 0.50390625, "num_input_tokens_seen": 461097504, "step": 8229 }, { "epoch": 18.329621380846326, "grad_norm": 12.527713775634766, "learning_rate": 1e-06, "loss": 0.3258, "num_input_tokens_seen": 461152312, "step": 8230 }, { "epoch": 18.329621380846326, "loss": 0.2726093530654907, "loss_ce": 7.59223330533132e-05, "loss_iou": 0.10693359375, "loss_num": 0.01177978515625, "loss_xval": 0.2734375, "num_input_tokens_seen": 461152312, "step": 8230 }, { "epoch": 18.33184855233853, "grad_norm": 17.93193244934082, "learning_rate": 1e-06, "loss": 0.3163, "num_input_tokens_seen": 461207068, "step": 8231 }, { "epoch": 18.33184855233853, "loss": 0.2764345407485962, "loss_ce": 6.737266085110605e-05, "loss_iou": 0.1123046875, "loss_num": 0.0103759765625, "loss_xval": 0.27734375, "num_input_tokens_seen": 461207068, "step": 8231 }, { "epoch": 18.334075723830736, "grad_norm": 15.471948623657227, "learning_rate": 1e-06, "loss": 0.3574, "num_input_tokens_seen": 461262992, "step": 8232 }, { "epoch": 18.334075723830736, "loss": 0.38583964109420776, "loss_ce": 6.69407527311705e-05, "loss_iou": 0.1748046875, "loss_num": 0.007293701171875, "loss_xval": 0.38671875, "num_input_tokens_seen": 461262992, "step": 8232 }, { "epoch": 18.33630289532294, "grad_norm": 19.41752815246582, "learning_rate": 1e-06, "loss": 0.2833, "num_input_tokens_seen": 461317216, "step": 8233 }, { "epoch": 18.33630289532294, "loss": 0.2918124198913574, "loss_ce": 6.43762614345178e-05, "loss_iou": 0.125, "loss_num": 0.0081787109375, "loss_xval": 0.291015625, "num_input_tokens_seen": 461317216, "step": 8233 }, { "epoch": 18.338530066815146, "grad_norm": 21.109203338623047, "learning_rate": 1e-06, "loss": 0.5718, "num_input_tokens_seen": 461372448, "step": 8234 }, { "epoch": 18.338530066815146, "loss": 0.7163882851600647, "loss_ce": 7.966908742673695e-05, "loss_iou": 0.330078125, "loss_num": 0.01116943359375, "loss_xval": 0.71484375, "num_input_tokens_seen": 461372448, "step": 8234 }, { "epoch": 18.34075723830735, "grad_norm": 11.767685890197754, "learning_rate": 1e-06, "loss": 0.3292, "num_input_tokens_seen": 461426004, "step": 8235 }, { "epoch": 18.34075723830735, "loss": 0.2593403160572052, "loss_ce": 0.0002003002300625667, "loss_iou": 0.11328125, "loss_num": 0.0064697265625, "loss_xval": 0.259765625, "num_input_tokens_seen": 461426004, "step": 8235 }, { "epoch": 18.342984409799556, "grad_norm": 17.519113540649414, "learning_rate": 1e-06, "loss": 0.3103, "num_input_tokens_seen": 461482340, "step": 8236 }, { "epoch": 18.342984409799556, "loss": 0.20156915485858917, "loss_ce": 9.21003520488739e-05, "loss_iou": 0.091796875, "loss_num": 0.003631591796875, "loss_xval": 0.201171875, "num_input_tokens_seen": 461482340, "step": 8236 }, { "epoch": 18.34521158129176, "grad_norm": 33.767704010009766, "learning_rate": 1e-06, "loss": 0.3042, "num_input_tokens_seen": 461537340, "step": 8237 }, { "epoch": 18.34521158129176, "loss": 0.31417620182037354, "loss_ce": 8.927624730858952e-05, "loss_iou": 0.1376953125, "loss_num": 0.007720947265625, "loss_xval": 0.314453125, "num_input_tokens_seen": 461537340, "step": 8237 }, { "epoch": 18.347438752783965, "grad_norm": 12.72602653503418, "learning_rate": 1e-06, "loss": 0.2971, "num_input_tokens_seen": 461593824, "step": 8238 }, { "epoch": 18.347438752783965, "loss": 0.3771233558654785, "loss_ce": 7.865474617574364e-05, "loss_iou": 0.1513671875, "loss_num": 0.014892578125, "loss_xval": 0.376953125, "num_input_tokens_seen": 461593824, "step": 8238 }, { "epoch": 18.34966592427617, "grad_norm": 19.39862632751465, "learning_rate": 1e-06, "loss": 0.3585, "num_input_tokens_seen": 461648568, "step": 8239 }, { "epoch": 18.34966592427617, "loss": 0.33659249544143677, "loss_ce": 7.516345067415386e-05, "loss_iou": 0.1484375, "loss_num": 0.007720947265625, "loss_xval": 0.3359375, "num_input_tokens_seen": 461648568, "step": 8239 }, { "epoch": 18.351893095768375, "grad_norm": 19.25311851501465, "learning_rate": 1e-06, "loss": 0.6112, "num_input_tokens_seen": 461705024, "step": 8240 }, { "epoch": 18.351893095768375, "loss": 0.6639153361320496, "loss_ce": 9.700231021270156e-05, "loss_iou": 0.279296875, "loss_num": 0.0211181640625, "loss_xval": 0.6640625, "num_input_tokens_seen": 461705024, "step": 8240 }, { "epoch": 18.35412026726058, "grad_norm": 13.526534080505371, "learning_rate": 1e-06, "loss": 0.2696, "num_input_tokens_seen": 461762520, "step": 8241 }, { "epoch": 18.35412026726058, "loss": 0.23994842171669006, "loss_ce": 8.790172432782128e-05, "loss_iou": 0.109375, "loss_num": 0.004119873046875, "loss_xval": 0.240234375, "num_input_tokens_seen": 461762520, "step": 8241 }, { "epoch": 18.356347438752785, "grad_norm": 16.30290412902832, "learning_rate": 1e-06, "loss": 0.3445, "num_input_tokens_seen": 461820448, "step": 8242 }, { "epoch": 18.356347438752785, "loss": 0.34260737895965576, "loss_ce": 7.808158989064395e-05, "loss_iou": 0.1572265625, "loss_num": 0.0057373046875, "loss_xval": 0.341796875, "num_input_tokens_seen": 461820448, "step": 8242 }, { "epoch": 18.35857461024499, "grad_norm": 16.620193481445312, "learning_rate": 1e-06, "loss": 0.2799, "num_input_tokens_seen": 461876008, "step": 8243 }, { "epoch": 18.35857461024499, "loss": 0.2714713215827942, "loss_ce": 0.00010899978951783851, "loss_iou": 0.0947265625, "loss_num": 0.016357421875, "loss_xval": 0.271484375, "num_input_tokens_seen": 461876008, "step": 8243 }, { "epoch": 18.360801781737194, "grad_norm": 15.673192977905273, "learning_rate": 1e-06, "loss": 0.4081, "num_input_tokens_seen": 461932492, "step": 8244 }, { "epoch": 18.360801781737194, "loss": 0.37256646156311035, "loss_ce": 6.890152872074395e-05, "loss_iou": 0.15625, "loss_num": 0.01220703125, "loss_xval": 0.373046875, "num_input_tokens_seen": 461932492, "step": 8244 }, { "epoch": 18.3630289532294, "grad_norm": 15.485980987548828, "learning_rate": 1e-06, "loss": 0.4505, "num_input_tokens_seen": 461989756, "step": 8245 }, { "epoch": 18.3630289532294, "loss": 0.4861689805984497, "loss_ce": 8.499999967170879e-05, "loss_iou": 0.2041015625, "loss_num": 0.015625, "loss_xval": 0.486328125, "num_input_tokens_seen": 461989756, "step": 8245 }, { "epoch": 18.365256124721604, "grad_norm": 31.88662338256836, "learning_rate": 1e-06, "loss": 0.5343, "num_input_tokens_seen": 462045504, "step": 8246 }, { "epoch": 18.365256124721604, "loss": 0.636799156665802, "loss_ce": 8.0388635979034e-05, "loss_iou": 0.255859375, "loss_num": 0.024658203125, "loss_xval": 0.63671875, "num_input_tokens_seen": 462045504, "step": 8246 }, { "epoch": 18.36748329621381, "grad_norm": 17.961257934570312, "learning_rate": 1e-06, "loss": 0.3355, "num_input_tokens_seen": 462101768, "step": 8247 }, { "epoch": 18.36748329621381, "loss": 0.29511559009552, "loss_ce": 7.165400893427432e-05, "loss_iou": 0.11767578125, "loss_num": 0.011962890625, "loss_xval": 0.294921875, "num_input_tokens_seen": 462101768, "step": 8247 }, { "epoch": 18.369710467706014, "grad_norm": 17.18083381652832, "learning_rate": 1e-06, "loss": 0.5334, "num_input_tokens_seen": 462158008, "step": 8248 }, { "epoch": 18.369710467706014, "loss": 0.5379419922828674, "loss_ce": 0.00010018746252171695, "loss_iou": 0.234375, "loss_num": 0.01361083984375, "loss_xval": 0.5390625, "num_input_tokens_seen": 462158008, "step": 8248 }, { "epoch": 18.37193763919822, "grad_norm": 20.73891258239746, "learning_rate": 1e-06, "loss": 0.3165, "num_input_tokens_seen": 462213512, "step": 8249 }, { "epoch": 18.37193763919822, "loss": 0.3529818058013916, "loss_ce": 7.652836939087138e-05, "loss_iou": 0.154296875, "loss_num": 0.009033203125, "loss_xval": 0.353515625, "num_input_tokens_seen": 462213512, "step": 8249 }, { "epoch": 18.374164810690424, "grad_norm": 18.682357788085938, "learning_rate": 1e-06, "loss": 0.3058, "num_input_tokens_seen": 462271092, "step": 8250 }, { "epoch": 18.374164810690424, "eval_seeclick_web_CIoU": 0.5856335759162903, "eval_seeclick_web_GIoU": 0.5846323072910309, "eval_seeclick_web_IoU": 0.604880303144455, "eval_seeclick_web_MAE_all": 0.015254544327035546, "eval_seeclick_web_MAE_h": 0.0071105193346738815, "eval_seeclick_web_MAE_w": 0.015218731015920639, "eval_seeclick_web_MAE_x_boxes": 0.008255395339801908, "eval_seeclick_web_MAE_y_boxes": 0.02131559420377016, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.9020078778266907, "eval_seeclick_web_loss_ce": 0.00012946181232109666, "eval_seeclick_web_loss_iou": 0.41748046875, "eval_seeclick_web_loss_num": 0.012150764465332031, "eval_seeclick_web_loss_xval": 0.896484375, "eval_seeclick_web_runtime": 24.3274, "eval_seeclick_web_samples_per_second": 2.055, "eval_seeclick_web_steps_per_second": 0.082, "num_input_tokens_seen": 462271092, "step": 8250 }, { "epoch": 18.374164810690424, "eval_icons_CIoU": 0.2648079916834831, "eval_icons_GIoU": 0.2893849313259125, "eval_icons_IoU": 0.341605544090271, "eval_icons_MAE_all": 0.06054504215717316, "eval_icons_MAE_h": 0.03238860424607992, "eval_icons_MAE_w": 0.06985406205058098, "eval_icons_MAE_x_boxes": 0.05330132879316807, "eval_icons_MAE_y_boxes": 0.03792322427034378, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7429778575897217, "eval_icons_loss_ce": 0.00015769631136208773, "eval_icons_loss_iou": 0.6812744140625, "eval_icons_loss_num": 0.059600830078125, "eval_icons_loss_xval": 1.66064453125, "eval_icons_runtime": 23.8883, "eval_icons_samples_per_second": 2.093, "eval_icons_steps_per_second": 0.084, "num_input_tokens_seen": 462271092, "step": 8250 }, { "epoch": 18.374164810690424, "eval_screenspot_CIoU": 0.3744619091351827, "eval_screenspot_GIoU": 0.391864409049352, "eval_screenspot_IoU": 0.4460846583048503, "eval_screenspot_MAE_all": 0.056168291717767715, "eval_screenspot_MAE_h": 0.03892662810782591, "eval_screenspot_MAE_w": 0.0636625016729037, "eval_screenspot_MAE_x_boxes": 0.06623268065353234, "eval_screenspot_MAE_y_boxes": 0.03843462746590376, "eval_screenspot_inside_bbox": 0.7145833373069763, "eval_screenspot_loss": 1.5575822591781616, "eval_screenspot_loss_ce": 0.00017815509151356915, "eval_screenspot_loss_iou": 0.6473795572916666, "eval_screenspot_loss_num": 0.06395212809244792, "eval_screenspot_loss_xval": 1.6150716145833333, "eval_screenspot_runtime": 40.5304, "eval_screenspot_samples_per_second": 2.196, "eval_screenspot_steps_per_second": 0.074, "num_input_tokens_seen": 462271092, "step": 8250 }, { "epoch": 18.374164810690424, "eval_compot_CIoU": 0.3456965386867523, "eval_compot_GIoU": 0.3530429005622864, "eval_compot_IoU": 0.4042937010526657, "eval_compot_MAE_all": 0.017928121611475945, "eval_compot_MAE_h": 0.008343497524037957, "eval_compot_MAE_w": 0.020461782813072205, "eval_compot_MAE_x_boxes": 0.030165866017341614, "eval_compot_MAE_y_boxes": 0.006972244009375572, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.4054356813430786, "eval_compot_loss_ce": 0.00012335985229583457, "eval_compot_loss_iou": 0.6524658203125, "eval_compot_loss_num": 0.016485214233398438, "eval_compot_loss_xval": 1.385986328125, "eval_compot_runtime": 25.2618, "eval_compot_samples_per_second": 1.979, "eval_compot_steps_per_second": 0.079, "num_input_tokens_seen": 462271092, "step": 8250 }, { "epoch": 18.374164810690424, "eval_custom_ui_val_CIoU": 0.4731468657652537, "eval_custom_ui_val_GIoU": 0.4772413820028305, "eval_custom_ui_val_IoU": 0.5357410940859053, "eval_custom_ui_val_MAE_all": 0.02722639176580641, "eval_custom_ui_val_MAE_h": 0.013777732486940093, "eval_custom_ui_val_MAE_w": 0.03672494749642081, "eval_custom_ui_val_MAE_x_boxes": 0.03293217242591911, "eval_custom_ui_val_MAE_y_boxes": 0.013356222900458507, "eval_custom_ui_val_inside_bbox": 0.7719907429483202, "eval_custom_ui_val_loss": 1.1706430912017822, "eval_custom_ui_val_loss_ce": 0.0001427585027866169, "eval_custom_ui_val_loss_iou": 0.501953125, "eval_custom_ui_val_loss_num": 0.0238491694132487, "eval_custom_ui_val_loss_xval": 1.1229383680555556, "eval_custom_ui_val_runtime": 76.2841, "eval_custom_ui_val_samples_per_second": 3.474, "eval_custom_ui_val_steps_per_second": 0.118, "num_input_tokens_seen": 462271092, "step": 8250 }, { "epoch": 18.374164810690424, "loss": 0.8289626836776733, "loss_ce": 0.00010525943071115762, "loss_iou": 0.373046875, "loss_num": 0.0166015625, "loss_xval": 0.828125, "num_input_tokens_seen": 462271092, "step": 8250 }, { "epoch": 18.37639198218263, "grad_norm": 33.0701789855957, "learning_rate": 1e-06, "loss": 0.4336, "num_input_tokens_seen": 462326668, "step": 8251 }, { "epoch": 18.37639198218263, "loss": 0.485931396484375, "loss_ce": 9.15604323381558e-05, "loss_iou": 0.228515625, "loss_num": 0.0057373046875, "loss_xval": 0.486328125, "num_input_tokens_seen": 462326668, "step": 8251 }, { "epoch": 18.378619153674833, "grad_norm": 18.959814071655273, "learning_rate": 1e-06, "loss": 0.3617, "num_input_tokens_seen": 462381308, "step": 8252 }, { "epoch": 18.378619153674833, "loss": 0.35047417879104614, "loss_ce": 7.13385088602081e-05, "loss_iou": 0.1669921875, "loss_num": 0.003143310546875, "loss_xval": 0.349609375, "num_input_tokens_seen": 462381308, "step": 8252 }, { "epoch": 18.380846325167038, "grad_norm": 18.871074676513672, "learning_rate": 1e-06, "loss": 0.3387, "num_input_tokens_seen": 462439476, "step": 8253 }, { "epoch": 18.380846325167038, "loss": 0.4407457113265991, "loss_ce": 7.18739174772054e-05, "loss_iou": 0.1982421875, "loss_num": 0.0087890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 462439476, "step": 8253 }, { "epoch": 18.383073496659243, "grad_norm": 17.004552841186523, "learning_rate": 1e-06, "loss": 0.3689, "num_input_tokens_seen": 462492792, "step": 8254 }, { "epoch": 18.383073496659243, "loss": 0.34425753355026245, "loss_ce": 8.024969429243356e-05, "loss_iou": 0.150390625, "loss_num": 0.00848388671875, "loss_xval": 0.34375, "num_input_tokens_seen": 462492792, "step": 8254 }, { "epoch": 18.385300668151448, "grad_norm": 20.56110382080078, "learning_rate": 1e-06, "loss": 0.3938, "num_input_tokens_seen": 462549204, "step": 8255 }, { "epoch": 18.385300668151448, "loss": 0.4753004312515259, "loss_ce": 8.071900811046362e-05, "loss_iou": 0.1923828125, "loss_num": 0.0181884765625, "loss_xval": 0.474609375, "num_input_tokens_seen": 462549204, "step": 8255 }, { "epoch": 18.387527839643653, "grad_norm": 21.641469955444336, "learning_rate": 1e-06, "loss": 0.3759, "num_input_tokens_seen": 462604868, "step": 8256 }, { "epoch": 18.387527839643653, "loss": 0.42918264865875244, "loss_ce": 7.497478509321809e-05, "loss_iou": 0.162109375, "loss_num": 0.0208740234375, "loss_xval": 0.4296875, "num_input_tokens_seen": 462604868, "step": 8256 }, { "epoch": 18.389755011135858, "grad_norm": 10.594487190246582, "learning_rate": 1e-06, "loss": 0.4945, "num_input_tokens_seen": 462662936, "step": 8257 }, { "epoch": 18.389755011135858, "loss": 0.47706377506256104, "loss_ce": 7.405664655379951e-05, "loss_iou": 0.2060546875, "loss_num": 0.012939453125, "loss_xval": 0.4765625, "num_input_tokens_seen": 462662936, "step": 8257 }, { "epoch": 18.391982182628063, "grad_norm": 19.134235382080078, "learning_rate": 1e-06, "loss": 0.4228, "num_input_tokens_seen": 462717820, "step": 8258 }, { "epoch": 18.391982182628063, "loss": 0.30488812923431396, "loss_ce": 7.853595889173448e-05, "loss_iou": 0.1328125, "loss_num": 0.0079345703125, "loss_xval": 0.3046875, "num_input_tokens_seen": 462717820, "step": 8258 }, { "epoch": 18.394209354120267, "grad_norm": 20.989978790283203, "learning_rate": 1e-06, "loss": 0.3927, "num_input_tokens_seen": 462772384, "step": 8259 }, { "epoch": 18.394209354120267, "loss": 0.435162216424942, "loss_ce": 7.693507359363139e-05, "loss_iou": 0.185546875, "loss_num": 0.012939453125, "loss_xval": 0.435546875, "num_input_tokens_seen": 462772384, "step": 8259 }, { "epoch": 18.396436525612472, "grad_norm": 14.269499778747559, "learning_rate": 1e-06, "loss": 0.355, "num_input_tokens_seen": 462827028, "step": 8260 }, { "epoch": 18.396436525612472, "loss": 0.31690651178359985, "loss_ce": 7.30217871023342e-05, "loss_iou": 0.1455078125, "loss_num": 0.00506591796875, "loss_xval": 0.31640625, "num_input_tokens_seen": 462827028, "step": 8260 }, { "epoch": 18.398663697104677, "grad_norm": 15.133282661437988, "learning_rate": 1e-06, "loss": 0.55, "num_input_tokens_seen": 462883484, "step": 8261 }, { "epoch": 18.398663697104677, "loss": 0.5562236905097961, "loss_ce": 0.0002544423332437873, "loss_iou": 0.2041015625, "loss_num": 0.029541015625, "loss_xval": 0.5546875, "num_input_tokens_seen": 462883484, "step": 8261 }, { "epoch": 18.400890868596882, "grad_norm": 18.654014587402344, "learning_rate": 1e-06, "loss": 0.3458, "num_input_tokens_seen": 462941596, "step": 8262 }, { "epoch": 18.400890868596882, "loss": 0.4173450469970703, "loss_ce": 0.00010873953578993678, "loss_iou": 0.17578125, "loss_num": 0.01312255859375, "loss_xval": 0.41796875, "num_input_tokens_seen": 462941596, "step": 8262 }, { "epoch": 18.403118040089087, "grad_norm": 21.48236656188965, "learning_rate": 1e-06, "loss": 0.3443, "num_input_tokens_seen": 462997916, "step": 8263 }, { "epoch": 18.403118040089087, "loss": 0.29500818252563477, "loss_ce": 8.628673094790429e-05, "loss_iou": 0.1376953125, "loss_num": 0.003997802734375, "loss_xval": 0.294921875, "num_input_tokens_seen": 462997916, "step": 8263 }, { "epoch": 18.40534521158129, "grad_norm": 29.181659698486328, "learning_rate": 1e-06, "loss": 0.4467, "num_input_tokens_seen": 463053976, "step": 8264 }, { "epoch": 18.40534521158129, "loss": 0.5064201354980469, "loss_ce": 7.253700459841639e-05, "loss_iou": 0.236328125, "loss_num": 0.00689697265625, "loss_xval": 0.5078125, "num_input_tokens_seen": 463053976, "step": 8264 }, { "epoch": 18.407572383073497, "grad_norm": 18.721237182617188, "learning_rate": 1e-06, "loss": 0.4064, "num_input_tokens_seen": 463111504, "step": 8265 }, { "epoch": 18.407572383073497, "loss": 0.45783811807632446, "loss_ce": 7.442037167493254e-05, "loss_iou": 0.2099609375, "loss_num": 0.007568359375, "loss_xval": 0.45703125, "num_input_tokens_seen": 463111504, "step": 8265 }, { "epoch": 18.4097995545657, "grad_norm": 57.14765548706055, "learning_rate": 1e-06, "loss": 0.7487, "num_input_tokens_seen": 463168420, "step": 8266 }, { "epoch": 18.4097995545657, "loss": 0.6589952707290649, "loss_ce": 5.971048813080415e-05, "loss_iou": 0.29296875, "loss_num": 0.01495361328125, "loss_xval": 0.66015625, "num_input_tokens_seen": 463168420, "step": 8266 }, { "epoch": 18.412026726057906, "grad_norm": 18.461252212524414, "learning_rate": 1e-06, "loss": 0.3474, "num_input_tokens_seen": 463225216, "step": 8267 }, { "epoch": 18.412026726057906, "loss": 0.38068366050720215, "loss_ce": 6.842733273515478e-05, "loss_iou": 0.1767578125, "loss_num": 0.005462646484375, "loss_xval": 0.380859375, "num_input_tokens_seen": 463225216, "step": 8267 }, { "epoch": 18.41425389755011, "grad_norm": 23.637659072875977, "learning_rate": 1e-06, "loss": 0.5465, "num_input_tokens_seen": 463279816, "step": 8268 }, { "epoch": 18.41425389755011, "loss": 0.513511061668396, "loss_ce": 8.326944953296334e-05, "loss_iou": 0.232421875, "loss_num": 0.00958251953125, "loss_xval": 0.51171875, "num_input_tokens_seen": 463279816, "step": 8268 }, { "epoch": 18.416481069042316, "grad_norm": 19.79255485534668, "learning_rate": 1e-06, "loss": 0.5626, "num_input_tokens_seen": 463336080, "step": 8269 }, { "epoch": 18.416481069042316, "loss": 0.5551245808601379, "loss_ce": 7.088375423336402e-05, "loss_iou": 0.2255859375, "loss_num": 0.0208740234375, "loss_xval": 0.5546875, "num_input_tokens_seen": 463336080, "step": 8269 }, { "epoch": 18.41870824053452, "grad_norm": 22.363147735595703, "learning_rate": 1e-06, "loss": 0.3449, "num_input_tokens_seen": 463392188, "step": 8270 }, { "epoch": 18.41870824053452, "loss": 0.27490442991256714, "loss_ce": 6.311971810646355e-05, "loss_iou": 0.1181640625, "loss_num": 0.0078125, "loss_xval": 0.275390625, "num_input_tokens_seen": 463392188, "step": 8270 }, { "epoch": 18.420935412026726, "grad_norm": 12.046845436096191, "learning_rate": 1e-06, "loss": 0.3125, "num_input_tokens_seen": 463450560, "step": 8271 }, { "epoch": 18.420935412026726, "loss": 0.33826929330825806, "loss_ce": 7.349113002419472e-05, "loss_iou": 0.1396484375, "loss_num": 0.01190185546875, "loss_xval": 0.337890625, "num_input_tokens_seen": 463450560, "step": 8271 }, { "epoch": 18.42316258351893, "grad_norm": 14.040665626525879, "learning_rate": 1e-06, "loss": 0.3608, "num_input_tokens_seen": 463504208, "step": 8272 }, { "epoch": 18.42316258351893, "loss": 0.2680038809776306, "loss_ce": 0.00012056898412993178, "loss_iou": 0.10986328125, "loss_num": 0.00970458984375, "loss_xval": 0.267578125, "num_input_tokens_seen": 463504208, "step": 8272 }, { "epoch": 18.425389755011135, "grad_norm": 20.083757400512695, "learning_rate": 1e-06, "loss": 0.3636, "num_input_tokens_seen": 463562476, "step": 8273 }, { "epoch": 18.425389755011135, "loss": 0.23639193177223206, "loss_ce": 6.381591811077669e-05, "loss_iou": 0.107421875, "loss_num": 0.0042724609375, "loss_xval": 0.236328125, "num_input_tokens_seen": 463562476, "step": 8273 }, { "epoch": 18.42761692650334, "grad_norm": 22.100793838500977, "learning_rate": 1e-06, "loss": 0.427, "num_input_tokens_seen": 463617160, "step": 8274 }, { "epoch": 18.42761692650334, "loss": 0.23444892466068268, "loss_ce": 5.8669171266956255e-05, "loss_iou": 0.0869140625, "loss_num": 0.01214599609375, "loss_xval": 0.234375, "num_input_tokens_seen": 463617160, "step": 8274 }, { "epoch": 18.429844097995545, "grad_norm": 20.41813087463379, "learning_rate": 1e-06, "loss": 0.4329, "num_input_tokens_seen": 463673804, "step": 8275 }, { "epoch": 18.429844097995545, "loss": 0.38324350118637085, "loss_ce": 6.480171578004956e-05, "loss_iou": 0.1572265625, "loss_num": 0.0135498046875, "loss_xval": 0.3828125, "num_input_tokens_seen": 463673804, "step": 8275 }, { "epoch": 18.43207126948775, "grad_norm": 17.835351943969727, "learning_rate": 1e-06, "loss": 0.4192, "num_input_tokens_seen": 463731528, "step": 8276 }, { "epoch": 18.43207126948775, "loss": 0.3969186246395111, "loss_ce": 6.806287274230272e-05, "loss_iou": 0.166015625, "loss_num": 0.0130615234375, "loss_xval": 0.396484375, "num_input_tokens_seen": 463731528, "step": 8276 }, { "epoch": 18.434298440979955, "grad_norm": 20.909772872924805, "learning_rate": 1e-06, "loss": 0.3829, "num_input_tokens_seen": 463786092, "step": 8277 }, { "epoch": 18.434298440979955, "loss": 0.35701119899749756, "loss_ce": 7.761004235362634e-05, "loss_iou": 0.14453125, "loss_num": 0.01348876953125, "loss_xval": 0.357421875, "num_input_tokens_seen": 463786092, "step": 8277 }, { "epoch": 18.43652561247216, "grad_norm": 15.374547958374023, "learning_rate": 1e-06, "loss": 0.4143, "num_input_tokens_seen": 463838012, "step": 8278 }, { "epoch": 18.43652561247216, "loss": 0.4003612995147705, "loss_ce": 9.272516763303429e-05, "loss_iou": 0.1689453125, "loss_num": 0.0126953125, "loss_xval": 0.400390625, "num_input_tokens_seen": 463838012, "step": 8278 }, { "epoch": 18.438752783964365, "grad_norm": 12.226360321044922, "learning_rate": 1e-06, "loss": 0.4797, "num_input_tokens_seen": 463892444, "step": 8279 }, { "epoch": 18.438752783964365, "loss": 0.42820942401885986, "loss_ce": 7.8319848398678e-05, "loss_iou": 0.1884765625, "loss_num": 0.01007080078125, "loss_xval": 0.427734375, "num_input_tokens_seen": 463892444, "step": 8279 }, { "epoch": 18.44097995545657, "grad_norm": 17.95158576965332, "learning_rate": 1e-06, "loss": 0.3166, "num_input_tokens_seen": 463947952, "step": 8280 }, { "epoch": 18.44097995545657, "loss": 0.3516421616077423, "loss_ce": 7.965406985022128e-05, "loss_iou": 0.146484375, "loss_num": 0.01153564453125, "loss_xval": 0.3515625, "num_input_tokens_seen": 463947952, "step": 8280 }, { "epoch": 18.443207126948774, "grad_norm": 16.93501853942871, "learning_rate": 1e-06, "loss": 0.4472, "num_input_tokens_seen": 464005712, "step": 8281 }, { "epoch": 18.443207126948774, "loss": 0.47952741384506226, "loss_ce": 9.625754319131374e-05, "loss_iou": 0.2060546875, "loss_num": 0.01324462890625, "loss_xval": 0.478515625, "num_input_tokens_seen": 464005712, "step": 8281 }, { "epoch": 18.44543429844098, "grad_norm": 20.220458984375, "learning_rate": 1e-06, "loss": 0.4457, "num_input_tokens_seen": 464062920, "step": 8282 }, { "epoch": 18.44543429844098, "loss": 0.4217246472835541, "loss_ce": 9.378813410876319e-05, "loss_iou": 0.17578125, "loss_num": 0.0137939453125, "loss_xval": 0.421875, "num_input_tokens_seen": 464062920, "step": 8282 }, { "epoch": 18.447661469933184, "grad_norm": 15.873303413391113, "learning_rate": 1e-06, "loss": 0.2138, "num_input_tokens_seen": 464116484, "step": 8283 }, { "epoch": 18.447661469933184, "loss": 0.14678719639778137, "loss_ce": 6.631132418988273e-05, "loss_iou": 0.06298828125, "loss_num": 0.004241943359375, "loss_xval": 0.146484375, "num_input_tokens_seen": 464116484, "step": 8283 }, { "epoch": 18.44988864142539, "grad_norm": 18.4737548828125, "learning_rate": 1e-06, "loss": 0.41, "num_input_tokens_seen": 464172992, "step": 8284 }, { "epoch": 18.44988864142539, "loss": 0.3817276358604431, "loss_ce": 7.478396582882851e-05, "loss_iou": 0.1689453125, "loss_num": 0.00872802734375, "loss_xval": 0.380859375, "num_input_tokens_seen": 464172992, "step": 8284 }, { "epoch": 18.452115812917594, "grad_norm": 19.414098739624023, "learning_rate": 1e-06, "loss": 0.4462, "num_input_tokens_seen": 464225548, "step": 8285 }, { "epoch": 18.452115812917594, "loss": 0.42627277970314026, "loss_ce": 0.00012532366963569075, "loss_iou": 0.1865234375, "loss_num": 0.01068115234375, "loss_xval": 0.42578125, "num_input_tokens_seen": 464225548, "step": 8285 }, { "epoch": 18.4543429844098, "grad_norm": 16.32335090637207, "learning_rate": 1e-06, "loss": 0.3318, "num_input_tokens_seen": 464281668, "step": 8286 }, { "epoch": 18.4543429844098, "loss": 0.3537209630012512, "loss_ce": 8.327406249009073e-05, "loss_iou": 0.1572265625, "loss_num": 0.00787353515625, "loss_xval": 0.353515625, "num_input_tokens_seen": 464281668, "step": 8286 }, { "epoch": 18.456570155902003, "grad_norm": 15.517791748046875, "learning_rate": 1e-06, "loss": 0.3719, "num_input_tokens_seen": 464337160, "step": 8287 }, { "epoch": 18.456570155902003, "loss": 0.3465074300765991, "loss_ce": 0.0001023836521198973, "loss_iou": 0.1376953125, "loss_num": 0.01434326171875, "loss_xval": 0.345703125, "num_input_tokens_seen": 464337160, "step": 8287 }, { "epoch": 18.45879732739421, "grad_norm": 18.08324432373047, "learning_rate": 1e-06, "loss": 0.5502, "num_input_tokens_seen": 464392772, "step": 8288 }, { "epoch": 18.45879732739421, "loss": 0.6127073168754578, "loss_ce": 0.00015847003669478, "loss_iou": 0.23828125, "loss_num": 0.0272216796875, "loss_xval": 0.61328125, "num_input_tokens_seen": 464392772, "step": 8288 }, { "epoch": 18.461024498886413, "grad_norm": 18.83432960510254, "learning_rate": 1e-06, "loss": 0.3693, "num_input_tokens_seen": 464448784, "step": 8289 }, { "epoch": 18.461024498886413, "loss": 0.4016864001750946, "loss_ce": 7.508075213991106e-05, "loss_iou": 0.1845703125, "loss_num": 0.006378173828125, "loss_xval": 0.40234375, "num_input_tokens_seen": 464448784, "step": 8289 }, { "epoch": 18.463251670378618, "grad_norm": 13.436317443847656, "learning_rate": 1e-06, "loss": 0.3396, "num_input_tokens_seen": 464505648, "step": 8290 }, { "epoch": 18.463251670378618, "loss": 0.35365384817123413, "loss_ce": 7.717913103988394e-05, "loss_iou": 0.15234375, "loss_num": 0.009765625, "loss_xval": 0.353515625, "num_input_tokens_seen": 464505648, "step": 8290 }, { "epoch": 18.465478841870823, "grad_norm": 19.951303482055664, "learning_rate": 1e-06, "loss": 0.3453, "num_input_tokens_seen": 464559060, "step": 8291 }, { "epoch": 18.465478841870823, "loss": 0.38332536816596985, "loss_ce": 0.00014667093637399375, "loss_iou": 0.1708984375, "loss_num": 0.008056640625, "loss_xval": 0.3828125, "num_input_tokens_seen": 464559060, "step": 8291 }, { "epoch": 18.467706013363028, "grad_norm": 228.0105438232422, "learning_rate": 1e-06, "loss": 0.4727, "num_input_tokens_seen": 464614584, "step": 8292 }, { "epoch": 18.467706013363028, "loss": 0.3697110116481781, "loss_ce": 8.208431245293468e-05, "loss_iou": 0.150390625, "loss_num": 0.013916015625, "loss_xval": 0.369140625, "num_input_tokens_seen": 464614584, "step": 8292 }, { "epoch": 18.469933184855233, "grad_norm": 14.540281295776367, "learning_rate": 1e-06, "loss": 0.3425, "num_input_tokens_seen": 464668316, "step": 8293 }, { "epoch": 18.469933184855233, "loss": 0.34370023012161255, "loss_ce": 7.22910335753113e-05, "loss_iou": 0.15625, "loss_num": 0.006072998046875, "loss_xval": 0.34375, "num_input_tokens_seen": 464668316, "step": 8293 }, { "epoch": 18.472160356347437, "grad_norm": 20.030624389648438, "learning_rate": 1e-06, "loss": 0.4044, "num_input_tokens_seen": 464723240, "step": 8294 }, { "epoch": 18.472160356347437, "loss": 0.40011394023895264, "loss_ce": 8.954846998676658e-05, "loss_iou": 0.18359375, "loss_num": 0.00677490234375, "loss_xval": 0.400390625, "num_input_tokens_seen": 464723240, "step": 8294 }, { "epoch": 18.474387527839642, "grad_norm": 13.285386085510254, "learning_rate": 1e-06, "loss": 0.4026, "num_input_tokens_seen": 464779508, "step": 8295 }, { "epoch": 18.474387527839642, "loss": 0.223904550075531, "loss_ce": 8.862309186952189e-05, "loss_iou": 0.10498046875, "loss_num": 0.0027008056640625, "loss_xval": 0.2236328125, "num_input_tokens_seen": 464779508, "step": 8295 }, { "epoch": 18.476614699331847, "grad_norm": 18.240877151489258, "learning_rate": 1e-06, "loss": 0.3385, "num_input_tokens_seen": 464834748, "step": 8296 }, { "epoch": 18.476614699331847, "loss": 0.29583436250686646, "loss_ce": 8.852417522575706e-05, "loss_iou": 0.1376953125, "loss_num": 0.003936767578125, "loss_xval": 0.294921875, "num_input_tokens_seen": 464834748, "step": 8296 }, { "epoch": 18.478841870824052, "grad_norm": 15.406899452209473, "learning_rate": 1e-06, "loss": 0.3808, "num_input_tokens_seen": 464892396, "step": 8297 }, { "epoch": 18.478841870824052, "loss": 0.379098504781723, "loss_ce": 7.018136966507882e-05, "loss_iou": 0.1650390625, "loss_num": 0.00994873046875, "loss_xval": 0.37890625, "num_input_tokens_seen": 464892396, "step": 8297 }, { "epoch": 18.481069042316257, "grad_norm": 22.26421546936035, "learning_rate": 1e-06, "loss": 0.3392, "num_input_tokens_seen": 464949808, "step": 8298 }, { "epoch": 18.481069042316257, "loss": 0.21200445294380188, "loss_ce": 9.038990538101643e-05, "loss_iou": 0.091796875, "loss_num": 0.005645751953125, "loss_xval": 0.2119140625, "num_input_tokens_seen": 464949808, "step": 8298 }, { "epoch": 18.48329621380846, "grad_norm": 15.141512870788574, "learning_rate": 1e-06, "loss": 0.403, "num_input_tokens_seen": 465003316, "step": 8299 }, { "epoch": 18.48329621380846, "loss": 0.4960575997829437, "loss_ce": 8.59203573781997e-05, "loss_iou": 0.193359375, "loss_num": 0.0216064453125, "loss_xval": 0.49609375, "num_input_tokens_seen": 465003316, "step": 8299 }, { "epoch": 18.485523385300667, "grad_norm": 29.124523162841797, "learning_rate": 1e-06, "loss": 0.5493, "num_input_tokens_seen": 465058828, "step": 8300 }, { "epoch": 18.485523385300667, "loss": 0.6504954695701599, "loss_ce": 0.0001048904232447967, "loss_iou": 0.298828125, "loss_num": 0.01068115234375, "loss_xval": 0.6484375, "num_input_tokens_seen": 465058828, "step": 8300 }, { "epoch": 18.48775055679287, "grad_norm": 17.59904670715332, "learning_rate": 1e-06, "loss": 0.3864, "num_input_tokens_seen": 465114628, "step": 8301 }, { "epoch": 18.48775055679287, "loss": 0.263241708278656, "loss_ce": 0.00011917389929294586, "loss_iou": 0.115234375, "loss_num": 0.00653076171875, "loss_xval": 0.263671875, "num_input_tokens_seen": 465114628, "step": 8301 }, { "epoch": 18.489977728285076, "grad_norm": 22.050037384033203, "learning_rate": 1e-06, "loss": 0.4467, "num_input_tokens_seen": 465169408, "step": 8302 }, { "epoch": 18.489977728285076, "loss": 0.29093292355537415, "loss_ce": 6.987818778725341e-05, "loss_iou": 0.130859375, "loss_num": 0.005889892578125, "loss_xval": 0.291015625, "num_input_tokens_seen": 465169408, "step": 8302 }, { "epoch": 18.49220489977728, "grad_norm": 20.6636962890625, "learning_rate": 1e-06, "loss": 0.3993, "num_input_tokens_seen": 465224880, "step": 8303 }, { "epoch": 18.49220489977728, "loss": 0.41500934958457947, "loss_ce": 9.234360186383128e-05, "loss_iou": 0.173828125, "loss_num": 0.013671875, "loss_xval": 0.4140625, "num_input_tokens_seen": 465224880, "step": 8303 }, { "epoch": 18.494432071269486, "grad_norm": 23.35464859008789, "learning_rate": 1e-06, "loss": 0.3545, "num_input_tokens_seen": 465281588, "step": 8304 }, { "epoch": 18.494432071269486, "loss": 0.4025862514972687, "loss_ce": 7.466854003723711e-05, "loss_iou": 0.1708984375, "loss_num": 0.01226806640625, "loss_xval": 0.40234375, "num_input_tokens_seen": 465281588, "step": 8304 }, { "epoch": 18.49665924276169, "grad_norm": 18.54194450378418, "learning_rate": 1e-06, "loss": 0.4179, "num_input_tokens_seen": 465336308, "step": 8305 }, { "epoch": 18.49665924276169, "loss": 0.4852302670478821, "loss_ce": 6.180583295645192e-05, "loss_iou": 0.2021484375, "loss_num": 0.0159912109375, "loss_xval": 0.484375, "num_input_tokens_seen": 465336308, "step": 8305 }, { "epoch": 18.498886414253896, "grad_norm": 16.24866485595703, "learning_rate": 1e-06, "loss": 0.2652, "num_input_tokens_seen": 465392124, "step": 8306 }, { "epoch": 18.498886414253896, "loss": 0.3223702311515808, "loss_ce": 0.00010461719648446888, "loss_iou": 0.1474609375, "loss_num": 0.005462646484375, "loss_xval": 0.322265625, "num_input_tokens_seen": 465392124, "step": 8306 }, { "epoch": 18.501113585746104, "grad_norm": 23.2437801361084, "learning_rate": 1e-06, "loss": 0.4014, "num_input_tokens_seen": 465448524, "step": 8307 }, { "epoch": 18.501113585746104, "loss": 0.32942575216293335, "loss_ce": 8.003541734069586e-05, "loss_iou": 0.1494140625, "loss_num": 0.006103515625, "loss_xval": 0.330078125, "num_input_tokens_seen": 465448524, "step": 8307 }, { "epoch": 18.50334075723831, "grad_norm": 17.066225051879883, "learning_rate": 1e-06, "loss": 0.5872, "num_input_tokens_seen": 465504656, "step": 8308 }, { "epoch": 18.50334075723831, "loss": 0.6132311820983887, "loss_ce": 7.201532571343705e-05, "loss_iou": 0.2412109375, "loss_num": 0.0260009765625, "loss_xval": 0.61328125, "num_input_tokens_seen": 465504656, "step": 8308 }, { "epoch": 18.505567928730514, "grad_norm": 37.251121520996094, "learning_rate": 1e-06, "loss": 0.325, "num_input_tokens_seen": 465560192, "step": 8309 }, { "epoch": 18.505567928730514, "loss": 0.24585197865962982, "loss_ce": 6.340416439343244e-05, "loss_iou": 0.109375, "loss_num": 0.00543212890625, "loss_xval": 0.24609375, "num_input_tokens_seen": 465560192, "step": 8309 }, { "epoch": 18.50779510022272, "grad_norm": 16.032716751098633, "learning_rate": 1e-06, "loss": 0.5305, "num_input_tokens_seen": 465618100, "step": 8310 }, { "epoch": 18.50779510022272, "loss": 0.629966139793396, "loss_ce": 8.333367441082373e-05, "loss_iou": 0.25390625, "loss_num": 0.024169921875, "loss_xval": 0.62890625, "num_input_tokens_seen": 465618100, "step": 8310 }, { "epoch": 18.510022271714924, "grad_norm": 15.98995304107666, "learning_rate": 1e-06, "loss": 0.2552, "num_input_tokens_seen": 465674400, "step": 8311 }, { "epoch": 18.510022271714924, "loss": 0.27437126636505127, "loss_ce": 7.928490958875045e-05, "loss_iou": 0.1298828125, "loss_num": 0.0030364990234375, "loss_xval": 0.2734375, "num_input_tokens_seen": 465674400, "step": 8311 }, { "epoch": 18.51224944320713, "grad_norm": 16.019752502441406, "learning_rate": 1e-06, "loss": 0.3386, "num_input_tokens_seen": 465732188, "step": 8312 }, { "epoch": 18.51224944320713, "loss": 0.5247637033462524, "loss_ce": 0.00010548259160714224, "loss_iou": 0.21484375, "loss_num": 0.01904296875, "loss_xval": 0.5234375, "num_input_tokens_seen": 465732188, "step": 8312 }, { "epoch": 18.514476614699333, "grad_norm": 17.876802444458008, "learning_rate": 1e-06, "loss": 0.3636, "num_input_tokens_seen": 465786628, "step": 8313 }, { "epoch": 18.514476614699333, "loss": 0.35969775915145874, "loss_ce": 7.861069752834737e-05, "loss_iou": 0.16796875, "loss_num": 0.004791259765625, "loss_xval": 0.359375, "num_input_tokens_seen": 465786628, "step": 8313 }, { "epoch": 18.51670378619154, "grad_norm": 15.022839546203613, "learning_rate": 1e-06, "loss": 0.3146, "num_input_tokens_seen": 465842532, "step": 8314 }, { "epoch": 18.51670378619154, "loss": 0.35521650314331055, "loss_ce": 0.00011394656030461192, "loss_iou": 0.14453125, "loss_num": 0.01318359375, "loss_xval": 0.35546875, "num_input_tokens_seen": 465842532, "step": 8314 }, { "epoch": 18.518930957683743, "grad_norm": 13.363021850585938, "learning_rate": 1e-06, "loss": 0.4604, "num_input_tokens_seen": 465900596, "step": 8315 }, { "epoch": 18.518930957683743, "loss": 0.583387017250061, "loss_ce": 0.00013512340956367552, "loss_iou": 0.224609375, "loss_num": 0.02685546875, "loss_xval": 0.58203125, "num_input_tokens_seen": 465900596, "step": 8315 }, { "epoch": 18.521158129175948, "grad_norm": 15.131711959838867, "learning_rate": 1e-06, "loss": 0.4687, "num_input_tokens_seen": 465958308, "step": 8316 }, { "epoch": 18.521158129175948, "loss": 0.5298267602920532, "loss_ce": 7.216697849798948e-05, "loss_iou": 0.2158203125, "loss_num": 0.01953125, "loss_xval": 0.53125, "num_input_tokens_seen": 465958308, "step": 8316 }, { "epoch": 18.523385300668153, "grad_norm": 12.049201965332031, "learning_rate": 1e-06, "loss": 0.3896, "num_input_tokens_seen": 466015604, "step": 8317 }, { "epoch": 18.523385300668153, "loss": 0.2444000393152237, "loss_ce": 7.62987692723982e-05, "loss_iou": 0.09130859375, "loss_num": 0.01239013671875, "loss_xval": 0.244140625, "num_input_tokens_seen": 466015604, "step": 8317 }, { "epoch": 18.525612472160358, "grad_norm": 25.494447708129883, "learning_rate": 1e-06, "loss": 0.4866, "num_input_tokens_seen": 466072012, "step": 8318 }, { "epoch": 18.525612472160358, "loss": 0.5554917454719543, "loss_ce": 7.183050183812156e-05, "loss_iou": 0.2255859375, "loss_num": 0.0206298828125, "loss_xval": 0.5546875, "num_input_tokens_seen": 466072012, "step": 8318 }, { "epoch": 18.527839643652563, "grad_norm": 32.9737548828125, "learning_rate": 1e-06, "loss": 0.5518, "num_input_tokens_seen": 466126616, "step": 8319 }, { "epoch": 18.527839643652563, "loss": 0.5966952443122864, "loss_ce": 7.660294068045914e-05, "loss_iou": 0.21484375, "loss_num": 0.033447265625, "loss_xval": 0.59765625, "num_input_tokens_seen": 466126616, "step": 8319 }, { "epoch": 18.530066815144767, "grad_norm": 12.370501518249512, "learning_rate": 1e-06, "loss": 0.299, "num_input_tokens_seen": 466181408, "step": 8320 }, { "epoch": 18.530066815144767, "loss": 0.357611745595932, "loss_ce": 6.781626143492758e-05, "loss_iou": 0.162109375, "loss_num": 0.006805419921875, "loss_xval": 0.357421875, "num_input_tokens_seen": 466181408, "step": 8320 }, { "epoch": 18.532293986636972, "grad_norm": 15.927129745483398, "learning_rate": 1e-06, "loss": 0.383, "num_input_tokens_seen": 466238792, "step": 8321 }, { "epoch": 18.532293986636972, "loss": 0.40339329838752747, "loss_ce": 7.296907278941944e-05, "loss_iou": 0.1630859375, "loss_num": 0.01556396484375, "loss_xval": 0.40234375, "num_input_tokens_seen": 466238792, "step": 8321 }, { "epoch": 18.534521158129177, "grad_norm": 15.46563720703125, "learning_rate": 1e-06, "loss": 0.342, "num_input_tokens_seen": 466291660, "step": 8322 }, { "epoch": 18.534521158129177, "loss": 0.2837243676185608, "loss_ce": 0.00015505831106565893, "loss_iou": 0.12060546875, "loss_num": 0.0084228515625, "loss_xval": 0.283203125, "num_input_tokens_seen": 466291660, "step": 8322 }, { "epoch": 18.536748329621382, "grad_norm": 16.087427139282227, "learning_rate": 1e-06, "loss": 0.5815, "num_input_tokens_seen": 466344276, "step": 8323 }, { "epoch": 18.536748329621382, "loss": 0.5009580850601196, "loss_ce": 0.00010357146675232798, "loss_iou": 0.2158203125, "loss_num": 0.01385498046875, "loss_xval": 0.5, "num_input_tokens_seen": 466344276, "step": 8323 }, { "epoch": 18.538975501113587, "grad_norm": 20.600358963012695, "learning_rate": 1e-06, "loss": 0.5411, "num_input_tokens_seen": 466400420, "step": 8324 }, { "epoch": 18.538975501113587, "loss": 0.47846150398254395, "loss_ce": 6.796044181101024e-05, "loss_iou": 0.212890625, "loss_num": 0.0103759765625, "loss_xval": 0.478515625, "num_input_tokens_seen": 466400420, "step": 8324 }, { "epoch": 18.54120267260579, "grad_norm": 26.053192138671875, "learning_rate": 1e-06, "loss": 0.3268, "num_input_tokens_seen": 466453920, "step": 8325 }, { "epoch": 18.54120267260579, "loss": 0.4151201546192169, "loss_ce": 8.109994087135419e-05, "loss_iou": 0.18359375, "loss_num": 0.0093994140625, "loss_xval": 0.4140625, "num_input_tokens_seen": 466453920, "step": 8325 }, { "epoch": 18.543429844097997, "grad_norm": 12.725341796875, "learning_rate": 1e-06, "loss": 0.3043, "num_input_tokens_seen": 466510280, "step": 8326 }, { "epoch": 18.543429844097997, "loss": 0.19257915019989014, "loss_ce": 7.426021329592913e-05, "loss_iou": 0.0888671875, "loss_num": 0.0030517578125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 466510280, "step": 8326 }, { "epoch": 18.5456570155902, "grad_norm": 19.8201904296875, "learning_rate": 1e-06, "loss": 0.6099, "num_input_tokens_seen": 466565908, "step": 8327 }, { "epoch": 18.5456570155902, "loss": 0.7313508987426758, "loss_ce": 0.00014971070049796253, "loss_iou": 0.296875, "loss_num": 0.0277099609375, "loss_xval": 0.73046875, "num_input_tokens_seen": 466565908, "step": 8327 }, { "epoch": 18.547884187082406, "grad_norm": 24.441503524780273, "learning_rate": 1e-06, "loss": 0.3867, "num_input_tokens_seen": 466624756, "step": 8328 }, { "epoch": 18.547884187082406, "loss": 0.46570825576782227, "loss_ce": 7.106846169335768e-05, "loss_iou": 0.2099609375, "loss_num": 0.00909423828125, "loss_xval": 0.46484375, "num_input_tokens_seen": 466624756, "step": 8328 }, { "epoch": 18.55011135857461, "grad_norm": 16.419023513793945, "learning_rate": 1e-06, "loss": 0.3623, "num_input_tokens_seen": 466682572, "step": 8329 }, { "epoch": 18.55011135857461, "loss": 0.24810969829559326, "loss_ce": 0.00036799628287553787, "loss_iou": 0.11279296875, "loss_num": 0.004425048828125, "loss_xval": 0.248046875, "num_input_tokens_seen": 466682572, "step": 8329 }, { "epoch": 18.552338530066816, "grad_norm": 29.954700469970703, "learning_rate": 1e-06, "loss": 0.2989, "num_input_tokens_seen": 466739668, "step": 8330 }, { "epoch": 18.552338530066816, "loss": 0.264358252286911, "loss_ce": 7.602167170261964e-05, "loss_iou": 0.1201171875, "loss_num": 0.004852294921875, "loss_xval": 0.263671875, "num_input_tokens_seen": 466739668, "step": 8330 }, { "epoch": 18.55456570155902, "grad_norm": 21.188688278198242, "learning_rate": 1e-06, "loss": 0.4046, "num_input_tokens_seen": 466798292, "step": 8331 }, { "epoch": 18.55456570155902, "loss": 0.2884581685066223, "loss_ce": 6.705736450385302e-05, "loss_iou": 0.1259765625, "loss_num": 0.007415771484375, "loss_xval": 0.2890625, "num_input_tokens_seen": 466798292, "step": 8331 }, { "epoch": 18.556792873051226, "grad_norm": 13.528090476989746, "learning_rate": 1e-06, "loss": 0.4685, "num_input_tokens_seen": 466856176, "step": 8332 }, { "epoch": 18.556792873051226, "loss": 0.5392540693283081, "loss_ce": 6.949950329726562e-05, "loss_iou": 0.22265625, "loss_num": 0.0186767578125, "loss_xval": 0.5390625, "num_input_tokens_seen": 466856176, "step": 8332 }, { "epoch": 18.55902004454343, "grad_norm": 18.641672134399414, "learning_rate": 1e-06, "loss": 0.4738, "num_input_tokens_seen": 466908128, "step": 8333 }, { "epoch": 18.55902004454343, "loss": 0.4274500608444214, "loss_ce": 8.191791857825592e-05, "loss_iou": 0.1787109375, "loss_num": 0.0140380859375, "loss_xval": 0.427734375, "num_input_tokens_seen": 466908128, "step": 8333 }, { "epoch": 18.561247216035635, "grad_norm": 29.179141998291016, "learning_rate": 1e-06, "loss": 0.3837, "num_input_tokens_seen": 466965624, "step": 8334 }, { "epoch": 18.561247216035635, "loss": 0.3577282428741455, "loss_ce": 6.22493025730364e-05, "loss_iou": 0.1591796875, "loss_num": 0.0078125, "loss_xval": 0.357421875, "num_input_tokens_seen": 466965624, "step": 8334 }, { "epoch": 18.56347438752784, "grad_norm": 15.943428993225098, "learning_rate": 1e-06, "loss": 0.4853, "num_input_tokens_seen": 467019384, "step": 8335 }, { "epoch": 18.56347438752784, "loss": 0.24585062265396118, "loss_ce": 6.203760858625174e-05, "loss_iou": 0.099609375, "loss_num": 0.0093994140625, "loss_xval": 0.24609375, "num_input_tokens_seen": 467019384, "step": 8335 }, { "epoch": 18.565701559020045, "grad_norm": 17.852943420410156, "learning_rate": 1e-06, "loss": 0.4954, "num_input_tokens_seen": 467076952, "step": 8336 }, { "epoch": 18.565701559020045, "loss": 0.46540504693984985, "loss_ce": 7.300135621335357e-05, "loss_iou": 0.20703125, "loss_num": 0.01031494140625, "loss_xval": 0.46484375, "num_input_tokens_seen": 467076952, "step": 8336 }, { "epoch": 18.56792873051225, "grad_norm": 16.556804656982422, "learning_rate": 1e-06, "loss": 0.4165, "num_input_tokens_seen": 467130340, "step": 8337 }, { "epoch": 18.56792873051225, "loss": 0.4885514974594116, "loss_ce": 8.713564602658153e-05, "loss_iou": 0.1796875, "loss_num": 0.0260009765625, "loss_xval": 0.48828125, "num_input_tokens_seen": 467130340, "step": 8337 }, { "epoch": 18.570155902004455, "grad_norm": 18.44285011291504, "learning_rate": 1e-06, "loss": 0.4634, "num_input_tokens_seen": 467187616, "step": 8338 }, { "epoch": 18.570155902004455, "loss": 0.5214405059814453, "loss_ce": 7.816489960532635e-05, "loss_iou": 0.205078125, "loss_num": 0.0223388671875, "loss_xval": 0.51953125, "num_input_tokens_seen": 467187616, "step": 8338 }, { "epoch": 18.57238307349666, "grad_norm": 16.12818717956543, "learning_rate": 1e-06, "loss": 0.5461, "num_input_tokens_seen": 467242808, "step": 8339 }, { "epoch": 18.57238307349666, "loss": 0.49351948499679565, "loss_ce": 0.00011128241749247536, "loss_iou": 0.201171875, "loss_num": 0.0181884765625, "loss_xval": 0.494140625, "num_input_tokens_seen": 467242808, "step": 8339 }, { "epoch": 18.574610244988865, "grad_norm": 13.428854942321777, "learning_rate": 1e-06, "loss": 0.3354, "num_input_tokens_seen": 467299608, "step": 8340 }, { "epoch": 18.574610244988865, "loss": 0.2194262444972992, "loss_ce": 6.589026452274993e-05, "loss_iou": 0.07470703125, "loss_num": 0.01397705078125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 467299608, "step": 8340 }, { "epoch": 18.57683741648107, "grad_norm": 341.0340881347656, "learning_rate": 1e-06, "loss": 0.552, "num_input_tokens_seen": 467354420, "step": 8341 }, { "epoch": 18.57683741648107, "loss": 0.6670901775360107, "loss_ce": 9.794821380637586e-05, "loss_iou": 0.287109375, "loss_num": 0.018798828125, "loss_xval": 0.66796875, "num_input_tokens_seen": 467354420, "step": 8341 }, { "epoch": 18.579064587973274, "grad_norm": 13.796717643737793, "learning_rate": 1e-06, "loss": 0.4222, "num_input_tokens_seen": 467409192, "step": 8342 }, { "epoch": 18.579064587973274, "loss": 0.46046194434165955, "loss_ce": 7.377237488981336e-05, "loss_iou": 0.2041015625, "loss_num": 0.01055908203125, "loss_xval": 0.4609375, "num_input_tokens_seen": 467409192, "step": 8342 }, { "epoch": 18.58129175946548, "grad_norm": 24.182491302490234, "learning_rate": 1e-06, "loss": 0.4398, "num_input_tokens_seen": 467464804, "step": 8343 }, { "epoch": 18.58129175946548, "loss": 0.4643157720565796, "loss_ce": 8.239349699579179e-05, "loss_iou": 0.216796875, "loss_num": 0.006134033203125, "loss_xval": 0.46484375, "num_input_tokens_seen": 467464804, "step": 8343 }, { "epoch": 18.583518930957684, "grad_norm": 29.6402645111084, "learning_rate": 1e-06, "loss": 0.5611, "num_input_tokens_seen": 467520644, "step": 8344 }, { "epoch": 18.583518930957684, "loss": 0.660957932472229, "loss_ce": 6.922356260474771e-05, "loss_iou": 0.2734375, "loss_num": 0.023193359375, "loss_xval": 0.66015625, "num_input_tokens_seen": 467520644, "step": 8344 }, { "epoch": 18.58574610244989, "grad_norm": 22.357826232910156, "learning_rate": 1e-06, "loss": 0.2284, "num_input_tokens_seen": 467578040, "step": 8345 }, { "epoch": 18.58574610244989, "loss": 0.16835345327854156, "loss_ce": 7.953326712595299e-05, "loss_iou": 0.06005859375, "loss_num": 0.00970458984375, "loss_xval": 0.16796875, "num_input_tokens_seen": 467578040, "step": 8345 }, { "epoch": 18.587973273942094, "grad_norm": 42.021976470947266, "learning_rate": 1e-06, "loss": 0.4582, "num_input_tokens_seen": 467633620, "step": 8346 }, { "epoch": 18.587973273942094, "loss": 0.41333815455436707, "loss_ce": 9.962193144019693e-05, "loss_iou": 0.189453125, "loss_num": 0.00701904296875, "loss_xval": 0.4140625, "num_input_tokens_seen": 467633620, "step": 8346 }, { "epoch": 18.5902004454343, "grad_norm": 21.736515045166016, "learning_rate": 1e-06, "loss": 0.374, "num_input_tokens_seen": 467691248, "step": 8347 }, { "epoch": 18.5902004454343, "loss": 0.37519749999046326, "loss_ce": 7.541398372268304e-05, "loss_iou": 0.1708984375, "loss_num": 0.00665283203125, "loss_xval": 0.375, "num_input_tokens_seen": 467691248, "step": 8347 }, { "epoch": 18.592427616926503, "grad_norm": 25.333066940307617, "learning_rate": 1e-06, "loss": 0.4718, "num_input_tokens_seen": 467744092, "step": 8348 }, { "epoch": 18.592427616926503, "loss": 0.39171791076660156, "loss_ce": 0.00011635862028924748, "loss_iou": 0.1748046875, "loss_num": 0.00836181640625, "loss_xval": 0.390625, "num_input_tokens_seen": 467744092, "step": 8348 }, { "epoch": 18.59465478841871, "grad_norm": 19.491870880126953, "learning_rate": 1e-06, "loss": 0.2949, "num_input_tokens_seen": 467800704, "step": 8349 }, { "epoch": 18.59465478841871, "loss": 0.2856776714324951, "loss_ce": 9.417271940037608e-05, "loss_iou": 0.11669921875, "loss_num": 0.0103759765625, "loss_xval": 0.28515625, "num_input_tokens_seen": 467800704, "step": 8349 }, { "epoch": 18.596881959910913, "grad_norm": 11.345683097839355, "learning_rate": 1e-06, "loss": 0.2598, "num_input_tokens_seen": 467858852, "step": 8350 }, { "epoch": 18.596881959910913, "loss": 0.23634421825408936, "loss_ce": 7.713548257015646e-05, "loss_iou": 0.1044921875, "loss_num": 0.005340576171875, "loss_xval": 0.236328125, "num_input_tokens_seen": 467858852, "step": 8350 }, { "epoch": 18.599109131403118, "grad_norm": 16.46609115600586, "learning_rate": 1e-06, "loss": 0.3114, "num_input_tokens_seen": 467914508, "step": 8351 }, { "epoch": 18.599109131403118, "loss": 0.2579895257949829, "loss_ce": 0.00011599328718148172, "loss_iou": 0.109375, "loss_num": 0.007781982421875, "loss_xval": 0.2578125, "num_input_tokens_seen": 467914508, "step": 8351 }, { "epoch": 18.601336302895323, "grad_norm": 21.424537658691406, "learning_rate": 1e-06, "loss": 0.4388, "num_input_tokens_seen": 467969980, "step": 8352 }, { "epoch": 18.601336302895323, "loss": 0.31832072138786316, "loss_ce": 8.341785724041983e-05, "loss_iou": 0.14453125, "loss_num": 0.005767822265625, "loss_xval": 0.318359375, "num_input_tokens_seen": 467969980, "step": 8352 }, { "epoch": 18.603563474387528, "grad_norm": 16.22272491455078, "learning_rate": 1e-06, "loss": 0.3837, "num_input_tokens_seen": 468026936, "step": 8353 }, { "epoch": 18.603563474387528, "loss": 0.42490488290786743, "loss_ce": 0.00010020087211159989, "loss_iou": 0.189453125, "loss_num": 0.009033203125, "loss_xval": 0.42578125, "num_input_tokens_seen": 468026936, "step": 8353 }, { "epoch": 18.605790645879733, "grad_norm": 19.997718811035156, "learning_rate": 1e-06, "loss": 0.3883, "num_input_tokens_seen": 468082576, "step": 8354 }, { "epoch": 18.605790645879733, "loss": 0.41608962416648865, "loss_ce": 7.400992035400122e-05, "loss_iou": 0.1953125, "loss_num": 0.004974365234375, "loss_xval": 0.416015625, "num_input_tokens_seen": 468082576, "step": 8354 }, { "epoch": 18.608017817371937, "grad_norm": 18.959848403930664, "learning_rate": 1e-06, "loss": 0.3883, "num_input_tokens_seen": 468139516, "step": 8355 }, { "epoch": 18.608017817371937, "loss": 0.3627595603466034, "loss_ce": 8.86675261426717e-05, "loss_iou": 0.1630859375, "loss_num": 0.00732421875, "loss_xval": 0.36328125, "num_input_tokens_seen": 468139516, "step": 8355 }, { "epoch": 18.610244988864142, "grad_norm": 10.21224308013916, "learning_rate": 1e-06, "loss": 0.4558, "num_input_tokens_seen": 468196392, "step": 8356 }, { "epoch": 18.610244988864142, "loss": 0.3999983072280884, "loss_ce": 9.597405733074993e-05, "loss_iou": 0.15234375, "loss_num": 0.01904296875, "loss_xval": 0.400390625, "num_input_tokens_seen": 468196392, "step": 8356 }, { "epoch": 18.612472160356347, "grad_norm": 12.426268577575684, "learning_rate": 1e-06, "loss": 0.551, "num_input_tokens_seen": 468252860, "step": 8357 }, { "epoch": 18.612472160356347, "loss": 0.8520680069923401, "loss_ce": 7.824598287697881e-05, "loss_iou": 0.326171875, "loss_num": 0.039306640625, "loss_xval": 0.8515625, "num_input_tokens_seen": 468252860, "step": 8357 }, { "epoch": 18.614699331848552, "grad_norm": 19.070262908935547, "learning_rate": 1e-06, "loss": 0.3966, "num_input_tokens_seen": 468308700, "step": 8358 }, { "epoch": 18.614699331848552, "loss": 0.5038906931877136, "loss_ce": 0.00010653713979991153, "loss_iou": 0.201171875, "loss_num": 0.0203857421875, "loss_xval": 0.50390625, "num_input_tokens_seen": 468308700, "step": 8358 }, { "epoch": 18.616926503340757, "grad_norm": 29.98044776916504, "learning_rate": 1e-06, "loss": 0.6309, "num_input_tokens_seen": 468363020, "step": 8359 }, { "epoch": 18.616926503340757, "loss": 0.703240156173706, "loss_ce": 0.00011515267397044227, "loss_iou": 0.296875, "loss_num": 0.021728515625, "loss_xval": 0.703125, "num_input_tokens_seen": 468363020, "step": 8359 }, { "epoch": 18.619153674832962, "grad_norm": 23.00288963317871, "learning_rate": 1e-06, "loss": 0.5153, "num_input_tokens_seen": 468419740, "step": 8360 }, { "epoch": 18.619153674832962, "loss": 0.5316950678825378, "loss_ce": 7.883799844421446e-05, "loss_iou": 0.220703125, "loss_num": 0.0179443359375, "loss_xval": 0.53125, "num_input_tokens_seen": 468419740, "step": 8360 }, { "epoch": 18.621380846325167, "grad_norm": 26.114784240722656, "learning_rate": 1e-06, "loss": 0.4365, "num_input_tokens_seen": 468475752, "step": 8361 }, { "epoch": 18.621380846325167, "loss": 0.40563201904296875, "loss_ce": 0.00011443370021879673, "loss_iou": 0.1708984375, "loss_num": 0.0126953125, "loss_xval": 0.40625, "num_input_tokens_seen": 468475752, "step": 8361 }, { "epoch": 18.62360801781737, "grad_norm": 25.014408111572266, "learning_rate": 1e-06, "loss": 0.3513, "num_input_tokens_seen": 468529432, "step": 8362 }, { "epoch": 18.62360801781737, "loss": 0.3612205982208252, "loss_ce": 7.560283120255917e-05, "loss_iou": 0.1591796875, "loss_num": 0.0084228515625, "loss_xval": 0.361328125, "num_input_tokens_seen": 468529432, "step": 8362 }, { "epoch": 18.625835189309576, "grad_norm": 16.121501922607422, "learning_rate": 1e-06, "loss": 0.264, "num_input_tokens_seen": 468587328, "step": 8363 }, { "epoch": 18.625835189309576, "loss": 0.21666020154953003, "loss_ce": 9.220950596500188e-05, "loss_iou": 0.09326171875, "loss_num": 0.00592041015625, "loss_xval": 0.216796875, "num_input_tokens_seen": 468587328, "step": 8363 }, { "epoch": 18.62806236080178, "grad_norm": 17.978118896484375, "learning_rate": 1e-06, "loss": 0.4242, "num_input_tokens_seen": 468644720, "step": 8364 }, { "epoch": 18.62806236080178, "loss": 0.6226370334625244, "loss_ce": 7.844380888855085e-05, "loss_iou": 0.2578125, "loss_num": 0.021484375, "loss_xval": 0.62109375, "num_input_tokens_seen": 468644720, "step": 8364 }, { "epoch": 18.630289532293986, "grad_norm": 19.74319076538086, "learning_rate": 1e-06, "loss": 0.3705, "num_input_tokens_seen": 468700752, "step": 8365 }, { "epoch": 18.630289532293986, "loss": 0.3942309021949768, "loss_ce": 6.587227107957006e-05, "loss_iou": 0.14453125, "loss_num": 0.0213623046875, "loss_xval": 0.39453125, "num_input_tokens_seen": 468700752, "step": 8365 }, { "epoch": 18.63251670378619, "grad_norm": 17.06964683532715, "learning_rate": 1e-06, "loss": 0.4573, "num_input_tokens_seen": 468755944, "step": 8366 }, { "epoch": 18.63251670378619, "loss": 0.28583765029907227, "loss_ce": 7.103722600731999e-05, "loss_iou": 0.1318359375, "loss_num": 0.004302978515625, "loss_xval": 0.28515625, "num_input_tokens_seen": 468755944, "step": 8366 }, { "epoch": 18.634743875278396, "grad_norm": 13.701447486877441, "learning_rate": 1e-06, "loss": 0.4467, "num_input_tokens_seen": 468811364, "step": 8367 }, { "epoch": 18.634743875278396, "loss": 0.3969283103942871, "loss_ce": 7.772387471050024e-05, "loss_iou": 0.1806640625, "loss_num": 0.00714111328125, "loss_xval": 0.396484375, "num_input_tokens_seen": 468811364, "step": 8367 }, { "epoch": 18.6369710467706, "grad_norm": 20.248186111450195, "learning_rate": 1e-06, "loss": 0.3801, "num_input_tokens_seen": 468869972, "step": 8368 }, { "epoch": 18.6369710467706, "loss": 0.4675615429878235, "loss_ce": 9.328986925538629e-05, "loss_iou": 0.203125, "loss_num": 0.011962890625, "loss_xval": 0.466796875, "num_input_tokens_seen": 468869972, "step": 8368 }, { "epoch": 18.639198218262806, "grad_norm": 25.159513473510742, "learning_rate": 1e-06, "loss": 0.4678, "num_input_tokens_seen": 468926332, "step": 8369 }, { "epoch": 18.639198218262806, "loss": 0.600066065788269, "loss_ce": 9.047788626048714e-05, "loss_iou": 0.26953125, "loss_num": 0.01239013671875, "loss_xval": 0.6015625, "num_input_tokens_seen": 468926332, "step": 8369 }, { "epoch": 18.64142538975501, "grad_norm": 14.643291473388672, "learning_rate": 1e-06, "loss": 0.3295, "num_input_tokens_seen": 468983940, "step": 8370 }, { "epoch": 18.64142538975501, "loss": 0.25062063336372375, "loss_ce": 7.132052996894345e-05, "loss_iou": 0.09130859375, "loss_num": 0.01361083984375, "loss_xval": 0.25, "num_input_tokens_seen": 468983940, "step": 8370 }, { "epoch": 18.643652561247215, "grad_norm": 15.840713500976562, "learning_rate": 1e-06, "loss": 0.3142, "num_input_tokens_seen": 469040776, "step": 8371 }, { "epoch": 18.643652561247215, "loss": 0.3157404363155365, "loss_ce": 6.659841164946556e-05, "loss_iou": 0.1416015625, "loss_num": 0.006439208984375, "loss_xval": 0.31640625, "num_input_tokens_seen": 469040776, "step": 8371 }, { "epoch": 18.64587973273942, "grad_norm": 40.83860397338867, "learning_rate": 1e-06, "loss": 0.4205, "num_input_tokens_seen": 469098340, "step": 8372 }, { "epoch": 18.64587973273942, "loss": 0.46250006556510925, "loss_ce": 9.772660268936306e-05, "loss_iou": 0.2119140625, "loss_num": 0.0078125, "loss_xval": 0.462890625, "num_input_tokens_seen": 469098340, "step": 8372 }, { "epoch": 18.648106904231625, "grad_norm": 16.642513275146484, "learning_rate": 1e-06, "loss": 0.3309, "num_input_tokens_seen": 469155160, "step": 8373 }, { "epoch": 18.648106904231625, "loss": 0.38674429059028625, "loss_ce": 8.656815043650568e-05, "loss_iou": 0.177734375, "loss_num": 0.006134033203125, "loss_xval": 0.38671875, "num_input_tokens_seen": 469155160, "step": 8373 }, { "epoch": 18.65033407572383, "grad_norm": 16.026294708251953, "learning_rate": 1e-06, "loss": 0.4616, "num_input_tokens_seen": 469211288, "step": 8374 }, { "epoch": 18.65033407572383, "loss": 0.40791958570480347, "loss_ce": 8.267381781479344e-05, "loss_iou": 0.1865234375, "loss_num": 0.00677490234375, "loss_xval": 0.408203125, "num_input_tokens_seen": 469211288, "step": 8374 }, { "epoch": 18.652561247216035, "grad_norm": 26.638465881347656, "learning_rate": 1e-06, "loss": 0.3375, "num_input_tokens_seen": 469267800, "step": 8375 }, { "epoch": 18.652561247216035, "loss": 0.3067091703414917, "loss_ce": 6.856051186332479e-05, "loss_iou": 0.13671875, "loss_num": 0.00653076171875, "loss_xval": 0.306640625, "num_input_tokens_seen": 469267800, "step": 8375 }, { "epoch": 18.65478841870824, "grad_norm": 12.635597229003906, "learning_rate": 1e-06, "loss": 0.4255, "num_input_tokens_seen": 469325572, "step": 8376 }, { "epoch": 18.65478841870824, "loss": 0.5839434266090393, "loss_ce": 8.110229828162119e-05, "loss_iou": 0.259765625, "loss_num": 0.01312255859375, "loss_xval": 0.58203125, "num_input_tokens_seen": 469325572, "step": 8376 }, { "epoch": 18.657015590200444, "grad_norm": 15.969237327575684, "learning_rate": 1e-06, "loss": 0.3131, "num_input_tokens_seen": 469381652, "step": 8377 }, { "epoch": 18.657015590200444, "loss": 0.3782818019390106, "loss_ce": 7.744554022792727e-05, "loss_iou": 0.173828125, "loss_num": 0.006195068359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 469381652, "step": 8377 }, { "epoch": 18.65924276169265, "grad_norm": 16.6336727142334, "learning_rate": 1e-06, "loss": 0.3071, "num_input_tokens_seen": 469437196, "step": 8378 }, { "epoch": 18.65924276169265, "loss": 0.32802021503448486, "loss_ce": 7.833550625946373e-05, "loss_iou": 0.126953125, "loss_num": 0.01507568359375, "loss_xval": 0.328125, "num_input_tokens_seen": 469437196, "step": 8378 }, { "epoch": 18.661469933184854, "grad_norm": 80.5313491821289, "learning_rate": 1e-06, "loss": 0.3585, "num_input_tokens_seen": 469493580, "step": 8379 }, { "epoch": 18.661469933184854, "loss": 0.3814215064048767, "loss_ce": 7.384659693343565e-05, "loss_iou": 0.169921875, "loss_num": 0.0081787109375, "loss_xval": 0.380859375, "num_input_tokens_seen": 469493580, "step": 8379 }, { "epoch": 18.66369710467706, "grad_norm": 16.55701446533203, "learning_rate": 1e-06, "loss": 0.5018, "num_input_tokens_seen": 469551092, "step": 8380 }, { "epoch": 18.66369710467706, "loss": 0.4989837408065796, "loss_ce": 8.234399138018489e-05, "loss_iou": 0.2099609375, "loss_num": 0.0159912109375, "loss_xval": 0.498046875, "num_input_tokens_seen": 469551092, "step": 8380 }, { "epoch": 18.665924276169264, "grad_norm": 23.459165573120117, "learning_rate": 1e-06, "loss": 0.5548, "num_input_tokens_seen": 469611232, "step": 8381 }, { "epoch": 18.665924276169264, "loss": 0.6222162246704102, "loss_ce": 8.488097955705598e-05, "loss_iou": 0.2734375, "loss_num": 0.0150146484375, "loss_xval": 0.62109375, "num_input_tokens_seen": 469611232, "step": 8381 }, { "epoch": 18.66815144766147, "grad_norm": 17.257272720336914, "learning_rate": 1e-06, "loss": 0.3304, "num_input_tokens_seen": 469664948, "step": 8382 }, { "epoch": 18.66815144766147, "loss": 0.2271890938282013, "loss_ce": 7.72664716350846e-05, "loss_iou": 0.1005859375, "loss_num": 0.0052490234375, "loss_xval": 0.2275390625, "num_input_tokens_seen": 469664948, "step": 8382 }, { "epoch": 18.670378619153674, "grad_norm": 20.108436584472656, "learning_rate": 1e-06, "loss": 0.405, "num_input_tokens_seen": 469718928, "step": 8383 }, { "epoch": 18.670378619153674, "loss": 0.42782682180404663, "loss_ce": 9.24727282836102e-05, "loss_iou": 0.189453125, "loss_num": 0.0096435546875, "loss_xval": 0.427734375, "num_input_tokens_seen": 469718928, "step": 8383 }, { "epoch": 18.67260579064588, "grad_norm": 17.33344841003418, "learning_rate": 1e-06, "loss": 0.4995, "num_input_tokens_seen": 469777728, "step": 8384 }, { "epoch": 18.67260579064588, "loss": 0.449005663394928, "loss_ce": 9.207701805280522e-05, "loss_iou": 0.1884765625, "loss_num": 0.014404296875, "loss_xval": 0.44921875, "num_input_tokens_seen": 469777728, "step": 8384 }, { "epoch": 18.674832962138083, "grad_norm": 19.488948822021484, "learning_rate": 1e-06, "loss": 0.5887, "num_input_tokens_seen": 469832624, "step": 8385 }, { "epoch": 18.674832962138083, "loss": 0.3829535245895386, "loss_ce": 7.999550871318206e-05, "loss_iou": 0.1640625, "loss_num": 0.0107421875, "loss_xval": 0.3828125, "num_input_tokens_seen": 469832624, "step": 8385 }, { "epoch": 18.677060133630288, "grad_norm": 13.389050483703613, "learning_rate": 1e-06, "loss": 0.4056, "num_input_tokens_seen": 469891116, "step": 8386 }, { "epoch": 18.677060133630288, "loss": 0.35558634996414185, "loss_ce": 8.708509267307818e-05, "loss_iou": 0.154296875, "loss_num": 0.00946044921875, "loss_xval": 0.35546875, "num_input_tokens_seen": 469891116, "step": 8386 }, { "epoch": 18.679287305122493, "grad_norm": 14.323442459106445, "learning_rate": 1e-06, "loss": 0.3495, "num_input_tokens_seen": 469946560, "step": 8387 }, { "epoch": 18.679287305122493, "loss": 0.21974313259124756, "loss_ce": 7.760837615933269e-05, "loss_iou": 0.095703125, "loss_num": 0.005584716796875, "loss_xval": 0.2197265625, "num_input_tokens_seen": 469946560, "step": 8387 }, { "epoch": 18.681514476614698, "grad_norm": 28.984878540039062, "learning_rate": 1e-06, "loss": 0.2812, "num_input_tokens_seen": 470002268, "step": 8388 }, { "epoch": 18.681514476614698, "loss": 0.24085469543933868, "loss_ce": 7.100608490873128e-05, "loss_iou": 0.107421875, "loss_num": 0.00518798828125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 470002268, "step": 8388 }, { "epoch": 18.683741648106903, "grad_norm": 15.400225639343262, "learning_rate": 1e-06, "loss": 0.4376, "num_input_tokens_seen": 470058180, "step": 8389 }, { "epoch": 18.683741648106903, "loss": 0.5878542065620422, "loss_ce": 8.563668234273791e-05, "loss_iou": 0.25390625, "loss_num": 0.016357421875, "loss_xval": 0.5859375, "num_input_tokens_seen": 470058180, "step": 8389 }, { "epoch": 18.685968819599108, "grad_norm": 23.56451988220215, "learning_rate": 1e-06, "loss": 0.5334, "num_input_tokens_seen": 470113364, "step": 8390 }, { "epoch": 18.685968819599108, "loss": 0.6698175668716431, "loss_ce": 7.881040801294148e-05, "loss_iou": 0.2734375, "loss_num": 0.024658203125, "loss_xval": 0.66796875, "num_input_tokens_seen": 470113364, "step": 8390 }, { "epoch": 18.688195991091312, "grad_norm": 20.264080047607422, "learning_rate": 1e-06, "loss": 0.4449, "num_input_tokens_seen": 470171080, "step": 8391 }, { "epoch": 18.688195991091312, "loss": 0.6437484622001648, "loss_ce": 7.168846786953509e-05, "loss_iou": 0.2578125, "loss_num": 0.025146484375, "loss_xval": 0.64453125, "num_input_tokens_seen": 470171080, "step": 8391 }, { "epoch": 18.690423162583517, "grad_norm": 18.708011627197266, "learning_rate": 1e-06, "loss": 0.4008, "num_input_tokens_seen": 470227556, "step": 8392 }, { "epoch": 18.690423162583517, "loss": 0.3530987799167633, "loss_ce": 7.14488051016815e-05, "loss_iou": 0.1640625, "loss_num": 0.005126953125, "loss_xval": 0.353515625, "num_input_tokens_seen": 470227556, "step": 8392 }, { "epoch": 18.692650334075722, "grad_norm": 33.3140754699707, "learning_rate": 1e-06, "loss": 0.4063, "num_input_tokens_seen": 470282536, "step": 8393 }, { "epoch": 18.692650334075722, "loss": 0.36571255326271057, "loss_ce": 0.0001119803127949126, "loss_iou": 0.166015625, "loss_num": 0.0067138671875, "loss_xval": 0.365234375, "num_input_tokens_seen": 470282536, "step": 8393 }, { "epoch": 18.694877505567927, "grad_norm": 14.095598220825195, "learning_rate": 1e-06, "loss": 0.4011, "num_input_tokens_seen": 470339848, "step": 8394 }, { "epoch": 18.694877505567927, "loss": 0.37506699562072754, "loss_ce": 6.698207289446145e-05, "loss_iou": 0.166015625, "loss_num": 0.00848388671875, "loss_xval": 0.375, "num_input_tokens_seen": 470339848, "step": 8394 }, { "epoch": 18.697104677060132, "grad_norm": 22.200273513793945, "learning_rate": 1e-06, "loss": 0.4252, "num_input_tokens_seen": 470393704, "step": 8395 }, { "epoch": 18.697104677060132, "loss": 0.5692760348320007, "loss_ce": 6.21723011136055e-05, "loss_iou": 0.25390625, "loss_num": 0.01177978515625, "loss_xval": 0.5703125, "num_input_tokens_seen": 470393704, "step": 8395 }, { "epoch": 18.69933184855234, "grad_norm": 19.075931549072266, "learning_rate": 1e-06, "loss": 0.4925, "num_input_tokens_seen": 470450140, "step": 8396 }, { "epoch": 18.69933184855234, "loss": 0.811962902545929, "loss_ce": 7.329651270993054e-05, "loss_iou": 0.318359375, "loss_num": 0.03466796875, "loss_xval": 0.8125, "num_input_tokens_seen": 470450140, "step": 8396 }, { "epoch": 18.70155902004454, "grad_norm": 15.641639709472656, "learning_rate": 1e-06, "loss": 0.4462, "num_input_tokens_seen": 470505936, "step": 8397 }, { "epoch": 18.70155902004454, "loss": 0.47692984342575073, "loss_ce": 0.0001232139766216278, "loss_iou": 0.1982421875, "loss_num": 0.0162353515625, "loss_xval": 0.4765625, "num_input_tokens_seen": 470505936, "step": 8397 }, { "epoch": 18.70378619153675, "grad_norm": 19.46721839904785, "learning_rate": 1e-06, "loss": 0.3472, "num_input_tokens_seen": 470560592, "step": 8398 }, { "epoch": 18.70378619153675, "loss": 0.20130589604377747, "loss_ce": 7.29848543414846e-05, "loss_iou": 0.080078125, "loss_num": 0.00830078125, "loss_xval": 0.201171875, "num_input_tokens_seen": 470560592, "step": 8398 }, { "epoch": 18.706013363028955, "grad_norm": 22.994003295898438, "learning_rate": 1e-06, "loss": 0.3366, "num_input_tokens_seen": 470616532, "step": 8399 }, { "epoch": 18.706013363028955, "loss": 0.40258532762527466, "loss_ce": 0.00011949414329137653, "loss_iou": 0.1923828125, "loss_num": 0.0036773681640625, "loss_xval": 0.40234375, "num_input_tokens_seen": 470616532, "step": 8399 }, { "epoch": 18.70824053452116, "grad_norm": 15.002291679382324, "learning_rate": 1e-06, "loss": 0.3363, "num_input_tokens_seen": 470673684, "step": 8400 }, { "epoch": 18.70824053452116, "loss": 0.3424663245677948, "loss_ce": 0.00012012013758067042, "loss_iou": 0.138671875, "loss_num": 0.0128173828125, "loss_xval": 0.341796875, "num_input_tokens_seen": 470673684, "step": 8400 }, { "epoch": 18.710467706013365, "grad_norm": 17.72799301147461, "learning_rate": 1e-06, "loss": 0.2912, "num_input_tokens_seen": 470732336, "step": 8401 }, { "epoch": 18.710467706013365, "loss": 0.3603472411632538, "loss_ce": 7.197496597655118e-05, "loss_iou": 0.16796875, "loss_num": 0.0047607421875, "loss_xval": 0.359375, "num_input_tokens_seen": 470732336, "step": 8401 }, { "epoch": 18.71269487750557, "grad_norm": 19.49424171447754, "learning_rate": 1e-06, "loss": 0.4016, "num_input_tokens_seen": 470789100, "step": 8402 }, { "epoch": 18.71269487750557, "loss": 0.5133872628211975, "loss_ce": 8.159891876857728e-05, "loss_iou": 0.23828125, "loss_num": 0.00750732421875, "loss_xval": 0.51171875, "num_input_tokens_seen": 470789100, "step": 8402 }, { "epoch": 18.714922048997774, "grad_norm": 18.081327438354492, "learning_rate": 1e-06, "loss": 0.4933, "num_input_tokens_seen": 470845216, "step": 8403 }, { "epoch": 18.714922048997774, "loss": 0.4872537851333618, "loss_ce": 7.114657637430355e-05, "loss_iou": 0.1875, "loss_num": 0.0223388671875, "loss_xval": 0.486328125, "num_input_tokens_seen": 470845216, "step": 8403 }, { "epoch": 18.71714922048998, "grad_norm": 17.4000186920166, "learning_rate": 1e-06, "loss": 0.3173, "num_input_tokens_seen": 470902088, "step": 8404 }, { "epoch": 18.71714922048998, "loss": 0.2885332405567169, "loss_ce": 8.110511407721788e-05, "loss_iou": 0.125, "loss_num": 0.00750732421875, "loss_xval": 0.2890625, "num_input_tokens_seen": 470902088, "step": 8404 }, { "epoch": 18.719376391982184, "grad_norm": 17.23605728149414, "learning_rate": 1e-06, "loss": 0.523, "num_input_tokens_seen": 470956888, "step": 8405 }, { "epoch": 18.719376391982184, "loss": 0.21879133582115173, "loss_ce": 0.0001786548673408106, "loss_iou": 0.09326171875, "loss_num": 0.00640869140625, "loss_xval": 0.21875, "num_input_tokens_seen": 470956888, "step": 8405 }, { "epoch": 18.72160356347439, "grad_norm": 22.435617446899414, "learning_rate": 1e-06, "loss": 0.4289, "num_input_tokens_seen": 471010248, "step": 8406 }, { "epoch": 18.72160356347439, "loss": 0.4550238251686096, "loss_ce": 0.00018985335191246122, "loss_iou": 0.1875, "loss_num": 0.015869140625, "loss_xval": 0.455078125, "num_input_tokens_seen": 471010248, "step": 8406 }, { "epoch": 18.723830734966594, "grad_norm": 16.684246063232422, "learning_rate": 1e-06, "loss": 0.431, "num_input_tokens_seen": 471067932, "step": 8407 }, { "epoch": 18.723830734966594, "loss": 0.3406444191932678, "loss_ce": 6.82445170241408e-05, "loss_iou": 0.1513671875, "loss_num": 0.00762939453125, "loss_xval": 0.33984375, "num_input_tokens_seen": 471067932, "step": 8407 }, { "epoch": 18.7260579064588, "grad_norm": 22.295700073242188, "learning_rate": 1e-06, "loss": 0.3284, "num_input_tokens_seen": 471122864, "step": 8408 }, { "epoch": 18.7260579064588, "loss": 0.30706241726875305, "loss_ce": 8.610197255620733e-05, "loss_iou": 0.123046875, "loss_num": 0.01226806640625, "loss_xval": 0.306640625, "num_input_tokens_seen": 471122864, "step": 8408 }, { "epoch": 18.728285077951004, "grad_norm": 17.58847999572754, "learning_rate": 1e-06, "loss": 0.3384, "num_input_tokens_seen": 471180772, "step": 8409 }, { "epoch": 18.728285077951004, "loss": 0.3137807548046112, "loss_ce": 9.056746785063297e-05, "loss_iou": 0.1240234375, "loss_num": 0.01324462890625, "loss_xval": 0.314453125, "num_input_tokens_seen": 471180772, "step": 8409 }, { "epoch": 18.73051224944321, "grad_norm": 24.846115112304688, "learning_rate": 1e-06, "loss": 0.3556, "num_input_tokens_seen": 471237380, "step": 8410 }, { "epoch": 18.73051224944321, "loss": 0.3991147577762604, "loss_ce": 6.691899034194648e-05, "loss_iou": 0.185546875, "loss_num": 0.00555419921875, "loss_xval": 0.3984375, "num_input_tokens_seen": 471237380, "step": 8410 }, { "epoch": 18.732739420935413, "grad_norm": 18.676179885864258, "learning_rate": 1e-06, "loss": 0.3904, "num_input_tokens_seen": 471293424, "step": 8411 }, { "epoch": 18.732739420935413, "loss": 0.28310564160346985, "loss_ce": 8.561325375922024e-05, "loss_iou": 0.1220703125, "loss_num": 0.00787353515625, "loss_xval": 0.283203125, "num_input_tokens_seen": 471293424, "step": 8411 }, { "epoch": 18.734966592427618, "grad_norm": 20.800790786743164, "learning_rate": 1e-06, "loss": 0.4449, "num_input_tokens_seen": 471349792, "step": 8412 }, { "epoch": 18.734966592427618, "loss": 0.3075633645057678, "loss_ce": 6.822836439823732e-05, "loss_iou": 0.1416015625, "loss_num": 0.004638671875, "loss_xval": 0.306640625, "num_input_tokens_seen": 471349792, "step": 8412 }, { "epoch": 18.737193763919823, "grad_norm": 23.218751907348633, "learning_rate": 1e-06, "loss": 0.3355, "num_input_tokens_seen": 471407216, "step": 8413 }, { "epoch": 18.737193763919823, "loss": 0.4214501678943634, "loss_ce": 6.347508315229788e-05, "loss_iou": 0.1865234375, "loss_num": 0.00970458984375, "loss_xval": 0.421875, "num_input_tokens_seen": 471407216, "step": 8413 }, { "epoch": 18.739420935412028, "grad_norm": 18.05634117126465, "learning_rate": 1e-06, "loss": 0.3551, "num_input_tokens_seen": 471464544, "step": 8414 }, { "epoch": 18.739420935412028, "loss": 0.388986736536026, "loss_ce": 7.072095468174666e-05, "loss_iou": 0.1572265625, "loss_num": 0.01507568359375, "loss_xval": 0.388671875, "num_input_tokens_seen": 471464544, "step": 8414 }, { "epoch": 18.741648106904233, "grad_norm": 22.533185958862305, "learning_rate": 1e-06, "loss": 0.4586, "num_input_tokens_seen": 471520660, "step": 8415 }, { "epoch": 18.741648106904233, "loss": 0.46280378103256226, "loss_ce": 8.86318739503622e-05, "loss_iou": 0.2060546875, "loss_num": 0.0103759765625, "loss_xval": 0.462890625, "num_input_tokens_seen": 471520660, "step": 8415 }, { "epoch": 18.743875278396438, "grad_norm": 20.770368576049805, "learning_rate": 1e-06, "loss": 0.329, "num_input_tokens_seen": 471579084, "step": 8416 }, { "epoch": 18.743875278396438, "loss": 0.31473100185394287, "loss_ce": 9.477273124502972e-05, "loss_iou": 0.1435546875, "loss_num": 0.00567626953125, "loss_xval": 0.314453125, "num_input_tokens_seen": 471579084, "step": 8416 }, { "epoch": 18.746102449888642, "grad_norm": 48.90117645263672, "learning_rate": 1e-06, "loss": 0.2945, "num_input_tokens_seen": 471633636, "step": 8417 }, { "epoch": 18.746102449888642, "loss": 0.25058692693710327, "loss_ce": 6.0503818531287834e-05, "loss_iou": 0.11474609375, "loss_num": 0.004241943359375, "loss_xval": 0.25, "num_input_tokens_seen": 471633636, "step": 8417 }, { "epoch": 18.748329621380847, "grad_norm": 15.029791831970215, "learning_rate": 1e-06, "loss": 0.3173, "num_input_tokens_seen": 471690856, "step": 8418 }, { "epoch": 18.748329621380847, "loss": 0.32165291905403137, "loss_ce": 5.868007428944111e-05, "loss_iou": 0.14453125, "loss_num": 0.006500244140625, "loss_xval": 0.322265625, "num_input_tokens_seen": 471690856, "step": 8418 }, { "epoch": 18.750556792873052, "grad_norm": 18.00300407409668, "learning_rate": 1e-06, "loss": 0.3783, "num_input_tokens_seen": 471749244, "step": 8419 }, { "epoch": 18.750556792873052, "loss": 0.325858473777771, "loss_ce": 0.0001748991635395214, "loss_iou": 0.1455078125, "loss_num": 0.00689697265625, "loss_xval": 0.326171875, "num_input_tokens_seen": 471749244, "step": 8419 }, { "epoch": 18.752783964365257, "grad_norm": 12.98948860168457, "learning_rate": 1e-06, "loss": 0.4111, "num_input_tokens_seen": 471804068, "step": 8420 }, { "epoch": 18.752783964365257, "loss": 0.4239133596420288, "loss_ce": 8.521559357177466e-05, "loss_iou": 0.15625, "loss_num": 0.0223388671875, "loss_xval": 0.423828125, "num_input_tokens_seen": 471804068, "step": 8420 }, { "epoch": 18.755011135857462, "grad_norm": 13.578185081481934, "learning_rate": 1e-06, "loss": 0.3306, "num_input_tokens_seen": 471859564, "step": 8421 }, { "epoch": 18.755011135857462, "loss": 0.285760760307312, "loss_ce": 5.517357203643769e-05, "loss_iou": 0.11083984375, "loss_num": 0.0126953125, "loss_xval": 0.28515625, "num_input_tokens_seen": 471859564, "step": 8421 }, { "epoch": 18.757238307349667, "grad_norm": 13.726969718933105, "learning_rate": 1e-06, "loss": 0.5174, "num_input_tokens_seen": 471918184, "step": 8422 }, { "epoch": 18.757238307349667, "loss": 0.4225544333457947, "loss_ce": 6.908080104039982e-05, "loss_iou": 0.1953125, "loss_num": 0.0064697265625, "loss_xval": 0.421875, "num_input_tokens_seen": 471918184, "step": 8422 }, { "epoch": 18.75946547884187, "grad_norm": 27.957782745361328, "learning_rate": 1e-06, "loss": 0.4461, "num_input_tokens_seen": 471972848, "step": 8423 }, { "epoch": 18.75946547884187, "loss": 0.5910878777503967, "loss_ce": 8.448412700090557e-05, "loss_iou": 0.265625, "loss_num": 0.0118408203125, "loss_xval": 0.58984375, "num_input_tokens_seen": 471972848, "step": 8423 }, { "epoch": 18.761692650334076, "grad_norm": 20.952011108398438, "learning_rate": 1e-06, "loss": 0.6627, "num_input_tokens_seen": 472030196, "step": 8424 }, { "epoch": 18.761692650334076, "loss": 0.3828308880329132, "loss_ce": 7.944751996546984e-05, "loss_iou": 0.171875, "loss_num": 0.007781982421875, "loss_xval": 0.3828125, "num_input_tokens_seen": 472030196, "step": 8424 }, { "epoch": 18.76391982182628, "grad_norm": 17.825992584228516, "learning_rate": 1e-06, "loss": 0.3981, "num_input_tokens_seen": 472086448, "step": 8425 }, { "epoch": 18.76391982182628, "loss": 0.3020787835121155, "loss_ce": 7.681577699258924e-05, "loss_iou": 0.134765625, "loss_num": 0.006439208984375, "loss_xval": 0.302734375, "num_input_tokens_seen": 472086448, "step": 8425 }, { "epoch": 18.766146993318486, "grad_norm": 15.120186805725098, "learning_rate": 1e-06, "loss": 0.6746, "num_input_tokens_seen": 472144096, "step": 8426 }, { "epoch": 18.766146993318486, "loss": 0.4926231801509857, "loss_ce": 6.947731162654236e-05, "loss_iou": 0.236328125, "loss_num": 0.00396728515625, "loss_xval": 0.4921875, "num_input_tokens_seen": 472144096, "step": 8426 }, { "epoch": 18.76837416481069, "grad_norm": 60.89303207397461, "learning_rate": 1e-06, "loss": 0.6013, "num_input_tokens_seen": 472197860, "step": 8427 }, { "epoch": 18.76837416481069, "loss": 0.546029269695282, "loss_ce": 6.977089651627466e-05, "loss_iou": 0.201171875, "loss_num": 0.0286865234375, "loss_xval": 0.546875, "num_input_tokens_seen": 472197860, "step": 8427 }, { "epoch": 18.770601336302896, "grad_norm": 23.926664352416992, "learning_rate": 1e-06, "loss": 0.4401, "num_input_tokens_seen": 472254716, "step": 8428 }, { "epoch": 18.770601336302896, "loss": 0.444293737411499, "loss_ce": 7.984477269928902e-05, "loss_iou": 0.1953125, "loss_num": 0.01055908203125, "loss_xval": 0.443359375, "num_input_tokens_seen": 472254716, "step": 8428 }, { "epoch": 18.7728285077951, "grad_norm": 20.06789779663086, "learning_rate": 1e-06, "loss": 0.6177, "num_input_tokens_seen": 472312756, "step": 8429 }, { "epoch": 18.7728285077951, "loss": 0.5740514993667603, "loss_ce": 7.688780897296965e-05, "loss_iou": 0.2236328125, "loss_num": 0.0255126953125, "loss_xval": 0.57421875, "num_input_tokens_seen": 472312756, "step": 8429 }, { "epoch": 18.775055679287306, "grad_norm": 32.13956069946289, "learning_rate": 1e-06, "loss": 0.3333, "num_input_tokens_seen": 472367428, "step": 8430 }, { "epoch": 18.775055679287306, "loss": 0.2963276207447052, "loss_ce": 6.297710933722556e-05, "loss_iou": 0.1357421875, "loss_num": 0.00482177734375, "loss_xval": 0.296875, "num_input_tokens_seen": 472367428, "step": 8430 }, { "epoch": 18.77728285077951, "grad_norm": 16.847000122070312, "learning_rate": 1e-06, "loss": 0.329, "num_input_tokens_seen": 472424024, "step": 8431 }, { "epoch": 18.77728285077951, "loss": 0.3400919437408447, "loss_ce": 6.50831643724814e-05, "loss_iou": 0.1357421875, "loss_num": 0.01348876953125, "loss_xval": 0.33984375, "num_input_tokens_seen": 472424024, "step": 8431 }, { "epoch": 18.779510022271715, "grad_norm": 27.27733039855957, "learning_rate": 1e-06, "loss": 0.3656, "num_input_tokens_seen": 472478816, "step": 8432 }, { "epoch": 18.779510022271715, "loss": 0.36883819103240967, "loss_ce": 6.378746184054762e-05, "loss_iou": 0.162109375, "loss_num": 0.0087890625, "loss_xval": 0.369140625, "num_input_tokens_seen": 472478816, "step": 8432 }, { "epoch": 18.78173719376392, "grad_norm": 42.84598922729492, "learning_rate": 1e-06, "loss": 0.2843, "num_input_tokens_seen": 472534048, "step": 8433 }, { "epoch": 18.78173719376392, "loss": 0.2845653295516968, "loss_ce": 8.045761933317408e-05, "loss_iou": 0.12158203125, "loss_num": 0.00830078125, "loss_xval": 0.28515625, "num_input_tokens_seen": 472534048, "step": 8433 }, { "epoch": 18.783964365256125, "grad_norm": 13.706661224365234, "learning_rate": 1e-06, "loss": 0.4059, "num_input_tokens_seen": 472588848, "step": 8434 }, { "epoch": 18.783964365256125, "loss": 0.5129237771034241, "loss_ce": 0.00010639386164257303, "loss_iou": 0.2021484375, "loss_num": 0.0218505859375, "loss_xval": 0.51171875, "num_input_tokens_seen": 472588848, "step": 8434 }, { "epoch": 18.78619153674833, "grad_norm": 17.942852020263672, "learning_rate": 1e-06, "loss": 0.3433, "num_input_tokens_seen": 472646472, "step": 8435 }, { "epoch": 18.78619153674833, "loss": 0.323493629693985, "loss_ce": 6.834171654190868e-05, "loss_iou": 0.142578125, "loss_num": 0.007415771484375, "loss_xval": 0.32421875, "num_input_tokens_seen": 472646472, "step": 8435 }, { "epoch": 18.788418708240535, "grad_norm": 19.692903518676758, "learning_rate": 1e-06, "loss": 0.5043, "num_input_tokens_seen": 472705540, "step": 8436 }, { "epoch": 18.788418708240535, "loss": 0.5412243604660034, "loss_ce": 8.667838119436055e-05, "loss_iou": 0.2265625, "loss_num": 0.0174560546875, "loss_xval": 0.54296875, "num_input_tokens_seen": 472705540, "step": 8436 }, { "epoch": 18.79064587973274, "grad_norm": 16.23405647277832, "learning_rate": 1e-06, "loss": 0.4239, "num_input_tokens_seen": 472761744, "step": 8437 }, { "epoch": 18.79064587973274, "loss": 0.44612982869148254, "loss_ce": 8.490896289004013e-05, "loss_iou": 0.1982421875, "loss_num": 0.0101318359375, "loss_xval": 0.4453125, "num_input_tokens_seen": 472761744, "step": 8437 }, { "epoch": 18.792873051224944, "grad_norm": 23.637725830078125, "learning_rate": 1e-06, "loss": 0.3022, "num_input_tokens_seen": 472817648, "step": 8438 }, { "epoch": 18.792873051224944, "loss": 0.30181801319122314, "loss_ce": 6.019488864694722e-05, "loss_iou": 0.1298828125, "loss_num": 0.00848388671875, "loss_xval": 0.30078125, "num_input_tokens_seen": 472817648, "step": 8438 }, { "epoch": 18.79510022271715, "grad_norm": 16.10801887512207, "learning_rate": 1e-06, "loss": 0.5896, "num_input_tokens_seen": 472873080, "step": 8439 }, { "epoch": 18.79510022271715, "loss": 0.7904741168022156, "loss_ce": 6.880288128741086e-05, "loss_iou": 0.33203125, "loss_num": 0.0250244140625, "loss_xval": 0.7890625, "num_input_tokens_seen": 472873080, "step": 8439 }, { "epoch": 18.797327394209354, "grad_norm": 29.073570251464844, "learning_rate": 1e-06, "loss": 0.2354, "num_input_tokens_seen": 472930596, "step": 8440 }, { "epoch": 18.797327394209354, "loss": 0.18952415883541107, "loss_ce": 7.103992538759485e-05, "loss_iou": 0.07666015625, "loss_num": 0.00726318359375, "loss_xval": 0.189453125, "num_input_tokens_seen": 472930596, "step": 8440 }, { "epoch": 18.79955456570156, "grad_norm": 19.053442001342773, "learning_rate": 1e-06, "loss": 0.5383, "num_input_tokens_seen": 472984744, "step": 8441 }, { "epoch": 18.79955456570156, "loss": 0.39138782024383545, "loss_ce": 9.141798363998532e-05, "loss_iou": 0.1591796875, "loss_num": 0.01470947265625, "loss_xval": 0.390625, "num_input_tokens_seen": 472984744, "step": 8441 }, { "epoch": 18.801781737193764, "grad_norm": 16.559202194213867, "learning_rate": 1e-06, "loss": 0.4231, "num_input_tokens_seen": 473041436, "step": 8442 }, { "epoch": 18.801781737193764, "loss": 0.47130343317985535, "loss_ce": 8.148739289026707e-05, "loss_iou": 0.1630859375, "loss_num": 0.0291748046875, "loss_xval": 0.470703125, "num_input_tokens_seen": 473041436, "step": 8442 }, { "epoch": 18.80400890868597, "grad_norm": 25.658018112182617, "learning_rate": 1e-06, "loss": 0.3696, "num_input_tokens_seen": 473093136, "step": 8443 }, { "epoch": 18.80400890868597, "loss": 0.31416118144989014, "loss_ce": 7.427769742207602e-05, "loss_iou": 0.14453125, "loss_num": 0.005096435546875, "loss_xval": 0.314453125, "num_input_tokens_seen": 473093136, "step": 8443 }, { "epoch": 18.806236080178174, "grad_norm": 21.92131996154785, "learning_rate": 1e-06, "loss": 0.3055, "num_input_tokens_seen": 473149008, "step": 8444 }, { "epoch": 18.806236080178174, "loss": 0.33100777864456177, "loss_ce": 7.515733886975795e-05, "loss_iou": 0.150390625, "loss_num": 0.006195068359375, "loss_xval": 0.330078125, "num_input_tokens_seen": 473149008, "step": 8444 }, { "epoch": 18.80846325167038, "grad_norm": 21.062211990356445, "learning_rate": 1e-06, "loss": 0.3811, "num_input_tokens_seen": 473203320, "step": 8445 }, { "epoch": 18.80846325167038, "loss": 0.36316657066345215, "loss_ce": 6.842101720394567e-05, "loss_iou": 0.1669921875, "loss_num": 0.005859375, "loss_xval": 0.36328125, "num_input_tokens_seen": 473203320, "step": 8445 }, { "epoch": 18.810690423162583, "grad_norm": 24.50238800048828, "learning_rate": 1e-06, "loss": 0.3116, "num_input_tokens_seen": 473259040, "step": 8446 }, { "epoch": 18.810690423162583, "loss": 0.31474488973617554, "loss_ce": 7.813816046109423e-05, "loss_iou": 0.1396484375, "loss_num": 0.00726318359375, "loss_xval": 0.314453125, "num_input_tokens_seen": 473259040, "step": 8446 }, { "epoch": 18.812917594654788, "grad_norm": 18.850893020629883, "learning_rate": 1e-06, "loss": 0.5647, "num_input_tokens_seen": 473312764, "step": 8447 }, { "epoch": 18.812917594654788, "loss": 0.6031259298324585, "loss_ce": 9.859535930445418e-05, "loss_iou": 0.236328125, "loss_num": 0.025634765625, "loss_xval": 0.6015625, "num_input_tokens_seen": 473312764, "step": 8447 }, { "epoch": 18.815144766146993, "grad_norm": 122.17961883544922, "learning_rate": 1e-06, "loss": 0.3084, "num_input_tokens_seen": 473371280, "step": 8448 }, { "epoch": 18.815144766146993, "loss": 0.32418200373649597, "loss_ce": 8.531348430551589e-05, "loss_iou": 0.14453125, "loss_num": 0.006805419921875, "loss_xval": 0.32421875, "num_input_tokens_seen": 473371280, "step": 8448 }, { "epoch": 18.817371937639198, "grad_norm": 26.249561309814453, "learning_rate": 1e-06, "loss": 0.336, "num_input_tokens_seen": 473426996, "step": 8449 }, { "epoch": 18.817371937639198, "loss": 0.44048649072647095, "loss_ce": 5.6789031077641994e-05, "loss_iou": 0.1943359375, "loss_num": 0.01025390625, "loss_xval": 0.44140625, "num_input_tokens_seen": 473426996, "step": 8449 }, { "epoch": 18.819599109131403, "grad_norm": 13.715222358703613, "learning_rate": 1e-06, "loss": 0.2667, "num_input_tokens_seen": 473482228, "step": 8450 }, { "epoch": 18.819599109131403, "loss": 0.30467987060546875, "loss_ce": 0.00011442266986705363, "loss_iou": 0.1396484375, "loss_num": 0.005218505859375, "loss_xval": 0.3046875, "num_input_tokens_seen": 473482228, "step": 8450 }, { "epoch": 18.821826280623608, "grad_norm": 37.36369705200195, "learning_rate": 1e-06, "loss": 0.4166, "num_input_tokens_seen": 473535108, "step": 8451 }, { "epoch": 18.821826280623608, "loss": 0.44977593421936035, "loss_ce": 6.89039152348414e-05, "loss_iou": 0.19140625, "loss_num": 0.013427734375, "loss_xval": 0.44921875, "num_input_tokens_seen": 473535108, "step": 8451 }, { "epoch": 18.824053452115812, "grad_norm": 20.77572250366211, "learning_rate": 1e-06, "loss": 0.2944, "num_input_tokens_seen": 473592996, "step": 8452 }, { "epoch": 18.824053452115812, "loss": 0.3387192189693451, "loss_ce": 9.616896568331867e-05, "loss_iou": 0.1416015625, "loss_num": 0.010986328125, "loss_xval": 0.337890625, "num_input_tokens_seen": 473592996, "step": 8452 }, { "epoch": 18.826280623608017, "grad_norm": 17.34552001953125, "learning_rate": 1e-06, "loss": 0.4778, "num_input_tokens_seen": 473649796, "step": 8453 }, { "epoch": 18.826280623608017, "loss": 0.4614381790161133, "loss_ce": 7.342195021919906e-05, "loss_iou": 0.1904296875, "loss_num": 0.016357421875, "loss_xval": 0.4609375, "num_input_tokens_seen": 473649796, "step": 8453 }, { "epoch": 18.828507795100222, "grad_norm": 12.931282997131348, "learning_rate": 1e-06, "loss": 0.3362, "num_input_tokens_seen": 473706892, "step": 8454 }, { "epoch": 18.828507795100222, "loss": 0.2970700263977051, "loss_ce": 7.297511911019683e-05, "loss_iou": 0.1279296875, "loss_num": 0.0081787109375, "loss_xval": 0.296875, "num_input_tokens_seen": 473706892, "step": 8454 }, { "epoch": 18.830734966592427, "grad_norm": 40.271270751953125, "learning_rate": 1e-06, "loss": 0.5246, "num_input_tokens_seen": 473763124, "step": 8455 }, { "epoch": 18.830734966592427, "loss": 0.5076436996459961, "loss_ce": 7.534350879723206e-05, "loss_iou": 0.21875, "loss_num": 0.01416015625, "loss_xval": 0.5078125, "num_input_tokens_seen": 473763124, "step": 8455 }, { "epoch": 18.832962138084632, "grad_norm": 49.25373840332031, "learning_rate": 1e-06, "loss": 0.4866, "num_input_tokens_seen": 473818376, "step": 8456 }, { "epoch": 18.832962138084632, "loss": 0.4683504104614258, "loss_ce": 8.872270700521767e-05, "loss_iou": 0.2138671875, "loss_num": 0.00823974609375, "loss_xval": 0.46875, "num_input_tokens_seen": 473818376, "step": 8456 }, { "epoch": 18.835189309576837, "grad_norm": 16.492551803588867, "learning_rate": 1e-06, "loss": 0.5093, "num_input_tokens_seen": 473873400, "step": 8457 }, { "epoch": 18.835189309576837, "loss": 0.5018711686134338, "loss_ce": 8.585450996179134e-05, "loss_iou": 0.2314453125, "loss_num": 0.00799560546875, "loss_xval": 0.5, "num_input_tokens_seen": 473873400, "step": 8457 }, { "epoch": 18.83741648106904, "grad_norm": 18.73830223083496, "learning_rate": 1e-06, "loss": 0.4191, "num_input_tokens_seen": 473928576, "step": 8458 }, { "epoch": 18.83741648106904, "loss": 0.3809613585472107, "loss_ce": 0.00010200223186984658, "loss_iou": 0.1611328125, "loss_num": 0.01153564453125, "loss_xval": 0.380859375, "num_input_tokens_seen": 473928576, "step": 8458 }, { "epoch": 18.839643652561247, "grad_norm": 21.033058166503906, "learning_rate": 1e-06, "loss": 0.3665, "num_input_tokens_seen": 473982784, "step": 8459 }, { "epoch": 18.839643652561247, "loss": 0.28056997060775757, "loss_ce": 5.241744656814262e-05, "loss_iou": 0.123046875, "loss_num": 0.00689697265625, "loss_xval": 0.28125, "num_input_tokens_seen": 473982784, "step": 8459 }, { "epoch": 18.84187082405345, "grad_norm": 18.85097312927246, "learning_rate": 1e-06, "loss": 0.4517, "num_input_tokens_seen": 474036980, "step": 8460 }, { "epoch": 18.84187082405345, "loss": 0.4336727559566498, "loss_ce": 7.89997138781473e-05, "loss_iou": 0.1767578125, "loss_num": 0.0162353515625, "loss_xval": 0.43359375, "num_input_tokens_seen": 474036980, "step": 8460 }, { "epoch": 18.844097995545656, "grad_norm": 22.325136184692383, "learning_rate": 1e-06, "loss": 0.3886, "num_input_tokens_seen": 474091556, "step": 8461 }, { "epoch": 18.844097995545656, "loss": 0.43452924489974976, "loss_ce": 0.00014204179751686752, "loss_iou": 0.1875, "loss_num": 0.01190185546875, "loss_xval": 0.43359375, "num_input_tokens_seen": 474091556, "step": 8461 }, { "epoch": 18.84632516703786, "grad_norm": 14.574409484863281, "learning_rate": 1e-06, "loss": 0.3643, "num_input_tokens_seen": 474147028, "step": 8462 }, { "epoch": 18.84632516703786, "loss": 0.22980637848377228, "loss_ce": 7.004974031588063e-05, "loss_iou": 0.10302734375, "loss_num": 0.004608154296875, "loss_xval": 0.2294921875, "num_input_tokens_seen": 474147028, "step": 8462 }, { "epoch": 18.848552338530066, "grad_norm": 31.346830368041992, "learning_rate": 1e-06, "loss": 0.4338, "num_input_tokens_seen": 474203072, "step": 8463 }, { "epoch": 18.848552338530066, "loss": 0.4625985324382782, "loss_ce": 7.410830585286021e-05, "loss_iou": 0.2099609375, "loss_num": 0.00860595703125, "loss_xval": 0.462890625, "num_input_tokens_seen": 474203072, "step": 8463 }, { "epoch": 18.85077951002227, "grad_norm": 15.782477378845215, "learning_rate": 1e-06, "loss": 0.3963, "num_input_tokens_seen": 474259464, "step": 8464 }, { "epoch": 18.85077951002227, "loss": 0.4502849578857422, "loss_ce": 8.967139001470059e-05, "loss_iou": 0.1875, "loss_num": 0.01519775390625, "loss_xval": 0.44921875, "num_input_tokens_seen": 474259464, "step": 8464 }, { "epoch": 18.853006681514476, "grad_norm": 19.337142944335938, "learning_rate": 1e-06, "loss": 0.3895, "num_input_tokens_seen": 474316776, "step": 8465 }, { "epoch": 18.853006681514476, "loss": 0.47631609439849854, "loss_ce": 0.00011980824638158083, "loss_iou": 0.1923828125, "loss_num": 0.01806640625, "loss_xval": 0.4765625, "num_input_tokens_seen": 474316776, "step": 8465 }, { "epoch": 18.85523385300668, "grad_norm": 16.090877532958984, "learning_rate": 1e-06, "loss": 0.4098, "num_input_tokens_seen": 474372780, "step": 8466 }, { "epoch": 18.85523385300668, "loss": 0.3744850754737854, "loss_ce": 9.543487976770848e-05, "loss_iou": 0.169921875, "loss_num": 0.006683349609375, "loss_xval": 0.375, "num_input_tokens_seen": 474372780, "step": 8466 }, { "epoch": 18.857461024498885, "grad_norm": 19.818513870239258, "learning_rate": 1e-06, "loss": 0.4536, "num_input_tokens_seen": 474429796, "step": 8467 }, { "epoch": 18.857461024498885, "loss": 0.6125023365020752, "loss_ce": 7.554069452453405e-05, "loss_iou": 0.27734375, "loss_num": 0.01123046875, "loss_xval": 0.61328125, "num_input_tokens_seen": 474429796, "step": 8467 }, { "epoch": 18.85968819599109, "grad_norm": 18.213647842407227, "learning_rate": 1e-06, "loss": 0.3334, "num_input_tokens_seen": 474489184, "step": 8468 }, { "epoch": 18.85968819599109, "loss": 0.26028338074684143, "loss_ce": 7.525723776780069e-05, "loss_iou": 0.12109375, "loss_num": 0.003570556640625, "loss_xval": 0.259765625, "num_input_tokens_seen": 474489184, "step": 8468 }, { "epoch": 18.861915367483295, "grad_norm": 23.33075523376465, "learning_rate": 1e-06, "loss": 0.3492, "num_input_tokens_seen": 474546056, "step": 8469 }, { "epoch": 18.861915367483295, "loss": 0.4007795751094818, "loss_ce": 8.379328937735409e-05, "loss_iou": 0.1708984375, "loss_num": 0.01171875, "loss_xval": 0.400390625, "num_input_tokens_seen": 474546056, "step": 8469 }, { "epoch": 18.8641425389755, "grad_norm": 30.516542434692383, "learning_rate": 1e-06, "loss": 0.3336, "num_input_tokens_seen": 474601092, "step": 8470 }, { "epoch": 18.8641425389755, "loss": 0.2731455862522125, "loss_ce": 7.429896504618227e-05, "loss_iou": 0.126953125, "loss_num": 0.003814697265625, "loss_xval": 0.2734375, "num_input_tokens_seen": 474601092, "step": 8470 }, { "epoch": 18.866369710467705, "grad_norm": 17.464170455932617, "learning_rate": 1e-06, "loss": 0.4543, "num_input_tokens_seen": 474657324, "step": 8471 }, { "epoch": 18.866369710467705, "loss": 0.48732370138168335, "loss_ce": 8.003832772374153e-05, "loss_iou": 0.2119140625, "loss_num": 0.0125732421875, "loss_xval": 0.486328125, "num_input_tokens_seen": 474657324, "step": 8471 }, { "epoch": 18.86859688195991, "grad_norm": 19.651233673095703, "learning_rate": 1e-06, "loss": 0.3527, "num_input_tokens_seen": 474713160, "step": 8472 }, { "epoch": 18.86859688195991, "loss": 0.3799467086791992, "loss_ce": 6.391612987499684e-05, "loss_iou": 0.1689453125, "loss_num": 0.00830078125, "loss_xval": 0.37890625, "num_input_tokens_seen": 474713160, "step": 8472 }, { "epoch": 18.870824053452115, "grad_norm": 41.639102935791016, "learning_rate": 1e-06, "loss": 0.6776, "num_input_tokens_seen": 474771492, "step": 8473 }, { "epoch": 18.870824053452115, "loss": 0.556826651096344, "loss_ce": 6.396578100975603e-05, "loss_iou": 0.259765625, "loss_num": 0.007720947265625, "loss_xval": 0.55859375, "num_input_tokens_seen": 474771492, "step": 8473 }, { "epoch": 18.87305122494432, "grad_norm": 20.428213119506836, "learning_rate": 1e-06, "loss": 0.4747, "num_input_tokens_seen": 474825884, "step": 8474 }, { "epoch": 18.87305122494432, "loss": 0.5070443153381348, "loss_ce": 8.636478742118925e-05, "loss_iou": 0.23828125, "loss_num": 0.006256103515625, "loss_xval": 0.5078125, "num_input_tokens_seen": 474825884, "step": 8474 }, { "epoch": 18.875278396436524, "grad_norm": 16.10317039489746, "learning_rate": 1e-06, "loss": 0.2764, "num_input_tokens_seen": 474880660, "step": 8475 }, { "epoch": 18.875278396436524, "loss": 0.1743912398815155, "loss_ce": 7.483765512006357e-05, "loss_iou": 0.0791015625, "loss_num": 0.0032806396484375, "loss_xval": 0.173828125, "num_input_tokens_seen": 474880660, "step": 8475 }, { "epoch": 18.87750556792873, "grad_norm": 21.885726928710938, "learning_rate": 1e-06, "loss": 0.5585, "num_input_tokens_seen": 474936820, "step": 8476 }, { "epoch": 18.87750556792873, "loss": 0.6800100803375244, "loss_ce": 7.844208448659629e-05, "loss_iou": 0.30078125, "loss_num": 0.0157470703125, "loss_xval": 0.6796875, "num_input_tokens_seen": 474936820, "step": 8476 }, { "epoch": 18.879732739420934, "grad_norm": 26.458858489990234, "learning_rate": 1e-06, "loss": 0.487, "num_input_tokens_seen": 474995968, "step": 8477 }, { "epoch": 18.879732739420934, "loss": 0.4402797818183899, "loss_ce": 9.42522456170991e-05, "loss_iou": 0.1865234375, "loss_num": 0.01336669921875, "loss_xval": 0.439453125, "num_input_tokens_seen": 474995968, "step": 8477 }, { "epoch": 18.88195991091314, "grad_norm": 21.456289291381836, "learning_rate": 1e-06, "loss": 0.4094, "num_input_tokens_seen": 475052128, "step": 8478 }, { "epoch": 18.88195991091314, "loss": 0.3907051384449005, "loss_ce": 8.013312617549673e-05, "loss_iou": 0.173828125, "loss_num": 0.0086669921875, "loss_xval": 0.390625, "num_input_tokens_seen": 475052128, "step": 8478 }, { "epoch": 18.884187082405344, "grad_norm": 19.8070068359375, "learning_rate": 1e-06, "loss": 0.5119, "num_input_tokens_seen": 475109916, "step": 8479 }, { "epoch": 18.884187082405344, "loss": 0.33552417159080505, "loss_ce": 7.495496538467705e-05, "loss_iou": 0.1494140625, "loss_num": 0.007537841796875, "loss_xval": 0.3359375, "num_input_tokens_seen": 475109916, "step": 8479 }, { "epoch": 18.88641425389755, "grad_norm": 18.59421157836914, "learning_rate": 1e-06, "loss": 0.3461, "num_input_tokens_seen": 475165660, "step": 8480 }, { "epoch": 18.88641425389755, "loss": 0.34626448154449463, "loss_ce": 7.30836036382243e-05, "loss_iou": 0.162109375, "loss_num": 0.00445556640625, "loss_xval": 0.345703125, "num_input_tokens_seen": 475165660, "step": 8480 }, { "epoch": 18.888641425389753, "grad_norm": 35.3235969543457, "learning_rate": 1e-06, "loss": 0.3462, "num_input_tokens_seen": 475223024, "step": 8481 }, { "epoch": 18.888641425389753, "loss": 0.3160410225391388, "loss_ce": 6.200766074471176e-05, "loss_iou": 0.1396484375, "loss_num": 0.00726318359375, "loss_xval": 0.31640625, "num_input_tokens_seen": 475223024, "step": 8481 }, { "epoch": 18.89086859688196, "grad_norm": 17.297746658325195, "learning_rate": 1e-06, "loss": 0.3611, "num_input_tokens_seen": 475278500, "step": 8482 }, { "epoch": 18.89086859688196, "loss": 0.2839501202106476, "loss_ce": 7.561447273474187e-05, "loss_iou": 0.134765625, "loss_num": 0.00262451171875, "loss_xval": 0.283203125, "num_input_tokens_seen": 475278500, "step": 8482 }, { "epoch": 18.893095768374163, "grad_norm": 26.896717071533203, "learning_rate": 1e-06, "loss": 0.5017, "num_input_tokens_seen": 475332444, "step": 8483 }, { "epoch": 18.893095768374163, "loss": 0.4822501540184021, "loss_ce": 7.242616266012192e-05, "loss_iou": 0.1796875, "loss_num": 0.0244140625, "loss_xval": 0.482421875, "num_input_tokens_seen": 475332444, "step": 8483 }, { "epoch": 18.895322939866368, "grad_norm": 27.973371505737305, "learning_rate": 1e-06, "loss": 0.3581, "num_input_tokens_seen": 475389244, "step": 8484 }, { "epoch": 18.895322939866368, "loss": 0.31134331226348877, "loss_ce": 6.400147685781121e-05, "loss_iou": 0.134765625, "loss_num": 0.00823974609375, "loss_xval": 0.310546875, "num_input_tokens_seen": 475389244, "step": 8484 }, { "epoch": 18.897550111358576, "grad_norm": 16.82830810546875, "learning_rate": 1e-06, "loss": 0.3729, "num_input_tokens_seen": 475446276, "step": 8485 }, { "epoch": 18.897550111358576, "loss": 0.3148788809776306, "loss_ce": 5.956278619123623e-05, "loss_iou": 0.142578125, "loss_num": 0.006072998046875, "loss_xval": 0.314453125, "num_input_tokens_seen": 475446276, "step": 8485 }, { "epoch": 18.899777282850778, "grad_norm": 16.516233444213867, "learning_rate": 1e-06, "loss": 0.2496, "num_input_tokens_seen": 475501428, "step": 8486 }, { "epoch": 18.899777282850778, "loss": 0.33766478300094604, "loss_ce": 7.93259241618216e-05, "loss_iou": 0.1591796875, "loss_num": 0.00396728515625, "loss_xval": 0.337890625, "num_input_tokens_seen": 475501428, "step": 8486 }, { "epoch": 18.902004454342986, "grad_norm": 19.306852340698242, "learning_rate": 1e-06, "loss": 0.3633, "num_input_tokens_seen": 475557696, "step": 8487 }, { "epoch": 18.902004454342986, "loss": 0.2621913552284241, "loss_ce": 7.588176958961412e-05, "loss_iou": 0.11474609375, "loss_num": 0.006439208984375, "loss_xval": 0.26171875, "num_input_tokens_seen": 475557696, "step": 8487 }, { "epoch": 18.90423162583519, "grad_norm": 17.466773986816406, "learning_rate": 1e-06, "loss": 0.346, "num_input_tokens_seen": 475614528, "step": 8488 }, { "epoch": 18.90423162583519, "loss": 0.5012964010238647, "loss_ce": 7.571464811917394e-05, "loss_iou": 0.2109375, "loss_num": 0.0159912109375, "loss_xval": 0.5, "num_input_tokens_seen": 475614528, "step": 8488 }, { "epoch": 18.906458797327396, "grad_norm": 16.38589096069336, "learning_rate": 1e-06, "loss": 0.4567, "num_input_tokens_seen": 475671076, "step": 8489 }, { "epoch": 18.906458797327396, "loss": 0.43280452489852905, "loss_ce": 6.528291123686358e-05, "loss_iou": 0.1982421875, "loss_num": 0.007110595703125, "loss_xval": 0.43359375, "num_input_tokens_seen": 475671076, "step": 8489 }, { "epoch": 18.9086859688196, "grad_norm": 22.471385955810547, "learning_rate": 1e-06, "loss": 0.6548, "num_input_tokens_seen": 475723864, "step": 8490 }, { "epoch": 18.9086859688196, "loss": 0.7026335000991821, "loss_ce": 0.0003630055289249867, "loss_iou": 0.328125, "loss_num": 0.00872802734375, "loss_xval": 0.703125, "num_input_tokens_seen": 475723864, "step": 8490 }, { "epoch": 18.910913140311806, "grad_norm": 16.447473526000977, "learning_rate": 1e-06, "loss": 0.3351, "num_input_tokens_seen": 475779860, "step": 8491 }, { "epoch": 18.910913140311806, "loss": 0.29600396752357483, "loss_ce": 7.501787331420928e-05, "loss_iou": 0.12890625, "loss_num": 0.007537841796875, "loss_xval": 0.296875, "num_input_tokens_seen": 475779860, "step": 8491 }, { "epoch": 18.91314031180401, "grad_norm": 16.438947677612305, "learning_rate": 1e-06, "loss": 0.4427, "num_input_tokens_seen": 475835912, "step": 8492 }, { "epoch": 18.91314031180401, "loss": 0.4535049796104431, "loss_ce": 7.47992453398183e-05, "loss_iou": 0.2001953125, "loss_num": 0.01055908203125, "loss_xval": 0.453125, "num_input_tokens_seen": 475835912, "step": 8492 }, { "epoch": 18.915367483296215, "grad_norm": 16.084348678588867, "learning_rate": 1e-06, "loss": 0.7689, "num_input_tokens_seen": 475892356, "step": 8493 }, { "epoch": 18.915367483296215, "loss": 0.9672378301620483, "loss_ce": 7.476539758499712e-05, "loss_iou": 0.359375, "loss_num": 0.04931640625, "loss_xval": 0.96875, "num_input_tokens_seen": 475892356, "step": 8493 }, { "epoch": 18.91759465478842, "grad_norm": 21.32638168334961, "learning_rate": 1e-06, "loss": 0.4226, "num_input_tokens_seen": 475948980, "step": 8494 }, { "epoch": 18.91759465478842, "loss": 0.461247056722641, "loss_ce": 0.0001875017478596419, "loss_iou": 0.2080078125, "loss_num": 0.00921630859375, "loss_xval": 0.4609375, "num_input_tokens_seen": 475948980, "step": 8494 }, { "epoch": 18.919821826280625, "grad_norm": 36.91980743408203, "learning_rate": 1e-06, "loss": 0.4261, "num_input_tokens_seen": 476005300, "step": 8495 }, { "epoch": 18.919821826280625, "loss": 0.39789801836013794, "loss_ce": 7.085979450494051e-05, "loss_iou": 0.1826171875, "loss_num": 0.006683349609375, "loss_xval": 0.3984375, "num_input_tokens_seen": 476005300, "step": 8495 }, { "epoch": 18.92204899777283, "grad_norm": 28.42441749572754, "learning_rate": 1e-06, "loss": 0.3562, "num_input_tokens_seen": 476059068, "step": 8496 }, { "epoch": 18.92204899777283, "loss": 0.35445064306259155, "loss_ce": 8.05156523711048e-05, "loss_iou": 0.1630859375, "loss_num": 0.00555419921875, "loss_xval": 0.353515625, "num_input_tokens_seen": 476059068, "step": 8496 }, { "epoch": 18.924276169265035, "grad_norm": 15.65682601928711, "learning_rate": 1e-06, "loss": 0.3527, "num_input_tokens_seen": 476116536, "step": 8497 }, { "epoch": 18.924276169265035, "loss": 0.347369909286499, "loss_ce": 7.986459240783006e-05, "loss_iou": 0.15234375, "loss_num": 0.00848388671875, "loss_xval": 0.34765625, "num_input_tokens_seen": 476116536, "step": 8497 }, { "epoch": 18.92650334075724, "grad_norm": 14.354292869567871, "learning_rate": 1e-06, "loss": 0.5194, "num_input_tokens_seen": 476172064, "step": 8498 }, { "epoch": 18.92650334075724, "loss": 0.49591702222824097, "loss_ce": 6.739451055182144e-05, "loss_iou": 0.228515625, "loss_num": 0.007598876953125, "loss_xval": 0.49609375, "num_input_tokens_seen": 476172064, "step": 8498 }, { "epoch": 18.928730512249444, "grad_norm": 26.137008666992188, "learning_rate": 1e-06, "loss": 0.4098, "num_input_tokens_seen": 476226344, "step": 8499 }, { "epoch": 18.928730512249444, "loss": 0.36395373940467834, "loss_ce": 6.214019958861172e-05, "loss_iou": 0.1591796875, "loss_num": 0.00921630859375, "loss_xval": 0.36328125, "num_input_tokens_seen": 476226344, "step": 8499 }, { "epoch": 18.93095768374165, "grad_norm": 15.024909973144531, "learning_rate": 1e-06, "loss": 0.2597, "num_input_tokens_seen": 476279788, "step": 8500 }, { "epoch": 18.93095768374165, "eval_seeclick_web_CIoU": 0.5848062932491302, "eval_seeclick_web_GIoU": 0.583672821521759, "eval_seeclick_web_IoU": 0.6038743853569031, "eval_seeclick_web_MAE_all": 0.015243555419147015, "eval_seeclick_web_MAE_h": 0.007093099411576986, "eval_seeclick_web_MAE_w": 0.015268665738403797, "eval_seeclick_web_MAE_x_boxes": 0.00826921034604311, "eval_seeclick_web_MAE_y_boxes": 0.02124078758060932, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.9087226986885071, "eval_seeclick_web_loss_ce": 0.00012506780331023037, "eval_seeclick_web_loss_iou": 0.42181396484375, "eval_seeclick_web_loss_num": 0.012174606323242188, "eval_seeclick_web_loss_xval": 0.9036865234375, "eval_seeclick_web_runtime": 22.9412, "eval_seeclick_web_samples_per_second": 2.179, "eval_seeclick_web_steps_per_second": 0.087, "num_input_tokens_seen": 476279788, "step": 8500 }, { "epoch": 18.93095768374165, "eval_icons_CIoU": 0.2560933604836464, "eval_icons_GIoU": 0.290867879986763, "eval_icons_IoU": 0.3409384936094284, "eval_icons_MAE_all": 0.06184186786413193, "eval_icons_MAE_h": 0.029924143571406603, "eval_icons_MAE_w": 0.06795705109834671, "eval_icons_MAE_x_boxes": 0.06149038299918175, "eval_icons_MAE_y_boxes": 0.037362379021942616, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7201489210128784, "eval_icons_loss_ce": 0.0001441572530893609, "eval_icons_loss_iou": 0.67724609375, "eval_icons_loss_num": 0.05980682373046875, "eval_icons_loss_xval": 1.65380859375, "eval_icons_runtime": 22.6756, "eval_icons_samples_per_second": 2.205, "eval_icons_steps_per_second": 0.088, "num_input_tokens_seen": 476279788, "step": 8500 }, { "epoch": 18.93095768374165, "eval_screenspot_CIoU": 0.3761156400044759, "eval_screenspot_GIoU": 0.3939984142780304, "eval_screenspot_IoU": 0.44694022337595624, "eval_screenspot_MAE_all": 0.055543094873428345, "eval_screenspot_MAE_h": 0.039369805405537285, "eval_screenspot_MAE_w": 0.05557269603013992, "eval_screenspot_MAE_x_boxes": 0.06785313226282597, "eval_screenspot_MAE_y_boxes": 0.04004095122218132, "eval_screenspot_inside_bbox": 0.7145833373069763, "eval_screenspot_loss": 1.5496809482574463, "eval_screenspot_loss_ce": 0.00017849090121065578, "eval_screenspot_loss_iou": 0.644775390625, "eval_screenspot_loss_num": 0.0635693868001302, "eval_screenspot_loss_xval": 1.6062825520833333, "eval_screenspot_runtime": 38.5423, "eval_screenspot_samples_per_second": 2.309, "eval_screenspot_steps_per_second": 0.078, "num_input_tokens_seen": 476279788, "step": 8500 }, { "epoch": 18.93095768374165, "eval_compot_CIoU": 0.3419719487428665, "eval_compot_GIoU": 0.3519115000963211, "eval_compot_IoU": 0.40101131796836853, "eval_compot_MAE_all": 0.018997764214873314, "eval_compot_MAE_h": 0.011835724115371704, "eval_compot_MAE_w": 0.02075411193072796, "eval_compot_MAE_x_boxes": 0.03001493401825428, "eval_compot_MAE_y_boxes": 0.006946815177798271, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3982415199279785, "eval_compot_loss_ce": 0.00011685657591442578, "eval_compot_loss_iou": 0.646240234375, "eval_compot_loss_num": 0.017612457275390625, "eval_compot_loss_xval": 1.3798828125, "eval_compot_runtime": 24.3361, "eval_compot_samples_per_second": 2.055, "eval_compot_steps_per_second": 0.082, "num_input_tokens_seen": 476279788, "step": 8500 }, { "epoch": 18.93095768374165, "eval_custom_ui_val_CIoU": 0.47164858794874615, "eval_custom_ui_val_GIoU": 0.47631729145844776, "eval_custom_ui_val_IoU": 0.5336444907718234, "eval_custom_ui_val_MAE_all": 0.026907082750565477, "eval_custom_ui_val_MAE_h": 0.013476735032680962, "eval_custom_ui_val_MAE_w": 0.03637263929057452, "eval_custom_ui_val_MAE_x_boxes": 0.03278244765371912, "eval_custom_ui_val_MAE_y_boxes": 0.013371242494839761, "eval_custom_ui_val_inside_bbox": 0.7685185207260979, "eval_custom_ui_val_loss": 1.1731735467910767, "eval_custom_ui_val_loss_ce": 0.0001309802391915582, "eval_custom_ui_val_loss_iou": 0.5032552083333334, "eval_custom_ui_val_loss_num": 0.023616578843858507, "eval_custom_ui_val_loss_xval": 1.1244574652777777, "eval_custom_ui_val_runtime": 72.5876, "eval_custom_ui_val_samples_per_second": 3.651, "eval_custom_ui_val_steps_per_second": 0.124, "num_input_tokens_seen": 476279788, "step": 8500 }, { "epoch": 18.93095768374165, "loss": 0.8257832527160645, "loss_ce": 9.972714178729802e-05, "loss_iou": 0.37109375, "loss_num": 0.016357421875, "loss_xval": 0.82421875, "num_input_tokens_seen": 476279788, "step": 8500 }, { "epoch": 18.933184855233854, "grad_norm": 16.40049934387207, "learning_rate": 1e-06, "loss": 0.3533, "num_input_tokens_seen": 476335896, "step": 8501 }, { "epoch": 18.933184855233854, "loss": 0.35964030027389526, "loss_ce": 0.00011269970855209976, "loss_iou": 0.1611328125, "loss_num": 0.007568359375, "loss_xval": 0.359375, "num_input_tokens_seen": 476335896, "step": 8501 }, { "epoch": 18.93541202672606, "grad_norm": 15.26424503326416, "learning_rate": 1e-06, "loss": 0.2743, "num_input_tokens_seen": 476390700, "step": 8502 }, { "epoch": 18.93541202672606, "loss": 0.33460482954978943, "loss_ce": 7.113382162060589e-05, "loss_iou": 0.146484375, "loss_num": 0.00823974609375, "loss_xval": 0.333984375, "num_input_tokens_seen": 476390700, "step": 8502 }, { "epoch": 18.937639198218264, "grad_norm": 19.776226043701172, "learning_rate": 1e-06, "loss": 0.2961, "num_input_tokens_seen": 476444976, "step": 8503 }, { "epoch": 18.937639198218264, "loss": 0.34564700722694397, "loss_ce": 6.596426828764379e-05, "loss_iou": 0.15234375, "loss_num": 0.0084228515625, "loss_xval": 0.345703125, "num_input_tokens_seen": 476444976, "step": 8503 }, { "epoch": 18.93986636971047, "grad_norm": 13.866682052612305, "learning_rate": 1e-06, "loss": 0.3222, "num_input_tokens_seen": 476500844, "step": 8504 }, { "epoch": 18.93986636971047, "loss": 0.31439658999443054, "loss_ce": 6.556001608259976e-05, "loss_iou": 0.1240234375, "loss_num": 0.01312255859375, "loss_xval": 0.314453125, "num_input_tokens_seen": 476500844, "step": 8504 }, { "epoch": 18.942093541202674, "grad_norm": 19.28264045715332, "learning_rate": 1e-06, "loss": 0.3066, "num_input_tokens_seen": 476556968, "step": 8505 }, { "epoch": 18.942093541202674, "loss": 0.3727559447288513, "loss_ce": 7.529569120379165e-05, "loss_iou": 0.166015625, "loss_num": 0.00823974609375, "loss_xval": 0.373046875, "num_input_tokens_seen": 476556968, "step": 8505 }, { "epoch": 18.94432071269488, "grad_norm": 20.56502342224121, "learning_rate": 1e-06, "loss": 0.3223, "num_input_tokens_seen": 476614640, "step": 8506 }, { "epoch": 18.94432071269488, "loss": 0.41475239396095276, "loss_ce": 7.952825399115682e-05, "loss_iou": 0.1787109375, "loss_num": 0.01141357421875, "loss_xval": 0.4140625, "num_input_tokens_seen": 476614640, "step": 8506 }, { "epoch": 18.946547884187083, "grad_norm": 34.558292388916016, "learning_rate": 1e-06, "loss": 0.5155, "num_input_tokens_seen": 476670132, "step": 8507 }, { "epoch": 18.946547884187083, "loss": 0.4488101005554199, "loss_ce": 7.96294043539092e-05, "loss_iou": 0.197265625, "loss_num": 0.0107421875, "loss_xval": 0.44921875, "num_input_tokens_seen": 476670132, "step": 8507 }, { "epoch": 18.948775055679288, "grad_norm": 18.674837112426758, "learning_rate": 1e-06, "loss": 0.4771, "num_input_tokens_seen": 476726448, "step": 8508 }, { "epoch": 18.948775055679288, "loss": 0.5774888396263123, "loss_ce": 9.624622180126607e-05, "loss_iou": 0.267578125, "loss_num": 0.00823974609375, "loss_xval": 0.578125, "num_input_tokens_seen": 476726448, "step": 8508 }, { "epoch": 18.951002227171493, "grad_norm": 15.41340160369873, "learning_rate": 1e-06, "loss": 0.4142, "num_input_tokens_seen": 476784916, "step": 8509 }, { "epoch": 18.951002227171493, "loss": 0.4316607713699341, "loss_ce": 8.12113648862578e-05, "loss_iou": 0.1953125, "loss_num": 0.00811767578125, "loss_xval": 0.431640625, "num_input_tokens_seen": 476784916, "step": 8509 }, { "epoch": 18.953229398663698, "grad_norm": 14.191808700561523, "learning_rate": 1e-06, "loss": 0.422, "num_input_tokens_seen": 476843120, "step": 8510 }, { "epoch": 18.953229398663698, "loss": 0.4266318678855896, "loss_ce": 0.00011820608051493764, "loss_iou": 0.169921875, "loss_num": 0.0174560546875, "loss_xval": 0.42578125, "num_input_tokens_seen": 476843120, "step": 8510 }, { "epoch": 18.955456570155903, "grad_norm": 16.592500686645508, "learning_rate": 1e-06, "loss": 0.4383, "num_input_tokens_seen": 476899824, "step": 8511 }, { "epoch": 18.955456570155903, "loss": 0.40235602855682373, "loss_ce": 7.331261440413073e-05, "loss_iou": 0.166015625, "loss_num": 0.01385498046875, "loss_xval": 0.40234375, "num_input_tokens_seen": 476899824, "step": 8511 }, { "epoch": 18.957683741648108, "grad_norm": 12.043611526489258, "learning_rate": 1e-06, "loss": 0.3217, "num_input_tokens_seen": 476956480, "step": 8512 }, { "epoch": 18.957683741648108, "loss": 0.3974224627017975, "loss_ce": 8.359744242625311e-05, "loss_iou": 0.1669921875, "loss_num": 0.01275634765625, "loss_xval": 0.396484375, "num_input_tokens_seen": 476956480, "step": 8512 }, { "epoch": 18.959910913140313, "grad_norm": 20.1372013092041, "learning_rate": 1e-06, "loss": 0.3755, "num_input_tokens_seen": 477011216, "step": 8513 }, { "epoch": 18.959910913140313, "loss": 0.3341856598854065, "loss_ce": 7.918624032754451e-05, "loss_iou": 0.142578125, "loss_num": 0.0096435546875, "loss_xval": 0.333984375, "num_input_tokens_seen": 477011216, "step": 8513 }, { "epoch": 18.962138084632517, "grad_norm": 23.11919593811035, "learning_rate": 1e-06, "loss": 0.3368, "num_input_tokens_seen": 477068100, "step": 8514 }, { "epoch": 18.962138084632517, "loss": 0.4352511167526245, "loss_ce": 7.042582728900015e-05, "loss_iou": 0.16015625, "loss_num": 0.02294921875, "loss_xval": 0.435546875, "num_input_tokens_seen": 477068100, "step": 8514 }, { "epoch": 18.964365256124722, "grad_norm": 15.498306274414062, "learning_rate": 1e-06, "loss": 0.4073, "num_input_tokens_seen": 477124952, "step": 8515 }, { "epoch": 18.964365256124722, "loss": 0.4004598557949066, "loss_ce": 6.92239118507132e-05, "loss_iou": 0.17578125, "loss_num": 0.00982666015625, "loss_xval": 0.400390625, "num_input_tokens_seen": 477124952, "step": 8515 }, { "epoch": 18.966592427616927, "grad_norm": 16.32062530517578, "learning_rate": 1e-06, "loss": 0.3115, "num_input_tokens_seen": 477178756, "step": 8516 }, { "epoch": 18.966592427616927, "loss": 0.38423728942871094, "loss_ce": 8.200977754313499e-05, "loss_iou": 0.16796875, "loss_num": 0.00958251953125, "loss_xval": 0.384765625, "num_input_tokens_seen": 477178756, "step": 8516 }, { "epoch": 18.968819599109132, "grad_norm": 16.013669967651367, "learning_rate": 1e-06, "loss": 0.4225, "num_input_tokens_seen": 477234552, "step": 8517 }, { "epoch": 18.968819599109132, "loss": 0.45508986711502075, "loss_ce": 7.279004785232246e-05, "loss_iou": 0.1943359375, "loss_num": 0.0133056640625, "loss_xval": 0.455078125, "num_input_tokens_seen": 477234552, "step": 8517 }, { "epoch": 18.971046770601337, "grad_norm": 15.03101921081543, "learning_rate": 1e-06, "loss": 0.5013, "num_input_tokens_seen": 477291364, "step": 8518 }, { "epoch": 18.971046770601337, "loss": 0.4453286826610565, "loss_ce": 7.720896974205971e-05, "loss_iou": 0.2041015625, "loss_num": 0.007476806640625, "loss_xval": 0.4453125, "num_input_tokens_seen": 477291364, "step": 8518 }, { "epoch": 18.97327394209354, "grad_norm": 16.71440887451172, "learning_rate": 1e-06, "loss": 0.3699, "num_input_tokens_seen": 477346348, "step": 8519 }, { "epoch": 18.97327394209354, "loss": 0.3506791889667511, "loss_ce": 9.326588042313233e-05, "loss_iou": 0.16015625, "loss_num": 0.00604248046875, "loss_xval": 0.3515625, "num_input_tokens_seen": 477346348, "step": 8519 }, { "epoch": 18.975501113585747, "grad_norm": 29.49169158935547, "learning_rate": 1e-06, "loss": 0.4555, "num_input_tokens_seen": 477402184, "step": 8520 }, { "epoch": 18.975501113585747, "loss": 0.31228724122047424, "loss_ce": 6.190109706949443e-05, "loss_iou": 0.1298828125, "loss_num": 0.01043701171875, "loss_xval": 0.3125, "num_input_tokens_seen": 477402184, "step": 8520 }, { "epoch": 18.97772828507795, "grad_norm": 17.541460037231445, "learning_rate": 1e-06, "loss": 0.4045, "num_input_tokens_seen": 477458060, "step": 8521 }, { "epoch": 18.97772828507795, "loss": 0.30591824650764465, "loss_ce": 7.108383579179645e-05, "loss_iou": 0.1396484375, "loss_num": 0.005279541015625, "loss_xval": 0.306640625, "num_input_tokens_seen": 477458060, "step": 8521 }, { "epoch": 18.979955456570156, "grad_norm": 17.852941513061523, "learning_rate": 1e-06, "loss": 0.5856, "num_input_tokens_seen": 477511612, "step": 8522 }, { "epoch": 18.979955456570156, "loss": 0.4428853988647461, "loss_ce": 7.535393524449319e-05, "loss_iou": 0.1806640625, "loss_num": 0.0164794921875, "loss_xval": 0.443359375, "num_input_tokens_seen": 477511612, "step": 8522 }, { "epoch": 18.98218262806236, "grad_norm": 18.045700073242188, "learning_rate": 1e-06, "loss": 0.3042, "num_input_tokens_seen": 477566404, "step": 8523 }, { "epoch": 18.98218262806236, "loss": 0.3434655964374542, "loss_ce": 8.181348675861955e-05, "loss_iou": 0.1396484375, "loss_num": 0.01300048828125, "loss_xval": 0.34375, "num_input_tokens_seen": 477566404, "step": 8523 }, { "epoch": 18.984409799554566, "grad_norm": 21.54753875732422, "learning_rate": 1e-06, "loss": 0.4359, "num_input_tokens_seen": 477623140, "step": 8524 }, { "epoch": 18.984409799554566, "loss": 0.4005663990974426, "loss_ce": 0.0004199253162369132, "loss_iou": 0.1640625, "loss_num": 0.014404296875, "loss_xval": 0.400390625, "num_input_tokens_seen": 477623140, "step": 8524 }, { "epoch": 18.98663697104677, "grad_norm": 14.066349983215332, "learning_rate": 1e-06, "loss": 0.3884, "num_input_tokens_seen": 477678528, "step": 8525 }, { "epoch": 18.98663697104677, "loss": 0.42566099762916565, "loss_ce": 6.285572453634813e-05, "loss_iou": 0.197265625, "loss_num": 0.00616455078125, "loss_xval": 0.42578125, "num_input_tokens_seen": 477678528, "step": 8525 }, { "epoch": 18.988864142538976, "grad_norm": 17.08536720275879, "learning_rate": 1e-06, "loss": 0.3323, "num_input_tokens_seen": 477735340, "step": 8526 }, { "epoch": 18.988864142538976, "loss": 0.3974015414714813, "loss_ce": 0.00012369919568300247, "loss_iou": 0.17578125, "loss_num": 0.009033203125, "loss_xval": 0.396484375, "num_input_tokens_seen": 477735340, "step": 8526 }, { "epoch": 18.99109131403118, "grad_norm": 17.082090377807617, "learning_rate": 1e-06, "loss": 0.3237, "num_input_tokens_seen": 477792108, "step": 8527 }, { "epoch": 18.99109131403118, "loss": 0.27058377861976624, "loss_ce": 7.597178046125919e-05, "loss_iou": 0.1103515625, "loss_num": 0.010009765625, "loss_xval": 0.26953125, "num_input_tokens_seen": 477792108, "step": 8527 }, { "epoch": 18.993318485523385, "grad_norm": 13.515477180480957, "learning_rate": 1e-06, "loss": 0.35, "num_input_tokens_seen": 477846252, "step": 8528 }, { "epoch": 18.993318485523385, "loss": 0.24356822669506073, "loss_ce": 6.846075120847672e-05, "loss_iou": 0.10693359375, "loss_num": 0.006011962890625, "loss_xval": 0.2431640625, "num_input_tokens_seen": 477846252, "step": 8528 }, { "epoch": 18.99554565701559, "grad_norm": 12.548789978027344, "learning_rate": 1e-06, "loss": 0.4884, "num_input_tokens_seen": 477901432, "step": 8529 }, { "epoch": 18.99554565701559, "loss": 0.44635307788848877, "loss_ce": 6.401098653441295e-05, "loss_iou": 0.1806640625, "loss_num": 0.01708984375, "loss_xval": 0.4453125, "num_input_tokens_seen": 477901432, "step": 8529 }, { "epoch": 18.997772828507795, "grad_norm": 19.36771011352539, "learning_rate": 1e-06, "loss": 0.4848, "num_input_tokens_seen": 477959164, "step": 8530 }, { "epoch": 18.997772828507795, "loss": 0.4359922409057617, "loss_ce": 0.00014018421643413603, "loss_iou": 0.185546875, "loss_num": 0.012939453125, "loss_xval": 0.435546875, "num_input_tokens_seen": 477959164, "step": 8530 }, { "epoch": 19.0, "grad_norm": 24.555925369262695, "learning_rate": 1e-06, "loss": 0.4317, "num_input_tokens_seen": 478014600, "step": 8531 }, { "epoch": 19.0, "loss": 0.5009192228317261, "loss_ce": 6.472200766438618e-05, "loss_iou": 0.2236328125, "loss_num": 0.0108642578125, "loss_xval": 0.5, "num_input_tokens_seen": 478014600, "step": 8531 }, { "epoch": 19.002227171492205, "grad_norm": 19.39182472229004, "learning_rate": 1e-06, "loss": 0.3046, "num_input_tokens_seen": 478070168, "step": 8532 }, { "epoch": 19.002227171492205, "loss": 0.2748461961746216, "loss_ce": 6.590808334294707e-05, "loss_iou": 0.12255859375, "loss_num": 0.005859375, "loss_xval": 0.275390625, "num_input_tokens_seen": 478070168, "step": 8532 }, { "epoch": 19.00445434298441, "grad_norm": 15.633817672729492, "learning_rate": 1e-06, "loss": 0.2743, "num_input_tokens_seen": 478124420, "step": 8533 }, { "epoch": 19.00445434298441, "loss": 0.2822956442832947, "loss_ce": 6.907560600666329e-05, "loss_iou": 0.11865234375, "loss_num": 0.00897216796875, "loss_xval": 0.28125, "num_input_tokens_seen": 478124420, "step": 8533 }, { "epoch": 19.006681514476615, "grad_norm": 20.823501586914062, "learning_rate": 1e-06, "loss": 0.4119, "num_input_tokens_seen": 478179792, "step": 8534 }, { "epoch": 19.006681514476615, "loss": 0.4808357357978821, "loss_ce": 6.183330697240308e-05, "loss_iou": 0.2001953125, "loss_num": 0.01611328125, "loss_xval": 0.48046875, "num_input_tokens_seen": 478179792, "step": 8534 }, { "epoch": 19.00890868596882, "grad_norm": 23.914974212646484, "learning_rate": 1e-06, "loss": 0.433, "num_input_tokens_seen": 478236068, "step": 8535 }, { "epoch": 19.00890868596882, "loss": 0.5453649759292603, "loss_ce": 7.688709592912346e-05, "loss_iou": 0.2412109375, "loss_num": 0.0125732421875, "loss_xval": 0.546875, "num_input_tokens_seen": 478236068, "step": 8535 }, { "epoch": 19.011135857461024, "grad_norm": 13.552783966064453, "learning_rate": 1e-06, "loss": 0.2469, "num_input_tokens_seen": 478293348, "step": 8536 }, { "epoch": 19.011135857461024, "loss": 0.2078023999929428, "loss_ce": 6.924466288182884e-05, "loss_iou": 0.09375, "loss_num": 0.0040283203125, "loss_xval": 0.2080078125, "num_input_tokens_seen": 478293348, "step": 8536 }, { "epoch": 19.01336302895323, "grad_norm": 16.82329750061035, "learning_rate": 1e-06, "loss": 0.4532, "num_input_tokens_seen": 478352520, "step": 8537 }, { "epoch": 19.01336302895323, "loss": 0.4592329263687134, "loss_ce": 6.543014023918658e-05, "loss_iou": 0.205078125, "loss_num": 0.010009765625, "loss_xval": 0.458984375, "num_input_tokens_seen": 478352520, "step": 8537 }, { "epoch": 19.015590200445434, "grad_norm": 18.402179718017578, "learning_rate": 1e-06, "loss": 0.4807, "num_input_tokens_seen": 478409540, "step": 8538 }, { "epoch": 19.015590200445434, "loss": 0.5504905581474304, "loss_ce": 7.548804569523782e-05, "loss_iou": 0.240234375, "loss_num": 0.0140380859375, "loss_xval": 0.55078125, "num_input_tokens_seen": 478409540, "step": 8538 }, { "epoch": 19.01781737193764, "grad_norm": 18.57283592224121, "learning_rate": 1e-06, "loss": 0.2209, "num_input_tokens_seen": 478465060, "step": 8539 }, { "epoch": 19.01781737193764, "loss": 0.2368859052658081, "loss_ce": 6.950320675969124e-05, "loss_iou": 0.09912109375, "loss_num": 0.00762939453125, "loss_xval": 0.236328125, "num_input_tokens_seen": 478465060, "step": 8539 }, { "epoch": 19.020044543429844, "grad_norm": 20.981054306030273, "learning_rate": 1e-06, "loss": 0.4838, "num_input_tokens_seen": 478520264, "step": 8540 }, { "epoch": 19.020044543429844, "loss": 0.39203161001205444, "loss_ce": 6.387151370290667e-05, "loss_iou": 0.1650390625, "loss_num": 0.01239013671875, "loss_xval": 0.392578125, "num_input_tokens_seen": 478520264, "step": 8540 }, { "epoch": 19.02227171492205, "grad_norm": 22.715524673461914, "learning_rate": 1e-06, "loss": 0.3701, "num_input_tokens_seen": 478577056, "step": 8541 }, { "epoch": 19.02227171492205, "loss": 0.37838345766067505, "loss_ce": 8.756719762459397e-05, "loss_iou": 0.158203125, "loss_num": 0.01239013671875, "loss_xval": 0.37890625, "num_input_tokens_seen": 478577056, "step": 8541 }, { "epoch": 19.024498886414253, "grad_norm": 19.18893051147461, "learning_rate": 1e-06, "loss": 0.3867, "num_input_tokens_seen": 478633512, "step": 8542 }, { "epoch": 19.024498886414253, "loss": 0.3619902729988098, "loss_ce": 6.705922714900225e-05, "loss_iou": 0.1640625, "loss_num": 0.00689697265625, "loss_xval": 0.361328125, "num_input_tokens_seen": 478633512, "step": 8542 }, { "epoch": 19.02672605790646, "grad_norm": 16.043140411376953, "learning_rate": 1e-06, "loss": 0.5777, "num_input_tokens_seen": 478689620, "step": 8543 }, { "epoch": 19.02672605790646, "loss": 0.8731194734573364, "loss_ce": 7.26049329387024e-05, "loss_iou": 0.375, "loss_num": 0.02490234375, "loss_xval": 0.875, "num_input_tokens_seen": 478689620, "step": 8543 }, { "epoch": 19.028953229398663, "grad_norm": 22.03814697265625, "learning_rate": 1e-06, "loss": 0.3668, "num_input_tokens_seen": 478745996, "step": 8544 }, { "epoch": 19.028953229398663, "loss": 0.3608505129814148, "loss_ce": 7.170035678427666e-05, "loss_iou": 0.158203125, "loss_num": 0.00909423828125, "loss_xval": 0.361328125, "num_input_tokens_seen": 478745996, "step": 8544 }, { "epoch": 19.031180400890868, "grad_norm": 18.709949493408203, "learning_rate": 1e-06, "loss": 0.3843, "num_input_tokens_seen": 478802344, "step": 8545 }, { "epoch": 19.031180400890868, "loss": 0.3520601689815521, "loss_ce": 7.041079516056925e-05, "loss_iou": 0.1328125, "loss_num": 0.017333984375, "loss_xval": 0.3515625, "num_input_tokens_seen": 478802344, "step": 8545 }, { "epoch": 19.033407572383073, "grad_norm": 14.813765525817871, "learning_rate": 1e-06, "loss": 0.463, "num_input_tokens_seen": 478860080, "step": 8546 }, { "epoch": 19.033407572383073, "loss": 0.5505503416061401, "loss_ce": 7.423260831274092e-05, "loss_iou": 0.2060546875, "loss_num": 0.027587890625, "loss_xval": 0.55078125, "num_input_tokens_seen": 478860080, "step": 8546 }, { "epoch": 19.035634743875278, "grad_norm": 23.64231300354004, "learning_rate": 1e-06, "loss": 0.3234, "num_input_tokens_seen": 478919644, "step": 8547 }, { "epoch": 19.035634743875278, "loss": 0.3638024926185608, "loss_ce": 9.396575478604063e-05, "loss_iou": 0.1220703125, "loss_num": 0.02392578125, "loss_xval": 0.36328125, "num_input_tokens_seen": 478919644, "step": 8547 }, { "epoch": 19.037861915367483, "grad_norm": 22.59861183166504, "learning_rate": 1e-06, "loss": 0.2975, "num_input_tokens_seen": 478974500, "step": 8548 }, { "epoch": 19.037861915367483, "loss": 0.27411675453186035, "loss_ce": 6.891056546010077e-05, "loss_iou": 0.119140625, "loss_num": 0.00701904296875, "loss_xval": 0.2734375, "num_input_tokens_seen": 478974500, "step": 8548 }, { "epoch": 19.040089086859687, "grad_norm": 21.548479080200195, "learning_rate": 1e-06, "loss": 0.5299, "num_input_tokens_seen": 479029432, "step": 8549 }, { "epoch": 19.040089086859687, "loss": 0.5104723572731018, "loss_ce": 9.636413597036153e-05, "loss_iou": 0.20703125, "loss_num": 0.019287109375, "loss_xval": 0.51171875, "num_input_tokens_seen": 479029432, "step": 8549 }, { "epoch": 19.042316258351892, "grad_norm": 22.90362548828125, "learning_rate": 1e-06, "loss": 0.6133, "num_input_tokens_seen": 479086928, "step": 8550 }, { "epoch": 19.042316258351892, "loss": 0.6397278904914856, "loss_ce": 7.949080463731661e-05, "loss_iou": 0.263671875, "loss_num": 0.0228271484375, "loss_xval": 0.640625, "num_input_tokens_seen": 479086928, "step": 8550 }, { "epoch": 19.044543429844097, "grad_norm": 13.663710594177246, "learning_rate": 1e-06, "loss": 0.4714, "num_input_tokens_seen": 479143420, "step": 8551 }, { "epoch": 19.044543429844097, "loss": 0.41580140590667725, "loss_ce": 9.095711720874533e-05, "loss_iou": 0.1640625, "loss_num": 0.017333984375, "loss_xval": 0.416015625, "num_input_tokens_seen": 479143420, "step": 8551 }, { "epoch": 19.046770601336302, "grad_norm": 26.639122009277344, "learning_rate": 1e-06, "loss": 0.4388, "num_input_tokens_seen": 479199488, "step": 8552 }, { "epoch": 19.046770601336302, "loss": 0.4706590175628662, "loss_ce": 7.797098805895075e-05, "loss_iou": 0.2060546875, "loss_num": 0.01190185546875, "loss_xval": 0.470703125, "num_input_tokens_seen": 479199488, "step": 8552 }, { "epoch": 19.048997772828507, "grad_norm": 37.29257583618164, "learning_rate": 1e-06, "loss": 0.4645, "num_input_tokens_seen": 479255004, "step": 8553 }, { "epoch": 19.048997772828507, "loss": 0.6149426698684692, "loss_ce": 7.449123950209469e-05, "loss_iou": 0.248046875, "loss_num": 0.023681640625, "loss_xval": 0.61328125, "num_input_tokens_seen": 479255004, "step": 8553 }, { "epoch": 19.051224944320712, "grad_norm": 187.8523712158203, "learning_rate": 1e-06, "loss": 0.4137, "num_input_tokens_seen": 479310756, "step": 8554 }, { "epoch": 19.051224944320712, "loss": 0.35520029067993164, "loss_ce": 9.777834929991513e-05, "loss_iou": 0.14453125, "loss_num": 0.01336669921875, "loss_xval": 0.35546875, "num_input_tokens_seen": 479310756, "step": 8554 }, { "epoch": 19.053452115812917, "grad_norm": 73.10868835449219, "learning_rate": 1e-06, "loss": 0.4949, "num_input_tokens_seen": 479365280, "step": 8555 }, { "epoch": 19.053452115812917, "loss": 0.5539067387580872, "loss_ce": 7.372665277216583e-05, "loss_iou": 0.2451171875, "loss_num": 0.01275634765625, "loss_xval": 0.5546875, "num_input_tokens_seen": 479365280, "step": 8555 }, { "epoch": 19.05567928730512, "grad_norm": 25.378646850585938, "learning_rate": 1e-06, "loss": 0.261, "num_input_tokens_seen": 479421220, "step": 8556 }, { "epoch": 19.05567928730512, "loss": 0.2171689122915268, "loss_ce": 6.685940024908632e-05, "loss_iou": 0.09033203125, "loss_num": 0.007293701171875, "loss_xval": 0.216796875, "num_input_tokens_seen": 479421220, "step": 8556 }, { "epoch": 19.057906458797326, "grad_norm": 14.335795402526855, "learning_rate": 1e-06, "loss": 0.289, "num_input_tokens_seen": 479477700, "step": 8557 }, { "epoch": 19.057906458797326, "loss": 0.18457838892936707, "loss_ce": 6.912185926921666e-05, "loss_iou": 0.07666015625, "loss_num": 0.00616455078125, "loss_xval": 0.1845703125, "num_input_tokens_seen": 479477700, "step": 8557 }, { "epoch": 19.06013363028953, "grad_norm": 14.203603744506836, "learning_rate": 1e-06, "loss": 0.4151, "num_input_tokens_seen": 479536192, "step": 8558 }, { "epoch": 19.06013363028953, "loss": 0.47530919313430786, "loss_ce": 8.947977039497346e-05, "loss_iou": 0.208984375, "loss_num": 0.01153564453125, "loss_xval": 0.474609375, "num_input_tokens_seen": 479536192, "step": 8558 }, { "epoch": 19.062360801781736, "grad_norm": 15.753239631652832, "learning_rate": 1e-06, "loss": 0.2125, "num_input_tokens_seen": 479591752, "step": 8559 }, { "epoch": 19.062360801781736, "loss": 0.19928745925426483, "loss_ce": 6.870189099572599e-05, "loss_iou": 0.07568359375, "loss_num": 0.0096435546875, "loss_xval": 0.19921875, "num_input_tokens_seen": 479591752, "step": 8559 }, { "epoch": 19.06458797327394, "grad_norm": 13.365279197692871, "learning_rate": 1e-06, "loss": 0.2772, "num_input_tokens_seen": 479650200, "step": 8560 }, { "epoch": 19.06458797327394, "loss": 0.31562650203704834, "loss_ce": 7.472425932064652e-05, "loss_iou": 0.140625, "loss_num": 0.00689697265625, "loss_xval": 0.31640625, "num_input_tokens_seen": 479650200, "step": 8560 }, { "epoch": 19.066815144766146, "grad_norm": 16.33476448059082, "learning_rate": 1e-06, "loss": 0.3525, "num_input_tokens_seen": 479705436, "step": 8561 }, { "epoch": 19.066815144766146, "loss": 0.3400951027870178, "loss_ce": 6.827242759754881e-05, "loss_iou": 0.140625, "loss_num": 0.01177978515625, "loss_xval": 0.33984375, "num_input_tokens_seen": 479705436, "step": 8561 }, { "epoch": 19.06904231625835, "grad_norm": 11.422565460205078, "learning_rate": 1e-06, "loss": 0.2924, "num_input_tokens_seen": 479760176, "step": 8562 }, { "epoch": 19.06904231625835, "loss": 0.21368908882141113, "loss_ce": 6.604377267649397e-05, "loss_iou": 0.08740234375, "loss_num": 0.00775146484375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 479760176, "step": 8562 }, { "epoch": 19.071269487750556, "grad_norm": 14.924948692321777, "learning_rate": 1e-06, "loss": 0.3429, "num_input_tokens_seen": 479815112, "step": 8563 }, { "epoch": 19.071269487750556, "loss": 0.42559611797332764, "loss_ce": 5.89884803048335e-05, "loss_iou": 0.15234375, "loss_num": 0.0240478515625, "loss_xval": 0.42578125, "num_input_tokens_seen": 479815112, "step": 8563 }, { "epoch": 19.07349665924276, "grad_norm": 26.38053321838379, "learning_rate": 1e-06, "loss": 0.3599, "num_input_tokens_seen": 479870284, "step": 8564 }, { "epoch": 19.07349665924276, "loss": 0.36798515915870667, "loss_ce": 6.524256605189294e-05, "loss_iou": 0.1708984375, "loss_num": 0.00537109375, "loss_xval": 0.3671875, "num_input_tokens_seen": 479870284, "step": 8564 }, { "epoch": 19.075723830734965, "grad_norm": 13.398744583129883, "learning_rate": 1e-06, "loss": 0.3668, "num_input_tokens_seen": 479927696, "step": 8565 }, { "epoch": 19.075723830734965, "loss": 0.4121933877468109, "loss_ce": 8.401382365263999e-05, "loss_iou": 0.1884765625, "loss_num": 0.007171630859375, "loss_xval": 0.412109375, "num_input_tokens_seen": 479927696, "step": 8565 }, { "epoch": 19.07795100222717, "grad_norm": 23.407733917236328, "learning_rate": 1e-06, "loss": 0.5092, "num_input_tokens_seen": 479984008, "step": 8566 }, { "epoch": 19.07795100222717, "loss": 0.536881685256958, "loss_ce": 0.00013851752737537026, "loss_iou": 0.220703125, "loss_num": 0.0189208984375, "loss_xval": 0.53515625, "num_input_tokens_seen": 479984008, "step": 8566 }, { "epoch": 19.080178173719375, "grad_norm": 26.368122100830078, "learning_rate": 1e-06, "loss": 0.4519, "num_input_tokens_seen": 480038472, "step": 8567 }, { "epoch": 19.080178173719375, "loss": 0.5067821741104126, "loss_ce": 0.00031240255339071155, "loss_iou": 0.21484375, "loss_num": 0.01531982421875, "loss_xval": 0.5078125, "num_input_tokens_seen": 480038472, "step": 8567 }, { "epoch": 19.08240534521158, "grad_norm": 16.010215759277344, "learning_rate": 1e-06, "loss": 0.3062, "num_input_tokens_seen": 480091528, "step": 8568 }, { "epoch": 19.08240534521158, "loss": 0.3443114161491394, "loss_ce": 7.312708476092666e-05, "loss_iou": 0.1611328125, "loss_num": 0.0045166015625, "loss_xval": 0.34375, "num_input_tokens_seen": 480091528, "step": 8568 }, { "epoch": 19.084632516703785, "grad_norm": 15.22715950012207, "learning_rate": 1e-06, "loss": 0.3562, "num_input_tokens_seen": 480147780, "step": 8569 }, { "epoch": 19.084632516703785, "loss": 0.4474610388278961, "loss_ce": 7.335421105381101e-05, "loss_iou": 0.19140625, "loss_num": 0.01312255859375, "loss_xval": 0.447265625, "num_input_tokens_seen": 480147780, "step": 8569 }, { "epoch": 19.08685968819599, "grad_norm": 12.806982040405273, "learning_rate": 1e-06, "loss": 0.3955, "num_input_tokens_seen": 480203968, "step": 8570 }, { "epoch": 19.08685968819599, "loss": 0.2964867353439331, "loss_ce": 0.00010003504576161504, "loss_iou": 0.13671875, "loss_num": 0.004669189453125, "loss_xval": 0.296875, "num_input_tokens_seen": 480203968, "step": 8570 }, { "epoch": 19.089086859688194, "grad_norm": 21.179973602294922, "learning_rate": 1e-06, "loss": 0.356, "num_input_tokens_seen": 480260560, "step": 8571 }, { "epoch": 19.089086859688194, "loss": 0.29164212942123413, "loss_ce": 7.720879511907697e-05, "loss_iou": 0.1318359375, "loss_num": 0.005706787109375, "loss_xval": 0.291015625, "num_input_tokens_seen": 480260560, "step": 8571 }, { "epoch": 19.0913140311804, "grad_norm": 17.90711784362793, "learning_rate": 1e-06, "loss": 0.4287, "num_input_tokens_seen": 480317040, "step": 8572 }, { "epoch": 19.0913140311804, "loss": 0.5795639753341675, "loss_ce": 0.00015723146498203278, "loss_iou": 0.2421875, "loss_num": 0.0191650390625, "loss_xval": 0.578125, "num_input_tokens_seen": 480317040, "step": 8572 }, { "epoch": 19.093541202672604, "grad_norm": 26.034122467041016, "learning_rate": 1e-06, "loss": 0.3087, "num_input_tokens_seen": 480371384, "step": 8573 }, { "epoch": 19.093541202672604, "loss": 0.33735281229019165, "loss_ce": 7.252431532833725e-05, "loss_iou": 0.1474609375, "loss_num": 0.00830078125, "loss_xval": 0.337890625, "num_input_tokens_seen": 480371384, "step": 8573 }, { "epoch": 19.09576837416481, "grad_norm": 17.473873138427734, "learning_rate": 1e-06, "loss": 0.3811, "num_input_tokens_seen": 480426792, "step": 8574 }, { "epoch": 19.09576837416481, "loss": 0.4617387652397156, "loss_ce": 6.881446461193264e-05, "loss_iou": 0.1904296875, "loss_num": 0.0159912109375, "loss_xval": 0.4609375, "num_input_tokens_seen": 480426792, "step": 8574 }, { "epoch": 19.097995545657014, "grad_norm": 16.091129302978516, "learning_rate": 1e-06, "loss": 0.4135, "num_input_tokens_seen": 480483268, "step": 8575 }, { "epoch": 19.097995545657014, "loss": 0.29927489161491394, "loss_ce": 8.053722558543086e-05, "loss_iou": 0.1357421875, "loss_num": 0.005462646484375, "loss_xval": 0.298828125, "num_input_tokens_seen": 480483268, "step": 8575 }, { "epoch": 19.100222717149222, "grad_norm": 22.522878646850586, "learning_rate": 1e-06, "loss": 0.595, "num_input_tokens_seen": 480537020, "step": 8576 }, { "epoch": 19.100222717149222, "loss": 0.5641788244247437, "loss_ce": 9.193105506710708e-05, "loss_iou": 0.2470703125, "loss_num": 0.01409912109375, "loss_xval": 0.5625, "num_input_tokens_seen": 480537020, "step": 8576 }, { "epoch": 19.102449888641427, "grad_norm": 24.5418701171875, "learning_rate": 1e-06, "loss": 0.2987, "num_input_tokens_seen": 480592488, "step": 8577 }, { "epoch": 19.102449888641427, "loss": 0.2232351005077362, "loss_ce": 7.150069723138586e-05, "loss_iou": 0.1025390625, "loss_num": 0.003631591796875, "loss_xval": 0.2236328125, "num_input_tokens_seen": 480592488, "step": 8577 }, { "epoch": 19.104677060133632, "grad_norm": 27.310413360595703, "learning_rate": 1e-06, "loss": 0.4059, "num_input_tokens_seen": 480647028, "step": 8578 }, { "epoch": 19.104677060133632, "loss": 0.4668709635734558, "loss_ce": 7.407699013128877e-05, "loss_iou": 0.19921875, "loss_num": 0.013671875, "loss_xval": 0.466796875, "num_input_tokens_seen": 480647028, "step": 8578 }, { "epoch": 19.106904231625837, "grad_norm": 19.787166595458984, "learning_rate": 1e-06, "loss": 0.3002, "num_input_tokens_seen": 480702564, "step": 8579 }, { "epoch": 19.106904231625837, "loss": 0.2308284193277359, "loss_ce": 8.500830153934658e-05, "loss_iou": 0.107421875, "loss_num": 0.0031280517578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 480702564, "step": 8579 }, { "epoch": 19.10913140311804, "grad_norm": 20.583070755004883, "learning_rate": 1e-06, "loss": 0.3846, "num_input_tokens_seen": 480758448, "step": 8580 }, { "epoch": 19.10913140311804, "loss": 0.39584484696388245, "loss_ce": 6.236594344954938e-05, "loss_iou": 0.1591796875, "loss_num": 0.0155029296875, "loss_xval": 0.396484375, "num_input_tokens_seen": 480758448, "step": 8580 }, { "epoch": 19.111358574610247, "grad_norm": 20.13869285583496, "learning_rate": 1e-06, "loss": 0.3753, "num_input_tokens_seen": 480810532, "step": 8581 }, { "epoch": 19.111358574610247, "loss": 0.4490373134613037, "loss_ce": 6.272210157476366e-05, "loss_iou": 0.2060546875, "loss_num": 0.007476806640625, "loss_xval": 0.44921875, "num_input_tokens_seen": 480810532, "step": 8581 }, { "epoch": 19.11358574610245, "grad_norm": 27.062620162963867, "learning_rate": 1e-06, "loss": 0.4051, "num_input_tokens_seen": 480867128, "step": 8582 }, { "epoch": 19.11358574610245, "loss": 0.388627290725708, "loss_ce": 7.747422205284238e-05, "loss_iou": 0.181640625, "loss_num": 0.005279541015625, "loss_xval": 0.388671875, "num_input_tokens_seen": 480867128, "step": 8582 }, { "epoch": 19.115812917594656, "grad_norm": 14.657607078552246, "learning_rate": 1e-06, "loss": 0.2535, "num_input_tokens_seen": 480921476, "step": 8583 }, { "epoch": 19.115812917594656, "loss": 0.35783708095550537, "loss_ce": 0.00017106709128711373, "loss_iou": 0.1572265625, "loss_num": 0.008544921875, "loss_xval": 0.357421875, "num_input_tokens_seen": 480921476, "step": 8583 }, { "epoch": 19.11804008908686, "grad_norm": 21.15728759765625, "learning_rate": 1e-06, "loss": 0.4817, "num_input_tokens_seen": 480977352, "step": 8584 }, { "epoch": 19.11804008908686, "loss": 0.541458010673523, "loss_ce": 7.615622598677874e-05, "loss_iou": 0.212890625, "loss_num": 0.023193359375, "loss_xval": 0.54296875, "num_input_tokens_seen": 480977352, "step": 8584 }, { "epoch": 19.120267260579066, "grad_norm": 20.03176498413086, "learning_rate": 1e-06, "loss": 0.4498, "num_input_tokens_seen": 481031908, "step": 8585 }, { "epoch": 19.120267260579066, "loss": 0.45001912117004395, "loss_ce": 6.798194954171777e-05, "loss_iou": 0.169921875, "loss_num": 0.0218505859375, "loss_xval": 0.44921875, "num_input_tokens_seen": 481031908, "step": 8585 }, { "epoch": 19.12249443207127, "grad_norm": 20.93001937866211, "learning_rate": 1e-06, "loss": 0.3745, "num_input_tokens_seen": 481088708, "step": 8586 }, { "epoch": 19.12249443207127, "loss": 0.41656631231307983, "loss_ce": 6.239269714569673e-05, "loss_iou": 0.189453125, "loss_num": 0.00738525390625, "loss_xval": 0.416015625, "num_input_tokens_seen": 481088708, "step": 8586 }, { "epoch": 19.124721603563476, "grad_norm": 15.11413860321045, "learning_rate": 1e-06, "loss": 0.3466, "num_input_tokens_seen": 481146480, "step": 8587 }, { "epoch": 19.124721603563476, "loss": 0.3888677954673767, "loss_ce": 7.386014476651326e-05, "loss_iou": 0.1630859375, "loss_num": 0.01226806640625, "loss_xval": 0.388671875, "num_input_tokens_seen": 481146480, "step": 8587 }, { "epoch": 19.12694877505568, "grad_norm": 15.431100845336914, "learning_rate": 1e-06, "loss": 0.2699, "num_input_tokens_seen": 481202468, "step": 8588 }, { "epoch": 19.12694877505568, "loss": 0.22169449925422668, "loss_ce": 7.583482511108741e-05, "loss_iou": 0.09765625, "loss_num": 0.00537109375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 481202468, "step": 8588 }, { "epoch": 19.129175946547885, "grad_norm": 14.637504577636719, "learning_rate": 1e-06, "loss": 0.2738, "num_input_tokens_seen": 481258796, "step": 8589 }, { "epoch": 19.129175946547885, "loss": 0.2992823123931885, "loss_ce": 8.799122588243335e-05, "loss_iou": 0.1298828125, "loss_num": 0.008056640625, "loss_xval": 0.298828125, "num_input_tokens_seen": 481258796, "step": 8589 }, { "epoch": 19.13140311804009, "grad_norm": 18.58306312561035, "learning_rate": 1e-06, "loss": 0.4947, "num_input_tokens_seen": 481314720, "step": 8590 }, { "epoch": 19.13140311804009, "loss": 0.6162921786308289, "loss_ce": 8.123279258143157e-05, "loss_iou": 0.279296875, "loss_num": 0.0113525390625, "loss_xval": 0.6171875, "num_input_tokens_seen": 481314720, "step": 8590 }, { "epoch": 19.133630289532295, "grad_norm": 24.67803382873535, "learning_rate": 1e-06, "loss": 0.3293, "num_input_tokens_seen": 481371188, "step": 8591 }, { "epoch": 19.133630289532295, "loss": 0.3968676030635834, "loss_ce": 7.80346745159477e-05, "loss_iou": 0.173828125, "loss_num": 0.0098876953125, "loss_xval": 0.396484375, "num_input_tokens_seen": 481371188, "step": 8591 }, { "epoch": 19.1358574610245, "grad_norm": 10.766166687011719, "learning_rate": 1e-06, "loss": 0.3724, "num_input_tokens_seen": 481427332, "step": 8592 }, { "epoch": 19.1358574610245, "loss": 0.4355580806732178, "loss_ce": 7.223740976769477e-05, "loss_iou": 0.193359375, "loss_num": 0.0096435546875, "loss_xval": 0.435546875, "num_input_tokens_seen": 481427332, "step": 8592 }, { "epoch": 19.138084632516705, "grad_norm": 18.193437576293945, "learning_rate": 1e-06, "loss": 0.3822, "num_input_tokens_seen": 481484316, "step": 8593 }, { "epoch": 19.138084632516705, "loss": 0.3628009557723999, "loss_ce": 6.902994937263429e-05, "loss_iou": 0.1669921875, "loss_num": 0.005859375, "loss_xval": 0.36328125, "num_input_tokens_seen": 481484316, "step": 8593 }, { "epoch": 19.14031180400891, "grad_norm": 20.437171936035156, "learning_rate": 1e-06, "loss": 0.3256, "num_input_tokens_seen": 481538132, "step": 8594 }, { "epoch": 19.14031180400891, "loss": 0.22638912498950958, "loss_ce": 7.076388283167034e-05, "loss_iou": 0.10107421875, "loss_num": 0.0048828125, "loss_xval": 0.2265625, "num_input_tokens_seen": 481538132, "step": 8594 }, { "epoch": 19.142538975501115, "grad_norm": 17.62116050720215, "learning_rate": 1e-06, "loss": 0.414, "num_input_tokens_seen": 481596580, "step": 8595 }, { "epoch": 19.142538975501115, "loss": 0.3615182042121887, "loss_ce": 6.798798858653754e-05, "loss_iou": 0.1572265625, "loss_num": 0.0093994140625, "loss_xval": 0.361328125, "num_input_tokens_seen": 481596580, "step": 8595 }, { "epoch": 19.14476614699332, "grad_norm": 16.3442440032959, "learning_rate": 1e-06, "loss": 0.3613, "num_input_tokens_seen": 481653060, "step": 8596 }, { "epoch": 19.14476614699332, "loss": 0.39320868253707886, "loss_ce": 0.00014228782674763352, "loss_iou": 0.171875, "loss_num": 0.010009765625, "loss_xval": 0.392578125, "num_input_tokens_seen": 481653060, "step": 8596 }, { "epoch": 19.146993318485524, "grad_norm": 11.166417121887207, "learning_rate": 1e-06, "loss": 0.5462, "num_input_tokens_seen": 481709604, "step": 8597 }, { "epoch": 19.146993318485524, "loss": 0.8146294355392456, "loss_ce": 0.00025259278481826186, "loss_iou": 0.271484375, "loss_num": 0.05419921875, "loss_xval": 0.8125, "num_input_tokens_seen": 481709604, "step": 8597 }, { "epoch": 19.14922048997773, "grad_norm": 24.46466827392578, "learning_rate": 1e-06, "loss": 0.5409, "num_input_tokens_seen": 481766076, "step": 8598 }, { "epoch": 19.14922048997773, "loss": 0.37435752153396606, "loss_ce": 8.992976654553786e-05, "loss_iou": 0.1669921875, "loss_num": 0.008056640625, "loss_xval": 0.375, "num_input_tokens_seen": 481766076, "step": 8598 }, { "epoch": 19.151447661469934, "grad_norm": 13.304176330566406, "learning_rate": 1e-06, "loss": 0.3356, "num_input_tokens_seen": 481822160, "step": 8599 }, { "epoch": 19.151447661469934, "loss": 0.24649234116077423, "loss_ce": 7.815843855496496e-05, "loss_iou": 0.11083984375, "loss_num": 0.0050048828125, "loss_xval": 0.24609375, "num_input_tokens_seen": 481822160, "step": 8599 }, { "epoch": 19.15367483296214, "grad_norm": 31.582448959350586, "learning_rate": 1e-06, "loss": 0.2843, "num_input_tokens_seen": 481878668, "step": 8600 }, { "epoch": 19.15367483296214, "loss": 0.27753129601478577, "loss_ce": 6.547504017362371e-05, "loss_iou": 0.1259765625, "loss_num": 0.00518798828125, "loss_xval": 0.27734375, "num_input_tokens_seen": 481878668, "step": 8600 }, { "epoch": 19.155902004454344, "grad_norm": 16.38020896911621, "learning_rate": 1e-06, "loss": 0.3844, "num_input_tokens_seen": 481936496, "step": 8601 }, { "epoch": 19.155902004454344, "loss": 0.43722057342529297, "loss_ce": 8.677801815792918e-05, "loss_iou": 0.193359375, "loss_num": 0.01025390625, "loss_xval": 0.4375, "num_input_tokens_seen": 481936496, "step": 8601 }, { "epoch": 19.15812917594655, "grad_norm": 79.99256896972656, "learning_rate": 1e-06, "loss": 0.4235, "num_input_tokens_seen": 481993192, "step": 8602 }, { "epoch": 19.15812917594655, "loss": 0.4538060426712036, "loss_ce": 7.070847641443834e-05, "loss_iou": 0.1884765625, "loss_num": 0.015380859375, "loss_xval": 0.453125, "num_input_tokens_seen": 481993192, "step": 8602 }, { "epoch": 19.160356347438753, "grad_norm": 13.313328742980957, "learning_rate": 1e-06, "loss": 0.3876, "num_input_tokens_seen": 482048636, "step": 8603 }, { "epoch": 19.160356347438753, "loss": 0.39753034710884094, "loss_ce": 6.941702304175124e-05, "loss_iou": 0.177734375, "loss_num": 0.0084228515625, "loss_xval": 0.3984375, "num_input_tokens_seen": 482048636, "step": 8603 }, { "epoch": 19.16258351893096, "grad_norm": 27.735868453979492, "learning_rate": 1e-06, "loss": 0.5142, "num_input_tokens_seen": 482099436, "step": 8604 }, { "epoch": 19.16258351893096, "loss": 0.3438310921192169, "loss_ce": 8.108046313282102e-05, "loss_iou": 0.1611328125, "loss_num": 0.00421142578125, "loss_xval": 0.34375, "num_input_tokens_seen": 482099436, "step": 8604 }, { "epoch": 19.164810690423163, "grad_norm": 17.522174835205078, "learning_rate": 1e-06, "loss": 0.4765, "num_input_tokens_seen": 482154916, "step": 8605 }, { "epoch": 19.164810690423163, "loss": 0.4795961380004883, "loss_ce": 0.00010396166180726141, "loss_iou": 0.1953125, "loss_num": 0.017578125, "loss_xval": 0.48046875, "num_input_tokens_seen": 482154916, "step": 8605 }, { "epoch": 19.167037861915368, "grad_norm": 17.66177749633789, "learning_rate": 1e-06, "loss": 0.5927, "num_input_tokens_seen": 482211392, "step": 8606 }, { "epoch": 19.167037861915368, "loss": 0.5756547451019287, "loss_ce": 0.00021524931071326137, "loss_iou": 0.259765625, "loss_num": 0.0113525390625, "loss_xval": 0.57421875, "num_input_tokens_seen": 482211392, "step": 8606 }, { "epoch": 19.169265033407573, "grad_norm": 19.507108688354492, "learning_rate": 1e-06, "loss": 0.2847, "num_input_tokens_seen": 482270844, "step": 8607 }, { "epoch": 19.169265033407573, "loss": 0.2603249251842499, "loss_ce": 7.102765084709972e-05, "loss_iou": 0.1220703125, "loss_num": 0.00323486328125, "loss_xval": 0.259765625, "num_input_tokens_seen": 482270844, "step": 8607 }, { "epoch": 19.171492204899778, "grad_norm": 28.656322479248047, "learning_rate": 1e-06, "loss": 0.4528, "num_input_tokens_seen": 482325476, "step": 8608 }, { "epoch": 19.171492204899778, "loss": 0.43023812770843506, "loss_ce": 0.00012339533714111894, "loss_iou": 0.1923828125, "loss_num": 0.00909423828125, "loss_xval": 0.4296875, "num_input_tokens_seen": 482325476, "step": 8608 }, { "epoch": 19.173719376391983, "grad_norm": 23.096620559692383, "learning_rate": 1e-06, "loss": 0.3489, "num_input_tokens_seen": 482380532, "step": 8609 }, { "epoch": 19.173719376391983, "loss": 0.48346471786499023, "loss_ce": 6.630421557929367e-05, "loss_iou": 0.2109375, "loss_num": 0.01226806640625, "loss_xval": 0.484375, "num_input_tokens_seen": 482380532, "step": 8609 }, { "epoch": 19.175946547884188, "grad_norm": 21.371700286865234, "learning_rate": 1e-06, "loss": 0.4853, "num_input_tokens_seen": 482433260, "step": 8610 }, { "epoch": 19.175946547884188, "loss": 0.4420054256916046, "loss_ce": 0.00023296594736166298, "loss_iou": 0.1865234375, "loss_num": 0.01385498046875, "loss_xval": 0.44140625, "num_input_tokens_seen": 482433260, "step": 8610 }, { "epoch": 19.178173719376392, "grad_norm": 22.586750030517578, "learning_rate": 1e-06, "loss": 0.4347, "num_input_tokens_seen": 482489688, "step": 8611 }, { "epoch": 19.178173719376392, "loss": 0.42078396677970886, "loss_ce": 6.861792644485831e-05, "loss_iou": 0.1904296875, "loss_num": 0.008056640625, "loss_xval": 0.419921875, "num_input_tokens_seen": 482489688, "step": 8611 }, { "epoch": 19.180400890868597, "grad_norm": 14.678589820861816, "learning_rate": 1e-06, "loss": 0.2694, "num_input_tokens_seen": 482546336, "step": 8612 }, { "epoch": 19.180400890868597, "loss": 0.22610211372375488, "loss_ce": 8.891787729226053e-05, "loss_iou": 0.099609375, "loss_num": 0.00543212890625, "loss_xval": 0.2255859375, "num_input_tokens_seen": 482546336, "step": 8612 }, { "epoch": 19.182628062360802, "grad_norm": 16.697389602661133, "learning_rate": 1e-06, "loss": 0.3748, "num_input_tokens_seen": 482603824, "step": 8613 }, { "epoch": 19.182628062360802, "loss": 0.37165093421936035, "loss_ce": 6.890263466630131e-05, "loss_iou": 0.1630859375, "loss_num": 0.00909423828125, "loss_xval": 0.37109375, "num_input_tokens_seen": 482603824, "step": 8613 }, { "epoch": 19.184855233853007, "grad_norm": 20.081663131713867, "learning_rate": 1e-06, "loss": 0.259, "num_input_tokens_seen": 482662992, "step": 8614 }, { "epoch": 19.184855233853007, "loss": 0.2634428143501282, "loss_ce": 7.611673936480656e-05, "loss_iou": 0.1103515625, "loss_num": 0.00848388671875, "loss_xval": 0.263671875, "num_input_tokens_seen": 482662992, "step": 8614 }, { "epoch": 19.187082405345212, "grad_norm": 15.964731216430664, "learning_rate": 1e-06, "loss": 0.3832, "num_input_tokens_seen": 482716832, "step": 8615 }, { "epoch": 19.187082405345212, "loss": 0.31769198179244995, "loss_ce": 6.505024066427723e-05, "loss_iou": 0.138671875, "loss_num": 0.00787353515625, "loss_xval": 0.318359375, "num_input_tokens_seen": 482716832, "step": 8615 }, { "epoch": 19.189309576837417, "grad_norm": 17.872699737548828, "learning_rate": 1e-06, "loss": 0.3562, "num_input_tokens_seen": 482771736, "step": 8616 }, { "epoch": 19.189309576837417, "loss": 0.36840832233428955, "loss_ce": 6.113122071838006e-05, "loss_iou": 0.1591796875, "loss_num": 0.00994873046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 482771736, "step": 8616 }, { "epoch": 19.19153674832962, "grad_norm": 17.340930938720703, "learning_rate": 1e-06, "loss": 0.3884, "num_input_tokens_seen": 482826724, "step": 8617 }, { "epoch": 19.19153674832962, "loss": 0.4055081009864807, "loss_ce": 8.206171332858503e-05, "loss_iou": 0.1796875, "loss_num": 0.00946044921875, "loss_xval": 0.40625, "num_input_tokens_seen": 482826724, "step": 8617 }, { "epoch": 19.193763919821826, "grad_norm": 17.151872634887695, "learning_rate": 1e-06, "loss": 0.303, "num_input_tokens_seen": 482883452, "step": 8618 }, { "epoch": 19.193763919821826, "loss": 0.4231913685798645, "loss_ce": 9.566033986629918e-05, "loss_iou": 0.1875, "loss_num": 0.0096435546875, "loss_xval": 0.423828125, "num_input_tokens_seen": 482883452, "step": 8618 }, { "epoch": 19.19599109131403, "grad_norm": 17.975557327270508, "learning_rate": 1e-06, "loss": 0.3269, "num_input_tokens_seen": 482941656, "step": 8619 }, { "epoch": 19.19599109131403, "loss": 0.3718949258327484, "loss_ce": 6.874144310131669e-05, "loss_iou": 0.1611328125, "loss_num": 0.00982666015625, "loss_xval": 0.37109375, "num_input_tokens_seen": 482941656, "step": 8619 }, { "epoch": 19.198218262806236, "grad_norm": 20.601882934570312, "learning_rate": 1e-06, "loss": 0.3974, "num_input_tokens_seen": 482997328, "step": 8620 }, { "epoch": 19.198218262806236, "loss": 0.32132768630981445, "loss_ce": 5.390634396462701e-05, "loss_iou": 0.13671875, "loss_num": 0.00970458984375, "loss_xval": 0.3203125, "num_input_tokens_seen": 482997328, "step": 8620 }, { "epoch": 19.20044543429844, "grad_norm": 23.428096771240234, "learning_rate": 1e-06, "loss": 0.4218, "num_input_tokens_seen": 483051884, "step": 8621 }, { "epoch": 19.20044543429844, "loss": 0.28058868646621704, "loss_ce": 7.111984450602904e-05, "loss_iou": 0.1240234375, "loss_num": 0.006500244140625, "loss_xval": 0.28125, "num_input_tokens_seen": 483051884, "step": 8621 }, { "epoch": 19.202672605790646, "grad_norm": 24.112133026123047, "learning_rate": 1e-06, "loss": 0.3654, "num_input_tokens_seen": 483111104, "step": 8622 }, { "epoch": 19.202672605790646, "loss": 0.35199394822120667, "loss_ce": 6.522829062305391e-05, "loss_iou": 0.16015625, "loss_num": 0.0064697265625, "loss_xval": 0.3515625, "num_input_tokens_seen": 483111104, "step": 8622 }, { "epoch": 19.20489977728285, "grad_norm": 21.558853149414062, "learning_rate": 1e-06, "loss": 0.2897, "num_input_tokens_seen": 483165588, "step": 8623 }, { "epoch": 19.20489977728285, "loss": 0.2628226578235626, "loss_ce": 6.632453005295247e-05, "loss_iou": 0.11669921875, "loss_num": 0.00592041015625, "loss_xval": 0.263671875, "num_input_tokens_seen": 483165588, "step": 8623 }, { "epoch": 19.207126948775056, "grad_norm": 17.29518699645996, "learning_rate": 1e-06, "loss": 0.278, "num_input_tokens_seen": 483217036, "step": 8624 }, { "epoch": 19.207126948775056, "loss": 0.24366407096385956, "loss_ce": 7.275763346115127e-05, "loss_iou": 0.10302734375, "loss_num": 0.007415771484375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 483217036, "step": 8624 }, { "epoch": 19.20935412026726, "grad_norm": 17.489004135131836, "learning_rate": 1e-06, "loss": 0.4856, "num_input_tokens_seen": 483274096, "step": 8625 }, { "epoch": 19.20935412026726, "loss": 0.37572720646858215, "loss_ce": 0.0001168523303931579, "loss_iou": 0.15625, "loss_num": 0.01251220703125, "loss_xval": 0.375, "num_input_tokens_seen": 483274096, "step": 8625 }, { "epoch": 19.211581291759465, "grad_norm": 17.45191192626953, "learning_rate": 1e-06, "loss": 0.5543, "num_input_tokens_seen": 483330112, "step": 8626 }, { "epoch": 19.211581291759465, "loss": 0.36878734827041626, "loss_ce": 7.397578156087548e-05, "loss_iou": 0.158203125, "loss_num": 0.01068115234375, "loss_xval": 0.369140625, "num_input_tokens_seen": 483330112, "step": 8626 }, { "epoch": 19.21380846325167, "grad_norm": 18.69688606262207, "learning_rate": 1e-06, "loss": 0.3022, "num_input_tokens_seen": 483388400, "step": 8627 }, { "epoch": 19.21380846325167, "loss": 0.24286232888698578, "loss_ce": 6.447385385399684e-05, "loss_iou": 0.103515625, "loss_num": 0.00701904296875, "loss_xval": 0.2431640625, "num_input_tokens_seen": 483388400, "step": 8627 }, { "epoch": 19.216035634743875, "grad_norm": 18.915224075317383, "learning_rate": 1e-06, "loss": 0.3235, "num_input_tokens_seen": 483444252, "step": 8628 }, { "epoch": 19.216035634743875, "loss": 0.40314409136772156, "loss_ce": 6.792263593524694e-05, "loss_iou": 0.1669921875, "loss_num": 0.013671875, "loss_xval": 0.40234375, "num_input_tokens_seen": 483444252, "step": 8628 }, { "epoch": 19.21826280623608, "grad_norm": 11.416555404663086, "learning_rate": 1e-06, "loss": 0.2597, "num_input_tokens_seen": 483503672, "step": 8629 }, { "epoch": 19.21826280623608, "loss": 0.2568409740924835, "loss_ce": 6.605684757232666e-05, "loss_iou": 0.1171875, "loss_num": 0.00457763671875, "loss_xval": 0.255859375, "num_input_tokens_seen": 483503672, "step": 8629 }, { "epoch": 19.220489977728285, "grad_norm": 21.57809066772461, "learning_rate": 1e-06, "loss": 0.4147, "num_input_tokens_seen": 483560072, "step": 8630 }, { "epoch": 19.220489977728285, "loss": 0.3244105577468872, "loss_ce": 6.974257121328264e-05, "loss_iou": 0.1298828125, "loss_num": 0.01300048828125, "loss_xval": 0.32421875, "num_input_tokens_seen": 483560072, "step": 8630 }, { "epoch": 19.22271714922049, "grad_norm": 16.324159622192383, "learning_rate": 1e-06, "loss": 0.2693, "num_input_tokens_seen": 483614548, "step": 8631 }, { "epoch": 19.22271714922049, "loss": 0.2832646369934082, "loss_ce": 6.152082642074674e-05, "loss_iou": 0.12060546875, "loss_num": 0.0084228515625, "loss_xval": 0.283203125, "num_input_tokens_seen": 483614548, "step": 8631 }, { "epoch": 19.224944320712694, "grad_norm": 29.336238861083984, "learning_rate": 1e-06, "loss": 0.3302, "num_input_tokens_seen": 483672292, "step": 8632 }, { "epoch": 19.224944320712694, "loss": 0.32135581970214844, "loss_ce": 6.674337782897055e-05, "loss_iou": 0.1494140625, "loss_num": 0.00439453125, "loss_xval": 0.3203125, "num_input_tokens_seen": 483672292, "step": 8632 }, { "epoch": 19.2271714922049, "grad_norm": 18.221179962158203, "learning_rate": 1e-06, "loss": 0.5706, "num_input_tokens_seen": 483729908, "step": 8633 }, { "epoch": 19.2271714922049, "loss": 0.6349157691001892, "loss_ce": 0.00015011939103715122, "loss_iou": 0.27734375, "loss_num": 0.016357421875, "loss_xval": 0.6328125, "num_input_tokens_seen": 483729908, "step": 8633 }, { "epoch": 19.229398663697104, "grad_norm": 18.27716636657715, "learning_rate": 1e-06, "loss": 0.4169, "num_input_tokens_seen": 483786140, "step": 8634 }, { "epoch": 19.229398663697104, "loss": 0.3237491250038147, "loss_ce": 7.969920261530206e-05, "loss_iou": 0.140625, "loss_num": 0.008544921875, "loss_xval": 0.32421875, "num_input_tokens_seen": 483786140, "step": 8634 }, { "epoch": 19.23162583518931, "grad_norm": 33.526123046875, "learning_rate": 1e-06, "loss": 0.3883, "num_input_tokens_seen": 483842292, "step": 8635 }, { "epoch": 19.23162583518931, "loss": 0.27155420184135437, "loss_ce": 6.981080514378846e-05, "loss_iou": 0.1201171875, "loss_num": 0.00616455078125, "loss_xval": 0.271484375, "num_input_tokens_seen": 483842292, "step": 8635 }, { "epoch": 19.233853006681514, "grad_norm": 12.381620407104492, "learning_rate": 1e-06, "loss": 0.3823, "num_input_tokens_seen": 483898136, "step": 8636 }, { "epoch": 19.233853006681514, "loss": 0.510744571685791, "loss_ce": 0.0001244788581971079, "loss_iou": 0.220703125, "loss_num": 0.0137939453125, "loss_xval": 0.51171875, "num_input_tokens_seen": 483898136, "step": 8636 }, { "epoch": 19.23608017817372, "grad_norm": 15.86150074005127, "learning_rate": 1e-06, "loss": 0.4379, "num_input_tokens_seen": 483952128, "step": 8637 }, { "epoch": 19.23608017817372, "loss": 0.4342408776283264, "loss_ce": 8.635166886961088e-05, "loss_iou": 0.189453125, "loss_num": 0.01092529296875, "loss_xval": 0.43359375, "num_input_tokens_seen": 483952128, "step": 8637 }, { "epoch": 19.238307349665924, "grad_norm": 22.681493759155273, "learning_rate": 1e-06, "loss": 0.347, "num_input_tokens_seen": 484007556, "step": 8638 }, { "epoch": 19.238307349665924, "loss": 0.26111987233161926, "loss_ce": 7.25125937606208e-05, "loss_iou": 0.1220703125, "loss_num": 0.0033721923828125, "loss_xval": 0.26171875, "num_input_tokens_seen": 484007556, "step": 8638 }, { "epoch": 19.24053452115813, "grad_norm": 16.731151580810547, "learning_rate": 1e-06, "loss": 0.3764, "num_input_tokens_seen": 484061948, "step": 8639 }, { "epoch": 19.24053452115813, "loss": 0.40643227100372314, "loss_ce": 6.019037391524762e-05, "loss_iou": 0.1884765625, "loss_num": 0.005859375, "loss_xval": 0.40625, "num_input_tokens_seen": 484061948, "step": 8639 }, { "epoch": 19.242761692650333, "grad_norm": 17.2427978515625, "learning_rate": 1e-06, "loss": 0.3552, "num_input_tokens_seen": 484118452, "step": 8640 }, { "epoch": 19.242761692650333, "loss": 0.3023113012313843, "loss_ce": 6.518626469187438e-05, "loss_iou": 0.130859375, "loss_num": 0.00799560546875, "loss_xval": 0.302734375, "num_input_tokens_seen": 484118452, "step": 8640 }, { "epoch": 19.244988864142538, "grad_norm": 48.943878173828125, "learning_rate": 1e-06, "loss": 0.3616, "num_input_tokens_seen": 484172552, "step": 8641 }, { "epoch": 19.244988864142538, "loss": 0.34272468090057373, "loss_ce": 7.331671076826751e-05, "loss_iou": 0.1416015625, "loss_num": 0.0115966796875, "loss_xval": 0.341796875, "num_input_tokens_seen": 484172552, "step": 8641 }, { "epoch": 19.247216035634743, "grad_norm": 14.787582397460938, "learning_rate": 1e-06, "loss": 0.3113, "num_input_tokens_seen": 484228180, "step": 8642 }, { "epoch": 19.247216035634743, "loss": 0.34266364574432373, "loss_ce": 7.330120570259169e-05, "loss_iou": 0.16015625, "loss_num": 0.00457763671875, "loss_xval": 0.341796875, "num_input_tokens_seen": 484228180, "step": 8642 }, { "epoch": 19.249443207126948, "grad_norm": 18.2416934967041, "learning_rate": 1e-06, "loss": 0.4346, "num_input_tokens_seen": 484283708, "step": 8643 }, { "epoch": 19.249443207126948, "loss": 0.37335968017578125, "loss_ce": 6.869265052955598e-05, "loss_iou": 0.1474609375, "loss_num": 0.015625, "loss_xval": 0.373046875, "num_input_tokens_seen": 484283708, "step": 8643 }, { "epoch": 19.251670378619153, "grad_norm": 18.17558479309082, "learning_rate": 1e-06, "loss": 0.3687, "num_input_tokens_seen": 484337716, "step": 8644 }, { "epoch": 19.251670378619153, "loss": 0.259723424911499, "loss_ce": 6.458204006776214e-05, "loss_iou": 0.11962890625, "loss_num": 0.004119873046875, "loss_xval": 0.259765625, "num_input_tokens_seen": 484337716, "step": 8644 }, { "epoch": 19.253897550111358, "grad_norm": 14.284744262695312, "learning_rate": 1e-06, "loss": 0.6006, "num_input_tokens_seen": 484392308, "step": 8645 }, { "epoch": 19.253897550111358, "loss": 0.6434794664382935, "loss_ce": 7.73589126765728e-05, "loss_iou": 0.25390625, "loss_num": 0.02734375, "loss_xval": 0.64453125, "num_input_tokens_seen": 484392308, "step": 8645 }, { "epoch": 19.256124721603562, "grad_norm": 12.745560646057129, "learning_rate": 1e-06, "loss": 0.2475, "num_input_tokens_seen": 484446740, "step": 8646 }, { "epoch": 19.256124721603562, "loss": 0.2596558630466461, "loss_ce": 7.333945541176945e-05, "loss_iou": 0.119140625, "loss_num": 0.0042724609375, "loss_xval": 0.259765625, "num_input_tokens_seen": 484446740, "step": 8646 }, { "epoch": 19.258351893095767, "grad_norm": 19.38439178466797, "learning_rate": 1e-06, "loss": 0.4594, "num_input_tokens_seen": 484501160, "step": 8647 }, { "epoch": 19.258351893095767, "loss": 0.4618569016456604, "loss_ce": 6.492799002444372e-05, "loss_iou": 0.20703125, "loss_num": 0.009765625, "loss_xval": 0.4609375, "num_input_tokens_seen": 484501160, "step": 8647 }, { "epoch": 19.260579064587972, "grad_norm": 14.868861198425293, "learning_rate": 1e-06, "loss": 0.5964, "num_input_tokens_seen": 484556848, "step": 8648 }, { "epoch": 19.260579064587972, "loss": 0.8090330362319946, "loss_ce": 7.303664460778236e-05, "loss_iou": 0.3359375, "loss_num": 0.02783203125, "loss_xval": 0.80859375, "num_input_tokens_seen": 484556848, "step": 8648 }, { "epoch": 19.262806236080177, "grad_norm": 17.061763763427734, "learning_rate": 1e-06, "loss": 0.3803, "num_input_tokens_seen": 484614472, "step": 8649 }, { "epoch": 19.262806236080177, "loss": 0.4142407476902008, "loss_ce": 5.6169934396166354e-05, "loss_iou": 0.1669921875, "loss_num": 0.0159912109375, "loss_xval": 0.4140625, "num_input_tokens_seen": 484614472, "step": 8649 }, { "epoch": 19.265033407572382, "grad_norm": 19.03131675720215, "learning_rate": 1e-06, "loss": 0.283, "num_input_tokens_seen": 484667276, "step": 8650 }, { "epoch": 19.265033407572382, "loss": 0.3287457227706909, "loss_ce": 0.0001019124174490571, "loss_iou": 0.1474609375, "loss_num": 0.0068359375, "loss_xval": 0.328125, "num_input_tokens_seen": 484667276, "step": 8650 }, { "epoch": 19.267260579064587, "grad_norm": 42.60155487060547, "learning_rate": 1e-06, "loss": 0.4224, "num_input_tokens_seen": 484723500, "step": 8651 }, { "epoch": 19.267260579064587, "loss": 0.37189292907714844, "loss_ce": 6.674042379017919e-05, "loss_iou": 0.154296875, "loss_num": 0.01275634765625, "loss_xval": 0.37109375, "num_input_tokens_seen": 484723500, "step": 8651 }, { "epoch": 19.26948775055679, "grad_norm": 17.580562591552734, "learning_rate": 1e-06, "loss": 0.363, "num_input_tokens_seen": 484779672, "step": 8652 }, { "epoch": 19.26948775055679, "loss": 0.2955964207649231, "loss_ce": 6.419201235985383e-05, "loss_iou": 0.130859375, "loss_num": 0.00701904296875, "loss_xval": 0.294921875, "num_input_tokens_seen": 484779672, "step": 8652 }, { "epoch": 19.271714922048996, "grad_norm": 17.12259292602539, "learning_rate": 1e-06, "loss": 0.4862, "num_input_tokens_seen": 484836188, "step": 8653 }, { "epoch": 19.271714922048996, "loss": 0.3255062699317932, "loss_ce": 6.683074752800167e-05, "loss_iou": 0.146484375, "loss_num": 0.00628662109375, "loss_xval": 0.326171875, "num_input_tokens_seen": 484836188, "step": 8653 }, { "epoch": 19.2739420935412, "grad_norm": 21.847190856933594, "learning_rate": 1e-06, "loss": 0.2709, "num_input_tokens_seen": 484895900, "step": 8654 }, { "epoch": 19.2739420935412, "loss": 0.25446265935897827, "loss_ce": 6.814206426497549e-05, "loss_iou": 0.115234375, "loss_num": 0.0047607421875, "loss_xval": 0.25390625, "num_input_tokens_seen": 484895900, "step": 8654 }, { "epoch": 19.276169265033406, "grad_norm": 13.837854385375977, "learning_rate": 1e-06, "loss": 0.3591, "num_input_tokens_seen": 484953396, "step": 8655 }, { "epoch": 19.276169265033406, "loss": 0.28464746475219727, "loss_ce": 0.00010157265205634758, "loss_iou": 0.123046875, "loss_num": 0.00775146484375, "loss_xval": 0.28515625, "num_input_tokens_seen": 484953396, "step": 8655 }, { "epoch": 19.27839643652561, "grad_norm": 18.63173484802246, "learning_rate": 1e-06, "loss": 0.5888, "num_input_tokens_seen": 485004972, "step": 8656 }, { "epoch": 19.27839643652561, "loss": 0.5696535706520081, "loss_ce": 7.348039071075618e-05, "loss_iou": 0.25390625, "loss_num": 0.0126953125, "loss_xval": 0.5703125, "num_input_tokens_seen": 485004972, "step": 8656 }, { "epoch": 19.280623608017816, "grad_norm": 23.69566535949707, "learning_rate": 1e-06, "loss": 0.4021, "num_input_tokens_seen": 485061276, "step": 8657 }, { "epoch": 19.280623608017816, "loss": 0.4415389895439148, "loss_ce": 0.00013270846102386713, "loss_iou": 0.2021484375, "loss_num": 0.00738525390625, "loss_xval": 0.44140625, "num_input_tokens_seen": 485061276, "step": 8657 }, { "epoch": 19.28285077951002, "grad_norm": 16.324665069580078, "learning_rate": 1e-06, "loss": 0.5027, "num_input_tokens_seen": 485116212, "step": 8658 }, { "epoch": 19.28285077951002, "loss": 0.656080961227417, "loss_ce": 7.512497541029006e-05, "loss_iou": 0.283203125, "loss_num": 0.0177001953125, "loss_xval": 0.65625, "num_input_tokens_seen": 485116212, "step": 8658 }, { "epoch": 19.285077951002226, "grad_norm": 25.834796905517578, "learning_rate": 1e-06, "loss": 0.5209, "num_input_tokens_seen": 485171484, "step": 8659 }, { "epoch": 19.285077951002226, "loss": 0.42792704701423645, "loss_ce": 7.059163181111217e-05, "loss_iou": 0.189453125, "loss_num": 0.009765625, "loss_xval": 0.427734375, "num_input_tokens_seen": 485171484, "step": 8659 }, { "epoch": 19.28730512249443, "grad_norm": 14.427014350891113, "learning_rate": 1e-06, "loss": 0.434, "num_input_tokens_seen": 485229056, "step": 8660 }, { "epoch": 19.28730512249443, "loss": 0.596036434173584, "loss_ce": 8.911389159038663e-05, "loss_iou": 0.236328125, "loss_num": 0.0247802734375, "loss_xval": 0.59765625, "num_input_tokens_seen": 485229056, "step": 8660 }, { "epoch": 19.289532293986635, "grad_norm": 56.55402374267578, "learning_rate": 1e-06, "loss": 0.5668, "num_input_tokens_seen": 485286492, "step": 8661 }, { "epoch": 19.289532293986635, "loss": 0.6791332960128784, "loss_ce": 5.614275869447738e-05, "loss_iou": 0.267578125, "loss_num": 0.0284423828125, "loss_xval": 0.6796875, "num_input_tokens_seen": 485286492, "step": 8661 }, { "epoch": 19.29175946547884, "grad_norm": 16.25436019897461, "learning_rate": 1e-06, "loss": 0.3742, "num_input_tokens_seen": 485343172, "step": 8662 }, { "epoch": 19.29175946547884, "loss": 0.30635008215904236, "loss_ce": 7.566015119664371e-05, "loss_iou": 0.1357421875, "loss_num": 0.007049560546875, "loss_xval": 0.306640625, "num_input_tokens_seen": 485343172, "step": 8662 }, { "epoch": 19.293986636971045, "grad_norm": 17.356847763061523, "learning_rate": 1e-06, "loss": 0.3913, "num_input_tokens_seen": 485399704, "step": 8663 }, { "epoch": 19.293986636971045, "loss": 0.4219398498535156, "loss_ce": 6.486824713647366e-05, "loss_iou": 0.173828125, "loss_num": 0.01470947265625, "loss_xval": 0.421875, "num_input_tokens_seen": 485399704, "step": 8663 }, { "epoch": 19.29621380846325, "grad_norm": 21.03586769104004, "learning_rate": 1e-06, "loss": 0.2966, "num_input_tokens_seen": 485455500, "step": 8664 }, { "epoch": 19.29621380846325, "loss": 0.22834303975105286, "loss_ce": 7.155879575293511e-05, "loss_iou": 0.103515625, "loss_num": 0.0042724609375, "loss_xval": 0.228515625, "num_input_tokens_seen": 485455500, "step": 8664 }, { "epoch": 19.29844097995546, "grad_norm": 25.01719093322754, "learning_rate": 1e-06, "loss": 0.4601, "num_input_tokens_seen": 485511580, "step": 8665 }, { "epoch": 19.29844097995546, "loss": 0.5397475361824036, "loss_ce": 7.467882824130356e-05, "loss_iou": 0.259765625, "loss_num": 0.004241943359375, "loss_xval": 0.5390625, "num_input_tokens_seen": 485511580, "step": 8665 }, { "epoch": 19.30066815144766, "grad_norm": 15.293524742126465, "learning_rate": 1e-06, "loss": 0.4385, "num_input_tokens_seen": 485567524, "step": 8666 }, { "epoch": 19.30066815144766, "loss": 0.29761672019958496, "loss_ce": 7.03330006217584e-05, "loss_iou": 0.1279296875, "loss_num": 0.00830078125, "loss_xval": 0.296875, "num_input_tokens_seen": 485567524, "step": 8666 }, { "epoch": 19.302895322939868, "grad_norm": 24.937509536743164, "learning_rate": 1e-06, "loss": 0.4014, "num_input_tokens_seen": 485623744, "step": 8667 }, { "epoch": 19.302895322939868, "loss": 0.5748975872993469, "loss_ce": 6.84759725118056e-05, "loss_iou": 0.251953125, "loss_num": 0.013916015625, "loss_xval": 0.57421875, "num_input_tokens_seen": 485623744, "step": 8667 }, { "epoch": 19.305122494432073, "grad_norm": 16.04351234436035, "learning_rate": 1e-06, "loss": 0.4303, "num_input_tokens_seen": 485680980, "step": 8668 }, { "epoch": 19.305122494432073, "loss": 0.310863733291626, "loss_ce": 7.271443610079587e-05, "loss_iou": 0.12890625, "loss_num": 0.01043701171875, "loss_xval": 0.310546875, "num_input_tokens_seen": 485680980, "step": 8668 }, { "epoch": 19.307349665924278, "grad_norm": 34.41288757324219, "learning_rate": 1e-06, "loss": 0.5049, "num_input_tokens_seen": 485738648, "step": 8669 }, { "epoch": 19.307349665924278, "loss": 0.6703619956970215, "loss_ce": 7.39232636988163e-05, "loss_iou": 0.302734375, "loss_num": 0.01287841796875, "loss_xval": 0.671875, "num_input_tokens_seen": 485738648, "step": 8669 }, { "epoch": 19.309576837416483, "grad_norm": 16.024730682373047, "learning_rate": 1e-06, "loss": 0.4571, "num_input_tokens_seen": 485795136, "step": 8670 }, { "epoch": 19.309576837416483, "loss": 0.3097517490386963, "loss_ce": 5.93543445575051e-05, "loss_iou": 0.1318359375, "loss_num": 0.00921630859375, "loss_xval": 0.310546875, "num_input_tokens_seen": 485795136, "step": 8670 }, { "epoch": 19.311804008908688, "grad_norm": 22.233224868774414, "learning_rate": 1e-06, "loss": 0.3574, "num_input_tokens_seen": 485853408, "step": 8671 }, { "epoch": 19.311804008908688, "loss": 0.45661628246307373, "loss_ce": 7.331735105253756e-05, "loss_iou": 0.21875, "loss_num": 0.00396728515625, "loss_xval": 0.45703125, "num_input_tokens_seen": 485853408, "step": 8671 }, { "epoch": 19.314031180400892, "grad_norm": 21.287254333496094, "learning_rate": 1e-06, "loss": 0.2966, "num_input_tokens_seen": 485908808, "step": 8672 }, { "epoch": 19.314031180400892, "loss": 0.31123119592666626, "loss_ce": 7.398641901090741e-05, "loss_iou": 0.146484375, "loss_num": 0.003875732421875, "loss_xval": 0.310546875, "num_input_tokens_seen": 485908808, "step": 8672 }, { "epoch": 19.316258351893097, "grad_norm": 17.1425838470459, "learning_rate": 1e-06, "loss": 0.4107, "num_input_tokens_seen": 485965732, "step": 8673 }, { "epoch": 19.316258351893097, "loss": 0.4722324013710022, "loss_ce": 6.443218444474041e-05, "loss_iou": 0.1982421875, "loss_num": 0.01495361328125, "loss_xval": 0.47265625, "num_input_tokens_seen": 485965732, "step": 8673 }, { "epoch": 19.318485523385302, "grad_norm": 23.668569564819336, "learning_rate": 1e-06, "loss": 0.4553, "num_input_tokens_seen": 486019256, "step": 8674 }, { "epoch": 19.318485523385302, "loss": 0.5221450328826904, "loss_ce": 8.080543193500489e-05, "loss_iou": 0.2138671875, "loss_num": 0.0189208984375, "loss_xval": 0.5234375, "num_input_tokens_seen": 486019256, "step": 8674 }, { "epoch": 19.320712694877507, "grad_norm": 19.682819366455078, "learning_rate": 1e-06, "loss": 0.2918, "num_input_tokens_seen": 486077160, "step": 8675 }, { "epoch": 19.320712694877507, "loss": 0.2836154103279114, "loss_ce": 7.660247501917183e-05, "loss_iou": 0.12158203125, "loss_num": 0.0079345703125, "loss_xval": 0.283203125, "num_input_tokens_seen": 486077160, "step": 8675 }, { "epoch": 19.322939866369712, "grad_norm": 19.56645965576172, "learning_rate": 1e-06, "loss": 0.4519, "num_input_tokens_seen": 486132592, "step": 8676 }, { "epoch": 19.322939866369712, "loss": 0.4211920499801636, "loss_ce": 0.00011051179171772674, "loss_iou": 0.166015625, "loss_num": 0.0177001953125, "loss_xval": 0.421875, "num_input_tokens_seen": 486132592, "step": 8676 }, { "epoch": 19.325167037861917, "grad_norm": 10.575581550598145, "learning_rate": 1e-06, "loss": 0.3449, "num_input_tokens_seen": 486190616, "step": 8677 }, { "epoch": 19.325167037861917, "loss": 0.2473614513874054, "loss_ce": 6.224659591680393e-05, "loss_iou": 0.1123046875, "loss_num": 0.0045166015625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 486190616, "step": 8677 }, { "epoch": 19.32739420935412, "grad_norm": 16.035282135009766, "learning_rate": 1e-06, "loss": 0.4254, "num_input_tokens_seen": 486245220, "step": 8678 }, { "epoch": 19.32739420935412, "loss": 0.3443076014518738, "loss_ce": 6.930766539881006e-05, "loss_iou": 0.15625, "loss_num": 0.006195068359375, "loss_xval": 0.34375, "num_input_tokens_seen": 486245220, "step": 8678 }, { "epoch": 19.329621380846326, "grad_norm": 17.30709457397461, "learning_rate": 1e-06, "loss": 0.4013, "num_input_tokens_seen": 486302496, "step": 8679 }, { "epoch": 19.329621380846326, "loss": 0.3727584779262543, "loss_ce": 7.78008543420583e-05, "loss_iou": 0.16796875, "loss_num": 0.007110595703125, "loss_xval": 0.373046875, "num_input_tokens_seen": 486302496, "step": 8679 }, { "epoch": 19.33184855233853, "grad_norm": 20.775163650512695, "learning_rate": 1e-06, "loss": 0.415, "num_input_tokens_seen": 486356500, "step": 8680 }, { "epoch": 19.33184855233853, "loss": 0.4452681541442871, "loss_ce": 7.77466339059174e-05, "loss_iou": 0.2099609375, "loss_num": 0.005218505859375, "loss_xval": 0.4453125, "num_input_tokens_seen": 486356500, "step": 8680 }, { "epoch": 19.334075723830736, "grad_norm": 35.834449768066406, "learning_rate": 1e-06, "loss": 0.2574, "num_input_tokens_seen": 486413916, "step": 8681 }, { "epoch": 19.334075723830736, "loss": 0.27562782168388367, "loss_ce": 6.935855344636366e-05, "loss_iou": 0.11669921875, "loss_num": 0.0084228515625, "loss_xval": 0.275390625, "num_input_tokens_seen": 486413916, "step": 8681 }, { "epoch": 19.33630289532294, "grad_norm": 14.752124786376953, "learning_rate": 1e-06, "loss": 0.3223, "num_input_tokens_seen": 486471340, "step": 8682 }, { "epoch": 19.33630289532294, "loss": 0.43706992268562317, "loss_ce": 5.819134821649641e-05, "loss_iou": 0.1826171875, "loss_num": 0.01446533203125, "loss_xval": 0.4375, "num_input_tokens_seen": 486471340, "step": 8682 }, { "epoch": 19.338530066815146, "grad_norm": 19.141727447509766, "learning_rate": 1e-06, "loss": 0.3472, "num_input_tokens_seen": 486529184, "step": 8683 }, { "epoch": 19.338530066815146, "loss": 0.2464015781879425, "loss_ce": 6.367703463183716e-05, "loss_iou": 0.109375, "loss_num": 0.005645751953125, "loss_xval": 0.24609375, "num_input_tokens_seen": 486529184, "step": 8683 }, { "epoch": 19.34075723830735, "grad_norm": 19.265857696533203, "learning_rate": 1e-06, "loss": 0.2633, "num_input_tokens_seen": 486584540, "step": 8684 }, { "epoch": 19.34075723830735, "loss": 0.24787020683288574, "loss_ce": 6.746564758941531e-05, "loss_iou": 0.11181640625, "loss_num": 0.0048828125, "loss_xval": 0.248046875, "num_input_tokens_seen": 486584540, "step": 8684 }, { "epoch": 19.342984409799556, "grad_norm": 64.30094909667969, "learning_rate": 1e-06, "loss": 0.4085, "num_input_tokens_seen": 486639632, "step": 8685 }, { "epoch": 19.342984409799556, "loss": 0.3603741526603699, "loss_ce": 8.36373510537669e-05, "loss_iou": 0.1689453125, "loss_num": 0.004547119140625, "loss_xval": 0.359375, "num_input_tokens_seen": 486639632, "step": 8685 }, { "epoch": 19.34521158129176, "grad_norm": 22.54205322265625, "learning_rate": 1e-06, "loss": 0.4647, "num_input_tokens_seen": 486693520, "step": 8686 }, { "epoch": 19.34521158129176, "loss": 0.39545318484306335, "loss_ce": 6.7443739681039e-05, "loss_iou": 0.173828125, "loss_num": 0.00970458984375, "loss_xval": 0.39453125, "num_input_tokens_seen": 486693520, "step": 8686 }, { "epoch": 19.347438752783965, "grad_norm": 19.74683380126953, "learning_rate": 1e-06, "loss": 0.5212, "num_input_tokens_seen": 486747984, "step": 8687 }, { "epoch": 19.347438752783965, "loss": 0.47200244665145874, "loss_ce": 0.00044482320663519204, "loss_iou": 0.1962890625, "loss_num": 0.01556396484375, "loss_xval": 0.470703125, "num_input_tokens_seen": 486747984, "step": 8687 }, { "epoch": 19.34966592427617, "grad_norm": 26.414714813232422, "learning_rate": 1e-06, "loss": 0.3239, "num_input_tokens_seen": 486803160, "step": 8688 }, { "epoch": 19.34966592427617, "loss": 0.30175623297691345, "loss_ce": 5.944071017438546e-05, "loss_iou": 0.1259765625, "loss_num": 0.00982666015625, "loss_xval": 0.30078125, "num_input_tokens_seen": 486803160, "step": 8688 }, { "epoch": 19.351893095768375, "grad_norm": 17.674320220947266, "learning_rate": 1e-06, "loss": 0.4622, "num_input_tokens_seen": 486861400, "step": 8689 }, { "epoch": 19.351893095768375, "loss": 0.45283421874046326, "loss_ce": 7.542921230196953e-05, "loss_iou": 0.181640625, "loss_num": 0.017822265625, "loss_xval": 0.453125, "num_input_tokens_seen": 486861400, "step": 8689 }, { "epoch": 19.35412026726058, "grad_norm": 24.226484298706055, "learning_rate": 1e-06, "loss": 0.5093, "num_input_tokens_seen": 486913828, "step": 8690 }, { "epoch": 19.35412026726058, "loss": 0.6187217235565186, "loss_ce": 6.936366116860881e-05, "loss_iou": 0.283203125, "loss_num": 0.01068115234375, "loss_xval": 0.6171875, "num_input_tokens_seen": 486913828, "step": 8690 }, { "epoch": 19.356347438752785, "grad_norm": 22.290658950805664, "learning_rate": 1e-06, "loss": 0.4345, "num_input_tokens_seen": 486968612, "step": 8691 }, { "epoch": 19.356347438752785, "loss": 0.3384547233581543, "loss_ce": 7.58438982302323e-05, "loss_iou": 0.1552734375, "loss_num": 0.00567626953125, "loss_xval": 0.337890625, "num_input_tokens_seen": 486968612, "step": 8691 }, { "epoch": 19.35857461024499, "grad_norm": 17.18849754333496, "learning_rate": 1e-06, "loss": 0.3679, "num_input_tokens_seen": 487024476, "step": 8692 }, { "epoch": 19.35857461024499, "loss": 0.46119123697280884, "loss_ce": 0.00013169522571843117, "loss_iou": 0.2041015625, "loss_num": 0.01068115234375, "loss_xval": 0.4609375, "num_input_tokens_seen": 487024476, "step": 8692 }, { "epoch": 19.360801781737194, "grad_norm": 21.173423767089844, "learning_rate": 1e-06, "loss": 0.3566, "num_input_tokens_seen": 487081268, "step": 8693 }, { "epoch": 19.360801781737194, "loss": 0.43475276231765747, "loss_ce": 6.03883781877812e-05, "loss_iou": 0.1796875, "loss_num": 0.01519775390625, "loss_xval": 0.435546875, "num_input_tokens_seen": 487081268, "step": 8693 }, { "epoch": 19.3630289532294, "grad_norm": 19.107248306274414, "learning_rate": 1e-06, "loss": 0.3304, "num_input_tokens_seen": 487133808, "step": 8694 }, { "epoch": 19.3630289532294, "loss": 0.30717021226882935, "loss_ce": 5.656223220285028e-05, "loss_iou": 0.1396484375, "loss_num": 0.005706787109375, "loss_xval": 0.306640625, "num_input_tokens_seen": 487133808, "step": 8694 }, { "epoch": 19.365256124721604, "grad_norm": 18.290464401245117, "learning_rate": 1e-06, "loss": 0.3325, "num_input_tokens_seen": 487191508, "step": 8695 }, { "epoch": 19.365256124721604, "loss": 0.41119277477264404, "loss_ce": 5.9931116993539035e-05, "loss_iou": 0.1884765625, "loss_num": 0.0067138671875, "loss_xval": 0.41015625, "num_input_tokens_seen": 487191508, "step": 8695 }, { "epoch": 19.36748329621381, "grad_norm": 20.532798767089844, "learning_rate": 1e-06, "loss": 0.4025, "num_input_tokens_seen": 487248060, "step": 8696 }, { "epoch": 19.36748329621381, "loss": 0.24934418499469757, "loss_ce": 7.660340634174645e-05, "loss_iou": 0.11328125, "loss_num": 0.004486083984375, "loss_xval": 0.2490234375, "num_input_tokens_seen": 487248060, "step": 8696 }, { "epoch": 19.369710467706014, "grad_norm": 21.59139633178711, "learning_rate": 1e-06, "loss": 0.4072, "num_input_tokens_seen": 487304516, "step": 8697 }, { "epoch": 19.369710467706014, "loss": 0.503758430480957, "loss_ce": 9.630977001506835e-05, "loss_iou": 0.2060546875, "loss_num": 0.0181884765625, "loss_xval": 0.50390625, "num_input_tokens_seen": 487304516, "step": 8697 }, { "epoch": 19.37193763919822, "grad_norm": 18.260494232177734, "learning_rate": 1e-06, "loss": 0.3184, "num_input_tokens_seen": 487360040, "step": 8698 }, { "epoch": 19.37193763919822, "loss": 0.3482136130332947, "loss_ce": 6.908161594765261e-05, "loss_iou": 0.1494140625, "loss_num": 0.009765625, "loss_xval": 0.34765625, "num_input_tokens_seen": 487360040, "step": 8698 }, { "epoch": 19.374164810690424, "grad_norm": 13.985121726989746, "learning_rate": 1e-06, "loss": 0.2731, "num_input_tokens_seen": 487413428, "step": 8699 }, { "epoch": 19.374164810690424, "loss": 0.23252707719802856, "loss_ce": 7.46890582377091e-05, "loss_iou": 0.107421875, "loss_num": 0.00347900390625, "loss_xval": 0.232421875, "num_input_tokens_seen": 487413428, "step": 8699 }, { "epoch": 19.37639198218263, "grad_norm": 23.42201805114746, "learning_rate": 1e-06, "loss": 0.4771, "num_input_tokens_seen": 487468832, "step": 8700 }, { "epoch": 19.37639198218263, "loss": 0.4661286175251007, "loss_ce": 6.414036033675075e-05, "loss_iou": 0.2060546875, "loss_num": 0.0107421875, "loss_xval": 0.466796875, "num_input_tokens_seen": 487468832, "step": 8700 }, { "epoch": 19.378619153674833, "grad_norm": 18.14457893371582, "learning_rate": 1e-06, "loss": 0.3369, "num_input_tokens_seen": 487525960, "step": 8701 }, { "epoch": 19.378619153674833, "loss": 0.3606569170951843, "loss_ce": 0.00012223681551404297, "loss_iou": 0.14453125, "loss_num": 0.01446533203125, "loss_xval": 0.361328125, "num_input_tokens_seen": 487525960, "step": 8701 }, { "epoch": 19.380846325167038, "grad_norm": 11.536683082580566, "learning_rate": 1e-06, "loss": 0.3009, "num_input_tokens_seen": 487583988, "step": 8702 }, { "epoch": 19.380846325167038, "loss": 0.3344113230705261, "loss_ce": 6.077022408135235e-05, "loss_iou": 0.1337890625, "loss_num": 0.01318359375, "loss_xval": 0.333984375, "num_input_tokens_seen": 487583988, "step": 8702 }, { "epoch": 19.383073496659243, "grad_norm": 21.613056182861328, "learning_rate": 1e-06, "loss": 0.3386, "num_input_tokens_seen": 487636664, "step": 8703 }, { "epoch": 19.383073496659243, "loss": 0.3694436550140381, "loss_ce": 8.942555723479018e-05, "loss_iou": 0.14453125, "loss_num": 0.0157470703125, "loss_xval": 0.369140625, "num_input_tokens_seen": 487636664, "step": 8703 }, { "epoch": 19.385300668151448, "grad_norm": 22.000041961669922, "learning_rate": 1e-06, "loss": 0.3079, "num_input_tokens_seen": 487694000, "step": 8704 }, { "epoch": 19.385300668151448, "loss": 0.23017969727516174, "loss_ce": 7.716739492025226e-05, "loss_iou": 0.10693359375, "loss_num": 0.003326416015625, "loss_xval": 0.23046875, "num_input_tokens_seen": 487694000, "step": 8704 }, { "epoch": 19.387527839643653, "grad_norm": 13.974677085876465, "learning_rate": 1e-06, "loss": 0.3866, "num_input_tokens_seen": 487749452, "step": 8705 }, { "epoch": 19.387527839643653, "loss": 0.27674055099487305, "loss_ce": 6.818344991188496e-05, "loss_iou": 0.12890625, "loss_num": 0.003692626953125, "loss_xval": 0.27734375, "num_input_tokens_seen": 487749452, "step": 8705 }, { "epoch": 19.389755011135858, "grad_norm": 24.663671493530273, "learning_rate": 1e-06, "loss": 0.3754, "num_input_tokens_seen": 487803040, "step": 8706 }, { "epoch": 19.389755011135858, "loss": 0.4255967140197754, "loss_ce": 5.9576763305813074e-05, "loss_iou": 0.19140625, "loss_num": 0.00836181640625, "loss_xval": 0.42578125, "num_input_tokens_seen": 487803040, "step": 8706 }, { "epoch": 19.391982182628063, "grad_norm": 15.723613739013672, "learning_rate": 1e-06, "loss": 0.385, "num_input_tokens_seen": 487861660, "step": 8707 }, { "epoch": 19.391982182628063, "loss": 0.502314031124115, "loss_ce": 0.0001167724549304694, "loss_iou": 0.2216796875, "loss_num": 0.0115966796875, "loss_xval": 0.50390625, "num_input_tokens_seen": 487861660, "step": 8707 }, { "epoch": 19.394209354120267, "grad_norm": 18.70146369934082, "learning_rate": 1e-06, "loss": 0.4727, "num_input_tokens_seen": 487917728, "step": 8708 }, { "epoch": 19.394209354120267, "loss": 0.4696964621543884, "loss_ce": 9.19793383218348e-05, "loss_iou": 0.197265625, "loss_num": 0.0150146484375, "loss_xval": 0.46875, "num_input_tokens_seen": 487917728, "step": 8708 }, { "epoch": 19.396436525612472, "grad_norm": 18.742565155029297, "learning_rate": 1e-06, "loss": 0.5473, "num_input_tokens_seen": 487972684, "step": 8709 }, { "epoch": 19.396436525612472, "loss": 0.47677797079086304, "loss_ce": 9.340511314803734e-05, "loss_iou": 0.2119140625, "loss_num": 0.01068115234375, "loss_xval": 0.4765625, "num_input_tokens_seen": 487972684, "step": 8709 }, { "epoch": 19.398663697104677, "grad_norm": 79.24798583984375, "learning_rate": 1e-06, "loss": 0.477, "num_input_tokens_seen": 488027984, "step": 8710 }, { "epoch": 19.398663697104677, "loss": 0.7484385967254639, "loss_ce": 8.648347284179181e-05, "loss_iou": 0.271484375, "loss_num": 0.040771484375, "loss_xval": 0.75, "num_input_tokens_seen": 488027984, "step": 8710 }, { "epoch": 19.400890868596882, "grad_norm": 23.097761154174805, "learning_rate": 1e-06, "loss": 0.4981, "num_input_tokens_seen": 488084492, "step": 8711 }, { "epoch": 19.400890868596882, "loss": 0.5120543241500854, "loss_ce": 9.138373570749536e-05, "loss_iou": 0.2021484375, "loss_num": 0.021728515625, "loss_xval": 0.51171875, "num_input_tokens_seen": 488084492, "step": 8711 }, { "epoch": 19.403118040089087, "grad_norm": 26.856395721435547, "learning_rate": 1e-06, "loss": 0.5109, "num_input_tokens_seen": 488139008, "step": 8712 }, { "epoch": 19.403118040089087, "loss": 0.4314088821411133, "loss_ce": 7.342306344071403e-05, "loss_iou": 0.193359375, "loss_num": 0.00885009765625, "loss_xval": 0.431640625, "num_input_tokens_seen": 488139008, "step": 8712 }, { "epoch": 19.40534521158129, "grad_norm": 17.314449310302734, "learning_rate": 1e-06, "loss": 0.443, "num_input_tokens_seen": 488193600, "step": 8713 }, { "epoch": 19.40534521158129, "loss": 0.39160478115081787, "loss_ce": 6.424939056159928e-05, "loss_iou": 0.1748046875, "loss_num": 0.00823974609375, "loss_xval": 0.390625, "num_input_tokens_seen": 488193600, "step": 8713 }, { "epoch": 19.407572383073497, "grad_norm": 25.597942352294922, "learning_rate": 1e-06, "loss": 0.2987, "num_input_tokens_seen": 488251452, "step": 8714 }, { "epoch": 19.407572383073497, "loss": 0.3611512780189514, "loss_ce": 6.729191227350384e-05, "loss_iou": 0.16015625, "loss_num": 0.00811767578125, "loss_xval": 0.361328125, "num_input_tokens_seen": 488251452, "step": 8714 }, { "epoch": 19.4097995545657, "grad_norm": 25.324542999267578, "learning_rate": 1e-06, "loss": 0.4442, "num_input_tokens_seen": 488307252, "step": 8715 }, { "epoch": 19.4097995545657, "loss": 0.5760635137557983, "loss_ce": 7.472602010238916e-05, "loss_iou": 0.2392578125, "loss_num": 0.019287109375, "loss_xval": 0.57421875, "num_input_tokens_seen": 488307252, "step": 8715 }, { "epoch": 19.412026726057906, "grad_norm": 17.964107513427734, "learning_rate": 1e-06, "loss": 0.3626, "num_input_tokens_seen": 488363092, "step": 8716 }, { "epoch": 19.412026726057906, "loss": 0.3571329712867737, "loss_ce": 7.73169522290118e-05, "loss_iou": 0.1572265625, "loss_num": 0.00836181640625, "loss_xval": 0.357421875, "num_input_tokens_seen": 488363092, "step": 8716 }, { "epoch": 19.41425389755011, "grad_norm": 16.54916763305664, "learning_rate": 1e-06, "loss": 0.2583, "num_input_tokens_seen": 488420924, "step": 8717 }, { "epoch": 19.41425389755011, "loss": 0.1723698377609253, "loss_ce": 6.760148971807212e-05, "loss_iou": 0.068359375, "loss_num": 0.00714111328125, "loss_xval": 0.171875, "num_input_tokens_seen": 488420924, "step": 8717 }, { "epoch": 19.416481069042316, "grad_norm": 31.5208683013916, "learning_rate": 1e-06, "loss": 0.5982, "num_input_tokens_seen": 488473012, "step": 8718 }, { "epoch": 19.416481069042316, "loss": 0.48655349016189575, "loss_ce": 0.00010326504707336426, "loss_iou": 0.20703125, "loss_num": 0.01434326171875, "loss_xval": 0.486328125, "num_input_tokens_seen": 488473012, "step": 8718 }, { "epoch": 19.41870824053452, "grad_norm": 14.7725248336792, "learning_rate": 1e-06, "loss": 0.3455, "num_input_tokens_seen": 488529484, "step": 8719 }, { "epoch": 19.41870824053452, "loss": 0.34601306915283203, "loss_ce": 6.580428453162313e-05, "loss_iou": 0.16015625, "loss_num": 0.005126953125, "loss_xval": 0.345703125, "num_input_tokens_seen": 488529484, "step": 8719 }, { "epoch": 19.420935412026726, "grad_norm": 18.437286376953125, "learning_rate": 1e-06, "loss": 0.384, "num_input_tokens_seen": 488587032, "step": 8720 }, { "epoch": 19.420935412026726, "loss": 0.30788394808769226, "loss_ce": 0.0001446868700440973, "loss_iou": 0.1259765625, "loss_num": 0.0113525390625, "loss_xval": 0.30859375, "num_input_tokens_seen": 488587032, "step": 8720 }, { "epoch": 19.42316258351893, "grad_norm": 18.742013931274414, "learning_rate": 1e-06, "loss": 0.6236, "num_input_tokens_seen": 488643196, "step": 8721 }, { "epoch": 19.42316258351893, "loss": 0.742760181427002, "loss_ce": 8.442148100584745e-05, "loss_iou": 0.298828125, "loss_num": 0.029052734375, "loss_xval": 0.7421875, "num_input_tokens_seen": 488643196, "step": 8721 }, { "epoch": 19.425389755011135, "grad_norm": 20.891321182250977, "learning_rate": 1e-06, "loss": 0.4322, "num_input_tokens_seen": 488699628, "step": 8722 }, { "epoch": 19.425389755011135, "loss": 0.44647514820098877, "loss_ce": 6.399239646270871e-05, "loss_iou": 0.20703125, "loss_num": 0.006561279296875, "loss_xval": 0.447265625, "num_input_tokens_seen": 488699628, "step": 8722 }, { "epoch": 19.42761692650334, "grad_norm": 11.5533447265625, "learning_rate": 1e-06, "loss": 0.4345, "num_input_tokens_seen": 488756332, "step": 8723 }, { "epoch": 19.42761692650334, "loss": 0.4649144411087036, "loss_ce": 7.067194383125752e-05, "loss_iou": 0.1884765625, "loss_num": 0.0174560546875, "loss_xval": 0.46484375, "num_input_tokens_seen": 488756332, "step": 8723 }, { "epoch": 19.429844097995545, "grad_norm": 27.625247955322266, "learning_rate": 1e-06, "loss": 0.3431, "num_input_tokens_seen": 488812856, "step": 8724 }, { "epoch": 19.429844097995545, "loss": 0.4067014753818512, "loss_ce": 8.526656165486202e-05, "loss_iou": 0.17578125, "loss_num": 0.01080322265625, "loss_xval": 0.40625, "num_input_tokens_seen": 488812856, "step": 8724 }, { "epoch": 19.43207126948775, "grad_norm": 17.141159057617188, "learning_rate": 1e-06, "loss": 0.3232, "num_input_tokens_seen": 488866352, "step": 8725 }, { "epoch": 19.43207126948775, "loss": 0.34912025928497314, "loss_ce": 6.021084118401632e-05, "loss_iou": 0.150390625, "loss_num": 0.00982666015625, "loss_xval": 0.349609375, "num_input_tokens_seen": 488866352, "step": 8725 }, { "epoch": 19.434298440979955, "grad_norm": 18.241275787353516, "learning_rate": 1e-06, "loss": 0.36, "num_input_tokens_seen": 488925556, "step": 8726 }, { "epoch": 19.434298440979955, "loss": 0.3274751305580139, "loss_ce": 8.253618580056354e-05, "loss_iou": 0.1513671875, "loss_num": 0.004791259765625, "loss_xval": 0.328125, "num_input_tokens_seen": 488925556, "step": 8726 }, { "epoch": 19.43652561247216, "grad_norm": 61.344459533691406, "learning_rate": 1e-06, "loss": 0.3267, "num_input_tokens_seen": 488983256, "step": 8727 }, { "epoch": 19.43652561247216, "loss": 0.4625932276248932, "loss_ce": 6.8813213147223e-05, "loss_iou": 0.197265625, "loss_num": 0.01336669921875, "loss_xval": 0.462890625, "num_input_tokens_seen": 488983256, "step": 8727 }, { "epoch": 19.438752783964365, "grad_norm": 24.339759826660156, "learning_rate": 1e-06, "loss": 0.4207, "num_input_tokens_seen": 489039964, "step": 8728 }, { "epoch": 19.438752783964365, "loss": 0.4445319175720215, "loss_ce": 7.392082625301555e-05, "loss_iou": 0.2099609375, "loss_num": 0.0047607421875, "loss_xval": 0.4453125, "num_input_tokens_seen": 489039964, "step": 8728 }, { "epoch": 19.44097995545657, "grad_norm": 14.037641525268555, "learning_rate": 1e-06, "loss": 0.4557, "num_input_tokens_seen": 489095200, "step": 8729 }, { "epoch": 19.44097995545657, "loss": 0.36377742886543274, "loss_ce": 6.892348756082356e-05, "loss_iou": 0.140625, "loss_num": 0.016357421875, "loss_xval": 0.36328125, "num_input_tokens_seen": 489095200, "step": 8729 }, { "epoch": 19.443207126948774, "grad_norm": 30.27907943725586, "learning_rate": 1e-06, "loss": 0.3208, "num_input_tokens_seen": 489150700, "step": 8730 }, { "epoch": 19.443207126948774, "loss": 0.3462543785572052, "loss_ce": 6.298153311945498e-05, "loss_iou": 0.1572265625, "loss_num": 0.006317138671875, "loss_xval": 0.345703125, "num_input_tokens_seen": 489150700, "step": 8730 }, { "epoch": 19.44543429844098, "grad_norm": 13.437280654907227, "learning_rate": 1e-06, "loss": 0.512, "num_input_tokens_seen": 489206188, "step": 8731 }, { "epoch": 19.44543429844098, "loss": 0.36061540246009827, "loss_ce": 8.075297955656424e-05, "loss_iou": 0.1552734375, "loss_num": 0.01007080078125, "loss_xval": 0.361328125, "num_input_tokens_seen": 489206188, "step": 8731 }, { "epoch": 19.447661469933184, "grad_norm": 23.991952896118164, "learning_rate": 1e-06, "loss": 0.3874, "num_input_tokens_seen": 489260872, "step": 8732 }, { "epoch": 19.447661469933184, "loss": 0.3317917287349701, "loss_ce": 6.565573858097196e-05, "loss_iou": 0.14453125, "loss_num": 0.00848388671875, "loss_xval": 0.33203125, "num_input_tokens_seen": 489260872, "step": 8732 }, { "epoch": 19.44988864142539, "grad_norm": 14.908693313598633, "learning_rate": 1e-06, "loss": 0.3623, "num_input_tokens_seen": 489317248, "step": 8733 }, { "epoch": 19.44988864142539, "loss": 0.30453062057495117, "loss_ce": 8.725294173927978e-05, "loss_iou": 0.134765625, "loss_num": 0.007080078125, "loss_xval": 0.3046875, "num_input_tokens_seen": 489317248, "step": 8733 }, { "epoch": 19.452115812917594, "grad_norm": 72.13795471191406, "learning_rate": 1e-06, "loss": 0.2718, "num_input_tokens_seen": 489374240, "step": 8734 }, { "epoch": 19.452115812917594, "loss": 0.3349186182022095, "loss_ce": 7.976653432706371e-05, "loss_iou": 0.1572265625, "loss_num": 0.004302978515625, "loss_xval": 0.333984375, "num_input_tokens_seen": 489374240, "step": 8734 }, { "epoch": 19.4543429844098, "grad_norm": 15.769805908203125, "learning_rate": 1e-06, "loss": 0.3651, "num_input_tokens_seen": 489432756, "step": 8735 }, { "epoch": 19.4543429844098, "loss": 0.3257424235343933, "loss_ce": 5.8816825912799686e-05, "loss_iou": 0.1328125, "loss_num": 0.01202392578125, "loss_xval": 0.326171875, "num_input_tokens_seen": 489432756, "step": 8735 }, { "epoch": 19.456570155902003, "grad_norm": 134.32081604003906, "learning_rate": 1e-06, "loss": 0.3294, "num_input_tokens_seen": 489488532, "step": 8736 }, { "epoch": 19.456570155902003, "loss": 0.30285748839378357, "loss_ce": 6.207545811776072e-05, "loss_iou": 0.1337890625, "loss_num": 0.0068359375, "loss_xval": 0.302734375, "num_input_tokens_seen": 489488532, "step": 8736 }, { "epoch": 19.45879732739421, "grad_norm": 19.14018440246582, "learning_rate": 1e-06, "loss": 0.4107, "num_input_tokens_seen": 489541320, "step": 8737 }, { "epoch": 19.45879732739421, "loss": 0.36432066559791565, "loss_ce": 6.283754191827029e-05, "loss_iou": 0.1611328125, "loss_num": 0.00836181640625, "loss_xval": 0.36328125, "num_input_tokens_seen": 489541320, "step": 8737 }, { "epoch": 19.461024498886413, "grad_norm": 17.099512100219727, "learning_rate": 1e-06, "loss": 0.3985, "num_input_tokens_seen": 489597048, "step": 8738 }, { "epoch": 19.461024498886413, "loss": 0.45394012331962585, "loss_ce": 8.270963735412806e-05, "loss_iou": 0.1884765625, "loss_num": 0.01544189453125, "loss_xval": 0.453125, "num_input_tokens_seen": 489597048, "step": 8738 }, { "epoch": 19.463251670378618, "grad_norm": 17.23607063293457, "learning_rate": 1e-06, "loss": 0.44, "num_input_tokens_seen": 489653544, "step": 8739 }, { "epoch": 19.463251670378618, "loss": 0.47176241874694824, "loss_ce": 8.272690320154652e-05, "loss_iou": 0.212890625, "loss_num": 0.009033203125, "loss_xval": 0.47265625, "num_input_tokens_seen": 489653544, "step": 8739 }, { "epoch": 19.465478841870823, "grad_norm": 25.947065353393555, "learning_rate": 1e-06, "loss": 0.5785, "num_input_tokens_seen": 489710516, "step": 8740 }, { "epoch": 19.465478841870823, "loss": 0.40167826414108276, "loss_ce": 6.691856833640486e-05, "loss_iou": 0.1806640625, "loss_num": 0.00811767578125, "loss_xval": 0.40234375, "num_input_tokens_seen": 489710516, "step": 8740 }, { "epoch": 19.467706013363028, "grad_norm": 15.991206169128418, "learning_rate": 1e-06, "loss": 0.2839, "num_input_tokens_seen": 489766984, "step": 8741 }, { "epoch": 19.467706013363028, "loss": 0.25690385699272156, "loss_ce": 6.792375643271953e-05, "loss_iou": 0.1142578125, "loss_num": 0.0057373046875, "loss_xval": 0.2578125, "num_input_tokens_seen": 489766984, "step": 8741 }, { "epoch": 19.469933184855233, "grad_norm": 12.759182929992676, "learning_rate": 1e-06, "loss": 0.3062, "num_input_tokens_seen": 489823144, "step": 8742 }, { "epoch": 19.469933184855233, "loss": 0.3527446389198303, "loss_ce": 0.0001140242675319314, "loss_iou": 0.15625, "loss_num": 0.0079345703125, "loss_xval": 0.353515625, "num_input_tokens_seen": 489823144, "step": 8742 }, { "epoch": 19.472160356347437, "grad_norm": 18.29888343811035, "learning_rate": 1e-06, "loss": 0.336, "num_input_tokens_seen": 489878156, "step": 8743 }, { "epoch": 19.472160356347437, "loss": 0.3941093683242798, "loss_ce": 6.639507773797959e-05, "loss_iou": 0.1796875, "loss_num": 0.007232666015625, "loss_xval": 0.39453125, "num_input_tokens_seen": 489878156, "step": 8743 }, { "epoch": 19.474387527839642, "grad_norm": 27.575145721435547, "learning_rate": 1e-06, "loss": 0.3742, "num_input_tokens_seen": 489932784, "step": 8744 }, { "epoch": 19.474387527839642, "loss": 0.45381438732147217, "loss_ce": 7.904722588136792e-05, "loss_iou": 0.1962890625, "loss_num": 0.01214599609375, "loss_xval": 0.453125, "num_input_tokens_seen": 489932784, "step": 8744 }, { "epoch": 19.476614699331847, "grad_norm": 16.64528465270996, "learning_rate": 1e-06, "loss": 0.3247, "num_input_tokens_seen": 489988008, "step": 8745 }, { "epoch": 19.476614699331847, "loss": 0.37039196491241455, "loss_ce": 7.641837873961776e-05, "loss_iou": 0.12353515625, "loss_num": 0.024658203125, "loss_xval": 0.37109375, "num_input_tokens_seen": 489988008, "step": 8745 }, { "epoch": 19.478841870824052, "grad_norm": 12.457158088684082, "learning_rate": 1e-06, "loss": 0.4706, "num_input_tokens_seen": 490045656, "step": 8746 }, { "epoch": 19.478841870824052, "loss": 0.576514482498169, "loss_ce": 9.850240166997537e-05, "loss_iou": 0.212890625, "loss_num": 0.0301513671875, "loss_xval": 0.578125, "num_input_tokens_seen": 490045656, "step": 8746 }, { "epoch": 19.481069042316257, "grad_norm": 29.269418716430664, "learning_rate": 1e-06, "loss": 0.5014, "num_input_tokens_seen": 490100304, "step": 8747 }, { "epoch": 19.481069042316257, "loss": 0.4481244385242462, "loss_ce": 6.533482519444078e-05, "loss_iou": 0.1982421875, "loss_num": 0.01025390625, "loss_xval": 0.447265625, "num_input_tokens_seen": 490100304, "step": 8747 }, { "epoch": 19.48329621380846, "grad_norm": 17.77977752685547, "learning_rate": 1e-06, "loss": 0.3385, "num_input_tokens_seen": 490155632, "step": 8748 }, { "epoch": 19.48329621380846, "loss": 0.24175959825515747, "loss_ce": 6.037494677002542e-05, "loss_iou": 0.1015625, "loss_num": 0.00762939453125, "loss_xval": 0.2421875, "num_input_tokens_seen": 490155632, "step": 8748 }, { "epoch": 19.485523385300667, "grad_norm": 16.200801849365234, "learning_rate": 1e-06, "loss": 0.3294, "num_input_tokens_seen": 490213296, "step": 8749 }, { "epoch": 19.485523385300667, "loss": 0.3568875789642334, "loss_ce": 7.605206337757409e-05, "loss_iou": 0.1669921875, "loss_num": 0.0047607421875, "loss_xval": 0.357421875, "num_input_tokens_seen": 490213296, "step": 8749 }, { "epoch": 19.48775055679287, "grad_norm": 15.862672805786133, "learning_rate": 1e-06, "loss": 0.3418, "num_input_tokens_seen": 490266248, "step": 8750 }, { "epoch": 19.48775055679287, "eval_seeclick_web_CIoU": 0.5831195414066315, "eval_seeclick_web_GIoU": 0.5824593603610992, "eval_seeclick_web_IoU": 0.6025447845458984, "eval_seeclick_web_MAE_all": 0.0153358387760818, "eval_seeclick_web_MAE_h": 0.0069596245884895325, "eval_seeclick_web_MAE_w": 0.015038002282381058, "eval_seeclick_web_MAE_x_boxes": 0.008386612171307206, "eval_seeclick_web_MAE_y_boxes": 0.02156838239170611, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.9198112487792969, "eval_seeclick_web_loss_ce": 0.00012025472460663877, "eval_seeclick_web_loss_iou": 0.4251708984375, "eval_seeclick_web_loss_num": 0.012319564819335938, "eval_seeclick_web_loss_xval": 0.912109375, "eval_seeclick_web_runtime": 21.1555, "eval_seeclick_web_samples_per_second": 2.363, "eval_seeclick_web_steps_per_second": 0.095, "num_input_tokens_seen": 490266248, "step": 8750 }, { "epoch": 19.48775055679287, "eval_icons_CIoU": 0.26400046050548553, "eval_icons_GIoU": 0.299440860748291, "eval_icons_IoU": 0.34618473052978516, "eval_icons_MAE_all": 0.062128059566020966, "eval_icons_MAE_h": 0.029708989895880222, "eval_icons_MAE_w": 0.06808132492005825, "eval_icons_MAE_x_boxes": 0.06109212152659893, "eval_icons_MAE_y_boxes": 0.03715028055012226, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7176454067230225, "eval_icons_loss_ce": 0.00013828581722918898, "eval_icons_loss_iou": 0.673095703125, "eval_icons_loss_num": 0.06012725830078125, "eval_icons_loss_xval": 1.6474609375, "eval_icons_runtime": 18.377, "eval_icons_samples_per_second": 2.721, "eval_icons_steps_per_second": 0.109, "num_input_tokens_seen": 490266248, "step": 8750 }, { "epoch": 19.48775055679287, "eval_screenspot_CIoU": 0.37087904413541156, "eval_screenspot_GIoU": 0.3902689814567566, "eval_screenspot_IoU": 0.4464185933272044, "eval_screenspot_MAE_all": 0.058752829829851784, "eval_screenspot_MAE_h": 0.039602997402350106, "eval_screenspot_MAE_w": 0.06322793414195378, "eval_screenspot_MAE_x_boxes": 0.0708691483984391, "eval_screenspot_MAE_y_boxes": 0.04212713334709406, "eval_screenspot_inside_bbox": 0.7041666706403097, "eval_screenspot_loss": 1.5793300867080688, "eval_screenspot_loss_ce": 0.00015701642648006478, "eval_screenspot_loss_iou": 0.6509602864583334, "eval_screenspot_loss_num": 0.06783040364583333, "eval_screenspot_loss_xval": 1.640625, "eval_screenspot_runtime": 31.0289, "eval_screenspot_samples_per_second": 2.868, "eval_screenspot_steps_per_second": 0.097, "num_input_tokens_seen": 490266248, "step": 8750 }, { "epoch": 19.48775055679287, "eval_compot_CIoU": 0.34746964275836945, "eval_compot_GIoU": 0.3572629541158676, "eval_compot_IoU": 0.40467870235443115, "eval_compot_MAE_all": 0.018142362125217915, "eval_compot_MAE_h": 0.009449589531868696, "eval_compot_MAE_w": 0.020473646000027657, "eval_compot_MAE_x_boxes": 0.030018706806004047, "eval_compot_MAE_y_boxes": 0.006989206187427044, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3744853734970093, "eval_compot_loss_ce": 0.00010999454752891324, "eval_compot_loss_iou": 0.6365966796875, "eval_compot_loss_num": 0.016666412353515625, "eval_compot_loss_xval": 1.356689453125, "eval_compot_runtime": 19.0377, "eval_compot_samples_per_second": 2.626, "eval_compot_steps_per_second": 0.105, "num_input_tokens_seen": 490266248, "step": 8750 }, { "epoch": 19.48775055679287, "eval_custom_ui_val_CIoU": 0.4767077672812674, "eval_custom_ui_val_GIoU": 0.481793655289544, "eval_custom_ui_val_IoU": 0.5382914178901248, "eval_custom_ui_val_MAE_all": 0.026706857451548178, "eval_custom_ui_val_MAE_h": 0.013321074657142162, "eval_custom_ui_val_MAE_w": 0.03640370096804367, "eval_custom_ui_val_MAE_x_boxes": 0.032115884642634124, "eval_custom_ui_val_MAE_y_boxes": 0.013279020553454757, "eval_custom_ui_val_inside_bbox": 0.7754629651705424, "eval_custom_ui_val_loss": 1.1639938354492188, "eval_custom_ui_val_loss_ce": 0.00012037252761527068, "eval_custom_ui_val_loss_iou": 0.5000542534722222, "eval_custom_ui_val_loss_num": 0.023473739624023438, "eval_custom_ui_val_loss_xval": 1.1174858940972223, "eval_custom_ui_val_runtime": 57.2523, "eval_custom_ui_val_samples_per_second": 4.629, "eval_custom_ui_val_steps_per_second": 0.157, "num_input_tokens_seen": 490266248, "step": 8750 }, { "epoch": 19.48775055679287, "loss": 0.8289517164230347, "loss_ce": 9.427011536899954e-05, "loss_iou": 0.373046875, "loss_num": 0.0164794921875, "loss_xval": 0.828125, "num_input_tokens_seen": 490266248, "step": 8750 }, { "epoch": 19.489977728285076, "grad_norm": 16.564908981323242, "learning_rate": 1e-06, "loss": 0.3765, "num_input_tokens_seen": 490324400, "step": 8751 }, { "epoch": 19.489977728285076, "loss": 0.39094218611717224, "loss_ce": 7.304349128389731e-05, "loss_iou": 0.1787109375, "loss_num": 0.0068359375, "loss_xval": 0.390625, "num_input_tokens_seen": 490324400, "step": 8751 }, { "epoch": 19.49220489977728, "grad_norm": 18.350833892822266, "learning_rate": 1e-06, "loss": 0.4225, "num_input_tokens_seen": 490380904, "step": 8752 }, { "epoch": 19.49220489977728, "loss": 0.43350207805633545, "loss_ce": 6.09189628448803e-05, "loss_iou": 0.1826171875, "loss_num": 0.01361083984375, "loss_xval": 0.43359375, "num_input_tokens_seen": 490380904, "step": 8752 }, { "epoch": 19.494432071269486, "grad_norm": 21.1101016998291, "learning_rate": 1e-06, "loss": 0.5412, "num_input_tokens_seen": 490436388, "step": 8753 }, { "epoch": 19.494432071269486, "loss": 0.5999248027801514, "loss_ce": 7.128326979000121e-05, "loss_iou": 0.25, "loss_num": 0.0196533203125, "loss_xval": 0.6015625, "num_input_tokens_seen": 490436388, "step": 8753 }, { "epoch": 19.49665924276169, "grad_norm": 20.924692153930664, "learning_rate": 1e-06, "loss": 0.3887, "num_input_tokens_seen": 490491856, "step": 8754 }, { "epoch": 19.49665924276169, "loss": 0.5576558113098145, "loss_ce": 0.00028280686819925904, "loss_iou": 0.2470703125, "loss_num": 0.0128173828125, "loss_xval": 0.55859375, "num_input_tokens_seen": 490491856, "step": 8754 }, { "epoch": 19.498886414253896, "grad_norm": 16.07146644592285, "learning_rate": 1e-06, "loss": 0.3163, "num_input_tokens_seen": 490547508, "step": 8755 }, { "epoch": 19.498886414253896, "loss": 0.35533058643341064, "loss_ce": 7.546801498392597e-05, "loss_iou": 0.1630859375, "loss_num": 0.0059814453125, "loss_xval": 0.35546875, "num_input_tokens_seen": 490547508, "step": 8755 }, { "epoch": 19.501113585746104, "grad_norm": 17.8480167388916, "learning_rate": 1e-06, "loss": 0.3092, "num_input_tokens_seen": 490602244, "step": 8756 }, { "epoch": 19.501113585746104, "loss": 0.3217475414276123, "loss_ce": 9.226159454556182e-05, "loss_iou": 0.1416015625, "loss_num": 0.007598876953125, "loss_xval": 0.322265625, "num_input_tokens_seen": 490602244, "step": 8756 }, { "epoch": 19.50334075723831, "grad_norm": 20.913305282592773, "learning_rate": 1e-06, "loss": 0.4527, "num_input_tokens_seen": 490659044, "step": 8757 }, { "epoch": 19.50334075723831, "loss": 0.49548420310020447, "loss_ce": 6.180736818350852e-05, "loss_iou": 0.19140625, "loss_num": 0.0224609375, "loss_xval": 0.49609375, "num_input_tokens_seen": 490659044, "step": 8757 }, { "epoch": 19.505567928730514, "grad_norm": 18.693899154663086, "learning_rate": 1e-06, "loss": 0.2973, "num_input_tokens_seen": 490715388, "step": 8758 }, { "epoch": 19.505567928730514, "loss": 0.19214215874671936, "loss_ce": 6.452086381614208e-05, "loss_iou": 0.08056640625, "loss_num": 0.006256103515625, "loss_xval": 0.1923828125, "num_input_tokens_seen": 490715388, "step": 8758 }, { "epoch": 19.50779510022272, "grad_norm": 22.483903884887695, "learning_rate": 1e-06, "loss": 0.4002, "num_input_tokens_seen": 490771780, "step": 8759 }, { "epoch": 19.50779510022272, "loss": 0.40626299381256104, "loss_ce": 7.403266499750316e-05, "loss_iou": 0.181640625, "loss_num": 0.00860595703125, "loss_xval": 0.40625, "num_input_tokens_seen": 490771780, "step": 8759 }, { "epoch": 19.510022271714924, "grad_norm": 14.928621292114258, "learning_rate": 1e-06, "loss": 0.3059, "num_input_tokens_seen": 490829264, "step": 8760 }, { "epoch": 19.510022271714924, "loss": 0.32245999574661255, "loss_ce": 7.230706978589296e-05, "loss_iou": 0.1376953125, "loss_num": 0.00927734375, "loss_xval": 0.322265625, "num_input_tokens_seen": 490829264, "step": 8760 }, { "epoch": 19.51224944320713, "grad_norm": 40.57841491699219, "learning_rate": 1e-06, "loss": 0.4451, "num_input_tokens_seen": 490885112, "step": 8761 }, { "epoch": 19.51224944320713, "loss": 0.2985847294330597, "loss_ce": 6.179253978189081e-05, "loss_iou": 0.1279296875, "loss_num": 0.00830078125, "loss_xval": 0.298828125, "num_input_tokens_seen": 490885112, "step": 8761 }, { "epoch": 19.514476614699333, "grad_norm": 26.560928344726562, "learning_rate": 1e-06, "loss": 0.376, "num_input_tokens_seen": 490940464, "step": 8762 }, { "epoch": 19.514476614699333, "loss": 0.35675495862960815, "loss_ce": 6.548444798681885e-05, "loss_iou": 0.1640625, "loss_num": 0.00579833984375, "loss_xval": 0.357421875, "num_input_tokens_seen": 490940464, "step": 8762 }, { "epoch": 19.51670378619154, "grad_norm": 14.288774490356445, "learning_rate": 1e-06, "loss": 0.4027, "num_input_tokens_seen": 490995184, "step": 8763 }, { "epoch": 19.51670378619154, "loss": 0.2923039197921753, "loss_ce": 6.761118129361421e-05, "loss_iou": 0.12890625, "loss_num": 0.00677490234375, "loss_xval": 0.29296875, "num_input_tokens_seen": 490995184, "step": 8763 }, { "epoch": 19.518930957683743, "grad_norm": 23.1356201171875, "learning_rate": 1e-06, "loss": 0.3756, "num_input_tokens_seen": 491051012, "step": 8764 }, { "epoch": 19.518930957683743, "loss": 0.41393959522247314, "loss_ce": 6.0199585277587175e-05, "loss_iou": 0.16796875, "loss_num": 0.015380859375, "loss_xval": 0.4140625, "num_input_tokens_seen": 491051012, "step": 8764 }, { "epoch": 19.521158129175948, "grad_norm": 18.624935150146484, "learning_rate": 1e-06, "loss": 0.3166, "num_input_tokens_seen": 491107176, "step": 8765 }, { "epoch": 19.521158129175948, "loss": 0.34882843494415283, "loss_ce": 7.35686844564043e-05, "loss_iou": 0.16015625, "loss_num": 0.005462646484375, "loss_xval": 0.349609375, "num_input_tokens_seen": 491107176, "step": 8765 }, { "epoch": 19.523385300668153, "grad_norm": 20.93644905090332, "learning_rate": 1e-06, "loss": 0.4879, "num_input_tokens_seen": 491162216, "step": 8766 }, { "epoch": 19.523385300668153, "loss": 0.4118029773235321, "loss_ce": 5.981113645248115e-05, "loss_iou": 0.1875, "loss_num": 0.0074462890625, "loss_xval": 0.412109375, "num_input_tokens_seen": 491162216, "step": 8766 }, { "epoch": 19.525612472160358, "grad_norm": 15.544754028320312, "learning_rate": 1e-06, "loss": 0.4786, "num_input_tokens_seen": 491217424, "step": 8767 }, { "epoch": 19.525612472160358, "loss": 0.5115818381309509, "loss_ce": 0.00010719275451265275, "loss_iou": 0.2080078125, "loss_num": 0.0189208984375, "loss_xval": 0.51171875, "num_input_tokens_seen": 491217424, "step": 8767 }, { "epoch": 19.527839643652563, "grad_norm": 16.272380828857422, "learning_rate": 1e-06, "loss": 0.3691, "num_input_tokens_seen": 491269164, "step": 8768 }, { "epoch": 19.527839643652563, "loss": 0.326735258102417, "loss_ce": 7.508813723688945e-05, "loss_iou": 0.1435546875, "loss_num": 0.0078125, "loss_xval": 0.326171875, "num_input_tokens_seen": 491269164, "step": 8768 }, { "epoch": 19.530066815144767, "grad_norm": 19.87215232849121, "learning_rate": 1e-06, "loss": 0.276, "num_input_tokens_seen": 491328140, "step": 8769 }, { "epoch": 19.530066815144767, "loss": 0.25873589515686035, "loss_ce": 6.88969885231927e-05, "loss_iou": 0.1220703125, "loss_num": 0.002838134765625, "loss_xval": 0.2578125, "num_input_tokens_seen": 491328140, "step": 8769 }, { "epoch": 19.532293986636972, "grad_norm": 16.369861602783203, "learning_rate": 1e-06, "loss": 0.5755, "num_input_tokens_seen": 491381900, "step": 8770 }, { "epoch": 19.532293986636972, "loss": 0.7968376874923706, "loss_ce": 8.477165829390287e-05, "loss_iou": 0.310546875, "loss_num": 0.035400390625, "loss_xval": 0.796875, "num_input_tokens_seen": 491381900, "step": 8770 }, { "epoch": 19.534521158129177, "grad_norm": 29.245031356811523, "learning_rate": 1e-06, "loss": 0.4296, "num_input_tokens_seen": 491437320, "step": 8771 }, { "epoch": 19.534521158129177, "loss": 0.2833980321884155, "loss_ce": 7.282086880877614e-05, "loss_iou": 0.1318359375, "loss_num": 0.00396728515625, "loss_xval": 0.283203125, "num_input_tokens_seen": 491437320, "step": 8771 }, { "epoch": 19.536748329621382, "grad_norm": 27.237791061401367, "learning_rate": 1e-06, "loss": 0.4367, "num_input_tokens_seen": 491493612, "step": 8772 }, { "epoch": 19.536748329621382, "loss": 0.3653065860271454, "loss_ce": 7.220754923764616e-05, "loss_iou": 0.1689453125, "loss_num": 0.00543212890625, "loss_xval": 0.365234375, "num_input_tokens_seen": 491493612, "step": 8772 }, { "epoch": 19.538975501113587, "grad_norm": 52.4660758972168, "learning_rate": 1e-06, "loss": 0.3087, "num_input_tokens_seen": 491546616, "step": 8773 }, { "epoch": 19.538975501113587, "loss": 0.2954094111919403, "loss_ce": 6.026863775332458e-05, "loss_iou": 0.134765625, "loss_num": 0.00537109375, "loss_xval": 0.294921875, "num_input_tokens_seen": 491546616, "step": 8773 }, { "epoch": 19.54120267260579, "grad_norm": 20.992326736450195, "learning_rate": 1e-06, "loss": 0.4035, "num_input_tokens_seen": 491598516, "step": 8774 }, { "epoch": 19.54120267260579, "loss": 0.37043464183807373, "loss_ce": 7.328739593503997e-05, "loss_iou": 0.16015625, "loss_num": 0.010009765625, "loss_xval": 0.37109375, "num_input_tokens_seen": 491598516, "step": 8774 }, { "epoch": 19.543429844097997, "grad_norm": 17.51434898376465, "learning_rate": 1e-06, "loss": 0.3259, "num_input_tokens_seen": 491655988, "step": 8775 }, { "epoch": 19.543429844097997, "loss": 0.3194010257720947, "loss_ce": 6.507511716336012e-05, "loss_iou": 0.1455078125, "loss_num": 0.00555419921875, "loss_xval": 0.3203125, "num_input_tokens_seen": 491655988, "step": 8775 }, { "epoch": 19.5456570155902, "grad_norm": 19.03451919555664, "learning_rate": 1e-06, "loss": 0.3281, "num_input_tokens_seen": 491711688, "step": 8776 }, { "epoch": 19.5456570155902, "loss": 0.26713547110557556, "loss_ce": 7.614593778271228e-05, "loss_iou": 0.1220703125, "loss_num": 0.0045166015625, "loss_xval": 0.267578125, "num_input_tokens_seen": 491711688, "step": 8776 }, { "epoch": 19.547884187082406, "grad_norm": 26.791309356689453, "learning_rate": 1e-06, "loss": 0.4318, "num_input_tokens_seen": 491765368, "step": 8777 }, { "epoch": 19.547884187082406, "loss": 0.45965927839279175, "loss_ce": 6.455044058384374e-05, "loss_iou": 0.201171875, "loss_num": 0.011474609375, "loss_xval": 0.458984375, "num_input_tokens_seen": 491765368, "step": 8777 }, { "epoch": 19.55011135857461, "grad_norm": 39.71892166137695, "learning_rate": 1e-06, "loss": 0.4227, "num_input_tokens_seen": 491822976, "step": 8778 }, { "epoch": 19.55011135857461, "loss": 0.439960241317749, "loss_ce": 7.986063428688794e-05, "loss_iou": 0.1962890625, "loss_num": 0.0093994140625, "loss_xval": 0.439453125, "num_input_tokens_seen": 491822976, "step": 8778 }, { "epoch": 19.552338530066816, "grad_norm": 14.03285026550293, "learning_rate": 1e-06, "loss": 0.3264, "num_input_tokens_seen": 491879560, "step": 8779 }, { "epoch": 19.552338530066816, "loss": 0.28052568435668945, "loss_ce": 6.915038829902187e-05, "loss_iou": 0.12451171875, "loss_num": 0.006256103515625, "loss_xval": 0.28125, "num_input_tokens_seen": 491879560, "step": 8779 }, { "epoch": 19.55456570155902, "grad_norm": 18.03692626953125, "learning_rate": 1e-06, "loss": 0.6105, "num_input_tokens_seen": 491932316, "step": 8780 }, { "epoch": 19.55456570155902, "loss": 0.5739569067955017, "loss_ce": 0.00010437377204652876, "loss_iou": 0.23046875, "loss_num": 0.022705078125, "loss_xval": 0.57421875, "num_input_tokens_seen": 491932316, "step": 8780 }, { "epoch": 19.556792873051226, "grad_norm": 32.511112213134766, "learning_rate": 1e-06, "loss": 0.4244, "num_input_tokens_seen": 491988108, "step": 8781 }, { "epoch": 19.556792873051226, "loss": 0.32041823863983154, "loss_ce": 7.521534280385822e-05, "loss_iou": 0.138671875, "loss_num": 0.008544921875, "loss_xval": 0.3203125, "num_input_tokens_seen": 491988108, "step": 8781 }, { "epoch": 19.55902004454343, "grad_norm": 16.41408920288086, "learning_rate": 1e-06, "loss": 0.3266, "num_input_tokens_seen": 492045888, "step": 8782 }, { "epoch": 19.55902004454343, "loss": 0.24403327703475952, "loss_ce": 7.575724157504737e-05, "loss_iou": 0.11083984375, "loss_num": 0.004486083984375, "loss_xval": 0.244140625, "num_input_tokens_seen": 492045888, "step": 8782 }, { "epoch": 19.561247216035635, "grad_norm": 19.35165786743164, "learning_rate": 1e-06, "loss": 0.4494, "num_input_tokens_seen": 492102372, "step": 8783 }, { "epoch": 19.561247216035635, "loss": 0.42511671781539917, "loss_ce": 6.788804603274912e-05, "loss_iou": 0.1845703125, "loss_num": 0.01129150390625, "loss_xval": 0.42578125, "num_input_tokens_seen": 492102372, "step": 8783 }, { "epoch": 19.56347438752784, "grad_norm": 28.635347366333008, "learning_rate": 1e-06, "loss": 0.4119, "num_input_tokens_seen": 492158548, "step": 8784 }, { "epoch": 19.56347438752784, "loss": 0.319175660610199, "loss_ce": 8.385031105717644e-05, "loss_iou": 0.138671875, "loss_num": 0.00823974609375, "loss_xval": 0.318359375, "num_input_tokens_seen": 492158548, "step": 8784 }, { "epoch": 19.565701559020045, "grad_norm": 20.49407386779785, "learning_rate": 1e-06, "loss": 0.3606, "num_input_tokens_seen": 492214608, "step": 8785 }, { "epoch": 19.565701559020045, "loss": 0.32773077487945557, "loss_ce": 9.405831951880828e-05, "loss_iou": 0.1513671875, "loss_num": 0.00482177734375, "loss_xval": 0.328125, "num_input_tokens_seen": 492214608, "step": 8785 }, { "epoch": 19.56792873051225, "grad_norm": 19.137483596801758, "learning_rate": 1e-06, "loss": 0.6018, "num_input_tokens_seen": 492271240, "step": 8786 }, { "epoch": 19.56792873051225, "loss": 0.5808820128440857, "loss_ce": 7.148716395022348e-05, "loss_iou": 0.244140625, "loss_num": 0.0185546875, "loss_xval": 0.58203125, "num_input_tokens_seen": 492271240, "step": 8786 }, { "epoch": 19.570155902004455, "grad_norm": 13.811071395874023, "learning_rate": 1e-06, "loss": 0.3109, "num_input_tokens_seen": 492323760, "step": 8787 }, { "epoch": 19.570155902004455, "loss": 0.32258814573287964, "loss_ce": 7.836205622879788e-05, "loss_iou": 0.1435546875, "loss_num": 0.006988525390625, "loss_xval": 0.322265625, "num_input_tokens_seen": 492323760, "step": 8787 }, { "epoch": 19.57238307349666, "grad_norm": 17.646892547607422, "learning_rate": 1e-06, "loss": 0.3744, "num_input_tokens_seen": 492377292, "step": 8788 }, { "epoch": 19.57238307349666, "loss": 0.48769521713256836, "loss_ce": 8.538780093658715e-05, "loss_iou": 0.203125, "loss_num": 0.0164794921875, "loss_xval": 0.48828125, "num_input_tokens_seen": 492377292, "step": 8788 }, { "epoch": 19.574610244988865, "grad_norm": 32.08982467651367, "learning_rate": 1e-06, "loss": 0.339, "num_input_tokens_seen": 492432440, "step": 8789 }, { "epoch": 19.574610244988865, "loss": 0.32432878017425537, "loss_ce": 6.424468301702291e-05, "loss_iou": 0.1484375, "loss_num": 0.0054931640625, "loss_xval": 0.32421875, "num_input_tokens_seen": 492432440, "step": 8789 }, { "epoch": 19.57683741648107, "grad_norm": 10.528325080871582, "learning_rate": 1e-06, "loss": 0.2973, "num_input_tokens_seen": 492490824, "step": 8790 }, { "epoch": 19.57683741648107, "loss": 0.3674495816230774, "loss_ce": 7.898532930994406e-05, "loss_iou": 0.1513671875, "loss_num": 0.0130615234375, "loss_xval": 0.3671875, "num_input_tokens_seen": 492490824, "step": 8790 }, { "epoch": 19.579064587973274, "grad_norm": 18.546995162963867, "learning_rate": 1e-06, "loss": 0.3658, "num_input_tokens_seen": 492549284, "step": 8791 }, { "epoch": 19.579064587973274, "loss": 0.2795357406139374, "loss_ce": 5.5770447943359613e-05, "loss_iou": 0.11669921875, "loss_num": 0.0091552734375, "loss_xval": 0.279296875, "num_input_tokens_seen": 492549284, "step": 8791 }, { "epoch": 19.58129175946548, "grad_norm": 19.887773513793945, "learning_rate": 1e-06, "loss": 0.376, "num_input_tokens_seen": 492605324, "step": 8792 }, { "epoch": 19.58129175946548, "loss": 0.35456758737564087, "loss_ce": 7.540988008258864e-05, "loss_iou": 0.1572265625, "loss_num": 0.0081787109375, "loss_xval": 0.35546875, "num_input_tokens_seen": 492605324, "step": 8792 }, { "epoch": 19.583518930957684, "grad_norm": 19.090816497802734, "learning_rate": 1e-06, "loss": 0.3357, "num_input_tokens_seen": 492661580, "step": 8793 }, { "epoch": 19.583518930957684, "loss": 0.36834782361984253, "loss_ce": 6.167573155835271e-05, "loss_iou": 0.15234375, "loss_num": 0.0125732421875, "loss_xval": 0.369140625, "num_input_tokens_seen": 492661580, "step": 8793 }, { "epoch": 19.58574610244989, "grad_norm": 18.095928192138672, "learning_rate": 1e-06, "loss": 0.3908, "num_input_tokens_seen": 492719140, "step": 8794 }, { "epoch": 19.58574610244989, "loss": 0.2904747724533081, "loss_ce": 6.948962982278317e-05, "loss_iou": 0.1259765625, "loss_num": 0.007720947265625, "loss_xval": 0.291015625, "num_input_tokens_seen": 492719140, "step": 8794 }, { "epoch": 19.587973273942094, "grad_norm": 15.511141777038574, "learning_rate": 1e-06, "loss": 0.4641, "num_input_tokens_seen": 492775612, "step": 8795 }, { "epoch": 19.587973273942094, "loss": 0.42014381289482117, "loss_ce": 9.987192606786266e-05, "loss_iou": 0.1904296875, "loss_num": 0.00775146484375, "loss_xval": 0.419921875, "num_input_tokens_seen": 492775612, "step": 8795 }, { "epoch": 19.5902004454343, "grad_norm": 19.146825790405273, "learning_rate": 1e-06, "loss": 0.4842, "num_input_tokens_seen": 492833836, "step": 8796 }, { "epoch": 19.5902004454343, "loss": 0.5711435079574585, "loss_ce": 9.86100931186229e-05, "loss_iou": 0.2412109375, "loss_num": 0.0177001953125, "loss_xval": 0.5703125, "num_input_tokens_seen": 492833836, "step": 8796 }, { "epoch": 19.592427616926503, "grad_norm": 14.747178077697754, "learning_rate": 1e-06, "loss": 0.444, "num_input_tokens_seen": 492887604, "step": 8797 }, { "epoch": 19.592427616926503, "loss": 0.29975390434265137, "loss_ce": 7.127322896849364e-05, "loss_iou": 0.140625, "loss_num": 0.00384521484375, "loss_xval": 0.298828125, "num_input_tokens_seen": 492887604, "step": 8797 }, { "epoch": 19.59465478841871, "grad_norm": 18.54738426208496, "learning_rate": 1e-06, "loss": 0.4126, "num_input_tokens_seen": 492944056, "step": 8798 }, { "epoch": 19.59465478841871, "loss": 0.259499728679657, "loss_ce": 6.978299643378705e-05, "loss_iou": 0.1171875, "loss_num": 0.00494384765625, "loss_xval": 0.259765625, "num_input_tokens_seen": 492944056, "step": 8798 }, { "epoch": 19.596881959910913, "grad_norm": 22.788713455200195, "learning_rate": 1e-06, "loss": 0.4753, "num_input_tokens_seen": 493001668, "step": 8799 }, { "epoch": 19.596881959910913, "loss": 0.5575045347213745, "loss_ce": 7.041562639642507e-05, "loss_iou": 0.2333984375, "loss_num": 0.0179443359375, "loss_xval": 0.55859375, "num_input_tokens_seen": 493001668, "step": 8799 }, { "epoch": 19.599109131403118, "grad_norm": 16.851640701293945, "learning_rate": 1e-06, "loss": 0.2476, "num_input_tokens_seen": 493060028, "step": 8800 }, { "epoch": 19.599109131403118, "loss": 0.2806518077850342, "loss_ce": 7.319905853364617e-05, "loss_iou": 0.125, "loss_num": 0.00628662109375, "loss_xval": 0.28125, "num_input_tokens_seen": 493060028, "step": 8800 }, { "epoch": 19.601336302895323, "grad_norm": 17.70904541015625, "learning_rate": 1e-06, "loss": 0.3181, "num_input_tokens_seen": 493117108, "step": 8801 }, { "epoch": 19.601336302895323, "loss": 0.2125520408153534, "loss_ce": 5.813151801703498e-05, "loss_iou": 0.095703125, "loss_num": 0.004150390625, "loss_xval": 0.212890625, "num_input_tokens_seen": 493117108, "step": 8801 }, { "epoch": 19.603563474387528, "grad_norm": 15.63475513458252, "learning_rate": 1e-06, "loss": 0.2397, "num_input_tokens_seen": 493170576, "step": 8802 }, { "epoch": 19.603563474387528, "loss": 0.2356223165988922, "loss_ce": 5.7125496823573485e-05, "loss_iou": 0.1044921875, "loss_num": 0.005218505859375, "loss_xval": 0.2353515625, "num_input_tokens_seen": 493170576, "step": 8802 }, { "epoch": 19.605790645879733, "grad_norm": 12.706445693969727, "learning_rate": 1e-06, "loss": 0.5809, "num_input_tokens_seen": 493227004, "step": 8803 }, { "epoch": 19.605790645879733, "loss": 0.6226192712783813, "loss_ce": 6.07035071880091e-05, "loss_iou": 0.2451171875, "loss_num": 0.0264892578125, "loss_xval": 0.62109375, "num_input_tokens_seen": 493227004, "step": 8803 }, { "epoch": 19.608017817371937, "grad_norm": 12.90841293334961, "learning_rate": 1e-06, "loss": 0.5149, "num_input_tokens_seen": 493284640, "step": 8804 }, { "epoch": 19.608017817371937, "loss": 0.36311158537864685, "loss_ce": 7.448208634741604e-05, "loss_iou": 0.1630859375, "loss_num": 0.00738525390625, "loss_xval": 0.36328125, "num_input_tokens_seen": 493284640, "step": 8804 }, { "epoch": 19.610244988864142, "grad_norm": 20.88025665283203, "learning_rate": 1e-06, "loss": 0.5289, "num_input_tokens_seen": 493343304, "step": 8805 }, { "epoch": 19.610244988864142, "loss": 0.4120563268661499, "loss_ce": 6.90398010192439e-05, "loss_iou": 0.1728515625, "loss_num": 0.0133056640625, "loss_xval": 0.412109375, "num_input_tokens_seen": 493343304, "step": 8805 }, { "epoch": 19.612472160356347, "grad_norm": 18.58097267150879, "learning_rate": 1e-06, "loss": 0.3529, "num_input_tokens_seen": 493401548, "step": 8806 }, { "epoch": 19.612472160356347, "loss": 0.3655468821525574, "loss_ce": 6.83803009451367e-05, "loss_iou": 0.1455078125, "loss_num": 0.01513671875, "loss_xval": 0.365234375, "num_input_tokens_seen": 493401548, "step": 8806 }, { "epoch": 19.614699331848552, "grad_norm": 23.805185317993164, "learning_rate": 1e-06, "loss": 0.3137, "num_input_tokens_seen": 493456364, "step": 8807 }, { "epoch": 19.614699331848552, "loss": 0.414080947637558, "loss_ce": 7.948270649649203e-05, "loss_iou": 0.1796875, "loss_num": 0.0108642578125, "loss_xval": 0.4140625, "num_input_tokens_seen": 493456364, "step": 8807 }, { "epoch": 19.616926503340757, "grad_norm": 14.647207260131836, "learning_rate": 1e-06, "loss": 0.3448, "num_input_tokens_seen": 493514088, "step": 8808 }, { "epoch": 19.616926503340757, "loss": 0.3253961503505707, "loss_ce": 7.876554445829242e-05, "loss_iou": 0.1474609375, "loss_num": 0.005950927734375, "loss_xval": 0.326171875, "num_input_tokens_seen": 493514088, "step": 8808 }, { "epoch": 19.619153674832962, "grad_norm": 16.693212509155273, "learning_rate": 1e-06, "loss": 0.3789, "num_input_tokens_seen": 493571508, "step": 8809 }, { "epoch": 19.619153674832962, "loss": 0.20014190673828125, "loss_ce": 6.866748299216852e-05, "loss_iou": 0.08251953125, "loss_num": 0.00701904296875, "loss_xval": 0.2001953125, "num_input_tokens_seen": 493571508, "step": 8809 }, { "epoch": 19.621380846325167, "grad_norm": 24.3143367767334, "learning_rate": 1e-06, "loss": 0.3707, "num_input_tokens_seen": 493625268, "step": 8810 }, { "epoch": 19.621380846325167, "loss": 0.4873785972595215, "loss_ce": 7.392482802970335e-05, "loss_iou": 0.208984375, "loss_num": 0.01373291015625, "loss_xval": 0.48828125, "num_input_tokens_seen": 493625268, "step": 8810 }, { "epoch": 19.62360801781737, "grad_norm": 23.970718383789062, "learning_rate": 1e-06, "loss": 0.5419, "num_input_tokens_seen": 493682744, "step": 8811 }, { "epoch": 19.62360801781737, "loss": 0.5781893730163574, "loss_ce": 6.439335993491113e-05, "loss_iou": 0.24609375, "loss_num": 0.0172119140625, "loss_xval": 0.578125, "num_input_tokens_seen": 493682744, "step": 8811 }, { "epoch": 19.625835189309576, "grad_norm": 16.08030128479004, "learning_rate": 1e-06, "loss": 0.3674, "num_input_tokens_seen": 493738420, "step": 8812 }, { "epoch": 19.625835189309576, "loss": 0.36957836151123047, "loss_ce": 7.152724720072001e-05, "loss_iou": 0.1494140625, "loss_num": 0.013916015625, "loss_xval": 0.369140625, "num_input_tokens_seen": 493738420, "step": 8812 }, { "epoch": 19.62806236080178, "grad_norm": 16.537208557128906, "learning_rate": 1e-06, "loss": 0.4184, "num_input_tokens_seen": 493792444, "step": 8813 }, { "epoch": 19.62806236080178, "loss": 0.27265116572380066, "loss_ce": 6.81565361446701e-05, "loss_iou": 0.1142578125, "loss_num": 0.0087890625, "loss_xval": 0.2734375, "num_input_tokens_seen": 493792444, "step": 8813 }, { "epoch": 19.630289532293986, "grad_norm": 16.97813606262207, "learning_rate": 1e-06, "loss": 0.3923, "num_input_tokens_seen": 493851536, "step": 8814 }, { "epoch": 19.630289532293986, "loss": 0.22918465733528137, "loss_ce": 5.868993321200833e-05, "loss_iou": 0.10205078125, "loss_num": 0.004974365234375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 493851536, "step": 8814 }, { "epoch": 19.63251670378619, "grad_norm": 18.847007751464844, "learning_rate": 1e-06, "loss": 0.4566, "num_input_tokens_seen": 493909260, "step": 8815 }, { "epoch": 19.63251670378619, "loss": 0.3699719309806824, "loss_ce": 9.890169894788414e-05, "loss_iou": 0.1640625, "loss_num": 0.0084228515625, "loss_xval": 0.369140625, "num_input_tokens_seen": 493909260, "step": 8815 }, { "epoch": 19.634743875278396, "grad_norm": 12.239165306091309, "learning_rate": 1e-06, "loss": 0.2959, "num_input_tokens_seen": 493964284, "step": 8816 }, { "epoch": 19.634743875278396, "loss": 0.36634206771850586, "loss_ce": 7.009244291111827e-05, "loss_iou": 0.15625, "loss_num": 0.01080322265625, "loss_xval": 0.3671875, "num_input_tokens_seen": 493964284, "step": 8816 }, { "epoch": 19.6369710467706, "grad_norm": 20.558364868164062, "learning_rate": 1e-06, "loss": 0.4118, "num_input_tokens_seen": 494020864, "step": 8817 }, { "epoch": 19.6369710467706, "loss": 0.4873617887496948, "loss_ce": 5.70821066503413e-05, "loss_iou": 0.2138671875, "loss_num": 0.0120849609375, "loss_xval": 0.48828125, "num_input_tokens_seen": 494020864, "step": 8817 }, { "epoch": 19.639198218262806, "grad_norm": 20.281597137451172, "learning_rate": 1e-06, "loss": 0.3563, "num_input_tokens_seen": 494076352, "step": 8818 }, { "epoch": 19.639198218262806, "loss": 0.2713017463684082, "loss_ce": 6.150568515295163e-05, "loss_iou": 0.125, "loss_num": 0.00408935546875, "loss_xval": 0.271484375, "num_input_tokens_seen": 494076352, "step": 8818 }, { "epoch": 19.64142538975501, "grad_norm": 11.944673538208008, "learning_rate": 1e-06, "loss": 0.2912, "num_input_tokens_seen": 494132992, "step": 8819 }, { "epoch": 19.64142538975501, "loss": 0.2255510687828064, "loss_ce": 8.72044765856117e-05, "loss_iou": 0.09912109375, "loss_num": 0.0054931640625, "loss_xval": 0.2255859375, "num_input_tokens_seen": 494132992, "step": 8819 }, { "epoch": 19.643652561247215, "grad_norm": 17.89927864074707, "learning_rate": 1e-06, "loss": 0.4362, "num_input_tokens_seen": 494187992, "step": 8820 }, { "epoch": 19.643652561247215, "loss": 0.3235388398170471, "loss_ce": 6.778095848858356e-05, "loss_iou": 0.1396484375, "loss_num": 0.0086669921875, "loss_xval": 0.32421875, "num_input_tokens_seen": 494187992, "step": 8820 }, { "epoch": 19.64587973273942, "grad_norm": 22.77928924560547, "learning_rate": 1e-06, "loss": 0.3405, "num_input_tokens_seen": 494243788, "step": 8821 }, { "epoch": 19.64587973273942, "loss": 0.24566447734832764, "loss_ce": 5.902632256038487e-05, "loss_iou": 0.10888671875, "loss_num": 0.005645751953125, "loss_xval": 0.24609375, "num_input_tokens_seen": 494243788, "step": 8821 }, { "epoch": 19.648106904231625, "grad_norm": 17.578380584716797, "learning_rate": 1e-06, "loss": 0.4421, "num_input_tokens_seen": 494300640, "step": 8822 }, { "epoch": 19.648106904231625, "loss": 0.6269207000732422, "loss_ce": 8.963636355474591e-05, "loss_iou": 0.2734375, "loss_num": 0.0162353515625, "loss_xval": 0.625, "num_input_tokens_seen": 494300640, "step": 8822 }, { "epoch": 19.65033407572383, "grad_norm": 18.126558303833008, "learning_rate": 1e-06, "loss": 0.3305, "num_input_tokens_seen": 494357432, "step": 8823 }, { "epoch": 19.65033407572383, "loss": 0.2599582076072693, "loss_ce": 7.050028216326609e-05, "loss_iou": 0.11474609375, "loss_num": 0.006134033203125, "loss_xval": 0.259765625, "num_input_tokens_seen": 494357432, "step": 8823 }, { "epoch": 19.652561247216035, "grad_norm": 14.803417205810547, "learning_rate": 1e-06, "loss": 0.6651, "num_input_tokens_seen": 494412312, "step": 8824 }, { "epoch": 19.652561247216035, "loss": 0.7808718681335449, "loss_ce": 0.00011010345770046115, "loss_iou": 0.3125, "loss_num": 0.031494140625, "loss_xval": 0.78125, "num_input_tokens_seen": 494412312, "step": 8824 }, { "epoch": 19.65478841870824, "grad_norm": 28.97988510131836, "learning_rate": 1e-06, "loss": 0.5272, "num_input_tokens_seen": 494465128, "step": 8825 }, { "epoch": 19.65478841870824, "loss": 0.629551112651825, "loss_ce": 0.0005227710935287178, "loss_iou": 0.287109375, "loss_num": 0.0113525390625, "loss_xval": 0.62890625, "num_input_tokens_seen": 494465128, "step": 8825 }, { "epoch": 19.657015590200444, "grad_norm": 14.80961799621582, "learning_rate": 1e-06, "loss": 0.3384, "num_input_tokens_seen": 494520212, "step": 8826 }, { "epoch": 19.657015590200444, "loss": 0.37103530764579773, "loss_ce": 6.362018757499754e-05, "loss_iou": 0.1640625, "loss_num": 0.0084228515625, "loss_xval": 0.37109375, "num_input_tokens_seen": 494520212, "step": 8826 }, { "epoch": 19.65924276169265, "grad_norm": 11.278867721557617, "learning_rate": 1e-06, "loss": 0.4748, "num_input_tokens_seen": 494576024, "step": 8827 }, { "epoch": 19.65924276169265, "loss": 0.3697161376476288, "loss_ce": 8.720482583157718e-05, "loss_iou": 0.16796875, "loss_num": 0.0067138671875, "loss_xval": 0.369140625, "num_input_tokens_seen": 494576024, "step": 8827 }, { "epoch": 19.661469933184854, "grad_norm": 21.07610321044922, "learning_rate": 1e-06, "loss": 0.4067, "num_input_tokens_seen": 494631812, "step": 8828 }, { "epoch": 19.661469933184854, "loss": 0.46012747287750244, "loss_ce": 7.498678314732388e-05, "loss_iou": 0.1796875, "loss_num": 0.02001953125, "loss_xval": 0.4609375, "num_input_tokens_seen": 494631812, "step": 8828 }, { "epoch": 19.66369710467706, "grad_norm": 29.130268096923828, "learning_rate": 1e-06, "loss": 0.5162, "num_input_tokens_seen": 494688008, "step": 8829 }, { "epoch": 19.66369710467706, "loss": 0.34423893690109253, "loss_ce": 6.168825348140672e-05, "loss_iou": 0.146484375, "loss_num": 0.0103759765625, "loss_xval": 0.34375, "num_input_tokens_seen": 494688008, "step": 8829 }, { "epoch": 19.665924276169264, "grad_norm": 15.006963729858398, "learning_rate": 1e-06, "loss": 0.2145, "num_input_tokens_seen": 494743180, "step": 8830 }, { "epoch": 19.665924276169264, "loss": 0.2853913903236389, "loss_ce": 0.0004792909894604236, "loss_iou": 0.1220703125, "loss_num": 0.008056640625, "loss_xval": 0.28515625, "num_input_tokens_seen": 494743180, "step": 8830 }, { "epoch": 19.66815144766147, "grad_norm": 18.46776580810547, "learning_rate": 1e-06, "loss": 0.3155, "num_input_tokens_seen": 494800836, "step": 8831 }, { "epoch": 19.66815144766147, "loss": 0.3410201072692871, "loss_ce": 7.773660036036745e-05, "loss_iou": 0.15234375, "loss_num": 0.0072021484375, "loss_xval": 0.341796875, "num_input_tokens_seen": 494800836, "step": 8831 }, { "epoch": 19.670378619153674, "grad_norm": 23.41823387145996, "learning_rate": 1e-06, "loss": 0.4599, "num_input_tokens_seen": 494858464, "step": 8832 }, { "epoch": 19.670378619153674, "loss": 0.5210896730422974, "loss_ce": 9.359928662888706e-05, "loss_iou": 0.224609375, "loss_num": 0.01422119140625, "loss_xval": 0.51953125, "num_input_tokens_seen": 494858464, "step": 8832 }, { "epoch": 19.67260579064588, "grad_norm": 17.398420333862305, "learning_rate": 1e-06, "loss": 0.3252, "num_input_tokens_seen": 494912524, "step": 8833 }, { "epoch": 19.67260579064588, "loss": 0.5025025606155396, "loss_ce": 6.115515134297311e-05, "loss_iou": 0.19921875, "loss_num": 0.0208740234375, "loss_xval": 0.50390625, "num_input_tokens_seen": 494912524, "step": 8833 }, { "epoch": 19.674832962138083, "grad_norm": 12.045059204101562, "learning_rate": 1e-06, "loss": 0.4081, "num_input_tokens_seen": 494968308, "step": 8834 }, { "epoch": 19.674832962138083, "loss": 0.38177552819252014, "loss_ce": 6.166419188957661e-05, "loss_iou": 0.173828125, "loss_num": 0.0067138671875, "loss_xval": 0.380859375, "num_input_tokens_seen": 494968308, "step": 8834 }, { "epoch": 19.677060133630288, "grad_norm": 24.122774124145508, "learning_rate": 1e-06, "loss": 0.4293, "num_input_tokens_seen": 495022460, "step": 8835 }, { "epoch": 19.677060133630288, "loss": 0.5443927049636841, "loss_ce": 8.117854304146022e-05, "loss_iou": 0.220703125, "loss_num": 0.0206298828125, "loss_xval": 0.54296875, "num_input_tokens_seen": 495022460, "step": 8835 }, { "epoch": 19.679287305122493, "grad_norm": 30.17648696899414, "learning_rate": 1e-06, "loss": 0.3063, "num_input_tokens_seen": 495079416, "step": 8836 }, { "epoch": 19.679287305122493, "loss": 0.363957941532135, "loss_ce": 6.632816075580195e-05, "loss_iou": 0.1630859375, "loss_num": 0.007720947265625, "loss_xval": 0.36328125, "num_input_tokens_seen": 495079416, "step": 8836 }, { "epoch": 19.681514476614698, "grad_norm": 28.630796432495117, "learning_rate": 1e-06, "loss": 0.4824, "num_input_tokens_seen": 495136332, "step": 8837 }, { "epoch": 19.681514476614698, "loss": 0.3295344114303589, "loss_ce": 6.66461419314146e-05, "loss_iou": 0.1435546875, "loss_num": 0.0084228515625, "loss_xval": 0.330078125, "num_input_tokens_seen": 495136332, "step": 8837 }, { "epoch": 19.683741648106903, "grad_norm": 16.524477005004883, "learning_rate": 1e-06, "loss": 0.3898, "num_input_tokens_seen": 495192408, "step": 8838 }, { "epoch": 19.683741648106903, "loss": 0.47682133316993713, "loss_ce": 7.57328380132094e-05, "loss_iou": 0.1943359375, "loss_num": 0.017578125, "loss_xval": 0.4765625, "num_input_tokens_seen": 495192408, "step": 8838 }, { "epoch": 19.685968819599108, "grad_norm": 15.386173248291016, "learning_rate": 1e-06, "loss": 0.487, "num_input_tokens_seen": 495248052, "step": 8839 }, { "epoch": 19.685968819599108, "loss": 0.6375049948692322, "loss_ce": 0.0001758981088642031, "loss_iou": 0.251953125, "loss_num": 0.027099609375, "loss_xval": 0.63671875, "num_input_tokens_seen": 495248052, "step": 8839 }, { "epoch": 19.688195991091312, "grad_norm": 13.274645805358887, "learning_rate": 1e-06, "loss": 0.4217, "num_input_tokens_seen": 495304220, "step": 8840 }, { "epoch": 19.688195991091312, "loss": 0.40014198422431946, "loss_ce": 5.652975960401818e-05, "loss_iou": 0.1708984375, "loss_num": 0.01177978515625, "loss_xval": 0.400390625, "num_input_tokens_seen": 495304220, "step": 8840 }, { "epoch": 19.690423162583517, "grad_norm": 19.099824905395508, "learning_rate": 1e-06, "loss": 0.4126, "num_input_tokens_seen": 495360180, "step": 8841 }, { "epoch": 19.690423162583517, "loss": 0.408029705286026, "loss_ce": 7.075037865433842e-05, "loss_iou": 0.1865234375, "loss_num": 0.007171630859375, "loss_xval": 0.408203125, "num_input_tokens_seen": 495360180, "step": 8841 }, { "epoch": 19.692650334075722, "grad_norm": 14.30420207977295, "learning_rate": 1e-06, "loss": 0.3627, "num_input_tokens_seen": 495417216, "step": 8842 }, { "epoch": 19.692650334075722, "loss": 0.5124090909957886, "loss_ce": 7.99886038294062e-05, "loss_iou": 0.2197265625, "loss_num": 0.01458740234375, "loss_xval": 0.51171875, "num_input_tokens_seen": 495417216, "step": 8842 }, { "epoch": 19.694877505567927, "grad_norm": 16.062725067138672, "learning_rate": 1e-06, "loss": 0.4026, "num_input_tokens_seen": 495473692, "step": 8843 }, { "epoch": 19.694877505567927, "loss": 0.44783011078834534, "loss_ce": 0.0001982695539481938, "loss_iou": 0.1728515625, "loss_num": 0.0205078125, "loss_xval": 0.447265625, "num_input_tokens_seen": 495473692, "step": 8843 }, { "epoch": 19.697104677060132, "grad_norm": 13.419224739074707, "learning_rate": 1e-06, "loss": 0.3158, "num_input_tokens_seen": 495529692, "step": 8844 }, { "epoch": 19.697104677060132, "loss": 0.41748589277267456, "loss_ce": 6.646426481893286e-05, "loss_iou": 0.1552734375, "loss_num": 0.021484375, "loss_xval": 0.41796875, "num_input_tokens_seen": 495529692, "step": 8844 }, { "epoch": 19.69933184855234, "grad_norm": 20.578535079956055, "learning_rate": 1e-06, "loss": 0.4155, "num_input_tokens_seen": 495583708, "step": 8845 }, { "epoch": 19.69933184855234, "loss": 0.42976486682891846, "loss_ce": 7.74070795159787e-05, "loss_iou": 0.19140625, "loss_num": 0.00946044921875, "loss_xval": 0.4296875, "num_input_tokens_seen": 495583708, "step": 8845 }, { "epoch": 19.70155902004454, "grad_norm": 16.558269500732422, "learning_rate": 1e-06, "loss": 0.4006, "num_input_tokens_seen": 495636772, "step": 8846 }, { "epoch": 19.70155902004454, "loss": 0.49481481313705444, "loss_ce": 6.387023313436657e-05, "loss_iou": 0.185546875, "loss_num": 0.0247802734375, "loss_xval": 0.494140625, "num_input_tokens_seen": 495636772, "step": 8846 }, { "epoch": 19.70378619153675, "grad_norm": 18.609947204589844, "learning_rate": 1e-06, "loss": 0.3799, "num_input_tokens_seen": 495690168, "step": 8847 }, { "epoch": 19.70378619153675, "loss": 0.3060356378555298, "loss_ce": 6.63895916659385e-05, "loss_iou": 0.1357421875, "loss_num": 0.006927490234375, "loss_xval": 0.306640625, "num_input_tokens_seen": 495690168, "step": 8847 }, { "epoch": 19.706013363028955, "grad_norm": 19.02481460571289, "learning_rate": 1e-06, "loss": 0.4229, "num_input_tokens_seen": 495748480, "step": 8848 }, { "epoch": 19.706013363028955, "loss": 0.520011305809021, "loss_ce": 0.00011385796096874401, "loss_iou": 0.220703125, "loss_num": 0.01556396484375, "loss_xval": 0.51953125, "num_input_tokens_seen": 495748480, "step": 8848 }, { "epoch": 19.70824053452116, "grad_norm": 13.990358352661133, "learning_rate": 1e-06, "loss": 0.3866, "num_input_tokens_seen": 495805712, "step": 8849 }, { "epoch": 19.70824053452116, "loss": 0.49600470066070557, "loss_ce": 9.406625758856535e-05, "loss_iou": 0.203125, "loss_num": 0.017822265625, "loss_xval": 0.49609375, "num_input_tokens_seen": 495805712, "step": 8849 }, { "epoch": 19.710467706013365, "grad_norm": 15.833937644958496, "learning_rate": 1e-06, "loss": 0.4138, "num_input_tokens_seen": 495862696, "step": 8850 }, { "epoch": 19.710467706013365, "loss": 0.44215184450149536, "loss_ce": 7.422211638186127e-05, "loss_iou": 0.1982421875, "loss_num": 0.00921630859375, "loss_xval": 0.44140625, "num_input_tokens_seen": 495862696, "step": 8850 }, { "epoch": 19.71269487750557, "grad_norm": 24.503477096557617, "learning_rate": 1e-06, "loss": 0.412, "num_input_tokens_seen": 495913924, "step": 8851 }, { "epoch": 19.71269487750557, "loss": 0.329414427280426, "loss_ce": 6.872846279293299e-05, "loss_iou": 0.146484375, "loss_num": 0.007354736328125, "loss_xval": 0.330078125, "num_input_tokens_seen": 495913924, "step": 8851 }, { "epoch": 19.714922048997774, "grad_norm": 20.467147827148438, "learning_rate": 1e-06, "loss": 0.4762, "num_input_tokens_seen": 495969596, "step": 8852 }, { "epoch": 19.714922048997774, "loss": 0.4412245452404022, "loss_ce": 6.245089753065258e-05, "loss_iou": 0.193359375, "loss_num": 0.01068115234375, "loss_xval": 0.44140625, "num_input_tokens_seen": 495969596, "step": 8852 }, { "epoch": 19.71714922048998, "grad_norm": 16.596433639526367, "learning_rate": 1e-06, "loss": 0.3338, "num_input_tokens_seen": 496022692, "step": 8853 }, { "epoch": 19.71714922048998, "loss": 0.3814437985420227, "loss_ce": 6.563542410731316e-05, "loss_iou": 0.1708984375, "loss_num": 0.007781982421875, "loss_xval": 0.380859375, "num_input_tokens_seen": 496022692, "step": 8853 }, { "epoch": 19.719376391982184, "grad_norm": 14.351813316345215, "learning_rate": 1e-06, "loss": 0.2733, "num_input_tokens_seen": 496077624, "step": 8854 }, { "epoch": 19.719376391982184, "loss": 0.30036044120788574, "loss_ce": 6.749367457814515e-05, "loss_iou": 0.130859375, "loss_num": 0.00775146484375, "loss_xval": 0.30078125, "num_input_tokens_seen": 496077624, "step": 8854 }, { "epoch": 19.72160356347439, "grad_norm": 21.87047004699707, "learning_rate": 1e-06, "loss": 0.5035, "num_input_tokens_seen": 496133564, "step": 8855 }, { "epoch": 19.72160356347439, "loss": 0.36122041940689087, "loss_ce": 7.540646765846759e-05, "loss_iou": 0.16015625, "loss_num": 0.0081787109375, "loss_xval": 0.361328125, "num_input_tokens_seen": 496133564, "step": 8855 }, { "epoch": 19.723830734966594, "grad_norm": 18.896873474121094, "learning_rate": 1e-06, "loss": 0.4041, "num_input_tokens_seen": 496190924, "step": 8856 }, { "epoch": 19.723830734966594, "loss": 0.38140565156936646, "loss_ce": 5.800734652439132e-05, "loss_iou": 0.1572265625, "loss_num": 0.0135498046875, "loss_xval": 0.380859375, "num_input_tokens_seen": 496190924, "step": 8856 }, { "epoch": 19.7260579064588, "grad_norm": 20.9360408782959, "learning_rate": 1e-06, "loss": 0.3515, "num_input_tokens_seen": 496247316, "step": 8857 }, { "epoch": 19.7260579064588, "loss": 0.49714395403862, "loss_ce": 7.367074431385845e-05, "loss_iou": 0.224609375, "loss_num": 0.009521484375, "loss_xval": 0.49609375, "num_input_tokens_seen": 496247316, "step": 8857 }, { "epoch": 19.728285077951004, "grad_norm": 22.762691497802734, "learning_rate": 1e-06, "loss": 0.3882, "num_input_tokens_seen": 496305148, "step": 8858 }, { "epoch": 19.728285077951004, "loss": 0.37469351291656494, "loss_ce": 5.972578583168797e-05, "loss_iou": 0.154296875, "loss_num": 0.013427734375, "loss_xval": 0.375, "num_input_tokens_seen": 496305148, "step": 8858 }, { "epoch": 19.73051224944321, "grad_norm": 17.930313110351562, "learning_rate": 1e-06, "loss": 0.4452, "num_input_tokens_seen": 496360372, "step": 8859 }, { "epoch": 19.73051224944321, "loss": 0.4869701862335205, "loss_ce": 6.224210665095598e-05, "loss_iou": 0.228515625, "loss_num": 0.005859375, "loss_xval": 0.486328125, "num_input_tokens_seen": 496360372, "step": 8859 }, { "epoch": 19.732739420935413, "grad_norm": 20.98902702331543, "learning_rate": 1e-06, "loss": 0.3769, "num_input_tokens_seen": 496418148, "step": 8860 }, { "epoch": 19.732739420935413, "loss": 0.367613285779953, "loss_ce": 5.957091343589127e-05, "loss_iou": 0.16015625, "loss_num": 0.00927734375, "loss_xval": 0.3671875, "num_input_tokens_seen": 496418148, "step": 8860 }, { "epoch": 19.734966592427618, "grad_norm": 27.783918380737305, "learning_rate": 1e-06, "loss": 0.3618, "num_input_tokens_seen": 496475176, "step": 8861 }, { "epoch": 19.734966592427618, "loss": 0.31055858731269836, "loss_ce": 7.274634845089167e-05, "loss_iou": 0.1240234375, "loss_num": 0.012451171875, "loss_xval": 0.310546875, "num_input_tokens_seen": 496475176, "step": 8861 }, { "epoch": 19.737193763919823, "grad_norm": 59.55124282836914, "learning_rate": 1e-06, "loss": 0.3794, "num_input_tokens_seen": 496530672, "step": 8862 }, { "epoch": 19.737193763919823, "loss": 0.5371950268745422, "loss_ce": 8.565981988795102e-05, "loss_iou": 0.251953125, "loss_num": 0.00640869140625, "loss_xval": 0.5390625, "num_input_tokens_seen": 496530672, "step": 8862 }, { "epoch": 19.739420935412028, "grad_norm": 20.648197174072266, "learning_rate": 1e-06, "loss": 0.4529, "num_input_tokens_seen": 496587424, "step": 8863 }, { "epoch": 19.739420935412028, "loss": 0.3034108877182007, "loss_ce": 6.618791667278856e-05, "loss_iou": 0.1357421875, "loss_num": 0.00653076171875, "loss_xval": 0.302734375, "num_input_tokens_seen": 496587424, "step": 8863 }, { "epoch": 19.741648106904233, "grad_norm": 20.388151168823242, "learning_rate": 1e-06, "loss": 0.5532, "num_input_tokens_seen": 496645436, "step": 8864 }, { "epoch": 19.741648106904233, "loss": 0.7695181369781494, "loss_ce": 0.00010891164129134268, "loss_iou": 0.2890625, "loss_num": 0.037841796875, "loss_xval": 0.76953125, "num_input_tokens_seen": 496645436, "step": 8864 }, { "epoch": 19.743875278396438, "grad_norm": 13.303028106689453, "learning_rate": 1e-06, "loss": 0.3442, "num_input_tokens_seen": 496701972, "step": 8865 }, { "epoch": 19.743875278396438, "loss": 0.2887694835662842, "loss_ce": 7.317646668525413e-05, "loss_iou": 0.12890625, "loss_num": 0.006439208984375, "loss_xval": 0.2890625, "num_input_tokens_seen": 496701972, "step": 8865 }, { "epoch": 19.746102449888642, "grad_norm": 63.60599899291992, "learning_rate": 1e-06, "loss": 0.3638, "num_input_tokens_seen": 496755828, "step": 8866 }, { "epoch": 19.746102449888642, "loss": 0.4621775150299072, "loss_ce": 8.034885104279965e-05, "loss_iou": 0.1904296875, "loss_num": 0.0159912109375, "loss_xval": 0.462890625, "num_input_tokens_seen": 496755828, "step": 8866 }, { "epoch": 19.748329621380847, "grad_norm": 22.44493293762207, "learning_rate": 1e-06, "loss": 0.3002, "num_input_tokens_seen": 496812708, "step": 8867 }, { "epoch": 19.748329621380847, "loss": 0.3059879541397095, "loss_ce": 7.977118366397917e-05, "loss_iou": 0.1298828125, "loss_num": 0.00921630859375, "loss_xval": 0.306640625, "num_input_tokens_seen": 496812708, "step": 8867 }, { "epoch": 19.750556792873052, "grad_norm": 21.83323860168457, "learning_rate": 1e-06, "loss": 0.385, "num_input_tokens_seen": 496868560, "step": 8868 }, { "epoch": 19.750556792873052, "loss": 0.30378103256225586, "loss_ce": 7.010510307736695e-05, "loss_iou": 0.1416015625, "loss_num": 0.004364013671875, "loss_xval": 0.3046875, "num_input_tokens_seen": 496868560, "step": 8868 }, { "epoch": 19.752783964365257, "grad_norm": 32.918434143066406, "learning_rate": 1e-06, "loss": 0.4902, "num_input_tokens_seen": 496922692, "step": 8869 }, { "epoch": 19.752783964365257, "loss": 0.5124045014381409, "loss_ce": 7.53938511479646e-05, "loss_iou": 0.2333984375, "loss_num": 0.0091552734375, "loss_xval": 0.51171875, "num_input_tokens_seen": 496922692, "step": 8869 }, { "epoch": 19.755011135857462, "grad_norm": 25.483102798461914, "learning_rate": 1e-06, "loss": 0.3786, "num_input_tokens_seen": 496980516, "step": 8870 }, { "epoch": 19.755011135857462, "loss": 0.3339354991912842, "loss_ce": 7.317691051866859e-05, "loss_iou": 0.154296875, "loss_num": 0.004913330078125, "loss_xval": 0.333984375, "num_input_tokens_seen": 496980516, "step": 8870 }, { "epoch": 19.757238307349667, "grad_norm": 15.504528045654297, "learning_rate": 1e-06, "loss": 0.4814, "num_input_tokens_seen": 497037204, "step": 8871 }, { "epoch": 19.757238307349667, "loss": 0.6138589382171631, "loss_ce": 7.417285814881325e-05, "loss_iou": 0.26953125, "loss_num": 0.0145263671875, "loss_xval": 0.61328125, "num_input_tokens_seen": 497037204, "step": 8871 }, { "epoch": 19.75946547884187, "grad_norm": 25.176443099975586, "learning_rate": 1e-06, "loss": 0.3316, "num_input_tokens_seen": 497089592, "step": 8872 }, { "epoch": 19.75946547884187, "loss": 0.2530931532382965, "loss_ce": 7.191546319518238e-05, "loss_iou": 0.1123046875, "loss_num": 0.005584716796875, "loss_xval": 0.25390625, "num_input_tokens_seen": 497089592, "step": 8872 }, { "epoch": 19.761692650334076, "grad_norm": 15.639261245727539, "learning_rate": 1e-06, "loss": 0.3225, "num_input_tokens_seen": 497143680, "step": 8873 }, { "epoch": 19.761692650334076, "loss": 0.2651139497756958, "loss_ce": 9.929385123541579e-05, "loss_iou": 0.11767578125, "loss_num": 0.005950927734375, "loss_xval": 0.265625, "num_input_tokens_seen": 497143680, "step": 8873 }, { "epoch": 19.76391982182628, "grad_norm": 23.403366088867188, "learning_rate": 1e-06, "loss": 0.3117, "num_input_tokens_seen": 497198980, "step": 8874 }, { "epoch": 19.76391982182628, "loss": 0.3706684112548828, "loss_ce": 6.295710045378655e-05, "loss_iou": 0.171875, "loss_num": 0.005462646484375, "loss_xval": 0.37109375, "num_input_tokens_seen": 497198980, "step": 8874 }, { "epoch": 19.766146993318486, "grad_norm": 14.233652114868164, "learning_rate": 1e-06, "loss": 0.3257, "num_input_tokens_seen": 497257808, "step": 8875 }, { "epoch": 19.766146993318486, "loss": 0.41550278663635254, "loss_ce": 0.0003416564140934497, "loss_iou": 0.1669921875, "loss_num": 0.0164794921875, "loss_xval": 0.416015625, "num_input_tokens_seen": 497257808, "step": 8875 }, { "epoch": 19.76837416481069, "grad_norm": 13.950034141540527, "learning_rate": 1e-06, "loss": 0.3255, "num_input_tokens_seen": 497314976, "step": 8876 }, { "epoch": 19.76837416481069, "loss": 0.2907865047454834, "loss_ce": 7.603740959893912e-05, "loss_iou": 0.12890625, "loss_num": 0.006561279296875, "loss_xval": 0.291015625, "num_input_tokens_seen": 497314976, "step": 8876 }, { "epoch": 19.770601336302896, "grad_norm": 12.77400016784668, "learning_rate": 1e-06, "loss": 0.3122, "num_input_tokens_seen": 497371316, "step": 8877 }, { "epoch": 19.770601336302896, "loss": 0.302242249250412, "loss_ce": 5.7184963225154206e-05, "loss_iou": 0.1328125, "loss_num": 0.00732421875, "loss_xval": 0.302734375, "num_input_tokens_seen": 497371316, "step": 8877 }, { "epoch": 19.7728285077951, "grad_norm": 16.136669158935547, "learning_rate": 1e-06, "loss": 0.4607, "num_input_tokens_seen": 497428012, "step": 8878 }, { "epoch": 19.7728285077951, "loss": 0.7113887667655945, "loss_ce": 8.501914271619171e-05, "loss_iou": 0.314453125, "loss_num": 0.0159912109375, "loss_xval": 0.7109375, "num_input_tokens_seen": 497428012, "step": 8878 }, { "epoch": 19.775055679287306, "grad_norm": 26.28825569152832, "learning_rate": 1e-06, "loss": 0.2925, "num_input_tokens_seen": 497485468, "step": 8879 }, { "epoch": 19.775055679287306, "loss": 0.346199095249176, "loss_ce": 6.87175925122574e-05, "loss_iou": 0.1611328125, "loss_num": 0.00469970703125, "loss_xval": 0.345703125, "num_input_tokens_seen": 497485468, "step": 8879 }, { "epoch": 19.77728285077951, "grad_norm": 17.712745666503906, "learning_rate": 1e-06, "loss": 0.3412, "num_input_tokens_seen": 497543128, "step": 8880 }, { "epoch": 19.77728285077951, "loss": 0.32294753193855286, "loss_ce": 7.154815102694556e-05, "loss_iou": 0.1474609375, "loss_num": 0.00537109375, "loss_xval": 0.322265625, "num_input_tokens_seen": 497543128, "step": 8880 }, { "epoch": 19.779510022271715, "grad_norm": 16.877283096313477, "learning_rate": 1e-06, "loss": 0.3093, "num_input_tokens_seen": 497601184, "step": 8881 }, { "epoch": 19.779510022271715, "loss": 0.26191645860671997, "loss_ce": 7.561576785519719e-05, "loss_iou": 0.10791015625, "loss_num": 0.0091552734375, "loss_xval": 0.26171875, "num_input_tokens_seen": 497601184, "step": 8881 }, { "epoch": 19.78173719376392, "grad_norm": 15.67979621887207, "learning_rate": 1e-06, "loss": 0.3683, "num_input_tokens_seen": 497658448, "step": 8882 }, { "epoch": 19.78173719376392, "loss": 0.4387954771518707, "loss_ce": 7.479546184185892e-05, "loss_iou": 0.171875, "loss_num": 0.0189208984375, "loss_xval": 0.439453125, "num_input_tokens_seen": 497658448, "step": 8882 }, { "epoch": 19.783964365256125, "grad_norm": 15.870912551879883, "learning_rate": 1e-06, "loss": 0.4725, "num_input_tokens_seen": 497714064, "step": 8883 }, { "epoch": 19.783964365256125, "loss": 0.47480326890945435, "loss_ce": 7.185728463809937e-05, "loss_iou": 0.212890625, "loss_num": 0.0096435546875, "loss_xval": 0.474609375, "num_input_tokens_seen": 497714064, "step": 8883 }, { "epoch": 19.78619153674833, "grad_norm": 22.020280838012695, "learning_rate": 1e-06, "loss": 0.5039, "num_input_tokens_seen": 497768280, "step": 8884 }, { "epoch": 19.78619153674833, "loss": 0.38886767625808716, "loss_ce": 7.375521818175912e-05, "loss_iou": 0.1865234375, "loss_num": 0.0032501220703125, "loss_xval": 0.388671875, "num_input_tokens_seen": 497768280, "step": 8884 }, { "epoch": 19.788418708240535, "grad_norm": 28.759016036987305, "learning_rate": 1e-06, "loss": 0.4566, "num_input_tokens_seen": 497822296, "step": 8885 }, { "epoch": 19.788418708240535, "loss": 0.5814878940582275, "loss_ce": 6.704413681291044e-05, "loss_iou": 0.236328125, "loss_num": 0.021728515625, "loss_xval": 0.58203125, "num_input_tokens_seen": 497822296, "step": 8885 }, { "epoch": 19.79064587973274, "grad_norm": 19.10303497314453, "learning_rate": 1e-06, "loss": 0.6282, "num_input_tokens_seen": 497880436, "step": 8886 }, { "epoch": 19.79064587973274, "loss": 0.586006760597229, "loss_ce": 6.921241583768278e-05, "loss_iou": 0.2275390625, "loss_num": 0.0263671875, "loss_xval": 0.5859375, "num_input_tokens_seen": 497880436, "step": 8886 }, { "epoch": 19.792873051224944, "grad_norm": 24.136104583740234, "learning_rate": 1e-06, "loss": 0.3318, "num_input_tokens_seen": 497934040, "step": 8887 }, { "epoch": 19.792873051224944, "loss": 0.3225743770599365, "loss_ce": 6.460573058575392e-05, "loss_iou": 0.140625, "loss_num": 0.00830078125, "loss_xval": 0.322265625, "num_input_tokens_seen": 497934040, "step": 8887 }, { "epoch": 19.79510022271715, "grad_norm": 15.562583923339844, "learning_rate": 1e-06, "loss": 0.4173, "num_input_tokens_seen": 497989992, "step": 8888 }, { "epoch": 19.79510022271715, "loss": 0.4295352101325989, "loss_ce": 6.133929127827287e-05, "loss_iou": 0.16796875, "loss_num": 0.0185546875, "loss_xval": 0.4296875, "num_input_tokens_seen": 497989992, "step": 8888 }, { "epoch": 19.797327394209354, "grad_norm": 12.647050857543945, "learning_rate": 1e-06, "loss": 0.3252, "num_input_tokens_seen": 498045484, "step": 8889 }, { "epoch": 19.797327394209354, "loss": 0.26227039098739624, "loss_ce": 6.334840873023495e-05, "loss_iou": 0.107421875, "loss_num": 0.009521484375, "loss_xval": 0.26171875, "num_input_tokens_seen": 498045484, "step": 8889 }, { "epoch": 19.79955456570156, "grad_norm": 17.201618194580078, "learning_rate": 1e-06, "loss": 0.3303, "num_input_tokens_seen": 498102048, "step": 8890 }, { "epoch": 19.79955456570156, "loss": 0.31794556975364685, "loss_ce": 7.44800636311993e-05, "loss_iou": 0.1279296875, "loss_num": 0.01220703125, "loss_xval": 0.318359375, "num_input_tokens_seen": 498102048, "step": 8890 }, { "epoch": 19.801781737193764, "grad_norm": 22.599042892456055, "learning_rate": 1e-06, "loss": 0.3699, "num_input_tokens_seen": 498159040, "step": 8891 }, { "epoch": 19.801781737193764, "loss": 0.2396983951330185, "loss_ce": 7.437313615810126e-05, "loss_iou": 0.107421875, "loss_num": 0.00494384765625, "loss_xval": 0.2392578125, "num_input_tokens_seen": 498159040, "step": 8891 }, { "epoch": 19.80400890868597, "grad_norm": 26.07795524597168, "learning_rate": 1e-06, "loss": 0.4677, "num_input_tokens_seen": 498212312, "step": 8892 }, { "epoch": 19.80400890868597, "loss": 0.5568862557411194, "loss_ce": 0.00012353430793154985, "loss_iou": 0.2333984375, "loss_num": 0.017822265625, "loss_xval": 0.55859375, "num_input_tokens_seen": 498212312, "step": 8892 }, { "epoch": 19.806236080178174, "grad_norm": 22.389713287353516, "learning_rate": 1e-06, "loss": 0.3661, "num_input_tokens_seen": 498267900, "step": 8893 }, { "epoch": 19.806236080178174, "loss": 0.4324456751346588, "loss_ce": 7.261322753038257e-05, "loss_iou": 0.201171875, "loss_num": 0.005950927734375, "loss_xval": 0.431640625, "num_input_tokens_seen": 498267900, "step": 8893 }, { "epoch": 19.80846325167038, "grad_norm": 16.400392532348633, "learning_rate": 1e-06, "loss": 0.2981, "num_input_tokens_seen": 498324320, "step": 8894 }, { "epoch": 19.80846325167038, "loss": 0.27405545115470886, "loss_ce": 6.862355803605169e-05, "loss_iou": 0.1279296875, "loss_num": 0.00360107421875, "loss_xval": 0.2734375, "num_input_tokens_seen": 498324320, "step": 8894 }, { "epoch": 19.810690423162583, "grad_norm": 21.506378173828125, "learning_rate": 1e-06, "loss": 0.3227, "num_input_tokens_seen": 498378944, "step": 8895 }, { "epoch": 19.810690423162583, "loss": 0.3589167296886444, "loss_ce": 6.0540056438185275e-05, "loss_iou": 0.1533203125, "loss_num": 0.01043701171875, "loss_xval": 0.359375, "num_input_tokens_seen": 498378944, "step": 8895 }, { "epoch": 19.812917594654788, "grad_norm": 23.304874420166016, "learning_rate": 1e-06, "loss": 0.2775, "num_input_tokens_seen": 498432504, "step": 8896 }, { "epoch": 19.812917594654788, "loss": 0.27460119128227234, "loss_ce": 6.50603833491914e-05, "loss_iou": 0.1240234375, "loss_num": 0.005218505859375, "loss_xval": 0.275390625, "num_input_tokens_seen": 498432504, "step": 8896 }, { "epoch": 19.815144766146993, "grad_norm": 15.28256893157959, "learning_rate": 1e-06, "loss": 0.334, "num_input_tokens_seen": 498489036, "step": 8897 }, { "epoch": 19.815144766146993, "loss": 0.3256245255470276, "loss_ce": 6.301122630247846e-05, "loss_iou": 0.150390625, "loss_num": 0.00518798828125, "loss_xval": 0.326171875, "num_input_tokens_seen": 498489036, "step": 8897 }, { "epoch": 19.817371937639198, "grad_norm": 37.541343688964844, "learning_rate": 1e-06, "loss": 0.556, "num_input_tokens_seen": 498544072, "step": 8898 }, { "epoch": 19.817371937639198, "loss": 0.7319257259368896, "loss_ce": 0.0001142127366620116, "loss_iou": 0.26953125, "loss_num": 0.038818359375, "loss_xval": 0.73046875, "num_input_tokens_seen": 498544072, "step": 8898 }, { "epoch": 19.819599109131403, "grad_norm": 35.13840866088867, "learning_rate": 1e-06, "loss": 0.4129, "num_input_tokens_seen": 498600592, "step": 8899 }, { "epoch": 19.819599109131403, "loss": 0.2181350737810135, "loss_ce": 5.645793498842977e-05, "loss_iou": 0.08837890625, "loss_num": 0.00823974609375, "loss_xval": 0.2177734375, "num_input_tokens_seen": 498600592, "step": 8899 }, { "epoch": 19.821826280623608, "grad_norm": 18.837566375732422, "learning_rate": 1e-06, "loss": 0.3815, "num_input_tokens_seen": 498655692, "step": 8900 }, { "epoch": 19.821826280623608, "loss": 0.21076907217502594, "loss_ce": 6.046070120646618e-05, "loss_iou": 0.09521484375, "loss_num": 0.004058837890625, "loss_xval": 0.2109375, "num_input_tokens_seen": 498655692, "step": 8900 }, { "epoch": 19.824053452115812, "grad_norm": 14.442410469055176, "learning_rate": 1e-06, "loss": 0.4286, "num_input_tokens_seen": 498710648, "step": 8901 }, { "epoch": 19.824053452115812, "loss": 0.48480677604675293, "loss_ce": 6.559376924997196e-05, "loss_iou": 0.205078125, "loss_num": 0.0150146484375, "loss_xval": 0.484375, "num_input_tokens_seen": 498710648, "step": 8901 }, { "epoch": 19.826280623608017, "grad_norm": 15.92866039276123, "learning_rate": 1e-06, "loss": 0.3134, "num_input_tokens_seen": 498766436, "step": 8902 }, { "epoch": 19.826280623608017, "loss": 0.35358044505119324, "loss_ce": 6.481447053374723e-05, "loss_iou": 0.1328125, "loss_num": 0.017578125, "loss_xval": 0.353515625, "num_input_tokens_seen": 498766436, "step": 8902 }, { "epoch": 19.828507795100222, "grad_norm": 18.898021697998047, "learning_rate": 1e-06, "loss": 0.5045, "num_input_tokens_seen": 498821800, "step": 8903 }, { "epoch": 19.828507795100222, "loss": 0.49323803186416626, "loss_ce": 7.400984031846747e-05, "loss_iou": 0.2119140625, "loss_num": 0.01385498046875, "loss_xval": 0.4921875, "num_input_tokens_seen": 498821800, "step": 8903 }, { "epoch": 19.830734966592427, "grad_norm": 11.007697105407715, "learning_rate": 1e-06, "loss": 0.2899, "num_input_tokens_seen": 498876356, "step": 8904 }, { "epoch": 19.830734966592427, "loss": 0.2739826440811157, "loss_ce": 0.00017892879259306937, "loss_iou": 0.11669921875, "loss_num": 0.0081787109375, "loss_xval": 0.2734375, "num_input_tokens_seen": 498876356, "step": 8904 }, { "epoch": 19.832962138084632, "grad_norm": 11.861038208007812, "learning_rate": 1e-06, "loss": 0.388, "num_input_tokens_seen": 498934096, "step": 8905 }, { "epoch": 19.832962138084632, "loss": 0.34637463092803955, "loss_ce": 6.114893039921299e-05, "loss_iou": 0.154296875, "loss_num": 0.00738525390625, "loss_xval": 0.345703125, "num_input_tokens_seen": 498934096, "step": 8905 }, { "epoch": 19.835189309576837, "grad_norm": 18.919658660888672, "learning_rate": 1e-06, "loss": 0.3537, "num_input_tokens_seen": 498989812, "step": 8906 }, { "epoch": 19.835189309576837, "loss": 0.30976182222366333, "loss_ce": 6.946048233658075e-05, "loss_iou": 0.1435546875, "loss_num": 0.004669189453125, "loss_xval": 0.310546875, "num_input_tokens_seen": 498989812, "step": 8906 }, { "epoch": 19.83741648106904, "grad_norm": 15.281343460083008, "learning_rate": 1e-06, "loss": 0.3773, "num_input_tokens_seen": 499047512, "step": 8907 }, { "epoch": 19.83741648106904, "loss": 0.3993797302246094, "loss_ce": 8.777561015449464e-05, "loss_iou": 0.1650390625, "loss_num": 0.01385498046875, "loss_xval": 0.3984375, "num_input_tokens_seen": 499047512, "step": 8907 }, { "epoch": 19.839643652561247, "grad_norm": 14.046529769897461, "learning_rate": 1e-06, "loss": 0.431, "num_input_tokens_seen": 499105152, "step": 8908 }, { "epoch": 19.839643652561247, "loss": 0.6798744201660156, "loss_ce": 6.483346078312024e-05, "loss_iou": 0.279296875, "loss_num": 0.024169921875, "loss_xval": 0.6796875, "num_input_tokens_seen": 499105152, "step": 8908 }, { "epoch": 19.84187082405345, "grad_norm": 19.07868766784668, "learning_rate": 1e-06, "loss": 0.362, "num_input_tokens_seen": 499160952, "step": 8909 }, { "epoch": 19.84187082405345, "loss": 0.5317122936248779, "loss_ce": 9.61277837632224e-05, "loss_iou": 0.205078125, "loss_num": 0.024169921875, "loss_xval": 0.53125, "num_input_tokens_seen": 499160952, "step": 8909 }, { "epoch": 19.844097995545656, "grad_norm": 17.901517868041992, "learning_rate": 1e-06, "loss": 0.4618, "num_input_tokens_seen": 499217672, "step": 8910 }, { "epoch": 19.844097995545656, "loss": 0.3117324709892273, "loss_ce": 8.69435680215247e-05, "loss_iou": 0.12158203125, "loss_num": 0.0135498046875, "loss_xval": 0.3125, "num_input_tokens_seen": 499217672, "step": 8910 }, { "epoch": 19.84632516703786, "grad_norm": 28.3020076751709, "learning_rate": 1e-06, "loss": 0.433, "num_input_tokens_seen": 499274752, "step": 8911 }, { "epoch": 19.84632516703786, "loss": 0.3874030113220215, "loss_ce": 7.39007446100004e-05, "loss_iou": 0.1787109375, "loss_num": 0.0059814453125, "loss_xval": 0.38671875, "num_input_tokens_seen": 499274752, "step": 8911 }, { "epoch": 19.848552338530066, "grad_norm": 29.61766242980957, "learning_rate": 1e-06, "loss": 0.3846, "num_input_tokens_seen": 499327256, "step": 8912 }, { "epoch": 19.848552338530066, "loss": 0.24732181429862976, "loss_ce": 6.841085996711627e-05, "loss_iou": 0.111328125, "loss_num": 0.004974365234375, "loss_xval": 0.2470703125, "num_input_tokens_seen": 499327256, "step": 8912 }, { "epoch": 19.85077951002227, "grad_norm": 25.11189079284668, "learning_rate": 1e-06, "loss": 0.3364, "num_input_tokens_seen": 499379248, "step": 8913 }, { "epoch": 19.85077951002227, "loss": 0.40423035621643066, "loss_ce": 5.5550888646394014e-05, "loss_iou": 0.1640625, "loss_num": 0.01513671875, "loss_xval": 0.404296875, "num_input_tokens_seen": 499379248, "step": 8913 }, { "epoch": 19.853006681514476, "grad_norm": 18.35516929626465, "learning_rate": 1e-06, "loss": 0.4902, "num_input_tokens_seen": 499436244, "step": 8914 }, { "epoch": 19.853006681514476, "loss": 0.41144680976867676, "loss_ce": 6.984162610024214e-05, "loss_iou": 0.18359375, "loss_num": 0.00885009765625, "loss_xval": 0.412109375, "num_input_tokens_seen": 499436244, "step": 8914 }, { "epoch": 19.85523385300668, "grad_norm": 14.630391120910645, "learning_rate": 1e-06, "loss": 0.3681, "num_input_tokens_seen": 499493472, "step": 8915 }, { "epoch": 19.85523385300668, "loss": 0.3745167851448059, "loss_ce": 6.610387936234474e-05, "loss_iou": 0.16796875, "loss_num": 0.0078125, "loss_xval": 0.375, "num_input_tokens_seen": 499493472, "step": 8915 }, { "epoch": 19.857461024498885, "grad_norm": 15.143143653869629, "learning_rate": 1e-06, "loss": 0.4017, "num_input_tokens_seen": 499548600, "step": 8916 }, { "epoch": 19.857461024498885, "loss": 0.5689959526062012, "loss_ce": 5.67264505662024e-05, "loss_iou": 0.2197265625, "loss_num": 0.026123046875, "loss_xval": 0.5703125, "num_input_tokens_seen": 499548600, "step": 8916 }, { "epoch": 19.85968819599109, "grad_norm": 25.465965270996094, "learning_rate": 1e-06, "loss": 0.3066, "num_input_tokens_seen": 499601500, "step": 8917 }, { "epoch": 19.85968819599109, "loss": 0.31936201453208923, "loss_ce": 8.711648843018338e-05, "loss_iou": 0.1328125, "loss_num": 0.01055908203125, "loss_xval": 0.318359375, "num_input_tokens_seen": 499601500, "step": 8917 }, { "epoch": 19.861915367483295, "grad_norm": 21.30182647705078, "learning_rate": 1e-06, "loss": 0.4865, "num_input_tokens_seen": 499657012, "step": 8918 }, { "epoch": 19.861915367483295, "loss": 0.38416028022766113, "loss_ce": 6.603718065889552e-05, "loss_iou": 0.154296875, "loss_num": 0.0152587890625, "loss_xval": 0.384765625, "num_input_tokens_seen": 499657012, "step": 8918 }, { "epoch": 19.8641425389755, "grad_norm": 19.05086898803711, "learning_rate": 1e-06, "loss": 0.5037, "num_input_tokens_seen": 499713108, "step": 8919 }, { "epoch": 19.8641425389755, "loss": 0.5627080202102661, "loss_ce": 8.592565427534282e-05, "loss_iou": 0.244140625, "loss_num": 0.01513671875, "loss_xval": 0.5625, "num_input_tokens_seen": 499713108, "step": 8919 }, { "epoch": 19.866369710467705, "grad_norm": 21.24616813659668, "learning_rate": 1e-06, "loss": 0.3316, "num_input_tokens_seen": 499771624, "step": 8920 }, { "epoch": 19.866369710467705, "loss": 0.3205050528049469, "loss_ce": 7.047764665912837e-05, "loss_iou": 0.1376953125, "loss_num": 0.0089111328125, "loss_xval": 0.3203125, "num_input_tokens_seen": 499771624, "step": 8920 }, { "epoch": 19.86859688195991, "grad_norm": 18.13334846496582, "learning_rate": 1e-06, "loss": 0.3272, "num_input_tokens_seen": 499825620, "step": 8921 }, { "epoch": 19.86859688195991, "loss": 0.3275926113128662, "loss_ce": 7.797064608894289e-05, "loss_iou": 0.1416015625, "loss_num": 0.0087890625, "loss_xval": 0.328125, "num_input_tokens_seen": 499825620, "step": 8921 }, { "epoch": 19.870824053452115, "grad_norm": 13.680047988891602, "learning_rate": 1e-06, "loss": 0.7128, "num_input_tokens_seen": 499881464, "step": 8922 }, { "epoch": 19.870824053452115, "loss": 0.5840435028076172, "loss_ce": 5.915117799304426e-05, "loss_iou": 0.2431640625, "loss_num": 0.0194091796875, "loss_xval": 0.5859375, "num_input_tokens_seen": 499881464, "step": 8922 }, { "epoch": 19.87305122494432, "grad_norm": 17.974990844726562, "learning_rate": 1e-06, "loss": 0.3335, "num_input_tokens_seen": 499935500, "step": 8923 }, { "epoch": 19.87305122494432, "loss": 0.39403825998306274, "loss_ce": 5.633125692838803e-05, "loss_iou": 0.1533203125, "loss_num": 0.017333984375, "loss_xval": 0.39453125, "num_input_tokens_seen": 499935500, "step": 8923 }, { "epoch": 19.875278396436524, "grad_norm": 16.1640625, "learning_rate": 1e-06, "loss": 0.5226, "num_input_tokens_seen": 499991676, "step": 8924 }, { "epoch": 19.875278396436524, "loss": 0.3693428039550781, "loss_ce": 0.00020217550627421588, "loss_iou": 0.1669921875, "loss_num": 0.0069580078125, "loss_xval": 0.369140625, "num_input_tokens_seen": 499991676, "step": 8924 }, { "epoch": 19.87750556792873, "grad_norm": 17.915504455566406, "learning_rate": 1e-06, "loss": 0.4869, "num_input_tokens_seen": 500049156, "step": 8925 }, { "epoch": 19.87750556792873, "loss": 0.44209641218185425, "loss_ce": 7.978252688189968e-05, "loss_iou": 0.2001953125, "loss_num": 0.00811767578125, "loss_xval": 0.44140625, "num_input_tokens_seen": 500049156, "step": 8925 }, { "epoch": 19.879732739420934, "grad_norm": 14.218931198120117, "learning_rate": 1e-06, "loss": 0.3029, "num_input_tokens_seen": 500105296, "step": 8926 }, { "epoch": 19.879732739420934, "loss": 0.33782947063446045, "loss_ce": 6.0907594161108136e-05, "loss_iou": 0.154296875, "loss_num": 0.005950927734375, "loss_xval": 0.337890625, "num_input_tokens_seen": 500105296, "step": 8926 }, { "epoch": 19.88195991091314, "grad_norm": 14.327959060668945, "learning_rate": 1e-06, "loss": 0.3458, "num_input_tokens_seen": 500161776, "step": 8927 }, { "epoch": 19.88195991091314, "loss": 0.3156734108924866, "loss_ce": 6.0611731896642596e-05, "loss_iou": 0.134765625, "loss_num": 0.00897216796875, "loss_xval": 0.31640625, "num_input_tokens_seen": 500161776, "step": 8927 }, { "epoch": 19.884187082405344, "grad_norm": 19.522199630737305, "learning_rate": 1e-06, "loss": 0.5647, "num_input_tokens_seen": 500216288, "step": 8928 }, { "epoch": 19.884187082405344, "loss": 0.6144946813583374, "loss_ce": 5.3751304221805185e-05, "loss_iou": 0.2158203125, "loss_num": 0.03662109375, "loss_xval": 0.61328125, "num_input_tokens_seen": 500216288, "step": 8928 }, { "epoch": 19.88641425389755, "grad_norm": 17.130699157714844, "learning_rate": 1e-06, "loss": 0.3437, "num_input_tokens_seen": 500271968, "step": 8929 }, { "epoch": 19.88641425389755, "loss": 0.2988947629928589, "loss_ce": 6.664128886768594e-05, "loss_iou": 0.1318359375, "loss_num": 0.006866455078125, "loss_xval": 0.298828125, "num_input_tokens_seen": 500271968, "step": 8929 }, { "epoch": 19.888641425389753, "grad_norm": 29.030588150024414, "learning_rate": 1e-06, "loss": 0.3789, "num_input_tokens_seen": 500329792, "step": 8930 }, { "epoch": 19.888641425389753, "loss": 0.40844833850860596, "loss_ce": 6.208484410308301e-05, "loss_iou": 0.1708984375, "loss_num": 0.013427734375, "loss_xval": 0.408203125, "num_input_tokens_seen": 500329792, "step": 8930 }, { "epoch": 19.89086859688196, "grad_norm": 20.700912475585938, "learning_rate": 1e-06, "loss": 0.3864, "num_input_tokens_seen": 500384792, "step": 8931 }, { "epoch": 19.89086859688196, "loss": 0.46894556283950806, "loss_ce": 7.346599886659533e-05, "loss_iou": 0.203125, "loss_num": 0.0126953125, "loss_xval": 0.46875, "num_input_tokens_seen": 500384792, "step": 8931 }, { "epoch": 19.893095768374163, "grad_norm": 20.326370239257812, "learning_rate": 1e-06, "loss": 0.2839, "num_input_tokens_seen": 500439812, "step": 8932 }, { "epoch": 19.893095768374163, "loss": 0.25635194778442383, "loss_ce": 6.53410388622433e-05, "loss_iou": 0.1162109375, "loss_num": 0.004791259765625, "loss_xval": 0.255859375, "num_input_tokens_seen": 500439812, "step": 8932 }, { "epoch": 19.895322939866368, "grad_norm": 18.51610565185547, "learning_rate": 1e-06, "loss": 0.4605, "num_input_tokens_seen": 500497212, "step": 8933 }, { "epoch": 19.895322939866368, "loss": 0.3897075355052948, "loss_ce": 5.907195736654103e-05, "loss_iou": 0.1669921875, "loss_num": 0.01123046875, "loss_xval": 0.390625, "num_input_tokens_seen": 500497212, "step": 8933 }, { "epoch": 19.897550111358576, "grad_norm": 15.211495399475098, "learning_rate": 1e-06, "loss": 0.3421, "num_input_tokens_seen": 500552316, "step": 8934 }, { "epoch": 19.897550111358576, "loss": 0.3298904299736023, "loss_ce": 5.644091288559139e-05, "loss_iou": 0.1376953125, "loss_num": 0.01080322265625, "loss_xval": 0.330078125, "num_input_tokens_seen": 500552316, "step": 8934 }, { "epoch": 19.899777282850778, "grad_norm": 20.869205474853516, "learning_rate": 1e-06, "loss": 0.2746, "num_input_tokens_seen": 500606956, "step": 8935 }, { "epoch": 19.899777282850778, "loss": 0.28754234313964844, "loss_ce": 6.67754648020491e-05, "loss_iou": 0.126953125, "loss_num": 0.006866455078125, "loss_xval": 0.287109375, "num_input_tokens_seen": 500606956, "step": 8935 }, { "epoch": 19.902004454342986, "grad_norm": 24.170188903808594, "learning_rate": 1e-06, "loss": 0.389, "num_input_tokens_seen": 500662136, "step": 8936 }, { "epoch": 19.902004454342986, "loss": 0.32582342624664307, "loss_ce": 7.880674093030393e-05, "loss_iou": 0.15234375, "loss_num": 0.004119873046875, "loss_xval": 0.326171875, "num_input_tokens_seen": 500662136, "step": 8936 }, { "epoch": 19.90423162583519, "grad_norm": 26.785816192626953, "learning_rate": 1e-06, "loss": 0.3826, "num_input_tokens_seen": 500716584, "step": 8937 }, { "epoch": 19.90423162583519, "loss": 0.41937169432640076, "loss_ce": 6.017117266310379e-05, "loss_iou": 0.1787109375, "loss_num": 0.0125732421875, "loss_xval": 0.419921875, "num_input_tokens_seen": 500716584, "step": 8937 }, { "epoch": 19.906458797327396, "grad_norm": 15.786996841430664, "learning_rate": 1e-06, "loss": 0.3471, "num_input_tokens_seen": 500772304, "step": 8938 }, { "epoch": 19.906458797327396, "loss": 0.26117777824401855, "loss_ce": 6.9360678025987e-05, "loss_iou": 0.11962890625, "loss_num": 0.004364013671875, "loss_xval": 0.26171875, "num_input_tokens_seen": 500772304, "step": 8938 }, { "epoch": 19.9086859688196, "grad_norm": 32.816749572753906, "learning_rate": 1e-06, "loss": 0.542, "num_input_tokens_seen": 500825560, "step": 8939 }, { "epoch": 19.9086859688196, "loss": 0.6478848457336426, "loss_ce": 5.768300252384506e-05, "loss_iou": 0.27734375, "loss_num": 0.0189208984375, "loss_xval": 0.6484375, "num_input_tokens_seen": 500825560, "step": 8939 }, { "epoch": 19.910913140311806, "grad_norm": 23.55243492126465, "learning_rate": 1e-06, "loss": 0.3671, "num_input_tokens_seen": 500881272, "step": 8940 }, { "epoch": 19.910913140311806, "loss": 0.28195321559906006, "loss_ce": 9.285648411605507e-05, "loss_iou": 0.1298828125, "loss_num": 0.004364013671875, "loss_xval": 0.28125, "num_input_tokens_seen": 500881272, "step": 8940 }, { "epoch": 19.91314031180401, "grad_norm": 19.66559600830078, "learning_rate": 1e-06, "loss": 0.49, "num_input_tokens_seen": 500937444, "step": 8941 }, { "epoch": 19.91314031180401, "loss": 0.621654748916626, "loss_ce": 7.26881917216815e-05, "loss_iou": 0.267578125, "loss_num": 0.0177001953125, "loss_xval": 0.62109375, "num_input_tokens_seen": 500937444, "step": 8941 }, { "epoch": 19.915367483296215, "grad_norm": 18.567909240722656, "learning_rate": 1e-06, "loss": 0.4054, "num_input_tokens_seen": 500995428, "step": 8942 }, { "epoch": 19.915367483296215, "loss": 0.36586493253707886, "loss_ce": 0.0001422582136001438, "loss_iou": 0.1630859375, "loss_num": 0.00799560546875, "loss_xval": 0.365234375, "num_input_tokens_seen": 500995428, "step": 8942 }, { "epoch": 19.91759465478842, "grad_norm": 37.899871826171875, "learning_rate": 1e-06, "loss": 0.3518, "num_input_tokens_seen": 501051864, "step": 8943 }, { "epoch": 19.91759465478842, "loss": 0.374164342880249, "loss_ce": 7.989482401171699e-05, "loss_iou": 0.1708984375, "loss_num": 0.00653076171875, "loss_xval": 0.375, "num_input_tokens_seen": 501051864, "step": 8943 }, { "epoch": 19.919821826280625, "grad_norm": 14.561594009399414, "learning_rate": 1e-06, "loss": 0.3799, "num_input_tokens_seen": 501107716, "step": 8944 }, { "epoch": 19.919821826280625, "loss": 0.3280653953552246, "loss_ce": 6.246700650081038e-05, "loss_iou": 0.146484375, "loss_num": 0.006866455078125, "loss_xval": 0.328125, "num_input_tokens_seen": 501107716, "step": 8944 }, { "epoch": 19.92204899777283, "grad_norm": 23.4508056640625, "learning_rate": 1e-06, "loss": 0.5593, "num_input_tokens_seen": 501162352, "step": 8945 }, { "epoch": 19.92204899777283, "loss": 0.4726704955101013, "loss_ce": 7.527978596044704e-05, "loss_iou": 0.1806640625, "loss_num": 0.02197265625, "loss_xval": 0.47265625, "num_input_tokens_seen": 501162352, "step": 8945 }, { "epoch": 19.924276169265035, "grad_norm": 17.324832916259766, "learning_rate": 1e-06, "loss": 0.3169, "num_input_tokens_seen": 501219728, "step": 8946 }, { "epoch": 19.924276169265035, "loss": 0.36799356341362, "loss_ce": 7.363170152530074e-05, "loss_iou": 0.1689453125, "loss_num": 0.00592041015625, "loss_xval": 0.3671875, "num_input_tokens_seen": 501219728, "step": 8946 }, { "epoch": 19.92650334075724, "grad_norm": 21.649171829223633, "learning_rate": 1e-06, "loss": 0.244, "num_input_tokens_seen": 501272792, "step": 8947 }, { "epoch": 19.92650334075724, "loss": 0.23100095987319946, "loss_ce": 0.00016598933143541217, "loss_iou": 0.103515625, "loss_num": 0.00482177734375, "loss_xval": 0.23046875, "num_input_tokens_seen": 501272792, "step": 8947 }, { "epoch": 19.928730512249444, "grad_norm": 14.748282432556152, "learning_rate": 1e-06, "loss": 0.326, "num_input_tokens_seen": 501330904, "step": 8948 }, { "epoch": 19.928730512249444, "loss": 0.3653227686882019, "loss_ce": 8.841823728289455e-05, "loss_iou": 0.1669921875, "loss_num": 0.006134033203125, "loss_xval": 0.365234375, "num_input_tokens_seen": 501330904, "step": 8948 }, { "epoch": 19.93095768374165, "grad_norm": 30.326812744140625, "learning_rate": 1e-06, "loss": 0.4558, "num_input_tokens_seen": 501387276, "step": 8949 }, { "epoch": 19.93095768374165, "loss": 0.4978707432746887, "loss_ce": 6.799891707487404e-05, "loss_iou": 0.17578125, "loss_num": 0.029052734375, "loss_xval": 0.498046875, "num_input_tokens_seen": 501387276, "step": 8949 }, { "epoch": 19.933184855233854, "grad_norm": 25.002771377563477, "learning_rate": 1e-06, "loss": 0.291, "num_input_tokens_seen": 501445204, "step": 8950 }, { "epoch": 19.933184855233854, "loss": 0.24606147408485413, "loss_ce": 5.927299935137853e-05, "loss_iou": 0.10595703125, "loss_num": 0.0068359375, "loss_xval": 0.24609375, "num_input_tokens_seen": 501445204, "step": 8950 }, { "epoch": 19.93541202672606, "grad_norm": 15.425994873046875, "learning_rate": 1e-06, "loss": 0.3445, "num_input_tokens_seen": 501501064, "step": 8951 }, { "epoch": 19.93541202672606, "loss": 0.29687991738319397, "loss_ce": 6.595243030460551e-05, "loss_iou": 0.1328125, "loss_num": 0.006072998046875, "loss_xval": 0.296875, "num_input_tokens_seen": 501501064, "step": 8951 }, { "epoch": 19.937639198218264, "grad_norm": 18.998098373413086, "learning_rate": 1e-06, "loss": 0.526, "num_input_tokens_seen": 501558712, "step": 8952 }, { "epoch": 19.937639198218264, "loss": 0.7495875358581543, "loss_ce": 7.577068754471838e-05, "loss_iou": 0.302734375, "loss_num": 0.028564453125, "loss_xval": 0.75, "num_input_tokens_seen": 501558712, "step": 8952 }, { "epoch": 19.93986636971047, "grad_norm": 13.607953071594238, "learning_rate": 1e-06, "loss": 0.2764, "num_input_tokens_seen": 501614280, "step": 8953 }, { "epoch": 19.93986636971047, "loss": 0.32002097368240356, "loss_ce": 7.468256808351725e-05, "loss_iou": 0.1416015625, "loss_num": 0.00750732421875, "loss_xval": 0.3203125, "num_input_tokens_seen": 501614280, "step": 8953 }, { "epoch": 19.942093541202674, "grad_norm": 17.92012596130371, "learning_rate": 1e-06, "loss": 0.5354, "num_input_tokens_seen": 501667972, "step": 8954 }, { "epoch": 19.942093541202674, "loss": 0.3914256989955902, "loss_ce": 6.828343612141907e-05, "loss_iou": 0.1767578125, "loss_num": 0.0078125, "loss_xval": 0.390625, "num_input_tokens_seen": 501667972, "step": 8954 }, { "epoch": 19.94432071269488, "grad_norm": 15.410776138305664, "learning_rate": 1e-06, "loss": 0.6504, "num_input_tokens_seen": 501724172, "step": 8955 }, { "epoch": 19.94432071269488, "loss": 0.7376750707626343, "loss_ce": 0.00012627900287043303, "loss_iou": 0.31640625, "loss_num": 0.0205078125, "loss_xval": 0.73828125, "num_input_tokens_seen": 501724172, "step": 8955 }, { "epoch": 19.946547884187083, "grad_norm": 14.882509231567383, "learning_rate": 1e-06, "loss": 0.2864, "num_input_tokens_seen": 501778848, "step": 8956 }, { "epoch": 19.946547884187083, "loss": 0.330025315284729, "loss_ce": 6.925308844074607e-05, "loss_iou": 0.1298828125, "loss_num": 0.0140380859375, "loss_xval": 0.330078125, "num_input_tokens_seen": 501778848, "step": 8956 }, { "epoch": 19.948775055679288, "grad_norm": 26.640323638916016, "learning_rate": 1e-06, "loss": 0.2967, "num_input_tokens_seen": 501833784, "step": 8957 }, { "epoch": 19.948775055679288, "loss": 0.23518048226833344, "loss_ce": 7.305956387426704e-05, "loss_iou": 0.0986328125, "loss_num": 0.00750732421875, "loss_xval": 0.2353515625, "num_input_tokens_seen": 501833784, "step": 8957 }, { "epoch": 19.951002227171493, "grad_norm": 23.93553352355957, "learning_rate": 1e-06, "loss": 0.4451, "num_input_tokens_seen": 501889872, "step": 8958 }, { "epoch": 19.951002227171493, "loss": 0.29492634534835815, "loss_ce": 6.549932732013986e-05, "loss_iou": 0.1328125, "loss_num": 0.006011962890625, "loss_xval": 0.294921875, "num_input_tokens_seen": 501889872, "step": 8958 }, { "epoch": 19.953229398663698, "grad_norm": 26.314388275146484, "learning_rate": 1e-06, "loss": 0.3367, "num_input_tokens_seen": 501946172, "step": 8959 }, { "epoch": 19.953229398663698, "loss": 0.3797050416469574, "loss_ce": 6.637527258135378e-05, "loss_iou": 0.17578125, "loss_num": 0.00543212890625, "loss_xval": 0.37890625, "num_input_tokens_seen": 501946172, "step": 8959 }, { "epoch": 19.955456570155903, "grad_norm": 12.816929817199707, "learning_rate": 1e-06, "loss": 0.4444, "num_input_tokens_seen": 502002296, "step": 8960 }, { "epoch": 19.955456570155903, "loss": 0.5041437149047852, "loss_ce": 6.582704372704029e-05, "loss_iou": 0.1904296875, "loss_num": 0.0244140625, "loss_xval": 0.50390625, "num_input_tokens_seen": 502002296, "step": 8960 }, { "epoch": 19.957683741648108, "grad_norm": 22.86423110961914, "learning_rate": 1e-06, "loss": 0.2911, "num_input_tokens_seen": 502056424, "step": 8961 }, { "epoch": 19.957683741648108, "loss": 0.2608652412891388, "loss_ce": 6.202118674991652e-05, "loss_iou": 0.1025390625, "loss_num": 0.01116943359375, "loss_xval": 0.26171875, "num_input_tokens_seen": 502056424, "step": 8961 }, { "epoch": 19.959910913140313, "grad_norm": 14.811808586120605, "learning_rate": 1e-06, "loss": 0.3608, "num_input_tokens_seen": 502112304, "step": 8962 }, { "epoch": 19.959910913140313, "loss": 0.4184660315513611, "loss_ce": 7.004072540439665e-05, "loss_iou": 0.1708984375, "loss_num": 0.01519775390625, "loss_xval": 0.41796875, "num_input_tokens_seen": 502112304, "step": 8962 }, { "epoch": 19.962138084632517, "grad_norm": 19.64674949645996, "learning_rate": 1e-06, "loss": 0.2859, "num_input_tokens_seen": 502167776, "step": 8963 }, { "epoch": 19.962138084632517, "loss": 0.2639721632003784, "loss_ce": 5.613587563857436e-05, "loss_iou": 0.10546875, "loss_num": 0.01055908203125, "loss_xval": 0.263671875, "num_input_tokens_seen": 502167776, "step": 8963 }, { "epoch": 19.964365256124722, "grad_norm": 14.231873512268066, "learning_rate": 1e-06, "loss": 0.3257, "num_input_tokens_seen": 502224072, "step": 8964 }, { "epoch": 19.964365256124722, "loss": 0.29962459206581116, "loss_ce": 6.405658496078104e-05, "loss_iou": 0.1328125, "loss_num": 0.00689697265625, "loss_xval": 0.298828125, "num_input_tokens_seen": 502224072, "step": 8964 }, { "epoch": 19.966592427616927, "grad_norm": 14.061171531677246, "learning_rate": 1e-06, "loss": 0.3948, "num_input_tokens_seen": 502281656, "step": 8965 }, { "epoch": 19.966592427616927, "loss": 0.36503365635871887, "loss_ce": 0.0002570544893387705, "loss_iou": 0.146484375, "loss_num": 0.01416015625, "loss_xval": 0.365234375, "num_input_tokens_seen": 502281656, "step": 8965 }, { "epoch": 19.968819599109132, "grad_norm": 39.687034606933594, "learning_rate": 1e-06, "loss": 0.2981, "num_input_tokens_seen": 502338012, "step": 8966 }, { "epoch": 19.968819599109132, "loss": 0.26977571845054626, "loss_ce": 6.136279262136668e-05, "loss_iou": 0.11865234375, "loss_num": 0.006622314453125, "loss_xval": 0.26953125, "num_input_tokens_seen": 502338012, "step": 8966 }, { "epoch": 19.971046770601337, "grad_norm": 17.423816680908203, "learning_rate": 1e-06, "loss": 0.3524, "num_input_tokens_seen": 502393216, "step": 8967 }, { "epoch": 19.971046770601337, "loss": 0.3833910822868347, "loss_ce": 0.00015138789603952318, "loss_iou": 0.16015625, "loss_num": 0.01263427734375, "loss_xval": 0.3828125, "num_input_tokens_seen": 502393216, "step": 8967 }, { "epoch": 19.97327394209354, "grad_norm": 15.628920555114746, "learning_rate": 1e-06, "loss": 0.6525, "num_input_tokens_seen": 502450296, "step": 8968 }, { "epoch": 19.97327394209354, "loss": 0.8214820027351379, "loss_ce": 7.082229421939701e-05, "loss_iou": 0.3046875, "loss_num": 0.04248046875, "loss_xval": 0.8203125, "num_input_tokens_seen": 502450296, "step": 8968 }, { "epoch": 19.975501113585747, "grad_norm": 19.731292724609375, "learning_rate": 1e-06, "loss": 0.3995, "num_input_tokens_seen": 502504404, "step": 8969 }, { "epoch": 19.975501113585747, "loss": 0.3412543833255768, "loss_ce": 6.785897130612284e-05, "loss_iou": 0.1435546875, "loss_num": 0.0107421875, "loss_xval": 0.341796875, "num_input_tokens_seen": 502504404, "step": 8969 }, { "epoch": 19.97772828507795, "grad_norm": 17.818803787231445, "learning_rate": 1e-06, "loss": 0.5062, "num_input_tokens_seen": 502561012, "step": 8970 }, { "epoch": 19.97772828507795, "loss": 0.46796897053718567, "loss_ce": 7.346458733081818e-05, "loss_iou": 0.1923828125, "loss_num": 0.0166015625, "loss_xval": 0.46875, "num_input_tokens_seen": 502561012, "step": 8970 }, { "epoch": 19.979955456570156, "grad_norm": 22.212177276611328, "learning_rate": 1e-06, "loss": 0.6205, "num_input_tokens_seen": 502615788, "step": 8971 }, { "epoch": 19.979955456570156, "loss": 0.680034875869751, "loss_ce": 0.00022534003073815256, "loss_iou": 0.28515625, "loss_num": 0.0220947265625, "loss_xval": 0.6796875, "num_input_tokens_seen": 502615788, "step": 8971 }, { "epoch": 19.98218262806236, "grad_norm": 28.786479949951172, "learning_rate": 1e-06, "loss": 0.4148, "num_input_tokens_seen": 502669668, "step": 8972 }, { "epoch": 19.98218262806236, "loss": 0.234740749001503, "loss_ce": 6.057398422854021e-05, "loss_iou": 0.1083984375, "loss_num": 0.0035400390625, "loss_xval": 0.234375, "num_input_tokens_seen": 502669668, "step": 8972 }, { "epoch": 19.984409799554566, "grad_norm": 22.56906509399414, "learning_rate": 1e-06, "loss": 0.3357, "num_input_tokens_seen": 502727112, "step": 8973 }, { "epoch": 19.984409799554566, "loss": 0.32478001713752747, "loss_ce": 7.298550917766988e-05, "loss_iou": 0.1435546875, "loss_num": 0.007415771484375, "loss_xval": 0.32421875, "num_input_tokens_seen": 502727112, "step": 8973 }, { "epoch": 19.98663697104677, "grad_norm": 12.906547546386719, "learning_rate": 1e-06, "loss": 0.6115, "num_input_tokens_seen": 502781448, "step": 8974 }, { "epoch": 19.98663697104677, "loss": 0.7082492113113403, "loss_ce": 5.82421307626646e-05, "loss_iou": 0.265625, "loss_num": 0.035400390625, "loss_xval": 0.70703125, "num_input_tokens_seen": 502781448, "step": 8974 }, { "epoch": 19.988864142538976, "grad_norm": 20.916061401367188, "learning_rate": 1e-06, "loss": 0.5555, "num_input_tokens_seen": 502834424, "step": 8975 }, { "epoch": 19.988864142538976, "loss": 0.562214195728302, "loss_ce": 8.040100510697812e-05, "loss_iou": 0.251953125, "loss_num": 0.01190185546875, "loss_xval": 0.5625, "num_input_tokens_seen": 502834424, "step": 8975 }, { "epoch": 19.99109131403118, "grad_norm": 27.778152465820312, "learning_rate": 1e-06, "loss": 0.5195, "num_input_tokens_seen": 502888060, "step": 8976 }, { "epoch": 19.99109131403118, "loss": 0.708865761756897, "loss_ce": 6.44870669930242e-05, "loss_iou": 0.3203125, "loss_num": 0.0135498046875, "loss_xval": 0.70703125, "num_input_tokens_seen": 502888060, "step": 8976 }, { "epoch": 19.993318485523385, "grad_norm": 16.12858772277832, "learning_rate": 1e-06, "loss": 0.3678, "num_input_tokens_seen": 502947760, "step": 8977 }, { "epoch": 19.993318485523385, "loss": 0.4400678873062134, "loss_ce": 6.544531788676977e-05, "loss_iou": 0.189453125, "loss_num": 0.01251220703125, "loss_xval": 0.439453125, "num_input_tokens_seen": 502947760, "step": 8977 }, { "epoch": 19.99554565701559, "grad_norm": 14.300686836242676, "learning_rate": 1e-06, "loss": 0.4194, "num_input_tokens_seen": 503003892, "step": 8978 }, { "epoch": 19.99554565701559, "loss": 0.554030179977417, "loss_ce": 7.511145668104291e-05, "loss_iou": 0.240234375, "loss_num": 0.0147705078125, "loss_xval": 0.5546875, "num_input_tokens_seen": 503003892, "step": 8978 }, { "epoch": 19.997772828507795, "grad_norm": 48.166603088378906, "learning_rate": 1e-06, "loss": 0.3181, "num_input_tokens_seen": 503060696, "step": 8979 }, { "epoch": 19.997772828507795, "loss": 0.36199867725372314, "loss_ce": 6.0221587773412466e-05, "loss_iou": 0.1533203125, "loss_num": 0.01116943359375, "loss_xval": 0.361328125, "num_input_tokens_seen": 503060696, "step": 8979 }, { "epoch": 20.0, "grad_norm": 19.31943130493164, "learning_rate": 1e-06, "loss": 0.3377, "num_input_tokens_seen": 503116608, "step": 8980 }, { "epoch": 20.0, "loss": 0.40692806243896484, "loss_ce": 6.770974141545594e-05, "loss_iou": 0.1796875, "loss_num": 0.00946044921875, "loss_xval": 0.40625, "num_input_tokens_seen": 503116608, "step": 8980 }, { "epoch": 20.002227171492205, "grad_norm": 19.004310607910156, "learning_rate": 1e-06, "loss": 0.3899, "num_input_tokens_seen": 503171700, "step": 8981 }, { "epoch": 20.002227171492205, "loss": 0.31977057456970215, "loss_ce": 6.842036236776039e-05, "loss_iou": 0.150390625, "loss_num": 0.0036163330078125, "loss_xval": 0.3203125, "num_input_tokens_seen": 503171700, "step": 8981 }, { "epoch": 20.00445434298441, "grad_norm": 16.16302490234375, "learning_rate": 1e-06, "loss": 0.3615, "num_input_tokens_seen": 503227328, "step": 8982 }, { "epoch": 20.00445434298441, "loss": 0.43536823987960815, "loss_ce": 6.547638622578233e-05, "loss_iou": 0.193359375, "loss_num": 0.00970458984375, "loss_xval": 0.435546875, "num_input_tokens_seen": 503227328, "step": 8982 }, { "epoch": 20.006681514476615, "grad_norm": 28.54294204711914, "learning_rate": 1e-06, "loss": 0.4652, "num_input_tokens_seen": 503280804, "step": 8983 }, { "epoch": 20.006681514476615, "loss": 0.3693348169326782, "loss_ce": 7.21029209671542e-05, "loss_iou": 0.16015625, "loss_num": 0.00982666015625, "loss_xval": 0.369140625, "num_input_tokens_seen": 503280804, "step": 8983 }, { "epoch": 20.00890868596882, "grad_norm": 13.636581420898438, "learning_rate": 1e-06, "loss": 0.3736, "num_input_tokens_seen": 503337904, "step": 8984 }, { "epoch": 20.00890868596882, "loss": 0.45746809244155884, "loss_ce": 7.06176069797948e-05, "loss_iou": 0.1962890625, "loss_num": 0.01312255859375, "loss_xval": 0.45703125, "num_input_tokens_seen": 503337904, "step": 8984 }, { "epoch": 20.011135857461024, "grad_norm": 23.064783096313477, "learning_rate": 1e-06, "loss": 0.4764, "num_input_tokens_seen": 503393424, "step": 8985 }, { "epoch": 20.011135857461024, "loss": 0.5955382585525513, "loss_ce": 7.930370338726789e-05, "loss_iou": 0.255859375, "loss_num": 0.0169677734375, "loss_xval": 0.59375, "num_input_tokens_seen": 503393424, "step": 8985 }, { "epoch": 20.01336302895323, "grad_norm": 20.41514015197754, "learning_rate": 1e-06, "loss": 0.3805, "num_input_tokens_seen": 503448520, "step": 8986 }, { "epoch": 20.01336302895323, "loss": 0.3952462673187256, "loss_ce": 0.00010465873492648825, "loss_iou": 0.1787109375, "loss_num": 0.007537841796875, "loss_xval": 0.39453125, "num_input_tokens_seen": 503448520, "step": 8986 }, { "epoch": 20.015590200445434, "grad_norm": 24.895496368408203, "learning_rate": 1e-06, "loss": 0.4096, "num_input_tokens_seen": 503504024, "step": 8987 }, { "epoch": 20.015590200445434, "loss": 0.43148207664489746, "loss_ce": 8.557141700293869e-05, "loss_iou": 0.189453125, "loss_num": 0.01031494140625, "loss_xval": 0.431640625, "num_input_tokens_seen": 503504024, "step": 8987 }, { "epoch": 20.01781737193764, "grad_norm": 23.964984893798828, "learning_rate": 1e-06, "loss": 0.3723, "num_input_tokens_seen": 503561476, "step": 8988 }, { "epoch": 20.01781737193764, "loss": 0.49469244480133057, "loss_ce": 6.352404307108372e-05, "loss_iou": 0.22265625, "loss_num": 0.010009765625, "loss_xval": 0.494140625, "num_input_tokens_seen": 503561476, "step": 8988 }, { "epoch": 20.020044543429844, "grad_norm": 23.97158432006836, "learning_rate": 1e-06, "loss": 0.6079, "num_input_tokens_seen": 503618636, "step": 8989 }, { "epoch": 20.020044543429844, "loss": 0.6602572202682495, "loss_ce": 0.00010094826575368643, "loss_iou": 0.255859375, "loss_num": 0.02978515625, "loss_xval": 0.66015625, "num_input_tokens_seen": 503618636, "step": 8989 }, { "epoch": 20.02227171492205, "grad_norm": 28.734207153320312, "learning_rate": 1e-06, "loss": 0.3533, "num_input_tokens_seen": 503674208, "step": 8990 }, { "epoch": 20.02227171492205, "loss": 0.34147197008132935, "loss_ce": 0.00019389839144423604, "loss_iou": 0.138671875, "loss_num": 0.01287841796875, "loss_xval": 0.341796875, "num_input_tokens_seen": 503674208, "step": 8990 }, { "epoch": 20.024498886414253, "grad_norm": 24.618515014648438, "learning_rate": 1e-06, "loss": 0.4778, "num_input_tokens_seen": 503728444, "step": 8991 }, { "epoch": 20.024498886414253, "loss": 0.24243876338005066, "loss_ce": 6.816858513047919e-05, "loss_iou": 0.10205078125, "loss_num": 0.00762939453125, "loss_xval": 0.2421875, "num_input_tokens_seen": 503728444, "step": 8991 }, { "epoch": 20.02672605790646, "grad_norm": 17.8887996673584, "learning_rate": 1e-06, "loss": 0.3836, "num_input_tokens_seen": 503784444, "step": 8992 }, { "epoch": 20.02672605790646, "loss": 0.4087654948234558, "loss_ce": 7.405409269267693e-05, "loss_iou": 0.177734375, "loss_num": 0.01043701171875, "loss_xval": 0.408203125, "num_input_tokens_seen": 503784444, "step": 8992 }, { "epoch": 20.028953229398663, "grad_norm": 19.861093521118164, "learning_rate": 1e-06, "loss": 0.5203, "num_input_tokens_seen": 503837876, "step": 8993 }, { "epoch": 20.028953229398663, "loss": 0.7142700552940369, "loss_ce": 6.721451791236177e-05, "loss_iou": 0.265625, "loss_num": 0.036865234375, "loss_xval": 0.71484375, "num_input_tokens_seen": 503837876, "step": 8993 }, { "epoch": 20.031180400890868, "grad_norm": 37.61943817138672, "learning_rate": 1e-06, "loss": 0.4567, "num_input_tokens_seen": 503894880, "step": 8994 }, { "epoch": 20.031180400890868, "loss": 0.4224729835987091, "loss_ce": 0.00010970777657348663, "loss_iou": 0.181640625, "loss_num": 0.01177978515625, "loss_xval": 0.421875, "num_input_tokens_seen": 503894880, "step": 8994 }, { "epoch": 20.033407572383073, "grad_norm": 21.20833969116211, "learning_rate": 1e-06, "loss": 0.4741, "num_input_tokens_seen": 503952424, "step": 8995 }, { "epoch": 20.033407572383073, "loss": 0.4196312725543976, "loss_ce": 7.561713573522866e-05, "loss_iou": 0.1640625, "loss_num": 0.0181884765625, "loss_xval": 0.419921875, "num_input_tokens_seen": 503952424, "step": 8995 }, { "epoch": 20.035634743875278, "grad_norm": 22.4307861328125, "learning_rate": 1e-06, "loss": 0.4745, "num_input_tokens_seen": 504008156, "step": 8996 }, { "epoch": 20.035634743875278, "loss": 0.4712446331977844, "loss_ce": 5.3244577429722995e-05, "loss_iou": 0.21875, "loss_num": 0.00677490234375, "loss_xval": 0.470703125, "num_input_tokens_seen": 504008156, "step": 8996 }, { "epoch": 20.037861915367483, "grad_norm": 35.790035247802734, "learning_rate": 1e-06, "loss": 0.3054, "num_input_tokens_seen": 504066160, "step": 8997 }, { "epoch": 20.037861915367483, "loss": 0.4005992114543915, "loss_ce": 8.653838449390605e-05, "loss_iou": 0.18359375, "loss_num": 0.006683349609375, "loss_xval": 0.400390625, "num_input_tokens_seen": 504066160, "step": 8997 }, { "epoch": 20.040089086859687, "grad_norm": 30.35886001586914, "learning_rate": 1e-06, "loss": 0.4581, "num_input_tokens_seen": 504123536, "step": 8998 }, { "epoch": 20.040089086859687, "loss": 0.4870605170726776, "loss_ce": 0.00012202416110085323, "loss_iou": 0.1865234375, "loss_num": 0.022705078125, "loss_xval": 0.486328125, "num_input_tokens_seen": 504123536, "step": 8998 }, { "epoch": 20.042316258351892, "grad_norm": 11.284721374511719, "learning_rate": 1e-06, "loss": 0.3595, "num_input_tokens_seen": 504179652, "step": 8999 }, { "epoch": 20.042316258351892, "loss": 0.37006866931915283, "loss_ce": 7.358190487138927e-05, "loss_iou": 0.158203125, "loss_num": 0.01092529296875, "loss_xval": 0.369140625, "num_input_tokens_seen": 504179652, "step": 8999 }, { "epoch": 20.044543429844097, "grad_norm": 14.649779319763184, "learning_rate": 1e-06, "loss": 0.4503, "num_input_tokens_seen": 504234948, "step": 9000 }, { "epoch": 20.044543429844097, "eval_seeclick_web_CIoU": 0.5833241045475006, "eval_seeclick_web_GIoU": 0.5827835202217102, "eval_seeclick_web_IoU": 0.602398693561554, "eval_seeclick_web_MAE_all": 0.015278099570423365, "eval_seeclick_web_MAE_h": 0.007092589279636741, "eval_seeclick_web_MAE_w": 0.015399211086332798, "eval_seeclick_web_MAE_x_boxes": 0.007994883926585317, "eval_seeclick_web_MAE_y_boxes": 0.021222305251285434, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.9251190423965454, "eval_seeclick_web_loss_ce": 0.00011575160169741139, "eval_seeclick_web_loss_iou": 0.428955078125, "eval_seeclick_web_loss_num": 0.012242317199707031, "eval_seeclick_web_loss_xval": 0.9189453125, "eval_seeclick_web_runtime": 24.8776, "eval_seeclick_web_samples_per_second": 2.01, "eval_seeclick_web_steps_per_second": 0.08, "num_input_tokens_seen": 504234948, "step": 9000 }, { "epoch": 20.044543429844097, "eval_icons_CIoU": 0.26180562376976013, "eval_icons_GIoU": 0.2955218553543091, "eval_icons_IoU": 0.3424162417650223, "eval_icons_MAE_all": 0.057900117710232735, "eval_icons_MAE_h": 0.029276233166456223, "eval_icons_MAE_w": 0.06031078938394785, "eval_icons_MAE_x_boxes": 0.058521781116724014, "eval_icons_MAE_y_boxes": 0.03748843166977167, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7113921642303467, "eval_icons_loss_ce": 0.0001341397837677505, "eval_icons_loss_iou": 0.6754150390625, "eval_icons_loss_num": 0.057514190673828125, "eval_icons_loss_xval": 1.6396484375, "eval_icons_runtime": 24.3524, "eval_icons_samples_per_second": 2.053, "eval_icons_steps_per_second": 0.082, "num_input_tokens_seen": 504234948, "step": 9000 }, { "epoch": 20.044543429844097, "eval_screenspot_CIoU": 0.3642461995283763, "eval_screenspot_GIoU": 0.38147173325220746, "eval_screenspot_IoU": 0.4438290496667226, "eval_screenspot_MAE_all": 0.058075872560342155, "eval_screenspot_MAE_h": 0.039860475808382034, "eval_screenspot_MAE_w": 0.060762856155633926, "eval_screenspot_MAE_x_boxes": 0.07081380176047485, "eval_screenspot_MAE_y_boxes": 0.04531909463306268, "eval_screenspot_inside_bbox": 0.7041666706403097, "eval_screenspot_loss": 1.5968748331069946, "eval_screenspot_loss_ce": 0.00014579095780694237, "eval_screenspot_loss_iou": 0.6578776041666666, "eval_screenspot_loss_num": 0.06808090209960938, "eval_screenspot_loss_xval": 1.6564127604166667, "eval_screenspot_runtime": 41.1503, "eval_screenspot_samples_per_second": 2.163, "eval_screenspot_steps_per_second": 0.073, "num_input_tokens_seen": 504234948, "step": 9000 }, { "epoch": 20.044543429844097, "eval_compot_CIoU": 0.3458597809076309, "eval_compot_GIoU": 0.35780732333660126, "eval_compot_IoU": 0.40467947721481323, "eval_compot_MAE_all": 0.01937184017151594, "eval_compot_MAE_h": 0.01292520109564066, "eval_compot_MAE_w": 0.02040032297372818, "eval_compot_MAE_x_boxes": 0.030146288685500622, "eval_compot_MAE_y_boxes": 0.006630459800362587, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.361356258392334, "eval_compot_loss_ce": 0.0001064865973603446, "eval_compot_loss_iou": 0.6224365234375, "eval_compot_loss_num": 0.017818450927734375, "eval_compot_loss_xval": 1.33349609375, "eval_compot_runtime": 25.482, "eval_compot_samples_per_second": 1.962, "eval_compot_steps_per_second": 0.078, "num_input_tokens_seen": 504234948, "step": 9000 }, { "epoch": 20.044543429844097, "eval_custom_ui_val_CIoU": 0.4760332836045159, "eval_custom_ui_val_GIoU": 0.48059673772917855, "eval_custom_ui_val_IoU": 0.5367237561278873, "eval_custom_ui_val_MAE_all": 0.026100213070296578, "eval_custom_ui_val_MAE_h": 0.013512549611429373, "eval_custom_ui_val_MAE_w": 0.036708953945587076, "eval_custom_ui_val_MAE_x_boxes": 0.03128902590833604, "eval_custom_ui_val_MAE_y_boxes": 0.012070265650335286, "eval_custom_ui_val_inside_bbox": 0.7719907429483202, "eval_custom_ui_val_loss": 1.1528477668762207, "eval_custom_ui_val_loss_ce": 0.00011129721032274474, "eval_custom_ui_val_loss_iou": 0.4967041015625, "eval_custom_ui_val_loss_num": 0.022805955674913194, "eval_custom_ui_val_loss_xval": 1.1072048611111112, "eval_custom_ui_val_runtime": 76.4297, "eval_custom_ui_val_samples_per_second": 3.467, "eval_custom_ui_val_steps_per_second": 0.118, "num_input_tokens_seen": 504234948, "step": 9000 }, { "epoch": 20.044543429844097, "loss": 0.8328539729118347, "loss_ce": 9.030340879689902e-05, "loss_iou": 0.375, "loss_num": 0.0166015625, "loss_xval": 0.83203125, "num_input_tokens_seen": 504234948, "step": 9000 }, { "epoch": 20.046770601336302, "grad_norm": 13.546182632446289, "learning_rate": 1e-06, "loss": 0.3243, "num_input_tokens_seen": 504290540, "step": 9001 }, { "epoch": 20.046770601336302, "loss": 0.26092007756233215, "loss_ce": 5.582950689131394e-05, "loss_iou": 0.1171875, "loss_num": 0.005340576171875, "loss_xval": 0.26171875, "num_input_tokens_seen": 504290540, "step": 9001 }, { "epoch": 20.048997772828507, "grad_norm": 11.733550071716309, "learning_rate": 1e-06, "loss": 0.3229, "num_input_tokens_seen": 504346220, "step": 9002 }, { "epoch": 20.048997772828507, "loss": 0.3032246530056, "loss_ce": 6.304614362306893e-05, "loss_iou": 0.1201171875, "loss_num": 0.01251220703125, "loss_xval": 0.302734375, "num_input_tokens_seen": 504346220, "step": 9002 }, { "epoch": 20.051224944320712, "grad_norm": 22.342601776123047, "learning_rate": 1e-06, "loss": 0.3593, "num_input_tokens_seen": 504402204, "step": 9003 }, { "epoch": 20.051224944320712, "loss": 0.26656997203826904, "loss_ce": 9.050035441759974e-05, "loss_iou": 0.119140625, "loss_num": 0.005706787109375, "loss_xval": 0.265625, "num_input_tokens_seen": 504402204, "step": 9003 }, { "epoch": 20.053452115812917, "grad_norm": 17.765623092651367, "learning_rate": 1e-06, "loss": 0.5938, "num_input_tokens_seen": 504458700, "step": 9004 }, { "epoch": 20.053452115812917, "loss": 0.5125864148139954, "loss_ce": 7.418861787300557e-05, "loss_iou": 0.2060546875, "loss_num": 0.0201416015625, "loss_xval": 0.51171875, "num_input_tokens_seen": 504458700, "step": 9004 }, { "epoch": 20.05567928730512, "grad_norm": 18.589075088500977, "learning_rate": 1e-06, "loss": 0.3828, "num_input_tokens_seen": 504514400, "step": 9005 }, { "epoch": 20.05567928730512, "loss": 0.3567635416984558, "loss_ce": 7.408991950796917e-05, "loss_iou": 0.14453125, "loss_num": 0.0135498046875, "loss_xval": 0.357421875, "num_input_tokens_seen": 504514400, "step": 9005 }, { "epoch": 20.057906458797326, "grad_norm": 19.300148010253906, "learning_rate": 1e-06, "loss": 0.5256, "num_input_tokens_seen": 504570592, "step": 9006 }, { "epoch": 20.057906458797326, "loss": 0.47822603583335876, "loss_ce": 7.661766721867025e-05, "loss_iou": 0.193359375, "loss_num": 0.0181884765625, "loss_xval": 0.478515625, "num_input_tokens_seen": 504570592, "step": 9006 }, { "epoch": 20.06013363028953, "grad_norm": 17.056005477905273, "learning_rate": 1e-06, "loss": 0.5382, "num_input_tokens_seen": 504623976, "step": 9007 }, { "epoch": 20.06013363028953, "loss": 0.6177802085876465, "loss_ce": 0.00010440404003020376, "loss_iou": 0.267578125, "loss_num": 0.0166015625, "loss_xval": 0.6171875, "num_input_tokens_seen": 504623976, "step": 9007 }, { "epoch": 20.062360801781736, "grad_norm": 12.326020240783691, "learning_rate": 1e-06, "loss": 0.2692, "num_input_tokens_seen": 504680516, "step": 9008 }, { "epoch": 20.062360801781736, "loss": 0.2412145882844925, "loss_ce": 6.467088678618893e-05, "loss_iou": 0.109375, "loss_num": 0.00439453125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 504680516, "step": 9008 }, { "epoch": 20.06458797327394, "grad_norm": 15.348241806030273, "learning_rate": 1e-06, "loss": 0.2844, "num_input_tokens_seen": 504732616, "step": 9009 }, { "epoch": 20.06458797327394, "loss": 0.3364948332309723, "loss_ce": 6.905219197506085e-05, "loss_iou": 0.1328125, "loss_num": 0.01416015625, "loss_xval": 0.3359375, "num_input_tokens_seen": 504732616, "step": 9009 }, { "epoch": 20.066815144766146, "grad_norm": 274.50628662109375, "learning_rate": 1e-06, "loss": 0.3171, "num_input_tokens_seen": 504789792, "step": 9010 }, { "epoch": 20.066815144766146, "loss": 0.3943229615688324, "loss_ce": 6.636053149122745e-05, "loss_iou": 0.173828125, "loss_num": 0.00946044921875, "loss_xval": 0.39453125, "num_input_tokens_seen": 504789792, "step": 9010 }, { "epoch": 20.06904231625835, "grad_norm": 12.248230934143066, "learning_rate": 1e-06, "loss": 0.2955, "num_input_tokens_seen": 504845860, "step": 9011 }, { "epoch": 20.06904231625835, "loss": 0.2785142660140991, "loss_ce": 7.186854054452851e-05, "loss_iou": 0.10595703125, "loss_num": 0.0133056640625, "loss_xval": 0.279296875, "num_input_tokens_seen": 504845860, "step": 9011 }, { "epoch": 20.071269487750556, "grad_norm": 16.346586227416992, "learning_rate": 1e-06, "loss": 0.3333, "num_input_tokens_seen": 504897560, "step": 9012 }, { "epoch": 20.071269487750556, "loss": 0.25494903326034546, "loss_ce": 6.623686931561679e-05, "loss_iou": 0.10546875, "loss_num": 0.0086669921875, "loss_xval": 0.25390625, "num_input_tokens_seen": 504897560, "step": 9012 }, { "epoch": 20.07349665924276, "grad_norm": 14.807296752929688, "learning_rate": 1e-06, "loss": 0.2808, "num_input_tokens_seen": 504953152, "step": 9013 }, { "epoch": 20.07349665924276, "loss": 0.27936726808547974, "loss_ce": 7.039310003165156e-05, "loss_iou": 0.11865234375, "loss_num": 0.00836181640625, "loss_xval": 0.279296875, "num_input_tokens_seen": 504953152, "step": 9013 }, { "epoch": 20.075723830734965, "grad_norm": 17.577438354492188, "learning_rate": 1e-06, "loss": 0.3015, "num_input_tokens_seen": 505011868, "step": 9014 }, { "epoch": 20.075723830734965, "loss": 0.23732471466064453, "loss_ce": 5.0541042583063245e-05, "loss_iou": 0.10546875, "loss_num": 0.005279541015625, "loss_xval": 0.2373046875, "num_input_tokens_seen": 505011868, "step": 9014 }, { "epoch": 20.07795100222717, "grad_norm": 14.317474365234375, "learning_rate": 1e-06, "loss": 0.44, "num_input_tokens_seen": 505068340, "step": 9015 }, { "epoch": 20.07795100222717, "loss": 0.4671054482460022, "loss_ce": 6.44456158624962e-05, "loss_iou": 0.1884765625, "loss_num": 0.017822265625, "loss_xval": 0.466796875, "num_input_tokens_seen": 505068340, "step": 9015 }, { "epoch": 20.080178173719375, "grad_norm": 16.756528854370117, "learning_rate": 1e-06, "loss": 0.2364, "num_input_tokens_seen": 505126536, "step": 9016 }, { "epoch": 20.080178173719375, "loss": 0.22590871155261993, "loss_ce": 7.862491474952549e-05, "loss_iou": 0.10205078125, "loss_num": 0.004302978515625, "loss_xval": 0.2255859375, "num_input_tokens_seen": 505126536, "step": 9016 }, { "epoch": 20.08240534521158, "grad_norm": 13.3543062210083, "learning_rate": 1e-06, "loss": 0.3849, "num_input_tokens_seen": 505183652, "step": 9017 }, { "epoch": 20.08240534521158, "loss": 0.4101763963699341, "loss_ce": 8.116650133160874e-05, "loss_iou": 0.185546875, "loss_num": 0.007720947265625, "loss_xval": 0.41015625, "num_input_tokens_seen": 505183652, "step": 9017 }, { "epoch": 20.084632516703785, "grad_norm": 15.893471717834473, "learning_rate": 1e-06, "loss": 0.3159, "num_input_tokens_seen": 505238776, "step": 9018 }, { "epoch": 20.084632516703785, "loss": 0.3582761287689209, "loss_ce": 6.079444574424997e-05, "loss_iou": 0.162109375, "loss_num": 0.0067138671875, "loss_xval": 0.357421875, "num_input_tokens_seen": 505238776, "step": 9018 }, { "epoch": 20.08685968819599, "grad_norm": 26.983814239501953, "learning_rate": 1e-06, "loss": 0.502, "num_input_tokens_seen": 505295268, "step": 9019 }, { "epoch": 20.08685968819599, "loss": 0.48191970586776733, "loss_ce": 0.00010815928544616327, "loss_iou": 0.2158203125, "loss_num": 0.009765625, "loss_xval": 0.482421875, "num_input_tokens_seen": 505295268, "step": 9019 }, { "epoch": 20.089086859688194, "grad_norm": 13.190807342529297, "learning_rate": 1e-06, "loss": 0.327, "num_input_tokens_seen": 505351820, "step": 9020 }, { "epoch": 20.089086859688194, "loss": 0.3194577693939209, "loss_ce": 6.07890251558274e-05, "loss_iou": 0.1416015625, "loss_num": 0.007110595703125, "loss_xval": 0.3203125, "num_input_tokens_seen": 505351820, "step": 9020 }, { "epoch": 20.0913140311804, "grad_norm": 18.049283981323242, "learning_rate": 1e-06, "loss": 0.3301, "num_input_tokens_seen": 505409524, "step": 9021 }, { "epoch": 20.0913140311804, "loss": 0.3330759108066559, "loss_ce": 6.808601028751582e-05, "loss_iou": 0.1513671875, "loss_num": 0.006072998046875, "loss_xval": 0.33203125, "num_input_tokens_seen": 505409524, "step": 9021 }, { "epoch": 20.093541202672604, "grad_norm": 20.265382766723633, "learning_rate": 1e-06, "loss": 0.3425, "num_input_tokens_seen": 505466160, "step": 9022 }, { "epoch": 20.093541202672604, "loss": 0.2662951648235321, "loss_ce": 5.9811318351421505e-05, "loss_iou": 0.1171875, "loss_num": 0.006439208984375, "loss_xval": 0.265625, "num_input_tokens_seen": 505466160, "step": 9022 }, { "epoch": 20.09576837416481, "grad_norm": 18.630359649658203, "learning_rate": 1e-06, "loss": 0.459, "num_input_tokens_seen": 505521684, "step": 9023 }, { "epoch": 20.09576837416481, "loss": 0.34796595573425293, "loss_ce": 6.557940650964156e-05, "loss_iou": 0.1640625, "loss_num": 0.004180908203125, "loss_xval": 0.34765625, "num_input_tokens_seen": 505521684, "step": 9023 }, { "epoch": 20.097995545657014, "grad_norm": 18.471445083618164, "learning_rate": 1e-06, "loss": 0.44, "num_input_tokens_seen": 505579020, "step": 9024 }, { "epoch": 20.097995545657014, "loss": 0.4241413474082947, "loss_ce": 6.90808956278488e-05, "loss_iou": 0.185546875, "loss_num": 0.0107421875, "loss_xval": 0.423828125, "num_input_tokens_seen": 505579020, "step": 9024 }, { "epoch": 20.100222717149222, "grad_norm": 13.083913803100586, "learning_rate": 1e-06, "loss": 0.4995, "num_input_tokens_seen": 505636064, "step": 9025 }, { "epoch": 20.100222717149222, "loss": 0.3323401212692261, "loss_ce": 6.473006214946508e-05, "loss_iou": 0.15234375, "loss_num": 0.00537109375, "loss_xval": 0.33203125, "num_input_tokens_seen": 505636064, "step": 9025 }, { "epoch": 20.102449888641427, "grad_norm": 13.375629425048828, "learning_rate": 1e-06, "loss": 0.3271, "num_input_tokens_seen": 505694548, "step": 9026 }, { "epoch": 20.102449888641427, "loss": 0.261178195476532, "loss_ce": 6.979504541959614e-05, "loss_iou": 0.12255859375, "loss_num": 0.003265380859375, "loss_xval": 0.26171875, "num_input_tokens_seen": 505694548, "step": 9026 }, { "epoch": 20.104677060133632, "grad_norm": 15.511109352111816, "learning_rate": 1e-06, "loss": 0.4012, "num_input_tokens_seen": 505752036, "step": 9027 }, { "epoch": 20.104677060133632, "loss": 0.5342494249343872, "loss_ce": 6.970732647459954e-05, "loss_iou": 0.240234375, "loss_num": 0.0106201171875, "loss_xval": 0.53515625, "num_input_tokens_seen": 505752036, "step": 9027 }, { "epoch": 20.106904231625837, "grad_norm": 18.12278175354004, "learning_rate": 1e-06, "loss": 0.3609, "num_input_tokens_seen": 505806088, "step": 9028 }, { "epoch": 20.106904231625837, "loss": 0.21917679905891418, "loss_ce": 6.0579972341656685e-05, "loss_iou": 0.0859375, "loss_num": 0.0093994140625, "loss_xval": 0.21875, "num_input_tokens_seen": 505806088, "step": 9028 }, { "epoch": 20.10913140311804, "grad_norm": 19.46178436279297, "learning_rate": 1e-06, "loss": 0.3676, "num_input_tokens_seen": 505863524, "step": 9029 }, { "epoch": 20.10913140311804, "loss": 0.2694757878780365, "loss_ce": 6.661626684945077e-05, "loss_iou": 0.109375, "loss_num": 0.01007080078125, "loss_xval": 0.26953125, "num_input_tokens_seen": 505863524, "step": 9029 }, { "epoch": 20.111358574610247, "grad_norm": 25.316984176635742, "learning_rate": 1e-06, "loss": 0.3969, "num_input_tokens_seen": 505919056, "step": 9030 }, { "epoch": 20.111358574610247, "loss": 0.39313435554504395, "loss_ce": 6.794696673750877e-05, "loss_iou": 0.173828125, "loss_num": 0.00909423828125, "loss_xval": 0.392578125, "num_input_tokens_seen": 505919056, "step": 9030 }, { "epoch": 20.11358574610245, "grad_norm": 17.775178909301758, "learning_rate": 1e-06, "loss": 0.5433, "num_input_tokens_seen": 505973188, "step": 9031 }, { "epoch": 20.11358574610245, "loss": 0.5688655972480774, "loss_ce": 0.00014000045484863222, "loss_iou": 0.24609375, "loss_num": 0.0152587890625, "loss_xval": 0.5703125, "num_input_tokens_seen": 505973188, "step": 9031 }, { "epoch": 20.115812917594656, "grad_norm": 18.306743621826172, "learning_rate": 1e-06, "loss": 0.3148, "num_input_tokens_seen": 506026908, "step": 9032 }, { "epoch": 20.115812917594656, "loss": 0.25726398825645447, "loss_ce": 6.184067751746625e-05, "loss_iou": 0.107421875, "loss_num": 0.00848388671875, "loss_xval": 0.2578125, "num_input_tokens_seen": 506026908, "step": 9032 }, { "epoch": 20.11804008908686, "grad_norm": 18.657054901123047, "learning_rate": 1e-06, "loss": 0.4274, "num_input_tokens_seen": 506084560, "step": 9033 }, { "epoch": 20.11804008908686, "loss": 0.4233429729938507, "loss_ce": 6.415171810658649e-05, "loss_iou": 0.1865234375, "loss_num": 0.0101318359375, "loss_xval": 0.423828125, "num_input_tokens_seen": 506084560, "step": 9033 }, { "epoch": 20.120267260579066, "grad_norm": 20.036205291748047, "learning_rate": 1e-06, "loss": 0.3367, "num_input_tokens_seen": 506141540, "step": 9034 }, { "epoch": 20.120267260579066, "loss": 0.2808334231376648, "loss_ce": 7.170556636992842e-05, "loss_iou": 0.130859375, "loss_num": 0.0036773681640625, "loss_xval": 0.28125, "num_input_tokens_seen": 506141540, "step": 9034 }, { "epoch": 20.12249443207127, "grad_norm": 14.675041198730469, "learning_rate": 1e-06, "loss": 0.4575, "num_input_tokens_seen": 506195448, "step": 9035 }, { "epoch": 20.12249443207127, "loss": 0.43460237979888916, "loss_ce": 9.311149187851697e-05, "loss_iou": 0.2060546875, "loss_num": 0.00439453125, "loss_xval": 0.43359375, "num_input_tokens_seen": 506195448, "step": 9035 }, { "epoch": 20.124721603563476, "grad_norm": 22.29570770263672, "learning_rate": 1e-06, "loss": 0.4673, "num_input_tokens_seen": 506249464, "step": 9036 }, { "epoch": 20.124721603563476, "loss": 0.30451393127441406, "loss_ce": 7.058979826979339e-05, "loss_iou": 0.130859375, "loss_num": 0.008544921875, "loss_xval": 0.3046875, "num_input_tokens_seen": 506249464, "step": 9036 }, { "epoch": 20.12694877505568, "grad_norm": 18.38787841796875, "learning_rate": 1e-06, "loss": 0.3275, "num_input_tokens_seen": 506305972, "step": 9037 }, { "epoch": 20.12694877505568, "loss": 0.2641042470932007, "loss_ce": 6.616647442569956e-05, "loss_iou": 0.11328125, "loss_num": 0.007476806640625, "loss_xval": 0.263671875, "num_input_tokens_seen": 506305972, "step": 9037 }, { "epoch": 20.129175946547885, "grad_norm": 24.06353759765625, "learning_rate": 1e-06, "loss": 0.3628, "num_input_tokens_seen": 506363376, "step": 9038 }, { "epoch": 20.129175946547885, "loss": 0.3895137310028076, "loss_ce": 0.00010943982488242909, "loss_iou": 0.177734375, "loss_num": 0.006561279296875, "loss_xval": 0.388671875, "num_input_tokens_seen": 506363376, "step": 9038 }, { "epoch": 20.13140311804009, "grad_norm": 24.388277053833008, "learning_rate": 1e-06, "loss": 0.2923, "num_input_tokens_seen": 506419872, "step": 9039 }, { "epoch": 20.13140311804009, "loss": 0.33582985401153564, "loss_ce": 0.00013651110930368304, "loss_iou": 0.15234375, "loss_num": 0.00634765625, "loss_xval": 0.3359375, "num_input_tokens_seen": 506419872, "step": 9039 }, { "epoch": 20.133630289532295, "grad_norm": 12.003957748413086, "learning_rate": 1e-06, "loss": 0.3221, "num_input_tokens_seen": 506474320, "step": 9040 }, { "epoch": 20.133630289532295, "loss": 0.19839268922805786, "loss_ce": 5.894018613616936e-05, "loss_iou": 0.09130859375, "loss_num": 0.0030670166015625, "loss_xval": 0.1982421875, "num_input_tokens_seen": 506474320, "step": 9040 }, { "epoch": 20.1358574610245, "grad_norm": 20.20316505432129, "learning_rate": 1e-06, "loss": 0.4102, "num_input_tokens_seen": 506531432, "step": 9041 }, { "epoch": 20.1358574610245, "loss": 0.3831365704536438, "loss_ce": 6.466210470534861e-05, "loss_iou": 0.169921875, "loss_num": 0.008544921875, "loss_xval": 0.3828125, "num_input_tokens_seen": 506531432, "step": 9041 }, { "epoch": 20.138084632516705, "grad_norm": 19.908742904663086, "learning_rate": 1e-06, "loss": 0.4735, "num_input_tokens_seen": 506586268, "step": 9042 }, { "epoch": 20.138084632516705, "loss": 0.5597177743911743, "loss_ce": 0.0001474099699407816, "loss_iou": 0.24609375, "loss_num": 0.0135498046875, "loss_xval": 0.55859375, "num_input_tokens_seen": 506586268, "step": 9042 }, { "epoch": 20.14031180400891, "grad_norm": 15.677102088928223, "learning_rate": 1e-06, "loss": 0.3571, "num_input_tokens_seen": 506642036, "step": 9043 }, { "epoch": 20.14031180400891, "loss": 0.41627195477485657, "loss_ce": 7.322065357584506e-05, "loss_iou": 0.1884765625, "loss_num": 0.0078125, "loss_xval": 0.416015625, "num_input_tokens_seen": 506642036, "step": 9043 }, { "epoch": 20.142538975501115, "grad_norm": 21.689987182617188, "learning_rate": 1e-06, "loss": 0.4071, "num_input_tokens_seen": 506698484, "step": 9044 }, { "epoch": 20.142538975501115, "loss": 0.40728476643562317, "loss_ce": 5.823103856528178e-05, "loss_iou": 0.181640625, "loss_num": 0.008544921875, "loss_xval": 0.40625, "num_input_tokens_seen": 506698484, "step": 9044 }, { "epoch": 20.14476614699332, "grad_norm": 27.1544132232666, "learning_rate": 1e-06, "loss": 0.3355, "num_input_tokens_seen": 506752068, "step": 9045 }, { "epoch": 20.14476614699332, "loss": 0.3848227858543396, "loss_ce": 5.7126497267745435e-05, "loss_iou": 0.1591796875, "loss_num": 0.0133056640625, "loss_xval": 0.384765625, "num_input_tokens_seen": 506752068, "step": 9045 }, { "epoch": 20.146993318485524, "grad_norm": 15.306382179260254, "learning_rate": 1e-06, "loss": 0.4094, "num_input_tokens_seen": 506810520, "step": 9046 }, { "epoch": 20.146993318485524, "loss": 0.44698041677474976, "loss_ce": 8.10260244179517e-05, "loss_iou": 0.18359375, "loss_num": 0.0159912109375, "loss_xval": 0.447265625, "num_input_tokens_seen": 506810520, "step": 9046 }, { "epoch": 20.14922048997773, "grad_norm": 17.21749496459961, "learning_rate": 1e-06, "loss": 0.5436, "num_input_tokens_seen": 506865016, "step": 9047 }, { "epoch": 20.14922048997773, "loss": 0.689301609992981, "loss_ce": 9.259382932214066e-05, "loss_iou": 0.291015625, "loss_num": 0.0216064453125, "loss_xval": 0.6875, "num_input_tokens_seen": 506865016, "step": 9047 }, { "epoch": 20.151447661469934, "grad_norm": 28.808147430419922, "learning_rate": 1e-06, "loss": 0.477, "num_input_tokens_seen": 506918928, "step": 9048 }, { "epoch": 20.151447661469934, "loss": 0.46686720848083496, "loss_ce": 7.035446469672024e-05, "loss_iou": 0.19140625, "loss_num": 0.016845703125, "loss_xval": 0.466796875, "num_input_tokens_seen": 506918928, "step": 9048 }, { "epoch": 20.15367483296214, "grad_norm": 32.62353515625, "learning_rate": 1e-06, "loss": 0.3442, "num_input_tokens_seen": 506975208, "step": 9049 }, { "epoch": 20.15367483296214, "loss": 0.505931556224823, "loss_ce": 7.217634993139654e-05, "loss_iou": 0.2138671875, "loss_num": 0.0157470703125, "loss_xval": 0.5078125, "num_input_tokens_seen": 506975208, "step": 9049 }, { "epoch": 20.155902004454344, "grad_norm": 21.908447265625, "learning_rate": 1e-06, "loss": 0.4323, "num_input_tokens_seen": 507031872, "step": 9050 }, { "epoch": 20.155902004454344, "loss": 0.5479224324226379, "loss_ce": 7.087649282766506e-05, "loss_iou": 0.21875, "loss_num": 0.0220947265625, "loss_xval": 0.546875, "num_input_tokens_seen": 507031872, "step": 9050 }, { "epoch": 20.15812917594655, "grad_norm": 20.468429565429688, "learning_rate": 1e-06, "loss": 0.3044, "num_input_tokens_seen": 507089620, "step": 9051 }, { "epoch": 20.15812917594655, "loss": 0.3056070804595947, "loss_ce": 6.508109072456136e-05, "loss_iou": 0.142578125, "loss_num": 0.003997802734375, "loss_xval": 0.3046875, "num_input_tokens_seen": 507089620, "step": 9051 }, { "epoch": 20.160356347438753, "grad_norm": 22.60247802734375, "learning_rate": 1e-06, "loss": 0.3752, "num_input_tokens_seen": 507147488, "step": 9052 }, { "epoch": 20.160356347438753, "loss": 0.27081358432769775, "loss_ce": 6.161894270917401e-05, "loss_iou": 0.11962890625, "loss_num": 0.006317138671875, "loss_xval": 0.271484375, "num_input_tokens_seen": 507147488, "step": 9052 }, { "epoch": 20.16258351893096, "grad_norm": 27.35213851928711, "learning_rate": 1e-06, "loss": 0.6975, "num_input_tokens_seen": 507203384, "step": 9053 }, { "epoch": 20.16258351893096, "loss": 0.46700623631477356, "loss_ce": 8.729432738618925e-05, "loss_iou": 0.20703125, "loss_num": 0.0103759765625, "loss_xval": 0.466796875, "num_input_tokens_seen": 507203384, "step": 9053 }, { "epoch": 20.164810690423163, "grad_norm": 23.201065063476562, "learning_rate": 1e-06, "loss": 0.2998, "num_input_tokens_seen": 507256920, "step": 9054 }, { "epoch": 20.164810690423163, "loss": 0.1558593213558197, "loss_ce": 6.707788270432502e-05, "loss_iou": 0.06640625, "loss_num": 0.004638671875, "loss_xval": 0.15625, "num_input_tokens_seen": 507256920, "step": 9054 }, { "epoch": 20.167037861915368, "grad_norm": 17.185827255249023, "learning_rate": 1e-06, "loss": 0.2036, "num_input_tokens_seen": 507311420, "step": 9055 }, { "epoch": 20.167037861915368, "loss": 0.24561916291713715, "loss_ce": 7.47331214370206e-05, "loss_iou": 0.11474609375, "loss_num": 0.003173828125, "loss_xval": 0.2451171875, "num_input_tokens_seen": 507311420, "step": 9055 }, { "epoch": 20.169265033407573, "grad_norm": 12.79721450805664, "learning_rate": 1e-06, "loss": 0.3995, "num_input_tokens_seen": 507364720, "step": 9056 }, { "epoch": 20.169265033407573, "loss": 0.397408664226532, "loss_ce": 6.977266457397491e-05, "loss_iou": 0.1611328125, "loss_num": 0.014892578125, "loss_xval": 0.396484375, "num_input_tokens_seen": 507364720, "step": 9056 }, { "epoch": 20.171492204899778, "grad_norm": 21.816648483276367, "learning_rate": 1e-06, "loss": 0.4521, "num_input_tokens_seen": 507420248, "step": 9057 }, { "epoch": 20.171492204899778, "loss": 0.3353777527809143, "loss_ce": 6.583896174561232e-05, "loss_iou": 0.154296875, "loss_num": 0.005523681640625, "loss_xval": 0.3359375, "num_input_tokens_seen": 507420248, "step": 9057 }, { "epoch": 20.173719376391983, "grad_norm": 18.13716697692871, "learning_rate": 1e-06, "loss": 0.2414, "num_input_tokens_seen": 507477140, "step": 9058 }, { "epoch": 20.173719376391983, "loss": 0.22375068068504333, "loss_ce": 5.682064875145443e-05, "loss_iou": 0.09375, "loss_num": 0.0072021484375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 507477140, "step": 9058 }, { "epoch": 20.175946547884188, "grad_norm": 15.739980697631836, "learning_rate": 1e-06, "loss": 0.3639, "num_input_tokens_seen": 507534024, "step": 9059 }, { "epoch": 20.175946547884188, "loss": 0.436111718416214, "loss_ce": 7.6580501627177e-05, "loss_iou": 0.201171875, "loss_num": 0.00665283203125, "loss_xval": 0.435546875, "num_input_tokens_seen": 507534024, "step": 9059 }, { "epoch": 20.178173719376392, "grad_norm": 25.104169845581055, "learning_rate": 1e-06, "loss": 0.5474, "num_input_tokens_seen": 507590760, "step": 9060 }, { "epoch": 20.178173719376392, "loss": 0.6292134523391724, "loss_ce": 0.00018516569980420172, "loss_iou": 0.2265625, "loss_num": 0.03515625, "loss_xval": 0.62890625, "num_input_tokens_seen": 507590760, "step": 9060 }, { "epoch": 20.180400890868597, "grad_norm": 14.639026641845703, "learning_rate": 1e-06, "loss": 0.3374, "num_input_tokens_seen": 507647528, "step": 9061 }, { "epoch": 20.180400890868597, "loss": 0.26568013429641724, "loss_ce": 5.5125230574049056e-05, "loss_iou": 0.1142578125, "loss_num": 0.007415771484375, "loss_xval": 0.265625, "num_input_tokens_seen": 507647528, "step": 9061 }, { "epoch": 20.182628062360802, "grad_norm": 25.42153549194336, "learning_rate": 1e-06, "loss": 0.4638, "num_input_tokens_seen": 507699664, "step": 9062 }, { "epoch": 20.182628062360802, "loss": 0.4531900882720947, "loss_ce": 6.505924829980358e-05, "loss_iou": 0.208984375, "loss_num": 0.007110595703125, "loss_xval": 0.453125, "num_input_tokens_seen": 507699664, "step": 9062 }, { "epoch": 20.184855233853007, "grad_norm": 20.597827911376953, "learning_rate": 1e-06, "loss": 0.2994, "num_input_tokens_seen": 507755092, "step": 9063 }, { "epoch": 20.184855233853007, "loss": 0.3056027889251709, "loss_ce": 6.079179729567841e-05, "loss_iou": 0.1328125, "loss_num": 0.0079345703125, "loss_xval": 0.3046875, "num_input_tokens_seen": 507755092, "step": 9063 }, { "epoch": 20.187082405345212, "grad_norm": 20.325477600097656, "learning_rate": 1e-06, "loss": 0.4699, "num_input_tokens_seen": 507810656, "step": 9064 }, { "epoch": 20.187082405345212, "loss": 0.5754441022872925, "loss_ce": 0.00012668846466112882, "loss_iou": 0.232421875, "loss_num": 0.0220947265625, "loss_xval": 0.57421875, "num_input_tokens_seen": 507810656, "step": 9064 }, { "epoch": 20.189309576837417, "grad_norm": 14.4949369430542, "learning_rate": 1e-06, "loss": 0.2323, "num_input_tokens_seen": 507868900, "step": 9065 }, { "epoch": 20.189309576837417, "loss": 0.29057085514068604, "loss_ce": 0.0002876527141779661, "loss_iou": 0.1201171875, "loss_num": 0.010009765625, "loss_xval": 0.291015625, "num_input_tokens_seen": 507868900, "step": 9065 }, { "epoch": 20.19153674832962, "grad_norm": 20.822650909423828, "learning_rate": 1e-06, "loss": 0.5267, "num_input_tokens_seen": 507927360, "step": 9066 }, { "epoch": 20.19153674832962, "loss": 0.4537545144557953, "loss_ce": 0.0001412302954122424, "loss_iou": 0.2109375, "loss_num": 0.00616455078125, "loss_xval": 0.453125, "num_input_tokens_seen": 507927360, "step": 9066 }, { "epoch": 20.193763919821826, "grad_norm": 25.914817810058594, "learning_rate": 1e-06, "loss": 0.4188, "num_input_tokens_seen": 507981276, "step": 9067 }, { "epoch": 20.193763919821826, "loss": 0.4693218767642975, "loss_ce": 8.358562627108768e-05, "loss_iou": 0.1845703125, "loss_num": 0.020263671875, "loss_xval": 0.46875, "num_input_tokens_seen": 507981276, "step": 9067 }, { "epoch": 20.19599109131403, "grad_norm": 21.08448028564453, "learning_rate": 1e-06, "loss": 0.2869, "num_input_tokens_seen": 508037936, "step": 9068 }, { "epoch": 20.19599109131403, "loss": 0.3300858736038208, "loss_ce": 6.879281136207283e-05, "loss_iou": 0.142578125, "loss_num": 0.00885009765625, "loss_xval": 0.330078125, "num_input_tokens_seen": 508037936, "step": 9068 }, { "epoch": 20.198218262806236, "grad_norm": 28.94850730895996, "learning_rate": 1e-06, "loss": 0.3374, "num_input_tokens_seen": 508094604, "step": 9069 }, { "epoch": 20.198218262806236, "loss": 0.311705619096756, "loss_ce": 6.010363722452894e-05, "loss_iou": 0.1416015625, "loss_num": 0.00555419921875, "loss_xval": 0.3125, "num_input_tokens_seen": 508094604, "step": 9069 }, { "epoch": 20.20044543429844, "grad_norm": 27.5219783782959, "learning_rate": 1e-06, "loss": 0.491, "num_input_tokens_seen": 508152580, "step": 9070 }, { "epoch": 20.20044543429844, "loss": 0.6100476384162903, "loss_ce": 6.228317215573043e-05, "loss_iou": 0.25390625, "loss_num": 0.02001953125, "loss_xval": 0.609375, "num_input_tokens_seen": 508152580, "step": 9070 }, { "epoch": 20.202672605790646, "grad_norm": 23.4427433013916, "learning_rate": 1e-06, "loss": 0.2314, "num_input_tokens_seen": 508206192, "step": 9071 }, { "epoch": 20.202672605790646, "loss": 0.2371232807636261, "loss_ce": 6.273827602853999e-05, "loss_iou": 0.10302734375, "loss_num": 0.006103515625, "loss_xval": 0.2373046875, "num_input_tokens_seen": 508206192, "step": 9071 }, { "epoch": 20.20489977728285, "grad_norm": 17.554397583007812, "learning_rate": 1e-06, "loss": 0.3968, "num_input_tokens_seen": 508262040, "step": 9072 }, { "epoch": 20.20489977728285, "loss": 0.4413501024246216, "loss_ce": 6.59393408568576e-05, "loss_iou": 0.19921875, "loss_num": 0.00872802734375, "loss_xval": 0.44140625, "num_input_tokens_seen": 508262040, "step": 9072 }, { "epoch": 20.207126948775056, "grad_norm": 15.516806602478027, "learning_rate": 1e-06, "loss": 0.3465, "num_input_tokens_seen": 508317992, "step": 9073 }, { "epoch": 20.207126948775056, "loss": 0.41605615615844727, "loss_ce": 0.0001626217272132635, "loss_iou": 0.1923828125, "loss_num": 0.00616455078125, "loss_xval": 0.416015625, "num_input_tokens_seen": 508317992, "step": 9073 }, { "epoch": 20.20935412026726, "grad_norm": 26.189729690551758, "learning_rate": 1e-06, "loss": 0.3646, "num_input_tokens_seen": 508373568, "step": 9074 }, { "epoch": 20.20935412026726, "loss": 0.3610216975212097, "loss_ce": 5.978911212878302e-05, "loss_iou": 0.15234375, "loss_num": 0.0111083984375, "loss_xval": 0.361328125, "num_input_tokens_seen": 508373568, "step": 9074 }, { "epoch": 20.211581291759465, "grad_norm": 16.33150863647461, "learning_rate": 1e-06, "loss": 0.4738, "num_input_tokens_seen": 508429440, "step": 9075 }, { "epoch": 20.211581291759465, "loss": 0.3367946445941925, "loss_ce": 6.368501635733992e-05, "loss_iou": 0.1337890625, "loss_num": 0.013916015625, "loss_xval": 0.3359375, "num_input_tokens_seen": 508429440, "step": 9075 }, { "epoch": 20.21380846325167, "grad_norm": 16.59500503540039, "learning_rate": 1e-06, "loss": 0.5425, "num_input_tokens_seen": 508486396, "step": 9076 }, { "epoch": 20.21380846325167, "loss": 0.4951022267341614, "loss_ce": 0.00010707708133850247, "loss_iou": 0.2158203125, "loss_num": 0.0125732421875, "loss_xval": 0.494140625, "num_input_tokens_seen": 508486396, "step": 9076 }, { "epoch": 20.216035634743875, "grad_norm": 16.87767791748047, "learning_rate": 1e-06, "loss": 0.6097, "num_input_tokens_seen": 508540776, "step": 9077 }, { "epoch": 20.216035634743875, "loss": 0.6077404022216797, "loss_ce": 7.441250636475161e-05, "loss_iou": 0.248046875, "loss_num": 0.0224609375, "loss_xval": 0.609375, "num_input_tokens_seen": 508540776, "step": 9077 }, { "epoch": 20.21826280623608, "grad_norm": 12.694762229919434, "learning_rate": 1e-06, "loss": 0.3561, "num_input_tokens_seen": 508597792, "step": 9078 }, { "epoch": 20.21826280623608, "loss": 0.22015753388404846, "loss_ce": 6.474574183812365e-05, "loss_iou": 0.099609375, "loss_num": 0.004119873046875, "loss_xval": 0.2197265625, "num_input_tokens_seen": 508597792, "step": 9078 }, { "epoch": 20.220489977728285, "grad_norm": 20.9544734954834, "learning_rate": 1e-06, "loss": 0.4849, "num_input_tokens_seen": 508655992, "step": 9079 }, { "epoch": 20.220489977728285, "loss": 0.7146784067153931, "loss_ce": 7.875564915593714e-05, "loss_iou": 0.3203125, "loss_num": 0.0147705078125, "loss_xval": 0.71484375, "num_input_tokens_seen": 508655992, "step": 9079 }, { "epoch": 20.22271714922049, "grad_norm": 9.648091316223145, "learning_rate": 1e-06, "loss": 0.401, "num_input_tokens_seen": 508711672, "step": 9080 }, { "epoch": 20.22271714922049, "loss": 0.2577533423900604, "loss_ce": 6.290937017183751e-05, "loss_iou": 0.1162109375, "loss_num": 0.005035400390625, "loss_xval": 0.2578125, "num_input_tokens_seen": 508711672, "step": 9080 }, { "epoch": 20.224944320712694, "grad_norm": 21.80451202392578, "learning_rate": 1e-06, "loss": 0.2682, "num_input_tokens_seen": 508769188, "step": 9081 }, { "epoch": 20.224944320712694, "loss": 0.2104550302028656, "loss_ce": 6.68463617330417e-05, "loss_iou": 0.087890625, "loss_num": 0.006866455078125, "loss_xval": 0.2099609375, "num_input_tokens_seen": 508769188, "step": 9081 }, { "epoch": 20.2271714922049, "grad_norm": 21.215343475341797, "learning_rate": 1e-06, "loss": 0.3385, "num_input_tokens_seen": 508824548, "step": 9082 }, { "epoch": 20.2271714922049, "loss": 0.4378107190132141, "loss_ce": 6.65808329358697e-05, "loss_iou": 0.1923828125, "loss_num": 0.01031494140625, "loss_xval": 0.4375, "num_input_tokens_seen": 508824548, "step": 9082 }, { "epoch": 20.229398663697104, "grad_norm": 17.058391571044922, "learning_rate": 1e-06, "loss": 0.2433, "num_input_tokens_seen": 508879452, "step": 9083 }, { "epoch": 20.229398663697104, "loss": 0.23303891718387604, "loss_ce": 6.771212792955339e-05, "loss_iou": 0.10302734375, "loss_num": 0.00543212890625, "loss_xval": 0.2333984375, "num_input_tokens_seen": 508879452, "step": 9083 }, { "epoch": 20.23162583518931, "grad_norm": 16.908109664916992, "learning_rate": 1e-06, "loss": 0.5078, "num_input_tokens_seen": 508935480, "step": 9084 }, { "epoch": 20.23162583518931, "loss": 0.29596656560897827, "loss_ce": 6.8145505792927e-05, "loss_iou": 0.1376953125, "loss_num": 0.00433349609375, "loss_xval": 0.296875, "num_input_tokens_seen": 508935480, "step": 9084 }, { "epoch": 20.233853006681514, "grad_norm": 20.34218978881836, "learning_rate": 1e-06, "loss": 0.3869, "num_input_tokens_seen": 508993000, "step": 9085 }, { "epoch": 20.233853006681514, "loss": 0.3470456302165985, "loss_ce": 6.076861609471962e-05, "loss_iou": 0.1513671875, "loss_num": 0.0089111328125, "loss_xval": 0.34765625, "num_input_tokens_seen": 508993000, "step": 9085 }, { "epoch": 20.23608017817372, "grad_norm": 23.703197479248047, "learning_rate": 1e-06, "loss": 0.3962, "num_input_tokens_seen": 509047736, "step": 9086 }, { "epoch": 20.23608017817372, "loss": 0.3868297338485718, "loss_ce": 0.00011096424714196473, "loss_iou": 0.1748046875, "loss_num": 0.007568359375, "loss_xval": 0.38671875, "num_input_tokens_seen": 509047736, "step": 9086 }, { "epoch": 20.238307349665924, "grad_norm": 17.429872512817383, "learning_rate": 1e-06, "loss": 0.3348, "num_input_tokens_seen": 509102768, "step": 9087 }, { "epoch": 20.238307349665924, "loss": 0.4313143193721771, "loss_ce": 0.00010093137825606391, "loss_iou": 0.1953125, "loss_num": 0.00823974609375, "loss_xval": 0.431640625, "num_input_tokens_seen": 509102768, "step": 9087 }, { "epoch": 20.24053452115813, "grad_norm": 24.075519561767578, "learning_rate": 1e-06, "loss": 0.3229, "num_input_tokens_seen": 509159036, "step": 9088 }, { "epoch": 20.24053452115813, "loss": 0.3190591335296631, "loss_ce": 5.887117004022002e-05, "loss_iou": 0.140625, "loss_num": 0.007568359375, "loss_xval": 0.318359375, "num_input_tokens_seen": 509159036, "step": 9088 }, { "epoch": 20.242761692650333, "grad_norm": 15.556453704833984, "learning_rate": 1e-06, "loss": 0.2766, "num_input_tokens_seen": 509215604, "step": 9089 }, { "epoch": 20.242761692650333, "loss": 0.2935373783111572, "loss_ce": 8.033675840124488e-05, "loss_iou": 0.1337890625, "loss_num": 0.005035400390625, "loss_xval": 0.29296875, "num_input_tokens_seen": 509215604, "step": 9089 }, { "epoch": 20.244988864142538, "grad_norm": 26.041152954101562, "learning_rate": 1e-06, "loss": 0.4012, "num_input_tokens_seen": 509271624, "step": 9090 }, { "epoch": 20.244988864142538, "loss": 0.28717702627182007, "loss_ce": 6.765717989765108e-05, "loss_iou": 0.1103515625, "loss_num": 0.0133056640625, "loss_xval": 0.287109375, "num_input_tokens_seen": 509271624, "step": 9090 }, { "epoch": 20.247216035634743, "grad_norm": 15.528565406799316, "learning_rate": 1e-06, "loss": 0.5706, "num_input_tokens_seen": 509327216, "step": 9091 }, { "epoch": 20.247216035634743, "loss": 0.7028372287750244, "loss_ce": 7.844123319955543e-05, "loss_iou": 0.30078125, "loss_num": 0.0203857421875, "loss_xval": 0.703125, "num_input_tokens_seen": 509327216, "step": 9091 }, { "epoch": 20.249443207126948, "grad_norm": 31.673660278320312, "learning_rate": 1e-06, "loss": 0.2802, "num_input_tokens_seen": 509384560, "step": 9092 }, { "epoch": 20.249443207126948, "loss": 0.2221912145614624, "loss_ce": 5.375898035708815e-05, "loss_iou": 0.09814453125, "loss_num": 0.005157470703125, "loss_xval": 0.2216796875, "num_input_tokens_seen": 509384560, "step": 9092 }, { "epoch": 20.251670378619153, "grad_norm": 17.960433959960938, "learning_rate": 1e-06, "loss": 0.2977, "num_input_tokens_seen": 509439108, "step": 9093 }, { "epoch": 20.251670378619153, "loss": 0.3416157364845276, "loss_ce": 6.299982487689704e-05, "loss_iou": 0.14453125, "loss_num": 0.010498046875, "loss_xval": 0.341796875, "num_input_tokens_seen": 509439108, "step": 9093 }, { "epoch": 20.253897550111358, "grad_norm": 15.661357879638672, "learning_rate": 1e-06, "loss": 0.3546, "num_input_tokens_seen": 509495464, "step": 9094 }, { "epoch": 20.253897550111358, "loss": 0.4557662904262543, "loss_ce": 7.779739098623395e-05, "loss_iou": 0.193359375, "loss_num": 0.01373291015625, "loss_xval": 0.455078125, "num_input_tokens_seen": 509495464, "step": 9094 }, { "epoch": 20.256124721603562, "grad_norm": 20.898950576782227, "learning_rate": 1e-06, "loss": 0.3629, "num_input_tokens_seen": 509552144, "step": 9095 }, { "epoch": 20.256124721603562, "loss": 0.3690887987613678, "loss_ce": 7.023525540716946e-05, "loss_iou": 0.1689453125, "loss_num": 0.006195068359375, "loss_xval": 0.369140625, "num_input_tokens_seen": 509552144, "step": 9095 }, { "epoch": 20.258351893095767, "grad_norm": 14.072615623474121, "learning_rate": 1e-06, "loss": 0.3833, "num_input_tokens_seen": 509607736, "step": 9096 }, { "epoch": 20.258351893095767, "loss": 0.41329166293144226, "loss_ce": 5.3150470193941146e-05, "loss_iou": 0.1845703125, "loss_num": 0.0087890625, "loss_xval": 0.4140625, "num_input_tokens_seen": 509607736, "step": 9096 }, { "epoch": 20.260579064587972, "grad_norm": 23.365612030029297, "learning_rate": 1e-06, "loss": 0.3804, "num_input_tokens_seen": 509662676, "step": 9097 }, { "epoch": 20.260579064587972, "loss": 0.42209792137145996, "loss_ce": 7.031915447441861e-05, "loss_iou": 0.158203125, "loss_num": 0.0211181640625, "loss_xval": 0.421875, "num_input_tokens_seen": 509662676, "step": 9097 }, { "epoch": 20.262806236080177, "grad_norm": 29.634624481201172, "learning_rate": 1e-06, "loss": 0.3842, "num_input_tokens_seen": 509720920, "step": 9098 }, { "epoch": 20.262806236080177, "loss": 0.4519707262516022, "loss_ce": 6.642582593485713e-05, "loss_iou": 0.1787109375, "loss_num": 0.01904296875, "loss_xval": 0.451171875, "num_input_tokens_seen": 509720920, "step": 9098 }, { "epoch": 20.265033407572382, "grad_norm": 15.21391773223877, "learning_rate": 1e-06, "loss": 0.4699, "num_input_tokens_seen": 509777720, "step": 9099 }, { "epoch": 20.265033407572382, "loss": 0.5803358554840088, "loss_ce": 7.468961121048778e-05, "loss_iou": 0.2431640625, "loss_num": 0.0185546875, "loss_xval": 0.58203125, "num_input_tokens_seen": 509777720, "step": 9099 }, { "epoch": 20.267260579064587, "grad_norm": 19.919464111328125, "learning_rate": 1e-06, "loss": 0.2832, "num_input_tokens_seen": 509834440, "step": 9100 }, { "epoch": 20.267260579064587, "loss": 0.2911492586135864, "loss_ce": 7.257152174133807e-05, "loss_iou": 0.12060546875, "loss_num": 0.01007080078125, "loss_xval": 0.291015625, "num_input_tokens_seen": 509834440, "step": 9100 }, { "epoch": 20.26948775055679, "grad_norm": 17.073530197143555, "learning_rate": 1e-06, "loss": 0.4515, "num_input_tokens_seen": 509891612, "step": 9101 }, { "epoch": 20.26948775055679, "loss": 0.4402480721473694, "loss_ce": 6.253210449358448e-05, "loss_iou": 0.169921875, "loss_num": 0.02001953125, "loss_xval": 0.439453125, "num_input_tokens_seen": 509891612, "step": 9101 }, { "epoch": 20.271714922048996, "grad_norm": 14.897684097290039, "learning_rate": 1e-06, "loss": 0.376, "num_input_tokens_seen": 509949728, "step": 9102 }, { "epoch": 20.271714922048996, "loss": 0.4393659234046936, "loss_ce": 6.541327456943691e-05, "loss_iou": 0.203125, "loss_num": 0.00640869140625, "loss_xval": 0.439453125, "num_input_tokens_seen": 509949728, "step": 9102 }, { "epoch": 20.2739420935412, "grad_norm": 17.08948516845703, "learning_rate": 1e-06, "loss": 0.2489, "num_input_tokens_seen": 510003660, "step": 9103 }, { "epoch": 20.2739420935412, "loss": 0.2572115659713745, "loss_ce": 7.0465452154167e-05, "loss_iou": 0.10009765625, "loss_num": 0.0113525390625, "loss_xval": 0.2578125, "num_input_tokens_seen": 510003660, "step": 9103 }, { "epoch": 20.276169265033406, "grad_norm": 19.830888748168945, "learning_rate": 1e-06, "loss": 0.3028, "num_input_tokens_seen": 510059112, "step": 9104 }, { "epoch": 20.276169265033406, "loss": 0.4022126793861389, "loss_ce": 0.00011306728993076831, "loss_iou": 0.1875, "loss_num": 0.0054931640625, "loss_xval": 0.40234375, "num_input_tokens_seen": 510059112, "step": 9104 }, { "epoch": 20.27839643652561, "grad_norm": 21.524456024169922, "learning_rate": 1e-06, "loss": 0.3969, "num_input_tokens_seen": 510113856, "step": 9105 }, { "epoch": 20.27839643652561, "loss": 0.2504650354385376, "loss_ce": 5.308254185365513e-05, "loss_iou": 0.10791015625, "loss_num": 0.006988525390625, "loss_xval": 0.25, "num_input_tokens_seen": 510113856, "step": 9105 }, { "epoch": 20.280623608017816, "grad_norm": 20.933208465576172, "learning_rate": 1e-06, "loss": 0.5396, "num_input_tokens_seen": 510171268, "step": 9106 }, { "epoch": 20.280623608017816, "loss": 0.566622257232666, "loss_ce": 9.393271466251463e-05, "loss_iou": 0.255859375, "loss_num": 0.0111083984375, "loss_xval": 0.56640625, "num_input_tokens_seen": 510171268, "step": 9106 }, { "epoch": 20.28285077951002, "grad_norm": 20.48546028137207, "learning_rate": 1e-06, "loss": 0.3572, "num_input_tokens_seen": 510224736, "step": 9107 }, { "epoch": 20.28285077951002, "loss": 0.33814841508865356, "loss_ce": 6.32288574706763e-05, "loss_iou": 0.146484375, "loss_num": 0.00897216796875, "loss_xval": 0.337890625, "num_input_tokens_seen": 510224736, "step": 9107 }, { "epoch": 20.285077951002226, "grad_norm": 11.746217727661133, "learning_rate": 1e-06, "loss": 0.3038, "num_input_tokens_seen": 510279584, "step": 9108 }, { "epoch": 20.285077951002226, "loss": 0.21575751900672913, "loss_ce": 5.92783690080978e-05, "loss_iou": 0.08740234375, "loss_num": 0.00811767578125, "loss_xval": 0.2158203125, "num_input_tokens_seen": 510279584, "step": 9108 }, { "epoch": 20.28730512249443, "grad_norm": 14.660599708557129, "learning_rate": 1e-06, "loss": 0.3486, "num_input_tokens_seen": 510337512, "step": 9109 }, { "epoch": 20.28730512249443, "loss": 0.3341745138168335, "loss_ce": 6.808016041759402e-05, "loss_iou": 0.150390625, "loss_num": 0.006591796875, "loss_xval": 0.333984375, "num_input_tokens_seen": 510337512, "step": 9109 }, { "epoch": 20.289532293986635, "grad_norm": 13.905699729919434, "learning_rate": 1e-06, "loss": 0.3323, "num_input_tokens_seen": 510393672, "step": 9110 }, { "epoch": 20.289532293986635, "loss": 0.4626014828681946, "loss_ce": 7.70436818129383e-05, "loss_iou": 0.1787109375, "loss_num": 0.02099609375, "loss_xval": 0.462890625, "num_input_tokens_seen": 510393672, "step": 9110 }, { "epoch": 20.29175946547884, "grad_norm": 22.228431701660156, "learning_rate": 1e-06, "loss": 0.3862, "num_input_tokens_seen": 510449288, "step": 9111 }, { "epoch": 20.29175946547884, "loss": 0.3664637804031372, "loss_ce": 6.975741416681558e-05, "loss_iou": 0.1630859375, "loss_num": 0.0081787109375, "loss_xval": 0.3671875, "num_input_tokens_seen": 510449288, "step": 9111 }, { "epoch": 20.293986636971045, "grad_norm": 21.240266799926758, "learning_rate": 1e-06, "loss": 0.323, "num_input_tokens_seen": 510504764, "step": 9112 }, { "epoch": 20.293986636971045, "loss": 0.31800100207328796, "loss_ce": 6.887991185067222e-05, "loss_iou": 0.1376953125, "loss_num": 0.0084228515625, "loss_xval": 0.318359375, "num_input_tokens_seen": 510504764, "step": 9112 }, { "epoch": 20.29621380846325, "grad_norm": 13.980697631835938, "learning_rate": 1e-06, "loss": 0.4736, "num_input_tokens_seen": 510560660, "step": 9113 }, { "epoch": 20.29621380846325, "loss": 0.4085160195827484, "loss_ce": 6.87609426677227e-05, "loss_iou": 0.181640625, "loss_num": 0.00909423828125, "loss_xval": 0.408203125, "num_input_tokens_seen": 510560660, "step": 9113 }, { "epoch": 20.29844097995546, "grad_norm": 13.33857536315918, "learning_rate": 1e-06, "loss": 0.3764, "num_input_tokens_seen": 510613008, "step": 9114 }, { "epoch": 20.29844097995546, "loss": 0.36590009927749634, "loss_ce": 8.587302727391943e-05, "loss_iou": 0.1552734375, "loss_num": 0.0111083984375, "loss_xval": 0.365234375, "num_input_tokens_seen": 510613008, "step": 9114 }, { "epoch": 20.30066815144766, "grad_norm": 18.67300033569336, "learning_rate": 1e-06, "loss": 0.2536, "num_input_tokens_seen": 510670700, "step": 9115 }, { "epoch": 20.30066815144766, "loss": 0.20209243893623352, "loss_ce": 6.605993257835507e-05, "loss_iou": 0.08935546875, "loss_num": 0.004608154296875, "loss_xval": 0.2021484375, "num_input_tokens_seen": 510670700, "step": 9115 }, { "epoch": 20.302895322939868, "grad_norm": 21.35281753540039, "learning_rate": 1e-06, "loss": 0.5114, "num_input_tokens_seen": 510721468, "step": 9116 }, { "epoch": 20.302895322939868, "loss": 0.388251394033432, "loss_ce": 6.779546674806625e-05, "loss_iou": 0.1728515625, "loss_num": 0.00848388671875, "loss_xval": 0.388671875, "num_input_tokens_seen": 510721468, "step": 9116 }, { "epoch": 20.305122494432073, "grad_norm": 17.087766647338867, "learning_rate": 1e-06, "loss": 0.5297, "num_input_tokens_seen": 510776316, "step": 9117 }, { "epoch": 20.305122494432073, "loss": 0.5188660621643066, "loss_ce": 6.725148705299944e-05, "loss_iou": 0.228515625, "loss_num": 0.01251220703125, "loss_xval": 0.51953125, "num_input_tokens_seen": 510776316, "step": 9117 }, { "epoch": 20.307349665924278, "grad_norm": 14.230613708496094, "learning_rate": 1e-06, "loss": 0.3324, "num_input_tokens_seen": 510833816, "step": 9118 }, { "epoch": 20.307349665924278, "loss": 0.2973046898841858, "loss_ce": 6.350380135700107e-05, "loss_iou": 0.138671875, "loss_num": 0.00390625, "loss_xval": 0.296875, "num_input_tokens_seen": 510833816, "step": 9118 }, { "epoch": 20.309576837416483, "grad_norm": 25.817169189453125, "learning_rate": 1e-06, "loss": 0.3926, "num_input_tokens_seen": 510890252, "step": 9119 }, { "epoch": 20.309576837416483, "loss": 0.3149741291999817, "loss_ce": 6.32612791378051e-05, "loss_iou": 0.1396484375, "loss_num": 0.007110595703125, "loss_xval": 0.314453125, "num_input_tokens_seen": 510890252, "step": 9119 }, { "epoch": 20.311804008908688, "grad_norm": 17.687280654907227, "learning_rate": 1e-06, "loss": 0.4531, "num_input_tokens_seen": 510946368, "step": 9120 }, { "epoch": 20.311804008908688, "loss": 0.5778282284736633, "loss_ce": 6.946115900063887e-05, "loss_iou": 0.234375, "loss_num": 0.02197265625, "loss_xval": 0.578125, "num_input_tokens_seen": 510946368, "step": 9120 }, { "epoch": 20.314031180400892, "grad_norm": 17.3331356048584, "learning_rate": 1e-06, "loss": 0.5977, "num_input_tokens_seen": 511002084, "step": 9121 }, { "epoch": 20.314031180400892, "loss": 0.4655322730541229, "loss_ce": 7.816165452823043e-05, "loss_iou": 0.201171875, "loss_num": 0.01275634765625, "loss_xval": 0.46484375, "num_input_tokens_seen": 511002084, "step": 9121 }, { "epoch": 20.316258351893097, "grad_norm": 28.032072067260742, "learning_rate": 1e-06, "loss": 0.3289, "num_input_tokens_seen": 511060036, "step": 9122 }, { "epoch": 20.316258351893097, "loss": 0.4448232352733612, "loss_ce": 6.0018668591510504e-05, "loss_iou": 0.19921875, "loss_num": 0.00933837890625, "loss_xval": 0.4453125, "num_input_tokens_seen": 511060036, "step": 9122 }, { "epoch": 20.318485523385302, "grad_norm": 31.768814086914062, "learning_rate": 1e-06, "loss": 0.407, "num_input_tokens_seen": 511115920, "step": 9123 }, { "epoch": 20.318485523385302, "loss": 0.32061654329299927, "loss_ce": 5.989862256683409e-05, "loss_iou": 0.1435546875, "loss_num": 0.0067138671875, "loss_xval": 0.3203125, "num_input_tokens_seen": 511115920, "step": 9123 }, { "epoch": 20.320712694877507, "grad_norm": 20.859983444213867, "learning_rate": 1e-06, "loss": 0.3574, "num_input_tokens_seen": 511173348, "step": 9124 }, { "epoch": 20.320712694877507, "loss": 0.36715149879455566, "loss_ce": 8.607155177742243e-05, "loss_iou": 0.15234375, "loss_num": 0.01239013671875, "loss_xval": 0.3671875, "num_input_tokens_seen": 511173348, "step": 9124 }, { "epoch": 20.322939866369712, "grad_norm": 14.776050567626953, "learning_rate": 1e-06, "loss": 0.4476, "num_input_tokens_seen": 511230864, "step": 9125 }, { "epoch": 20.322939866369712, "loss": 0.49763861298561096, "loss_ce": 6.477968418039382e-05, "loss_iou": 0.21875, "loss_num": 0.01220703125, "loss_xval": 0.498046875, "num_input_tokens_seen": 511230864, "step": 9125 }, { "epoch": 20.325167037861917, "grad_norm": 25.55546760559082, "learning_rate": 1e-06, "loss": 0.3811, "num_input_tokens_seen": 511285816, "step": 9126 }, { "epoch": 20.325167037861917, "loss": 0.32178765535354614, "loss_ce": 7.134034967748448e-05, "loss_iou": 0.130859375, "loss_num": 0.0118408203125, "loss_xval": 0.322265625, "num_input_tokens_seen": 511285816, "step": 9126 }, { "epoch": 20.32739420935412, "grad_norm": 21.569284439086914, "learning_rate": 1e-06, "loss": 0.2597, "num_input_tokens_seen": 511343148, "step": 9127 }, { "epoch": 20.32739420935412, "loss": 0.262149453163147, "loss_ce": 6.447016494348645e-05, "loss_iou": 0.11767578125, "loss_num": 0.00531005859375, "loss_xval": 0.26171875, "num_input_tokens_seen": 511343148, "step": 9127 }, { "epoch": 20.329621380846326, "grad_norm": 14.305514335632324, "learning_rate": 1e-06, "loss": 0.3907, "num_input_tokens_seen": 511400796, "step": 9128 }, { "epoch": 20.329621380846326, "loss": 0.32069259881973267, "loss_ce": 7.491336873499677e-05, "loss_iou": 0.142578125, "loss_num": 0.0072021484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 511400796, "step": 9128 }, { "epoch": 20.33184855233853, "grad_norm": 15.63278865814209, "learning_rate": 1e-06, "loss": 0.2036, "num_input_tokens_seen": 511456720, "step": 9129 }, { "epoch": 20.33184855233853, "loss": 0.2301052063703537, "loss_ce": 6.369603215716779e-05, "loss_iou": 0.10693359375, "loss_num": 0.0032501220703125, "loss_xval": 0.23046875, "num_input_tokens_seen": 511456720, "step": 9129 }, { "epoch": 20.334075723830736, "grad_norm": 93.94779205322266, "learning_rate": 1e-06, "loss": 0.3359, "num_input_tokens_seen": 511513624, "step": 9130 }, { "epoch": 20.334075723830736, "loss": 0.24669964611530304, "loss_ce": 5.657020301441662e-05, "loss_iou": 0.11083984375, "loss_num": 0.00506591796875, "loss_xval": 0.2470703125, "num_input_tokens_seen": 511513624, "step": 9130 }, { "epoch": 20.33630289532294, "grad_norm": 33.43038558959961, "learning_rate": 1e-06, "loss": 0.3538, "num_input_tokens_seen": 511570664, "step": 9131 }, { "epoch": 20.33630289532294, "loss": 0.4840731620788574, "loss_ce": 0.00012543509365059435, "loss_iou": 0.21875, "loss_num": 0.00921630859375, "loss_xval": 0.484375, "num_input_tokens_seen": 511570664, "step": 9131 }, { "epoch": 20.338530066815146, "grad_norm": 11.925836563110352, "learning_rate": 1e-06, "loss": 0.2765, "num_input_tokens_seen": 511626512, "step": 9132 }, { "epoch": 20.338530066815146, "loss": 0.27660778164863586, "loss_ce": 5.7503631978761405e-05, "loss_iou": 0.12451171875, "loss_num": 0.00543212890625, "loss_xval": 0.27734375, "num_input_tokens_seen": 511626512, "step": 9132 }, { "epoch": 20.34075723830735, "grad_norm": 19.892284393310547, "learning_rate": 1e-06, "loss": 0.2753, "num_input_tokens_seen": 511684156, "step": 9133 }, { "epoch": 20.34075723830735, "loss": 0.2991905212402344, "loss_ce": 5.724587390432134e-05, "loss_iou": 0.13671875, "loss_num": 0.005279541015625, "loss_xval": 0.298828125, "num_input_tokens_seen": 511684156, "step": 9133 }, { "epoch": 20.342984409799556, "grad_norm": 14.584566116333008, "learning_rate": 1e-06, "loss": 0.3131, "num_input_tokens_seen": 511743004, "step": 9134 }, { "epoch": 20.342984409799556, "loss": 0.31488001346588135, "loss_ce": 6.067375943530351e-05, "loss_iou": 0.1435546875, "loss_num": 0.005615234375, "loss_xval": 0.314453125, "num_input_tokens_seen": 511743004, "step": 9134 }, { "epoch": 20.34521158129176, "grad_norm": 24.601272583007812, "learning_rate": 1e-06, "loss": 0.5077, "num_input_tokens_seen": 511794760, "step": 9135 }, { "epoch": 20.34521158129176, "loss": 0.6481292247772217, "loss_ce": 5.792171214125119e-05, "loss_iou": 0.263671875, "loss_num": 0.0238037109375, "loss_xval": 0.6484375, "num_input_tokens_seen": 511794760, "step": 9135 }, { "epoch": 20.347438752783965, "grad_norm": 13.779706954956055, "learning_rate": 1e-06, "loss": 0.4595, "num_input_tokens_seen": 511852632, "step": 9136 }, { "epoch": 20.347438752783965, "loss": 0.4468442499637604, "loss_ce": 6.692019815091044e-05, "loss_iou": 0.1884765625, "loss_num": 0.01409912109375, "loss_xval": 0.447265625, "num_input_tokens_seen": 511852632, "step": 9136 }, { "epoch": 20.34966592427617, "grad_norm": 17.18348503112793, "learning_rate": 1e-06, "loss": 0.3995, "num_input_tokens_seen": 511910288, "step": 9137 }, { "epoch": 20.34966592427617, "loss": 0.5219651460647583, "loss_ce": 0.00011460046516731381, "loss_iou": 0.1943359375, "loss_num": 0.026611328125, "loss_xval": 0.5234375, "num_input_tokens_seen": 511910288, "step": 9137 }, { "epoch": 20.351893095768375, "grad_norm": 16.120128631591797, "learning_rate": 1e-06, "loss": 0.434, "num_input_tokens_seen": 511965500, "step": 9138 }, { "epoch": 20.351893095768375, "loss": 0.4480791389942169, "loss_ce": 8.108362089842558e-05, "loss_iou": 0.1650390625, "loss_num": 0.0235595703125, "loss_xval": 0.447265625, "num_input_tokens_seen": 511965500, "step": 9138 }, { "epoch": 20.35412026726058, "grad_norm": 14.766048431396484, "learning_rate": 1e-06, "loss": 0.3227, "num_input_tokens_seen": 512020400, "step": 9139 }, { "epoch": 20.35412026726058, "loss": 0.17573511600494385, "loss_ce": 6.067653885111213e-05, "loss_iou": 0.07080078125, "loss_num": 0.006805419921875, "loss_xval": 0.17578125, "num_input_tokens_seen": 512020400, "step": 9139 }, { "epoch": 20.356347438752785, "grad_norm": 23.5654354095459, "learning_rate": 1e-06, "loss": 0.3828, "num_input_tokens_seen": 512079228, "step": 9140 }, { "epoch": 20.356347438752785, "loss": 0.4413471221923828, "loss_ce": 6.295171624515206e-05, "loss_iou": 0.19140625, "loss_num": 0.01177978515625, "loss_xval": 0.44140625, "num_input_tokens_seen": 512079228, "step": 9140 }, { "epoch": 20.35857461024499, "grad_norm": 17.19645118713379, "learning_rate": 1e-06, "loss": 0.4033, "num_input_tokens_seen": 512136932, "step": 9141 }, { "epoch": 20.35857461024499, "loss": 0.3532071113586426, "loss_ce": 5.770741336164065e-05, "loss_iou": 0.1435546875, "loss_num": 0.01312255859375, "loss_xval": 0.353515625, "num_input_tokens_seen": 512136932, "step": 9141 }, { "epoch": 20.360801781737194, "grad_norm": 15.467480659484863, "learning_rate": 1e-06, "loss": 0.3074, "num_input_tokens_seen": 512194392, "step": 9142 }, { "epoch": 20.360801781737194, "loss": 0.2540320158004761, "loss_ce": 0.00012579177564475685, "loss_iou": 0.10791015625, "loss_num": 0.007568359375, "loss_xval": 0.25390625, "num_input_tokens_seen": 512194392, "step": 9142 }, { "epoch": 20.3630289532294, "grad_norm": 21.110055923461914, "learning_rate": 1e-06, "loss": 0.4215, "num_input_tokens_seen": 512251156, "step": 9143 }, { "epoch": 20.3630289532294, "loss": 0.4504120349884033, "loss_ce": 0.00021672958973795176, "loss_iou": 0.2099609375, "loss_num": 0.00592041015625, "loss_xval": 0.44921875, "num_input_tokens_seen": 512251156, "step": 9143 }, { "epoch": 20.365256124721604, "grad_norm": 13.63264274597168, "learning_rate": 1e-06, "loss": 0.356, "num_input_tokens_seen": 512307892, "step": 9144 }, { "epoch": 20.365256124721604, "loss": 0.4380814731121063, "loss_ce": 6.268207653192803e-05, "loss_iou": 0.1953125, "loss_num": 0.00921630859375, "loss_xval": 0.4375, "num_input_tokens_seen": 512307892, "step": 9144 }, { "epoch": 20.36748329621381, "grad_norm": 37.342010498046875, "learning_rate": 1e-06, "loss": 0.3701, "num_input_tokens_seen": 512361460, "step": 9145 }, { "epoch": 20.36748329621381, "loss": 0.4177306294441223, "loss_ce": 6.703598774038255e-05, "loss_iou": 0.1748046875, "loss_num": 0.013671875, "loss_xval": 0.41796875, "num_input_tokens_seen": 512361460, "step": 9145 }, { "epoch": 20.369710467706014, "grad_norm": 17.764732360839844, "learning_rate": 1e-06, "loss": 0.3428, "num_input_tokens_seen": 512418536, "step": 9146 }, { "epoch": 20.369710467706014, "loss": 0.33477145433425903, "loss_ce": 5.4668213124386966e-05, "loss_iou": 0.150390625, "loss_num": 0.0068359375, "loss_xval": 0.333984375, "num_input_tokens_seen": 512418536, "step": 9146 }, { "epoch": 20.37193763919822, "grad_norm": 25.832767486572266, "learning_rate": 1e-06, "loss": 0.5058, "num_input_tokens_seen": 512473128, "step": 9147 }, { "epoch": 20.37193763919822, "loss": 0.4780276417732239, "loss_ce": 6.131519330665469e-05, "loss_iou": 0.185546875, "loss_num": 0.0213623046875, "loss_xval": 0.478515625, "num_input_tokens_seen": 512473128, "step": 9147 }, { "epoch": 20.374164810690424, "grad_norm": 21.50016212463379, "learning_rate": 1e-06, "loss": 0.4721, "num_input_tokens_seen": 512527572, "step": 9148 }, { "epoch": 20.374164810690424, "loss": 0.42840418219566345, "loss_ce": 5.943184805801138e-05, "loss_iou": 0.1845703125, "loss_num": 0.0115966796875, "loss_xval": 0.427734375, "num_input_tokens_seen": 512527572, "step": 9148 }, { "epoch": 20.37639198218263, "grad_norm": 13.383490562438965, "learning_rate": 1e-06, "loss": 0.3591, "num_input_tokens_seen": 512584280, "step": 9149 }, { "epoch": 20.37639198218263, "loss": 0.345126748085022, "loss_ce": 6.446812767535448e-05, "loss_iou": 0.15625, "loss_num": 0.006500244140625, "loss_xval": 0.345703125, "num_input_tokens_seen": 512584280, "step": 9149 }, { "epoch": 20.378619153674833, "grad_norm": 17.600801467895508, "learning_rate": 1e-06, "loss": 0.3535, "num_input_tokens_seen": 512640636, "step": 9150 }, { "epoch": 20.378619153674833, "loss": 0.45195910334587097, "loss_ce": 5.4796357289887965e-05, "loss_iou": 0.1806640625, "loss_num": 0.01806640625, "loss_xval": 0.451171875, "num_input_tokens_seen": 512640636, "step": 9150 }, { "epoch": 20.380846325167038, "grad_norm": 15.987733840942383, "learning_rate": 1e-06, "loss": 0.3294, "num_input_tokens_seen": 512697364, "step": 9151 }, { "epoch": 20.380846325167038, "loss": 0.42148369550704956, "loss_ce": 9.698521898826584e-05, "loss_iou": 0.18359375, "loss_num": 0.0107421875, "loss_xval": 0.421875, "num_input_tokens_seen": 512697364, "step": 9151 }, { "epoch": 20.383073496659243, "grad_norm": 17.254024505615234, "learning_rate": 1e-06, "loss": 0.4233, "num_input_tokens_seen": 512753732, "step": 9152 }, { "epoch": 20.383073496659243, "loss": 0.43444541096687317, "loss_ce": 0.00011922699195565656, "loss_iou": 0.2001953125, "loss_num": 0.00665283203125, "loss_xval": 0.43359375, "num_input_tokens_seen": 512753732, "step": 9152 }, { "epoch": 20.385300668151448, "grad_norm": 32.762237548828125, "learning_rate": 1e-06, "loss": 0.5156, "num_input_tokens_seen": 512808448, "step": 9153 }, { "epoch": 20.385300668151448, "loss": 0.7034420371055603, "loss_ce": 7.288139750016853e-05, "loss_iou": 0.283203125, "loss_num": 0.02734375, "loss_xval": 0.703125, "num_input_tokens_seen": 512808448, "step": 9153 }, { "epoch": 20.387527839643653, "grad_norm": 34.91286087036133, "learning_rate": 1e-06, "loss": 0.4527, "num_input_tokens_seen": 512864528, "step": 9154 }, { "epoch": 20.387527839643653, "loss": 0.48030704259872437, "loss_ce": 8.245596836786717e-05, "loss_iou": 0.181640625, "loss_num": 0.023193359375, "loss_xval": 0.48046875, "num_input_tokens_seen": 512864528, "step": 9154 }, { "epoch": 20.389755011135858, "grad_norm": 16.32798194885254, "learning_rate": 1e-06, "loss": 0.3353, "num_input_tokens_seen": 512921932, "step": 9155 }, { "epoch": 20.389755011135858, "loss": 0.3125593066215515, "loss_ce": 5.93056101934053e-05, "loss_iou": 0.1435546875, "loss_num": 0.005279541015625, "loss_xval": 0.3125, "num_input_tokens_seen": 512921932, "step": 9155 }, { "epoch": 20.391982182628063, "grad_norm": 22.385419845581055, "learning_rate": 1e-06, "loss": 0.4722, "num_input_tokens_seen": 512977824, "step": 9156 }, { "epoch": 20.391982182628063, "loss": 0.4710025191307068, "loss_ce": 5.522385981748812e-05, "loss_iou": 0.2109375, "loss_num": 0.00958251953125, "loss_xval": 0.470703125, "num_input_tokens_seen": 512977824, "step": 9156 }, { "epoch": 20.394209354120267, "grad_norm": 22.299945831298828, "learning_rate": 1e-06, "loss": 0.332, "num_input_tokens_seen": 513036572, "step": 9157 }, { "epoch": 20.394209354120267, "loss": 0.27536553144454956, "loss_ce": 6.644198583671823e-05, "loss_iou": 0.1259765625, "loss_num": 0.00482177734375, "loss_xval": 0.275390625, "num_input_tokens_seen": 513036572, "step": 9157 }, { "epoch": 20.396436525612472, "grad_norm": 18.22890853881836, "learning_rate": 1e-06, "loss": 0.5649, "num_input_tokens_seen": 513092372, "step": 9158 }, { "epoch": 20.396436525612472, "loss": 0.5173371434211731, "loss_ce": 6.420467980206013e-05, "loss_iou": 0.21875, "loss_num": 0.0162353515625, "loss_xval": 0.515625, "num_input_tokens_seen": 513092372, "step": 9158 }, { "epoch": 20.398663697104677, "grad_norm": 13.26450252532959, "learning_rate": 1e-06, "loss": 0.2702, "num_input_tokens_seen": 513146088, "step": 9159 }, { "epoch": 20.398663697104677, "loss": 0.24542546272277832, "loss_ce": 6.411702634068206e-05, "loss_iou": 0.111328125, "loss_num": 0.004547119140625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 513146088, "step": 9159 }, { "epoch": 20.400890868596882, "grad_norm": 13.41020393371582, "learning_rate": 1e-06, "loss": 0.2748, "num_input_tokens_seen": 513201040, "step": 9160 }, { "epoch": 20.400890868596882, "loss": 0.32405906915664673, "loss_ce": 8.446039282716811e-05, "loss_iou": 0.1435546875, "loss_num": 0.007537841796875, "loss_xval": 0.32421875, "num_input_tokens_seen": 513201040, "step": 9160 }, { "epoch": 20.403118040089087, "grad_norm": 26.860549926757812, "learning_rate": 1e-06, "loss": 0.4289, "num_input_tokens_seen": 513259616, "step": 9161 }, { "epoch": 20.403118040089087, "loss": 0.5175533294677734, "loss_ce": 9.72962225205265e-05, "loss_iou": 0.21484375, "loss_num": 0.017822265625, "loss_xval": 0.515625, "num_input_tokens_seen": 513259616, "step": 9161 }, { "epoch": 20.40534521158129, "grad_norm": 16.444419860839844, "learning_rate": 1e-06, "loss": 0.3839, "num_input_tokens_seen": 513315256, "step": 9162 }, { "epoch": 20.40534521158129, "loss": 0.21068307757377625, "loss_ce": 5.074591172160581e-05, "loss_iou": 0.0888671875, "loss_num": 0.006622314453125, "loss_xval": 0.2109375, "num_input_tokens_seen": 513315256, "step": 9162 }, { "epoch": 20.407572383073497, "grad_norm": 118.25879669189453, "learning_rate": 1e-06, "loss": 0.3403, "num_input_tokens_seen": 513371288, "step": 9163 }, { "epoch": 20.407572383073497, "loss": 0.3745216727256775, "loss_ce": 7.099170761648566e-05, "loss_iou": 0.1513671875, "loss_num": 0.0145263671875, "loss_xval": 0.375, "num_input_tokens_seen": 513371288, "step": 9163 }, { "epoch": 20.4097995545657, "grad_norm": 23.073387145996094, "learning_rate": 1e-06, "loss": 0.298, "num_input_tokens_seen": 513429104, "step": 9164 }, { "epoch": 20.4097995545657, "loss": 0.2354094237089157, "loss_ce": 8.837871428113431e-05, "loss_iou": 0.1064453125, "loss_num": 0.004364013671875, "loss_xval": 0.2353515625, "num_input_tokens_seen": 513429104, "step": 9164 }, { "epoch": 20.412026726057906, "grad_norm": 16.92924690246582, "learning_rate": 1e-06, "loss": 0.4139, "num_input_tokens_seen": 513486220, "step": 9165 }, { "epoch": 20.412026726057906, "loss": 0.30585503578186035, "loss_ce": 6.89055013936013e-05, "loss_iou": 0.1337890625, "loss_num": 0.007476806640625, "loss_xval": 0.306640625, "num_input_tokens_seen": 513486220, "step": 9165 }, { "epoch": 20.41425389755011, "grad_norm": 19.239347457885742, "learning_rate": 1e-06, "loss": 0.2636, "num_input_tokens_seen": 513540880, "step": 9166 }, { "epoch": 20.41425389755011, "loss": 0.24502648413181305, "loss_ce": 6.187942926771939e-05, "loss_iou": 0.103515625, "loss_num": 0.007598876953125, "loss_xval": 0.2451171875, "num_input_tokens_seen": 513540880, "step": 9166 }, { "epoch": 20.416481069042316, "grad_norm": 18.908649444580078, "learning_rate": 1e-06, "loss": 0.2436, "num_input_tokens_seen": 513596724, "step": 9167 }, { "epoch": 20.416481069042316, "loss": 0.274596244096756, "loss_ce": 6.010765355313197e-05, "loss_iou": 0.12060546875, "loss_num": 0.0067138671875, "loss_xval": 0.275390625, "num_input_tokens_seen": 513596724, "step": 9167 }, { "epoch": 20.41870824053452, "grad_norm": 15.285927772521973, "learning_rate": 1e-06, "loss": 0.3752, "num_input_tokens_seen": 513652904, "step": 9168 }, { "epoch": 20.41870824053452, "loss": 0.34748250246047974, "loss_ce": 7.040443597361445e-05, "loss_iou": 0.1513671875, "loss_num": 0.0089111328125, "loss_xval": 0.34765625, "num_input_tokens_seen": 513652904, "step": 9168 }, { "epoch": 20.420935412026726, "grad_norm": 20.112258911132812, "learning_rate": 1e-06, "loss": 0.3643, "num_input_tokens_seen": 513708628, "step": 9169 }, { "epoch": 20.420935412026726, "loss": 0.43531090021133423, "loss_ce": 6.918917642906308e-05, "loss_iou": 0.2021484375, "loss_num": 0.00628662109375, "loss_xval": 0.435546875, "num_input_tokens_seen": 513708628, "step": 9169 }, { "epoch": 20.42316258351893, "grad_norm": 12.775040626525879, "learning_rate": 1e-06, "loss": 0.357, "num_input_tokens_seen": 513766376, "step": 9170 }, { "epoch": 20.42316258351893, "loss": 0.4500158429145813, "loss_ce": 6.470449443440884e-05, "loss_iou": 0.1591796875, "loss_num": 0.0263671875, "loss_xval": 0.44921875, "num_input_tokens_seen": 513766376, "step": 9170 }, { "epoch": 20.425389755011135, "grad_norm": 22.044282913208008, "learning_rate": 1e-06, "loss": 0.413, "num_input_tokens_seen": 513821636, "step": 9171 }, { "epoch": 20.425389755011135, "loss": 0.5979858040809631, "loss_ce": 8.540972339687869e-05, "loss_iou": 0.251953125, "loss_num": 0.0184326171875, "loss_xval": 0.59765625, "num_input_tokens_seen": 513821636, "step": 9171 }, { "epoch": 20.42761692650334, "grad_norm": 16.4615478515625, "learning_rate": 1e-06, "loss": 0.4864, "num_input_tokens_seen": 513878332, "step": 9172 }, { "epoch": 20.42761692650334, "loss": 0.3729918897151947, "loss_ce": 6.709150329697877e-05, "loss_iou": 0.17578125, "loss_num": 0.004119873046875, "loss_xval": 0.373046875, "num_input_tokens_seen": 513878332, "step": 9172 }, { "epoch": 20.429844097995545, "grad_norm": 17.841854095458984, "learning_rate": 1e-06, "loss": 0.329, "num_input_tokens_seen": 513934092, "step": 9173 }, { "epoch": 20.429844097995545, "loss": 0.3353223204612732, "loss_ce": 5.619797593681142e-05, "loss_iou": 0.154296875, "loss_num": 0.00537109375, "loss_xval": 0.3359375, "num_input_tokens_seen": 513934092, "step": 9173 }, { "epoch": 20.43207126948775, "grad_norm": 18.19709587097168, "learning_rate": 1e-06, "loss": 0.3155, "num_input_tokens_seen": 513989020, "step": 9174 }, { "epoch": 20.43207126948775, "loss": 0.2181463986635208, "loss_ce": 6.778672832297161e-05, "loss_iou": 0.09375, "loss_num": 0.00616455078125, "loss_xval": 0.2177734375, "num_input_tokens_seen": 513989020, "step": 9174 }, { "epoch": 20.434298440979955, "grad_norm": 35.7059440612793, "learning_rate": 1e-06, "loss": 0.5612, "num_input_tokens_seen": 514040600, "step": 9175 }, { "epoch": 20.434298440979955, "loss": 0.6007704734802246, "loss_ce": 6.243239477043971e-05, "loss_iou": 0.25390625, "loss_num": 0.018310546875, "loss_xval": 0.6015625, "num_input_tokens_seen": 514040600, "step": 9175 }, { "epoch": 20.43652561247216, "grad_norm": 17.803630828857422, "learning_rate": 1e-06, "loss": 0.2915, "num_input_tokens_seen": 514095804, "step": 9176 }, { "epoch": 20.43652561247216, "loss": 0.27951860427856445, "loss_ce": 9.964508353732526e-05, "loss_iou": 0.10791015625, "loss_num": 0.0128173828125, "loss_xval": 0.279296875, "num_input_tokens_seen": 514095804, "step": 9176 }, { "epoch": 20.438752783964365, "grad_norm": 15.95635986328125, "learning_rate": 1e-06, "loss": 0.3912, "num_input_tokens_seen": 514152436, "step": 9177 }, { "epoch": 20.438752783964365, "loss": 0.45222991704940796, "loss_ce": 8.150480425683782e-05, "loss_iou": 0.16796875, "loss_num": 0.0234375, "loss_xval": 0.453125, "num_input_tokens_seen": 514152436, "step": 9177 }, { "epoch": 20.44097995545657, "grad_norm": 30.868133544921875, "learning_rate": 1e-06, "loss": 0.5745, "num_input_tokens_seen": 514210436, "step": 9178 }, { "epoch": 20.44097995545657, "loss": 0.45710474252700806, "loss_ce": 7.350958185270429e-05, "loss_iou": 0.2177734375, "loss_num": 0.00421142578125, "loss_xval": 0.45703125, "num_input_tokens_seen": 514210436, "step": 9178 }, { "epoch": 20.443207126948774, "grad_norm": 28.9227352142334, "learning_rate": 1e-06, "loss": 0.3003, "num_input_tokens_seen": 514268436, "step": 9179 }, { "epoch": 20.443207126948774, "loss": 0.27464067935943604, "loss_ce": 4.350075323600322e-05, "loss_iou": 0.1181640625, "loss_num": 0.00762939453125, "loss_xval": 0.275390625, "num_input_tokens_seen": 514268436, "step": 9179 }, { "epoch": 20.44543429844098, "grad_norm": 18.41109848022461, "learning_rate": 1e-06, "loss": 0.2609, "num_input_tokens_seen": 514326408, "step": 9180 }, { "epoch": 20.44543429844098, "loss": 0.2086607664823532, "loss_ce": 0.00010363436012994498, "loss_iou": 0.08642578125, "loss_num": 0.007080078125, "loss_xval": 0.208984375, "num_input_tokens_seen": 514326408, "step": 9180 }, { "epoch": 20.447661469933184, "grad_norm": 22.37228012084961, "learning_rate": 1e-06, "loss": 0.2734, "num_input_tokens_seen": 514381776, "step": 9181 }, { "epoch": 20.447661469933184, "loss": 0.2675103545188904, "loss_ce": 5.431023964774795e-05, "loss_iou": 0.111328125, "loss_num": 0.00897216796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 514381776, "step": 9181 }, { "epoch": 20.44988864142539, "grad_norm": 20.06549072265625, "learning_rate": 1e-06, "loss": 0.2565, "num_input_tokens_seen": 514435556, "step": 9182 }, { "epoch": 20.44988864142539, "loss": 0.2752624452114105, "loss_ce": 0.00011597707634791732, "loss_iou": 0.10546875, "loss_num": 0.012939453125, "loss_xval": 0.275390625, "num_input_tokens_seen": 514435556, "step": 9182 }, { "epoch": 20.452115812917594, "grad_norm": 14.442758560180664, "learning_rate": 1e-06, "loss": 0.4209, "num_input_tokens_seen": 514491956, "step": 9183 }, { "epoch": 20.452115812917594, "loss": 0.4563639163970947, "loss_ce": 6.508714432129636e-05, "loss_iou": 0.189453125, "loss_num": 0.0155029296875, "loss_xval": 0.45703125, "num_input_tokens_seen": 514491956, "step": 9183 }, { "epoch": 20.4543429844098, "grad_norm": 18.204957962036133, "learning_rate": 1e-06, "loss": 0.3314, "num_input_tokens_seen": 514548608, "step": 9184 }, { "epoch": 20.4543429844098, "loss": 0.2957186698913574, "loss_ce": 6.437343108700588e-05, "loss_iou": 0.1376953125, "loss_num": 0.00408935546875, "loss_xval": 0.294921875, "num_input_tokens_seen": 514548608, "step": 9184 }, { "epoch": 20.456570155902003, "grad_norm": 19.535131454467773, "learning_rate": 1e-06, "loss": 0.3625, "num_input_tokens_seen": 514604368, "step": 9185 }, { "epoch": 20.456570155902003, "loss": 0.4076521098613739, "loss_ce": 5.9345908084651455e-05, "loss_iou": 0.1796875, "loss_num": 0.00946044921875, "loss_xval": 0.408203125, "num_input_tokens_seen": 514604368, "step": 9185 }, { "epoch": 20.45879732739421, "grad_norm": 19.040164947509766, "learning_rate": 1e-06, "loss": 0.3052, "num_input_tokens_seen": 514661100, "step": 9186 }, { "epoch": 20.45879732739421, "loss": 0.29315710067749023, "loss_ce": 6.629424751736224e-05, "loss_iou": 0.130859375, "loss_num": 0.006103515625, "loss_xval": 0.29296875, "num_input_tokens_seen": 514661100, "step": 9186 }, { "epoch": 20.461024498886413, "grad_norm": 26.235673904418945, "learning_rate": 1e-06, "loss": 0.6357, "num_input_tokens_seen": 514715804, "step": 9187 }, { "epoch": 20.461024498886413, "loss": 0.4242648184299469, "loss_ce": 7.046835526125506e-05, "loss_iou": 0.1884765625, "loss_num": 0.00927734375, "loss_xval": 0.423828125, "num_input_tokens_seen": 514715804, "step": 9187 }, { "epoch": 20.463251670378618, "grad_norm": 20.59581756591797, "learning_rate": 1e-06, "loss": 0.2953, "num_input_tokens_seen": 514772648, "step": 9188 }, { "epoch": 20.463251670378618, "loss": 0.297730028629303, "loss_ce": 6.155186565592885e-05, "loss_iou": 0.1416015625, "loss_num": 0.0030364990234375, "loss_xval": 0.296875, "num_input_tokens_seen": 514772648, "step": 9188 }, { "epoch": 20.465478841870823, "grad_norm": 21.08500099182129, "learning_rate": 1e-06, "loss": 0.4619, "num_input_tokens_seen": 514827056, "step": 9189 }, { "epoch": 20.465478841870823, "loss": 0.5222786664962769, "loss_ce": 6.190245039761066e-05, "loss_iou": 0.2294921875, "loss_num": 0.01251220703125, "loss_xval": 0.5234375, "num_input_tokens_seen": 514827056, "step": 9189 }, { "epoch": 20.467706013363028, "grad_norm": 24.425458908081055, "learning_rate": 1e-06, "loss": 0.4498, "num_input_tokens_seen": 514878540, "step": 9190 }, { "epoch": 20.467706013363028, "loss": 0.27809709310531616, "loss_ce": 8.197416900657117e-05, "loss_iou": 0.115234375, "loss_num": 0.00933837890625, "loss_xval": 0.27734375, "num_input_tokens_seen": 514878540, "step": 9190 }, { "epoch": 20.469933184855233, "grad_norm": 20.29898452758789, "learning_rate": 1e-06, "loss": 0.3158, "num_input_tokens_seen": 514933080, "step": 9191 }, { "epoch": 20.469933184855233, "loss": 0.2902224063873291, "loss_ce": 6.12850344623439e-05, "loss_iou": 0.130859375, "loss_num": 0.005889892578125, "loss_xval": 0.291015625, "num_input_tokens_seen": 514933080, "step": 9191 }, { "epoch": 20.472160356347437, "grad_norm": 23.6481876373291, "learning_rate": 1e-06, "loss": 0.2375, "num_input_tokens_seen": 514987504, "step": 9192 }, { "epoch": 20.472160356347437, "loss": 0.16320902109146118, "loss_ce": 9.255468467017636e-05, "loss_iou": 0.06982421875, "loss_num": 0.004638671875, "loss_xval": 0.1630859375, "num_input_tokens_seen": 514987504, "step": 9192 }, { "epoch": 20.474387527839642, "grad_norm": 16.435392379760742, "learning_rate": 1e-06, "loss": 0.3885, "num_input_tokens_seen": 515043248, "step": 9193 }, { "epoch": 20.474387527839642, "loss": 0.4120558798313141, "loss_ce": 6.857035623397678e-05, "loss_iou": 0.1787109375, "loss_num": 0.01092529296875, "loss_xval": 0.412109375, "num_input_tokens_seen": 515043248, "step": 9193 }, { "epoch": 20.476614699331847, "grad_norm": 18.2564697265625, "learning_rate": 1e-06, "loss": 0.3537, "num_input_tokens_seen": 515098600, "step": 9194 }, { "epoch": 20.476614699331847, "loss": 0.3193051218986511, "loss_ce": 9.127189696300775e-05, "loss_iou": 0.1484375, "loss_num": 0.004547119140625, "loss_xval": 0.318359375, "num_input_tokens_seen": 515098600, "step": 9194 }, { "epoch": 20.478841870824052, "grad_norm": 47.8402099609375, "learning_rate": 1e-06, "loss": 0.2802, "num_input_tokens_seen": 515157696, "step": 9195 }, { "epoch": 20.478841870824052, "loss": 0.31334012746810913, "loss_ce": 0.00010771671804832295, "loss_iou": 0.1435546875, "loss_num": 0.005035400390625, "loss_xval": 0.3125, "num_input_tokens_seen": 515157696, "step": 9195 }, { "epoch": 20.481069042316257, "grad_norm": 16.30150604248047, "learning_rate": 1e-06, "loss": 0.2492, "num_input_tokens_seen": 515213144, "step": 9196 }, { "epoch": 20.481069042316257, "loss": 0.20818519592285156, "loss_ce": 5.53104946448002e-05, "loss_iou": 0.080078125, "loss_num": 0.00946044921875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 515213144, "step": 9196 }, { "epoch": 20.48329621380846, "grad_norm": 12.833916664123535, "learning_rate": 1e-06, "loss": 0.2618, "num_input_tokens_seen": 515271576, "step": 9197 }, { "epoch": 20.48329621380846, "loss": 0.3264831006526947, "loss_ce": 6.709274020977318e-05, "loss_iou": 0.1357421875, "loss_num": 0.01092529296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 515271576, "step": 9197 }, { "epoch": 20.485523385300667, "grad_norm": 124.69937896728516, "learning_rate": 1e-06, "loss": 0.3763, "num_input_tokens_seen": 515327292, "step": 9198 }, { "epoch": 20.485523385300667, "loss": 0.25111645460128784, "loss_ce": 7.887819083407521e-05, "loss_iou": 0.10595703125, "loss_num": 0.0079345703125, "loss_xval": 0.251953125, "num_input_tokens_seen": 515327292, "step": 9198 }, { "epoch": 20.48775055679287, "grad_norm": 30.976198196411133, "learning_rate": 1e-06, "loss": 0.5357, "num_input_tokens_seen": 515382716, "step": 9199 }, { "epoch": 20.48775055679287, "loss": 0.6795259118080139, "loss_ce": 9.781922562979162e-05, "loss_iou": 0.298828125, "loss_num": 0.016357421875, "loss_xval": 0.6796875, "num_input_tokens_seen": 515382716, "step": 9199 }, { "epoch": 20.489977728285076, "grad_norm": 37.82209396362305, "learning_rate": 1e-06, "loss": 0.3135, "num_input_tokens_seen": 515438296, "step": 9200 }, { "epoch": 20.489977728285076, "loss": 0.38873451948165894, "loss_ce": 6.264903640840203e-05, "loss_iou": 0.177734375, "loss_num": 0.006622314453125, "loss_xval": 0.388671875, "num_input_tokens_seen": 515438296, "step": 9200 }, { "epoch": 20.49220489977728, "grad_norm": 15.100608825683594, "learning_rate": 1e-06, "loss": 0.2894, "num_input_tokens_seen": 515495380, "step": 9201 }, { "epoch": 20.49220489977728, "loss": 0.3832995295524597, "loss_ce": 5.9795893321279436e-05, "loss_iou": 0.17578125, "loss_num": 0.00653076171875, "loss_xval": 0.3828125, "num_input_tokens_seen": 515495380, "step": 9201 }, { "epoch": 20.494432071269486, "grad_norm": 26.354969024658203, "learning_rate": 1e-06, "loss": 0.3353, "num_input_tokens_seen": 515552492, "step": 9202 }, { "epoch": 20.494432071269486, "loss": 0.3944174647331238, "loss_ce": 6.930494419066235e-05, "loss_iou": 0.162109375, "loss_num": 0.01409912109375, "loss_xval": 0.39453125, "num_input_tokens_seen": 515552492, "step": 9202 }, { "epoch": 20.49665924276169, "grad_norm": 19.706466674804688, "learning_rate": 1e-06, "loss": 0.3413, "num_input_tokens_seen": 515607424, "step": 9203 }, { "epoch": 20.49665924276169, "loss": 0.3399580419063568, "loss_ce": 5.32609956280794e-05, "loss_iou": 0.1298828125, "loss_num": 0.0159912109375, "loss_xval": 0.33984375, "num_input_tokens_seen": 515607424, "step": 9203 }, { "epoch": 20.498886414253896, "grad_norm": 24.811115264892578, "learning_rate": 1e-06, "loss": 0.4147, "num_input_tokens_seen": 515661964, "step": 9204 }, { "epoch": 20.498886414253896, "loss": 0.4160168766975403, "loss_ce": 9.280447557102889e-05, "loss_iou": 0.1806640625, "loss_num": 0.0107421875, "loss_xval": 0.416015625, "num_input_tokens_seen": 515661964, "step": 9204 }, { "epoch": 20.501113585746104, "grad_norm": 27.92363929748535, "learning_rate": 1e-06, "loss": 0.3021, "num_input_tokens_seen": 515717596, "step": 9205 }, { "epoch": 20.501113585746104, "loss": 0.39837294816970825, "loss_ce": 5.751060962211341e-05, "loss_iou": 0.169921875, "loss_num": 0.01171875, "loss_xval": 0.3984375, "num_input_tokens_seen": 515717596, "step": 9205 }, { "epoch": 20.50334075723831, "grad_norm": 14.264836311340332, "learning_rate": 1e-06, "loss": 0.4384, "num_input_tokens_seen": 515773496, "step": 9206 }, { "epoch": 20.50334075723831, "loss": 0.35908016562461853, "loss_ce": 7.139322406146675e-05, "loss_iou": 0.1494140625, "loss_num": 0.0120849609375, "loss_xval": 0.359375, "num_input_tokens_seen": 515773496, "step": 9206 }, { "epoch": 20.505567928730514, "grad_norm": 17.64116668701172, "learning_rate": 1e-06, "loss": 0.432, "num_input_tokens_seen": 515827180, "step": 9207 }, { "epoch": 20.505567928730514, "loss": 0.4668075442314148, "loss_ce": 7.171083416324109e-05, "loss_iou": 0.1962890625, "loss_num": 0.01495361328125, "loss_xval": 0.466796875, "num_input_tokens_seen": 515827180, "step": 9207 }, { "epoch": 20.50779510022272, "grad_norm": 19.3404541015625, "learning_rate": 1e-06, "loss": 0.3338, "num_input_tokens_seen": 515881772, "step": 9208 }, { "epoch": 20.50779510022272, "loss": 0.31878164410591125, "loss_ce": 5.6054937886074185e-05, "loss_iou": 0.146484375, "loss_num": 0.0050048828125, "loss_xval": 0.318359375, "num_input_tokens_seen": 515881772, "step": 9208 }, { "epoch": 20.510022271714924, "grad_norm": 19.894765853881836, "learning_rate": 1e-06, "loss": 0.3582, "num_input_tokens_seen": 515938532, "step": 9209 }, { "epoch": 20.510022271714924, "loss": 0.3078060746192932, "loss_ce": 6.68221473461017e-05, "loss_iou": 0.1279296875, "loss_num": 0.0106201171875, "loss_xval": 0.30859375, "num_input_tokens_seen": 515938532, "step": 9209 }, { "epoch": 20.51224944320713, "grad_norm": 20.052772521972656, "learning_rate": 1e-06, "loss": 0.4125, "num_input_tokens_seen": 515996532, "step": 9210 }, { "epoch": 20.51224944320713, "loss": 0.5830047130584717, "loss_ce": 5.793360833195038e-05, "loss_iou": 0.248046875, "loss_num": 0.017333984375, "loss_xval": 0.58203125, "num_input_tokens_seen": 515996532, "step": 9210 }, { "epoch": 20.514476614699333, "grad_norm": 24.21659278869629, "learning_rate": 1e-06, "loss": 0.313, "num_input_tokens_seen": 516052064, "step": 9211 }, { "epoch": 20.514476614699333, "loss": 0.323422372341156, "loss_ce": 5.810863513033837e-05, "loss_iou": 0.1396484375, "loss_num": 0.00872802734375, "loss_xval": 0.32421875, "num_input_tokens_seen": 516052064, "step": 9211 }, { "epoch": 20.51670378619154, "grad_norm": 18.89595603942871, "learning_rate": 1e-06, "loss": 0.2474, "num_input_tokens_seen": 516109116, "step": 9212 }, { "epoch": 20.51670378619154, "loss": 0.18618465960025787, "loss_ce": 5.79521874897182e-05, "loss_iou": 0.08203125, "loss_num": 0.004364013671875, "loss_xval": 0.1865234375, "num_input_tokens_seen": 516109116, "step": 9212 }, { "epoch": 20.518930957683743, "grad_norm": 22.48140525817871, "learning_rate": 1e-06, "loss": 0.3139, "num_input_tokens_seen": 516162824, "step": 9213 }, { "epoch": 20.518930957683743, "loss": 0.28497621417045593, "loss_ce": 6.412078801076859e-05, "loss_iou": 0.1259765625, "loss_num": 0.0067138671875, "loss_xval": 0.28515625, "num_input_tokens_seen": 516162824, "step": 9213 }, { "epoch": 20.521158129175948, "grad_norm": 20.399208068847656, "learning_rate": 1e-06, "loss": 0.4476, "num_input_tokens_seen": 516216872, "step": 9214 }, { "epoch": 20.521158129175948, "loss": 0.6123678684234619, "loss_ce": 6.322791159618646e-05, "loss_iou": 0.2734375, "loss_num": 0.012939453125, "loss_xval": 0.61328125, "num_input_tokens_seen": 516216872, "step": 9214 }, { "epoch": 20.523385300668153, "grad_norm": 23.317928314208984, "learning_rate": 1e-06, "loss": 0.4519, "num_input_tokens_seen": 516269896, "step": 9215 }, { "epoch": 20.523385300668153, "loss": 0.4913950562477112, "loss_ce": 6.203198427101597e-05, "loss_iou": 0.2099609375, "loss_num": 0.01434326171875, "loss_xval": 0.4921875, "num_input_tokens_seen": 516269896, "step": 9215 }, { "epoch": 20.525612472160358, "grad_norm": 24.348512649536133, "learning_rate": 1e-06, "loss": 0.5282, "num_input_tokens_seen": 516324752, "step": 9216 }, { "epoch": 20.525612472160358, "loss": 0.30804580450057983, "loss_ce": 6.23964297119528e-05, "loss_iou": 0.146484375, "loss_num": 0.0029449462890625, "loss_xval": 0.30859375, "num_input_tokens_seen": 516324752, "step": 9216 }, { "epoch": 20.527839643652563, "grad_norm": 19.019367218017578, "learning_rate": 1e-06, "loss": 0.3523, "num_input_tokens_seen": 516381168, "step": 9217 }, { "epoch": 20.527839643652563, "loss": 0.34173130989074707, "loss_ce": 5.6528966524638236e-05, "loss_iou": 0.15625, "loss_num": 0.005767822265625, "loss_xval": 0.341796875, "num_input_tokens_seen": 516381168, "step": 9217 }, { "epoch": 20.530066815144767, "grad_norm": 14.334078788757324, "learning_rate": 1e-06, "loss": 0.4507, "num_input_tokens_seen": 516438516, "step": 9218 }, { "epoch": 20.530066815144767, "loss": 0.6355735063552856, "loss_ce": 7.543759420514107e-05, "loss_iou": 0.26953125, "loss_num": 0.0196533203125, "loss_xval": 0.63671875, "num_input_tokens_seen": 516438516, "step": 9218 }, { "epoch": 20.532293986636972, "grad_norm": 24.67681312561035, "learning_rate": 1e-06, "loss": 0.5983, "num_input_tokens_seen": 516492580, "step": 9219 }, { "epoch": 20.532293986636972, "loss": 0.5057117938995361, "loss_ce": 0.00015756976790726185, "loss_iou": 0.1962890625, "loss_num": 0.0224609375, "loss_xval": 0.50390625, "num_input_tokens_seen": 516492580, "step": 9219 }, { "epoch": 20.534521158129177, "grad_norm": 25.80486297607422, "learning_rate": 1e-06, "loss": 0.4122, "num_input_tokens_seen": 516546048, "step": 9220 }, { "epoch": 20.534521158129177, "loss": 0.5275466442108154, "loss_ce": 8.086032175924629e-05, "loss_iou": 0.2333984375, "loss_num": 0.01214599609375, "loss_xval": 0.52734375, "num_input_tokens_seen": 516546048, "step": 9220 }, { "epoch": 20.536748329621382, "grad_norm": 19.290876388549805, "learning_rate": 1e-06, "loss": 0.3358, "num_input_tokens_seen": 516600172, "step": 9221 }, { "epoch": 20.536748329621382, "loss": 0.40956592559814453, "loss_ce": 6.581825437024236e-05, "loss_iou": 0.1904296875, "loss_num": 0.005767822265625, "loss_xval": 0.41015625, "num_input_tokens_seen": 516600172, "step": 9221 }, { "epoch": 20.538975501113587, "grad_norm": 14.446426391601562, "learning_rate": 1e-06, "loss": 0.3625, "num_input_tokens_seen": 516655008, "step": 9222 }, { "epoch": 20.538975501113587, "loss": 0.4629659950733185, "loss_ce": 7.537858618889004e-05, "loss_iou": 0.19140625, "loss_num": 0.0162353515625, "loss_xval": 0.462890625, "num_input_tokens_seen": 516655008, "step": 9222 }, { "epoch": 20.54120267260579, "grad_norm": 20.61759376525879, "learning_rate": 1e-06, "loss": 0.3554, "num_input_tokens_seen": 516713012, "step": 9223 }, { "epoch": 20.54120267260579, "loss": 0.2745492458343506, "loss_ce": 7.413580169668421e-05, "loss_iou": 0.12109375, "loss_num": 0.006561279296875, "loss_xval": 0.275390625, "num_input_tokens_seen": 516713012, "step": 9223 }, { "epoch": 20.543429844097997, "grad_norm": 15.4387788772583, "learning_rate": 1e-06, "loss": 0.3891, "num_input_tokens_seen": 516771928, "step": 9224 }, { "epoch": 20.543429844097997, "loss": 0.21673524379730225, "loss_ce": 6.0440983361331746e-05, "loss_iou": 0.09912109375, "loss_num": 0.003662109375, "loss_xval": 0.216796875, "num_input_tokens_seen": 516771928, "step": 9224 }, { "epoch": 20.5456570155902, "grad_norm": 73.51461791992188, "learning_rate": 1e-06, "loss": 0.3232, "num_input_tokens_seen": 516827052, "step": 9225 }, { "epoch": 20.5456570155902, "loss": 0.37042248249053955, "loss_ce": 6.116791337262839e-05, "loss_iou": 0.17578125, "loss_num": 0.0037994384765625, "loss_xval": 0.37109375, "num_input_tokens_seen": 516827052, "step": 9225 }, { "epoch": 20.547884187082406, "grad_norm": 18.046146392822266, "learning_rate": 1e-06, "loss": 0.3615, "num_input_tokens_seen": 516883920, "step": 9226 }, { "epoch": 20.547884187082406, "loss": 0.335880845785141, "loss_ce": 6.541566108353436e-05, "loss_iou": 0.1513671875, "loss_num": 0.0064697265625, "loss_xval": 0.3359375, "num_input_tokens_seen": 516883920, "step": 9226 }, { "epoch": 20.55011135857461, "grad_norm": 24.70758628845215, "learning_rate": 1e-06, "loss": 0.3517, "num_input_tokens_seen": 516939448, "step": 9227 }, { "epoch": 20.55011135857461, "loss": 0.3225664794445038, "loss_ce": 5.6713775848038495e-05, "loss_iou": 0.134765625, "loss_num": 0.01055908203125, "loss_xval": 0.322265625, "num_input_tokens_seen": 516939448, "step": 9227 }, { "epoch": 20.552338530066816, "grad_norm": 18.065723419189453, "learning_rate": 1e-06, "loss": 0.4389, "num_input_tokens_seen": 516994964, "step": 9228 }, { "epoch": 20.552338530066816, "loss": 0.35278570652008057, "loss_ce": 6.352636410156265e-05, "loss_iou": 0.16015625, "loss_num": 0.0064697265625, "loss_xval": 0.353515625, "num_input_tokens_seen": 516994964, "step": 9228 }, { "epoch": 20.55456570155902, "grad_norm": 17.124879837036133, "learning_rate": 1e-06, "loss": 0.4484, "num_input_tokens_seen": 517051364, "step": 9229 }, { "epoch": 20.55456570155902, "loss": 0.5044651031494141, "loss_ce": 7.060293137328699e-05, "loss_iou": 0.2265625, "loss_num": 0.01043701171875, "loss_xval": 0.50390625, "num_input_tokens_seen": 517051364, "step": 9229 }, { "epoch": 20.556792873051226, "grad_norm": 19.566333770751953, "learning_rate": 1e-06, "loss": 0.3415, "num_input_tokens_seen": 517106692, "step": 9230 }, { "epoch": 20.556792873051226, "loss": 0.3661907911300659, "loss_ce": 7.13969930075109e-05, "loss_iou": 0.1640625, "loss_num": 0.007415771484375, "loss_xval": 0.365234375, "num_input_tokens_seen": 517106692, "step": 9230 }, { "epoch": 20.55902004454343, "grad_norm": 16.50565528869629, "learning_rate": 1e-06, "loss": 0.3832, "num_input_tokens_seen": 517162332, "step": 9231 }, { "epoch": 20.55902004454343, "loss": 0.3715935945510864, "loss_ce": 7.258763071149588e-05, "loss_iou": 0.1630859375, "loss_num": 0.00933837890625, "loss_xval": 0.37109375, "num_input_tokens_seen": 517162332, "step": 9231 }, { "epoch": 20.561247216035635, "grad_norm": 26.36023712158203, "learning_rate": 1e-06, "loss": 0.3381, "num_input_tokens_seen": 517219120, "step": 9232 }, { "epoch": 20.561247216035635, "loss": 0.26105207204818726, "loss_ce": 6.5757536503952e-05, "loss_iou": 0.119140625, "loss_num": 0.004547119140625, "loss_xval": 0.26171875, "num_input_tokens_seen": 517219120, "step": 9232 }, { "epoch": 20.56347438752784, "grad_norm": 40.432926177978516, "learning_rate": 1e-06, "loss": 0.5472, "num_input_tokens_seen": 517273304, "step": 9233 }, { "epoch": 20.56347438752784, "loss": 0.3474746346473694, "loss_ce": 6.25449392828159e-05, "loss_iou": 0.162109375, "loss_num": 0.00482177734375, "loss_xval": 0.34765625, "num_input_tokens_seen": 517273304, "step": 9233 }, { "epoch": 20.565701559020045, "grad_norm": 37.93396759033203, "learning_rate": 1e-06, "loss": 0.3976, "num_input_tokens_seen": 517327196, "step": 9234 }, { "epoch": 20.565701559020045, "loss": 0.4561777412891388, "loss_ce": 6.202464282978326e-05, "loss_iou": 0.1962890625, "loss_num": 0.01263427734375, "loss_xval": 0.45703125, "num_input_tokens_seen": 517327196, "step": 9234 }, { "epoch": 20.56792873051225, "grad_norm": 16.984933853149414, "learning_rate": 1e-06, "loss": 0.3733, "num_input_tokens_seen": 517384080, "step": 9235 }, { "epoch": 20.56792873051225, "loss": 0.4556872844696045, "loss_ce": 5.983790470054373e-05, "loss_iou": 0.1826171875, "loss_num": 0.0179443359375, "loss_xval": 0.455078125, "num_input_tokens_seen": 517384080, "step": 9235 }, { "epoch": 20.570155902004455, "grad_norm": 9.283608436584473, "learning_rate": 1e-06, "loss": 0.3774, "num_input_tokens_seen": 517438320, "step": 9236 }, { "epoch": 20.570155902004455, "loss": 0.4296599328517914, "loss_ce": 0.00011740654008463025, "loss_iou": 0.18359375, "loss_num": 0.01220703125, "loss_xval": 0.4296875, "num_input_tokens_seen": 517438320, "step": 9236 }, { "epoch": 20.57238307349666, "grad_norm": 16.504417419433594, "learning_rate": 1e-06, "loss": 0.3546, "num_input_tokens_seen": 517496136, "step": 9237 }, { "epoch": 20.57238307349666, "loss": 0.3683554530143738, "loss_ce": 6.933379336260259e-05, "loss_iou": 0.16015625, "loss_num": 0.00958251953125, "loss_xval": 0.369140625, "num_input_tokens_seen": 517496136, "step": 9237 }, { "epoch": 20.574610244988865, "grad_norm": 19.25970458984375, "learning_rate": 1e-06, "loss": 0.2776, "num_input_tokens_seen": 517553096, "step": 9238 }, { "epoch": 20.574610244988865, "loss": 0.3049623966217041, "loss_ce": 6.127238884801045e-05, "loss_iou": 0.14453125, "loss_num": 0.0030517578125, "loss_xval": 0.3046875, "num_input_tokens_seen": 517553096, "step": 9238 }, { "epoch": 20.57683741648107, "grad_norm": 19.565448760986328, "learning_rate": 1e-06, "loss": 0.3708, "num_input_tokens_seen": 517608736, "step": 9239 }, { "epoch": 20.57683741648107, "loss": 0.23846621811389923, "loss_ce": 6.2903571233619e-05, "loss_iou": 0.09814453125, "loss_num": 0.0084228515625, "loss_xval": 0.23828125, "num_input_tokens_seen": 517608736, "step": 9239 }, { "epoch": 20.579064587973274, "grad_norm": 15.315186500549316, "learning_rate": 1e-06, "loss": 0.4186, "num_input_tokens_seen": 517664492, "step": 9240 }, { "epoch": 20.579064587973274, "loss": 0.5499961972236633, "loss_ce": 6.946978101041168e-05, "loss_iou": 0.2060546875, "loss_num": 0.0277099609375, "loss_xval": 0.55078125, "num_input_tokens_seen": 517664492, "step": 9240 }, { "epoch": 20.58129175946548, "grad_norm": 19.74601173400879, "learning_rate": 1e-06, "loss": 0.4442, "num_input_tokens_seen": 517723296, "step": 9241 }, { "epoch": 20.58129175946548, "loss": 0.39636969566345215, "loss_ce": 6.84402184560895e-05, "loss_iou": 0.1552734375, "loss_num": 0.0169677734375, "loss_xval": 0.396484375, "num_input_tokens_seen": 517723296, "step": 9241 }, { "epoch": 20.583518930957684, "grad_norm": 13.046077728271484, "learning_rate": 1e-06, "loss": 0.2947, "num_input_tokens_seen": 517783636, "step": 9242 }, { "epoch": 20.583518930957684, "loss": 0.2621099352836609, "loss_ce": 5.5477234127465636e-05, "loss_iou": 0.1201171875, "loss_num": 0.004302978515625, "loss_xval": 0.26171875, "num_input_tokens_seen": 517783636, "step": 9242 }, { "epoch": 20.58574610244989, "grad_norm": 15.847582817077637, "learning_rate": 1e-06, "loss": 0.2867, "num_input_tokens_seen": 517837348, "step": 9243 }, { "epoch": 20.58574610244989, "loss": 0.36224034428596497, "loss_ce": 5.771276482846588e-05, "loss_iou": 0.1591796875, "loss_num": 0.00860595703125, "loss_xval": 0.361328125, "num_input_tokens_seen": 517837348, "step": 9243 }, { "epoch": 20.587973273942094, "grad_norm": 18.27277374267578, "learning_rate": 1e-06, "loss": 0.4209, "num_input_tokens_seen": 517892936, "step": 9244 }, { "epoch": 20.587973273942094, "loss": 0.3434508144855499, "loss_ce": 6.703373946947977e-05, "loss_iou": 0.158203125, "loss_num": 0.005523681640625, "loss_xval": 0.34375, "num_input_tokens_seen": 517892936, "step": 9244 }, { "epoch": 20.5902004454343, "grad_norm": 18.745811462402344, "learning_rate": 1e-06, "loss": 0.3061, "num_input_tokens_seen": 517948732, "step": 9245 }, { "epoch": 20.5902004454343, "loss": 0.3197273313999176, "loss_ce": 0.0001472493022447452, "loss_iou": 0.134765625, "loss_num": 0.010009765625, "loss_xval": 0.3203125, "num_input_tokens_seen": 517948732, "step": 9245 }, { "epoch": 20.592427616926503, "grad_norm": 25.77054214477539, "learning_rate": 1e-06, "loss": 0.5403, "num_input_tokens_seen": 518005172, "step": 9246 }, { "epoch": 20.592427616926503, "loss": 0.6770814657211304, "loss_ce": 7.9466997704003e-05, "loss_iou": 0.291015625, "loss_num": 0.0189208984375, "loss_xval": 0.67578125, "num_input_tokens_seen": 518005172, "step": 9246 }, { "epoch": 20.59465478841871, "grad_norm": 14.424224853515625, "learning_rate": 1e-06, "loss": 0.5092, "num_input_tokens_seen": 518059824, "step": 9247 }, { "epoch": 20.59465478841871, "loss": 0.6334977746009827, "loss_ce": 7.492607983294874e-05, "loss_iou": 0.283203125, "loss_num": 0.0135498046875, "loss_xval": 0.6328125, "num_input_tokens_seen": 518059824, "step": 9247 }, { "epoch": 20.596881959910913, "grad_norm": 22.720426559448242, "learning_rate": 1e-06, "loss": 0.3461, "num_input_tokens_seen": 518116568, "step": 9248 }, { "epoch": 20.596881959910913, "loss": 0.3957892954349518, "loss_ce": 9.837795369094238e-05, "loss_iou": 0.1611328125, "loss_num": 0.01470947265625, "loss_xval": 0.396484375, "num_input_tokens_seen": 518116568, "step": 9248 }, { "epoch": 20.599109131403118, "grad_norm": 24.825748443603516, "learning_rate": 1e-06, "loss": 0.4304, "num_input_tokens_seen": 518171416, "step": 9249 }, { "epoch": 20.599109131403118, "loss": 0.44866153597831726, "loss_ce": 5.314835289027542e-05, "loss_iou": 0.197265625, "loss_num": 0.0108642578125, "loss_xval": 0.44921875, "num_input_tokens_seen": 518171416, "step": 9249 }, { "epoch": 20.601336302895323, "grad_norm": 16.588918685913086, "learning_rate": 1e-06, "loss": 0.2945, "num_input_tokens_seen": 518230356, "step": 9250 }, { "epoch": 20.601336302895323, "eval_seeclick_web_CIoU": 0.583829253911972, "eval_seeclick_web_GIoU": 0.5839782357215881, "eval_seeclick_web_IoU": 0.6033029854297638, "eval_seeclick_web_MAE_all": 0.015166566707193851, "eval_seeclick_web_MAE_h": 0.006818295689299703, "eval_seeclick_web_MAE_w": 0.015144134871661663, "eval_seeclick_web_MAE_x_boxes": 0.008378135273233056, "eval_seeclick_web_MAE_y_boxes": 0.021190070547163486, "eval_seeclick_web_inside_bbox": 0.9010416567325592, "eval_seeclick_web_loss": 0.9226139783859253, "eval_seeclick_web_loss_ce": 0.00011066030492656864, "eval_seeclick_web_loss_iou": 0.42724609375, "eval_seeclick_web_loss_num": 0.0122528076171875, "eval_seeclick_web_loss_xval": 0.9150390625, "eval_seeclick_web_runtime": 20.8356, "eval_seeclick_web_samples_per_second": 2.4, "eval_seeclick_web_steps_per_second": 0.096, "num_input_tokens_seen": 518230356, "step": 9250 }, { "epoch": 20.601336302895323, "eval_icons_CIoU": 0.25548499822616577, "eval_icons_GIoU": 0.2894558012485504, "eval_icons_IoU": 0.33956870436668396, "eval_icons_MAE_all": 0.062356239184737206, "eval_icons_MAE_h": 0.029669910669326782, "eval_icons_MAE_w": 0.07730034179985523, "eval_icons_MAE_x_boxes": 0.05503878928720951, "eval_icons_MAE_y_boxes": 0.037660352885723114, "eval_icons_inside_bbox": 0.59375, "eval_icons_loss": 1.7357631921768188, "eval_icons_loss_ce": 0.00013154443513485603, "eval_icons_loss_iou": 0.6806640625, "eval_icons_loss_num": 0.06060218811035156, "eval_icons_loss_xval": 1.6640625, "eval_icons_runtime": 20.933, "eval_icons_samples_per_second": 2.389, "eval_icons_steps_per_second": 0.096, "num_input_tokens_seen": 518230356, "step": 9250 }, { "epoch": 20.601336302895323, "eval_screenspot_CIoU": 0.3744088610013326, "eval_screenspot_GIoU": 0.3934180239836375, "eval_screenspot_IoU": 0.44850159684817, "eval_screenspot_MAE_all": 0.058480776846408844, "eval_screenspot_MAE_h": 0.03973913627366225, "eval_screenspot_MAE_w": 0.06190794085462888, "eval_screenspot_MAE_x_boxes": 0.06778114537398021, "eval_screenspot_MAE_y_boxes": 0.043080691869060196, "eval_screenspot_inside_bbox": 0.7041666706403097, "eval_screenspot_loss": 1.5582458972930908, "eval_screenspot_loss_ce": 0.00013941208211084208, "eval_screenspot_loss_iou": 0.640625, "eval_screenspot_loss_num": 0.06737772623697917, "eval_screenspot_loss_xval": 1.6188151041666667, "eval_screenspot_runtime": 29.263, "eval_screenspot_samples_per_second": 3.041, "eval_screenspot_steps_per_second": 0.103, "num_input_tokens_seen": 518230356, "step": 9250 }, { "epoch": 20.601336302895323, "eval_compot_CIoU": 0.3450569659471512, "eval_compot_GIoU": 0.3522755652666092, "eval_compot_IoU": 0.40288856625556946, "eval_compot_MAE_all": 0.018017619848251343, "eval_compot_MAE_h": 0.008548983139917254, "eval_compot_MAE_w": 0.020043439231812954, "eval_compot_MAE_x_boxes": 0.03034346178174019, "eval_compot_MAE_y_boxes": 0.007265899330377579, "eval_compot_inside_bbox": 0.6302083432674408, "eval_compot_loss": 1.3769763708114624, "eval_compot_loss_ce": 0.00010162792386836372, "eval_compot_loss_iou": 0.633544921875, "eval_compot_loss_num": 0.016551971435546875, "eval_compot_loss_xval": 1.34912109375, "eval_compot_runtime": 22.9916, "eval_compot_samples_per_second": 2.175, "eval_compot_steps_per_second": 0.087, "num_input_tokens_seen": 518230356, "step": 9250 }, { "epoch": 20.601336302895323, "eval_custom_ui_val_CIoU": 0.47359417213333976, "eval_custom_ui_val_GIoU": 0.4787476923730638, "eval_custom_ui_val_IoU": 0.5349448025226593, "eval_custom_ui_val_MAE_all": 0.02652052231132984, "eval_custom_ui_val_MAE_h": 0.013495955590365661, "eval_custom_ui_val_MAE_w": 0.036366066274543606, "eval_custom_ui_val_MAE_x_boxes": 0.03154980060127047, "eval_custom_ui_val_MAE_y_boxes": 0.012846555560827255, "eval_custom_ui_val_inside_bbox": 0.7596450646718343, "eval_custom_ui_val_loss": 1.1579389572143555, "eval_custom_ui_val_loss_ce": 0.00010260268269727628, "eval_custom_ui_val_loss_iou": 0.4972601996527778, "eval_custom_ui_val_loss_num": 0.023241043090820312, "eval_custom_ui_val_loss_xval": 1.1111111111111112, "eval_custom_ui_val_runtime": 58.4246, "eval_custom_ui_val_samples_per_second": 4.536, "eval_custom_ui_val_steps_per_second": 0.154, "num_input_tokens_seen": 518230356, "step": 9250 } ], "logging_steps": 1.0, "max_steps": 22450, "num_input_tokens_seen": 518230356, "num_train_epochs": 50, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.410904613476211e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }