|
{ |
|
"best_metric": 2.0196783542633057, |
|
"best_model_checkpoint": "/home/sunggeunan/data/ICL/outputs/lora/SKIML-ICL_mrqa_nq_v3/Meta-Llama-3-8B-Instruct-unanswerable-5Q-0U-0C-qa_first/checkpoint-1025", |
|
"epoch": 1.9985376553741165, |
|
"eval_steps": 500, |
|
"global_step": 1025, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0019497928345113332, |
|
"grad_norm": 0.2671431005001068, |
|
"learning_rate": 6.493506493506494e-09, |
|
"loss": 2.0238, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0038995856690226664, |
|
"grad_norm": 0.26295146346092224, |
|
"learning_rate": 1.2987012987012988e-08, |
|
"loss": 2.0792, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005849378503534, |
|
"grad_norm": 0.26511502265930176, |
|
"learning_rate": 1.9480519480519478e-08, |
|
"loss": 2.1298, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.007799171338045333, |
|
"grad_norm": 0.268216997385025, |
|
"learning_rate": 2.5974025974025976e-08, |
|
"loss": 2.0854, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009748964172556666, |
|
"grad_norm": 0.2698403000831604, |
|
"learning_rate": 3.246753246753246e-08, |
|
"loss": 2.0665, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.011698757007068, |
|
"grad_norm": 0.2657904624938965, |
|
"learning_rate": 3.8961038961038956e-08, |
|
"loss": 2.0213, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.013648549841579332, |
|
"grad_norm": 0.2607410252094269, |
|
"learning_rate": 4.545454545454545e-08, |
|
"loss": 2.0425, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.015598342676090666, |
|
"grad_norm": 0.28946036100387573, |
|
"learning_rate": 5.194805194805195e-08, |
|
"loss": 2.0742, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.017548135510601998, |
|
"grad_norm": 0.250527948141098, |
|
"learning_rate": 5.844155844155844e-08, |
|
"loss": 2.1037, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01949792834511333, |
|
"grad_norm": 0.29370346665382385, |
|
"learning_rate": 6.493506493506492e-08, |
|
"loss": 2.1355, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021447721179624665, |
|
"grad_norm": 0.2751532196998596, |
|
"learning_rate": 7.142857142857142e-08, |
|
"loss": 2.1219, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.023397514014136, |
|
"grad_norm": 0.2966114282608032, |
|
"learning_rate": 7.792207792207791e-08, |
|
"loss": 2.1788, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02534730684864733, |
|
"grad_norm": 0.24350005388259888, |
|
"learning_rate": 8.441558441558441e-08, |
|
"loss": 2.0531, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.027297099683158663, |
|
"grad_norm": 0.2536744177341461, |
|
"learning_rate": 9.09090909090909e-08, |
|
"loss": 2.0485, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.029246892517669997, |
|
"grad_norm": 0.2583434581756592, |
|
"learning_rate": 9.74025974025974e-08, |
|
"loss": 2.0712, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03119668535218133, |
|
"grad_norm": 0.25572890043258667, |
|
"learning_rate": 1.038961038961039e-07, |
|
"loss": 2.0674, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03314647818669266, |
|
"grad_norm": 0.24798272550106049, |
|
"learning_rate": 1.1038961038961038e-07, |
|
"loss": 1.9777, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.035096271021203995, |
|
"grad_norm": 0.25968796014785767, |
|
"learning_rate": 1.1688311688311688e-07, |
|
"loss": 2.1233, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03704606385571533, |
|
"grad_norm": 0.2510642111301422, |
|
"learning_rate": 1.2337662337662337e-07, |
|
"loss": 2.0819, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03899585669022666, |
|
"grad_norm": 0.2636696696281433, |
|
"learning_rate": 1.2987012987012984e-07, |
|
"loss": 2.1369, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.040945649524738, |
|
"grad_norm": 0.26741182804107666, |
|
"learning_rate": 1.3636363636363635e-07, |
|
"loss": 2.0973, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04289544235924933, |
|
"grad_norm": 0.2516593933105469, |
|
"learning_rate": 1.4285714285714285e-07, |
|
"loss": 2.1089, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.044845235193760664, |
|
"grad_norm": 0.2642120122909546, |
|
"learning_rate": 1.4935064935064935e-07, |
|
"loss": 2.069, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.046795028028272, |
|
"grad_norm": 0.2595269978046417, |
|
"learning_rate": 1.5584415584415582e-07, |
|
"loss": 2.1304, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04874482086278333, |
|
"grad_norm": 0.2557779848575592, |
|
"learning_rate": 1.6233766233766232e-07, |
|
"loss": 2.0084, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05069461369729466, |
|
"grad_norm": 0.26405468583106995, |
|
"learning_rate": 1.6883116883116883e-07, |
|
"loss": 2.0683, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05264440653180599, |
|
"grad_norm": 0.2540312111377716, |
|
"learning_rate": 1.7532467532467533e-07, |
|
"loss": 2.1389, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05459419936631733, |
|
"grad_norm": 0.2732296586036682, |
|
"learning_rate": 1.818181818181818e-07, |
|
"loss": 2.0663, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05654399220082866, |
|
"grad_norm": 0.2802280783653259, |
|
"learning_rate": 1.883116883116883e-07, |
|
"loss": 2.0758, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.058493785035339994, |
|
"grad_norm": 0.2741639018058777, |
|
"learning_rate": 1.948051948051948e-07, |
|
"loss": 2.0638, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06044357786985133, |
|
"grad_norm": 0.2648272216320038, |
|
"learning_rate": 2.012987012987013e-07, |
|
"loss": 2.0978, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06239337070436266, |
|
"grad_norm": 0.2700302004814148, |
|
"learning_rate": 2.077922077922078e-07, |
|
"loss": 2.1145, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.064343163538874, |
|
"grad_norm": 0.24180686473846436, |
|
"learning_rate": 2.1428571428571426e-07, |
|
"loss": 2.0752, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06629295637338532, |
|
"grad_norm": 0.27451491355895996, |
|
"learning_rate": 2.2077922077922076e-07, |
|
"loss": 2.0719, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06824274920789666, |
|
"grad_norm": 0.2594657838344574, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"loss": 2.0107, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07019254204240799, |
|
"grad_norm": 0.26720282435417175, |
|
"learning_rate": 2.3376623376623376e-07, |
|
"loss": 2.1045, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07214233487691933, |
|
"grad_norm": 0.2727048695087433, |
|
"learning_rate": 2.4025974025974024e-07, |
|
"loss": 2.0983, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07409212771143066, |
|
"grad_norm": 0.2821039855480194, |
|
"learning_rate": 2.4675324675324674e-07, |
|
"loss": 2.1199, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.076041920545942, |
|
"grad_norm": 0.2540994882583618, |
|
"learning_rate": 2.532467532467532e-07, |
|
"loss": 2.0925, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07799171338045333, |
|
"grad_norm": 0.2766543924808502, |
|
"learning_rate": 2.597402597402597e-07, |
|
"loss": 2.1259, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07994150621496467, |
|
"grad_norm": 0.28683698177337646, |
|
"learning_rate": 2.662337662337662e-07, |
|
"loss": 2.135, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.081891299049476, |
|
"grad_norm": 0.25892165303230286, |
|
"learning_rate": 2.727272727272727e-07, |
|
"loss": 2.0734, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08384109188398732, |
|
"grad_norm": 0.2723507881164551, |
|
"learning_rate": 2.792207792207792e-07, |
|
"loss": 2.0313, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08579088471849866, |
|
"grad_norm": 0.25262904167175293, |
|
"learning_rate": 2.857142857142857e-07, |
|
"loss": 2.0777, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08774067755300999, |
|
"grad_norm": 0.26076266169548035, |
|
"learning_rate": 2.922077922077922e-07, |
|
"loss": 2.0877, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08969047038752133, |
|
"grad_norm": 0.2711774408817291, |
|
"learning_rate": 2.987012987012987e-07, |
|
"loss": 2.063, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09164026322203266, |
|
"grad_norm": 0.24715273082256317, |
|
"learning_rate": 3.0519480519480515e-07, |
|
"loss": 2.0698, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.093590056056544, |
|
"grad_norm": 0.2721501588821411, |
|
"learning_rate": 3.1168831168831165e-07, |
|
"loss": 2.0192, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09553984889105532, |
|
"grad_norm": 0.2476457953453064, |
|
"learning_rate": 3.1818181818181815e-07, |
|
"loss": 2.0208, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09748964172556666, |
|
"grad_norm": 0.26186031103134155, |
|
"learning_rate": 3.2467532467532465e-07, |
|
"loss": 2.1028, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09943943456007799, |
|
"grad_norm": 0.263841450214386, |
|
"learning_rate": 3.3116883116883115e-07, |
|
"loss": 2.071, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10138922739458932, |
|
"grad_norm": 0.27216637134552, |
|
"learning_rate": 3.3766233766233765e-07, |
|
"loss": 2.0743, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.10333902022910066, |
|
"grad_norm": 0.25524261593818665, |
|
"learning_rate": 3.4415584415584415e-07, |
|
"loss": 2.0426, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.10528881306361199, |
|
"grad_norm": 0.2809346914291382, |
|
"learning_rate": 3.5064935064935066e-07, |
|
"loss": 2.049, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.10723860589812333, |
|
"grad_norm": 0.25672242045402527, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"loss": 2.0213, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.10918839873263465, |
|
"grad_norm": 0.2544190585613251, |
|
"learning_rate": 3.636363636363636e-07, |
|
"loss": 2.0663, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.111138191567146, |
|
"grad_norm": 0.26028168201446533, |
|
"learning_rate": 3.701298701298701e-07, |
|
"loss": 2.0947, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11308798440165732, |
|
"grad_norm": 0.26112449169158936, |
|
"learning_rate": 3.766233766233766e-07, |
|
"loss": 2.0611, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11503777723616866, |
|
"grad_norm": 0.29020223021507263, |
|
"learning_rate": 3.831168831168831e-07, |
|
"loss": 2.1048, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11698757007067999, |
|
"grad_norm": 0.269167959690094, |
|
"learning_rate": 3.896103896103896e-07, |
|
"loss": 2.0392, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11893736290519133, |
|
"grad_norm": 0.2823875844478607, |
|
"learning_rate": 3.961038961038961e-07, |
|
"loss": 2.1341, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12088715573970266, |
|
"grad_norm": 0.27546533942222595, |
|
"learning_rate": 4.025974025974026e-07, |
|
"loss": 2.0903, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12283694857421398, |
|
"grad_norm": 0.2821657657623291, |
|
"learning_rate": 4.090909090909091e-07, |
|
"loss": 2.1028, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12478674140872532, |
|
"grad_norm": 0.2886088788509369, |
|
"learning_rate": 4.155844155844156e-07, |
|
"loss": 2.0685, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.12673653424323666, |
|
"grad_norm": 0.3001558482646942, |
|
"learning_rate": 4.22077922077922e-07, |
|
"loss": 2.0996, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.128686327077748, |
|
"grad_norm": 0.24933473765850067, |
|
"learning_rate": 4.285714285714285e-07, |
|
"loss": 2.0242, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13063611991225932, |
|
"grad_norm": 0.27868619561195374, |
|
"learning_rate": 4.35064935064935e-07, |
|
"loss": 2.0535, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13258591274677065, |
|
"grad_norm": 0.29242217540740967, |
|
"learning_rate": 4.415584415584415e-07, |
|
"loss": 2.0379, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.134535705581282, |
|
"grad_norm": 0.2707277536392212, |
|
"learning_rate": 4.48051948051948e-07, |
|
"loss": 2.0922, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.13648549841579333, |
|
"grad_norm": 0.2940627336502075, |
|
"learning_rate": 4.545454545454545e-07, |
|
"loss": 2.0857, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13843529125030465, |
|
"grad_norm": 0.25989463925361633, |
|
"learning_rate": 4.61038961038961e-07, |
|
"loss": 2.0664, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14038508408481598, |
|
"grad_norm": 0.2827669382095337, |
|
"learning_rate": 4.675324675324675e-07, |
|
"loss": 2.0804, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1423348769193273, |
|
"grad_norm": 0.2898445725440979, |
|
"learning_rate": 4.7402597402597397e-07, |
|
"loss": 2.1116, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14428466975383866, |
|
"grad_norm": 0.2953305244445801, |
|
"learning_rate": 4.805194805194805e-07, |
|
"loss": 2.0997, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.14623446258835, |
|
"grad_norm": 0.28880831599235535, |
|
"learning_rate": 4.87012987012987e-07, |
|
"loss": 2.0695, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14818425542286132, |
|
"grad_norm": 0.2893301844596863, |
|
"learning_rate": 4.935064935064935e-07, |
|
"loss": 2.1663, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15013404825737264, |
|
"grad_norm": 0.27863314747810364, |
|
"learning_rate": 5e-07, |
|
"loss": 2.0468, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.152083841091884, |
|
"grad_norm": 0.27849143743515015, |
|
"learning_rate": 4.996572995202193e-07, |
|
"loss": 2.0909, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.15403363392639532, |
|
"grad_norm": 0.2688325345516205, |
|
"learning_rate": 4.993145990404387e-07, |
|
"loss": 2.1058, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.15598342676090665, |
|
"grad_norm": 0.2714349627494812, |
|
"learning_rate": 4.989718985606579e-07, |
|
"loss": 2.0719, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15793321959541798, |
|
"grad_norm": 0.267674058675766, |
|
"learning_rate": 4.986291980808773e-07, |
|
"loss": 2.003, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.15988301242992933, |
|
"grad_norm": 0.26871585845947266, |
|
"learning_rate": 4.982864976010966e-07, |
|
"loss": 2.0506, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16183280526444066, |
|
"grad_norm": 0.27725961804389954, |
|
"learning_rate": 4.97943797121316e-07, |
|
"loss": 2.0908, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.163782598098952, |
|
"grad_norm": 0.26912689208984375, |
|
"learning_rate": 4.976010966415353e-07, |
|
"loss": 2.1065, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1657323909334633, |
|
"grad_norm": 0.26862508058547974, |
|
"learning_rate": 4.972583961617545e-07, |
|
"loss": 2.0017, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16768218376797464, |
|
"grad_norm": 0.2780780792236328, |
|
"learning_rate": 4.969156956819739e-07, |
|
"loss": 2.0812, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.169631976602486, |
|
"grad_norm": 0.2691902816295624, |
|
"learning_rate": 4.965729952021932e-07, |
|
"loss": 2.108, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17158176943699732, |
|
"grad_norm": 0.25564315915107727, |
|
"learning_rate": 4.962302947224126e-07, |
|
"loss": 2.0141, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.17353156227150865, |
|
"grad_norm": 0.29978710412979126, |
|
"learning_rate": 4.958875942426319e-07, |
|
"loss": 2.1087, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.17548135510601998, |
|
"grad_norm": 0.26945438981056213, |
|
"learning_rate": 4.955448937628513e-07, |
|
"loss": 2.0654, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17743114794053133, |
|
"grad_norm": 0.2857602834701538, |
|
"learning_rate": 4.952021932830705e-07, |
|
"loss": 2.0258, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.17938094077504266, |
|
"grad_norm": 0.3205603063106537, |
|
"learning_rate": 4.948594928032899e-07, |
|
"loss": 2.0839, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18133073360955398, |
|
"grad_norm": 0.29022127389907837, |
|
"learning_rate": 4.945167923235092e-07, |
|
"loss": 2.063, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1832805264440653, |
|
"grad_norm": 0.2677106559276581, |
|
"learning_rate": 4.941740918437286e-07, |
|
"loss": 2.0257, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.18523031927857664, |
|
"grad_norm": 0.2686716318130493, |
|
"learning_rate": 4.938313913639479e-07, |
|
"loss": 2.053, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.187180112113088, |
|
"grad_norm": 0.3096849322319031, |
|
"learning_rate": 4.934886908841673e-07, |
|
"loss": 2.0954, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.18912990494759932, |
|
"grad_norm": 0.29678693413734436, |
|
"learning_rate": 4.931459904043865e-07, |
|
"loss": 2.0984, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.19107969778211065, |
|
"grad_norm": 0.29280567169189453, |
|
"learning_rate": 4.928032899246059e-07, |
|
"loss": 2.1523, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.19302949061662197, |
|
"grad_norm": 0.33339405059814453, |
|
"learning_rate": 4.924605894448252e-07, |
|
"loss": 2.1537, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.19497928345113333, |
|
"grad_norm": 0.2959805727005005, |
|
"learning_rate": 4.921178889650445e-07, |
|
"loss": 2.07, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19692907628564466, |
|
"grad_norm": 0.2850833535194397, |
|
"learning_rate": 4.917751884852638e-07, |
|
"loss": 2.0565, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.19887886912015598, |
|
"grad_norm": 0.27677983045578003, |
|
"learning_rate": 4.914324880054832e-07, |
|
"loss": 2.0252, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2008286619546673, |
|
"grad_norm": 0.2881922423839569, |
|
"learning_rate": 4.910897875257025e-07, |
|
"loss": 2.1085, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.20277845478917864, |
|
"grad_norm": 0.28352612257003784, |
|
"learning_rate": 4.907470870459218e-07, |
|
"loss": 2.0758, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.20472824762369, |
|
"grad_norm": 0.2815571427345276, |
|
"learning_rate": 4.904043865661412e-07, |
|
"loss": 2.0588, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.20667804045820132, |
|
"grad_norm": 0.2817777395248413, |
|
"learning_rate": 4.900616860863605e-07, |
|
"loss": 2.0751, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.20862783329271264, |
|
"grad_norm": 0.29829949140548706, |
|
"learning_rate": 4.897189856065798e-07, |
|
"loss": 2.0505, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.21057762612722397, |
|
"grad_norm": 0.2886929214000702, |
|
"learning_rate": 4.893762851267992e-07, |
|
"loss": 2.028, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.21252741896173533, |
|
"grad_norm": 0.28375059366226196, |
|
"learning_rate": 4.890335846470185e-07, |
|
"loss": 2.0282, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.21447721179624665, |
|
"grad_norm": 0.27930572628974915, |
|
"learning_rate": 4.886908841672378e-07, |
|
"loss": 2.1027, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.21642700463075798, |
|
"grad_norm": 0.27910512685775757, |
|
"learning_rate": 4.883481836874572e-07, |
|
"loss": 2.1146, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2183767974652693, |
|
"grad_norm": 0.286739319562912, |
|
"learning_rate": 4.880054832076765e-07, |
|
"loss": 2.0727, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.22032659029978066, |
|
"grad_norm": 0.2716750502586365, |
|
"learning_rate": 4.876627827278957e-07, |
|
"loss": 2.02, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.222276383134292, |
|
"grad_norm": 0.28050121665000916, |
|
"learning_rate": 4.873200822481151e-07, |
|
"loss": 1.9912, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.22422617596880332, |
|
"grad_norm": 0.31914082169532776, |
|
"learning_rate": 4.869773817683344e-07, |
|
"loss": 2.0654, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22617596880331464, |
|
"grad_norm": 0.3212663233280182, |
|
"learning_rate": 4.866346812885538e-07, |
|
"loss": 2.1145, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.22812576163782597, |
|
"grad_norm": 0.3040018081665039, |
|
"learning_rate": 4.862919808087731e-07, |
|
"loss": 2.1285, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.23007555447233732, |
|
"grad_norm": 0.3013773560523987, |
|
"learning_rate": 4.859492803289925e-07, |
|
"loss": 2.0631, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.23202534730684865, |
|
"grad_norm": 0.2854544520378113, |
|
"learning_rate": 4.856065798492117e-07, |
|
"loss": 2.0701, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.23397514014135998, |
|
"grad_norm": 0.27997076511383057, |
|
"learning_rate": 4.852638793694311e-07, |
|
"loss": 1.9768, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2359249329758713, |
|
"grad_norm": 0.2790175974369049, |
|
"learning_rate": 4.849211788896504e-07, |
|
"loss": 2.0499, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.23787472581038266, |
|
"grad_norm": 0.28126639127731323, |
|
"learning_rate": 4.845784784098698e-07, |
|
"loss": 2.0691, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.23982451864489399, |
|
"grad_norm": 0.32007864117622375, |
|
"learning_rate": 4.842357779300891e-07, |
|
"loss": 2.0886, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2417743114794053, |
|
"grad_norm": 0.3017228841781616, |
|
"learning_rate": 4.838930774503084e-07, |
|
"loss": 2.0796, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.24372410431391664, |
|
"grad_norm": 0.28364625573158264, |
|
"learning_rate": 4.835503769705277e-07, |
|
"loss": 2.0737, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.24567389714842797, |
|
"grad_norm": 0.3120713233947754, |
|
"learning_rate": 4.83207676490747e-07, |
|
"loss": 2.0741, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.24762368998293932, |
|
"grad_norm": 0.293863445520401, |
|
"learning_rate": 4.828649760109664e-07, |
|
"loss": 1.9777, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.24957348281745065, |
|
"grad_norm": 0.2932412326335907, |
|
"learning_rate": 4.825222755311857e-07, |
|
"loss": 2.0567, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.251523275651962, |
|
"grad_norm": 0.29689502716064453, |
|
"learning_rate": 4.821795750514051e-07, |
|
"loss": 2.0251, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.25347306848647333, |
|
"grad_norm": 0.2953934669494629, |
|
"learning_rate": 4.818368745716243e-07, |
|
"loss": 2.0826, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25542286132098463, |
|
"grad_norm": 0.29008495807647705, |
|
"learning_rate": 4.814941740918437e-07, |
|
"loss": 1.9974, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.257372654155496, |
|
"grad_norm": 0.29402440786361694, |
|
"learning_rate": 4.81151473612063e-07, |
|
"loss": 2.1115, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.25932244699000734, |
|
"grad_norm": 0.313650906085968, |
|
"learning_rate": 4.808087731322824e-07, |
|
"loss": 2.0834, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.26127223982451864, |
|
"grad_norm": 0.2968846261501312, |
|
"learning_rate": 4.804660726525017e-07, |
|
"loss": 2.0786, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.26322203265903, |
|
"grad_norm": 0.30427923798561096, |
|
"learning_rate": 4.801233721727211e-07, |
|
"loss": 1.9974, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2651718254935413, |
|
"grad_norm": 0.3112437129020691, |
|
"learning_rate": 4.797806716929403e-07, |
|
"loss": 2.0837, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.26712161832805265, |
|
"grad_norm": 0.30960723757743835, |
|
"learning_rate": 4.794379712131597e-07, |
|
"loss": 2.1307, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.269071411162564, |
|
"grad_norm": 0.3101617097854614, |
|
"learning_rate": 4.79095270733379e-07, |
|
"loss": 2.0395, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2710212039970753, |
|
"grad_norm": 0.2995094358921051, |
|
"learning_rate": 4.787525702535984e-07, |
|
"loss": 2.0844, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.27297099683158665, |
|
"grad_norm": 0.29981735348701477, |
|
"learning_rate": 4.784098697738176e-07, |
|
"loss": 2.0474, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.27492078966609795, |
|
"grad_norm": 0.29965049028396606, |
|
"learning_rate": 4.78067169294037e-07, |
|
"loss": 2.0664, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2768705825006093, |
|
"grad_norm": 0.31631559133529663, |
|
"learning_rate": 4.777244688142563e-07, |
|
"loss": 2.0932, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.27882037533512066, |
|
"grad_norm": 0.32392817735671997, |
|
"learning_rate": 4.773817683344756e-07, |
|
"loss": 2.0404, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.28077016816963196, |
|
"grad_norm": 0.2919900715351105, |
|
"learning_rate": 4.77039067854695e-07, |
|
"loss": 2.0367, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2827199610041433, |
|
"grad_norm": 0.3037238121032715, |
|
"learning_rate": 4.7669636737491434e-07, |
|
"loss": 2.0741, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2846697538386546, |
|
"grad_norm": 0.2894318997859955, |
|
"learning_rate": 4.7635366689513363e-07, |
|
"loss": 2.0676, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.28661954667316597, |
|
"grad_norm": 0.3007095158100128, |
|
"learning_rate": 4.760109664153529e-07, |
|
"loss": 2.051, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2885693395076773, |
|
"grad_norm": 0.31736671924591064, |
|
"learning_rate": 4.756682659355723e-07, |
|
"loss": 2.0587, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2905191323421886, |
|
"grad_norm": 0.3223492503166199, |
|
"learning_rate": 4.753255654557916e-07, |
|
"loss": 2.0884, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2924689251767, |
|
"grad_norm": 0.31644171476364136, |
|
"learning_rate": 4.749828649760109e-07, |
|
"loss": 2.128, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29441871801121133, |
|
"grad_norm": 0.3055993914604187, |
|
"learning_rate": 4.746401644962303e-07, |
|
"loss": 2.0597, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.29636851084572263, |
|
"grad_norm": 0.3014571964740753, |
|
"learning_rate": 4.742974640164496e-07, |
|
"loss": 2.0674, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.298318303680234, |
|
"grad_norm": 0.33088865876197815, |
|
"learning_rate": 4.739547635366689e-07, |
|
"loss": 2.0636, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3002680965147453, |
|
"grad_norm": 0.3139593005180359, |
|
"learning_rate": 4.736120630568883e-07, |
|
"loss": 2.0674, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.30221788934925664, |
|
"grad_norm": 0.31804022192955017, |
|
"learning_rate": 4.732693625771076e-07, |
|
"loss": 2.1092, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.304167682183768, |
|
"grad_norm": 0.34043845534324646, |
|
"learning_rate": 4.729266620973269e-07, |
|
"loss": 2.0391, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3061174750182793, |
|
"grad_norm": 0.34768176078796387, |
|
"learning_rate": 4.725839616175463e-07, |
|
"loss": 2.0984, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.30806726785279065, |
|
"grad_norm": 0.30159029364585876, |
|
"learning_rate": 4.722412611377656e-07, |
|
"loss": 2.0085, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.31001706068730195, |
|
"grad_norm": 0.3267905116081238, |
|
"learning_rate": 4.718985606579849e-07, |
|
"loss": 2.0719, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3119668535218133, |
|
"grad_norm": 0.3086291551589966, |
|
"learning_rate": 4.715558601782042e-07, |
|
"loss": 2.0928, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31391664635632466, |
|
"grad_norm": 0.30459094047546387, |
|
"learning_rate": 4.712131596984236e-07, |
|
"loss": 2.1044, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.31586643919083596, |
|
"grad_norm": 0.2868260443210602, |
|
"learning_rate": 4.7087045921864287e-07, |
|
"loss": 2.0631, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3178162320253473, |
|
"grad_norm": 0.3526155650615692, |
|
"learning_rate": 4.7052775873886217e-07, |
|
"loss": 2.0573, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.31976602485985867, |
|
"grad_norm": 0.3164813220500946, |
|
"learning_rate": 4.7018505825908157e-07, |
|
"loss": 2.1207, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.32171581769436997, |
|
"grad_norm": 0.3223491907119751, |
|
"learning_rate": 4.6984235777930086e-07, |
|
"loss": 2.089, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3236656105288813, |
|
"grad_norm": 0.3313138484954834, |
|
"learning_rate": 4.6949965729952016e-07, |
|
"loss": 2.0777, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3256154033633926, |
|
"grad_norm": 0.3372494876384735, |
|
"learning_rate": 4.6915695681973956e-07, |
|
"loss": 2.0185, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.327565196197904, |
|
"grad_norm": 0.3191705346107483, |
|
"learning_rate": 4.6881425633995885e-07, |
|
"loss": 2.0505, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.32951498903241533, |
|
"grad_norm": 0.32238319516181946, |
|
"learning_rate": 4.6847155586017815e-07, |
|
"loss": 2.126, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3314647818669266, |
|
"grad_norm": 0.31298163533210754, |
|
"learning_rate": 4.6812885538039755e-07, |
|
"loss": 2.1064, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.333414574701438, |
|
"grad_norm": 0.3096555471420288, |
|
"learning_rate": 4.6778615490061684e-07, |
|
"loss": 2.0649, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3353643675359493, |
|
"grad_norm": 0.3024272620677948, |
|
"learning_rate": 4.6744345442083614e-07, |
|
"loss": 2.0508, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.33731416037046064, |
|
"grad_norm": 0.3325616419315338, |
|
"learning_rate": 4.671007539410555e-07, |
|
"loss": 2.1431, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.339263953204972, |
|
"grad_norm": 0.3665126860141754, |
|
"learning_rate": 4.6675805346127483e-07, |
|
"loss": 2.1174, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3412137460394833, |
|
"grad_norm": 0.3292168378829956, |
|
"learning_rate": 4.664153529814941e-07, |
|
"loss": 2.1029, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.34316353887399464, |
|
"grad_norm": 0.3286147713661194, |
|
"learning_rate": 4.6607265250171347e-07, |
|
"loss": 2.1042, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.34511333170850594, |
|
"grad_norm": 0.32417264580726624, |
|
"learning_rate": 4.657299520219328e-07, |
|
"loss": 2.0901, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3470631245430173, |
|
"grad_norm": 0.31667739152908325, |
|
"learning_rate": 4.653872515421521e-07, |
|
"loss": 2.0895, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.34901291737752865, |
|
"grad_norm": 0.3280418813228607, |
|
"learning_rate": 4.6504455106237146e-07, |
|
"loss": 2.1237, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.35096271021203995, |
|
"grad_norm": 0.32828444242477417, |
|
"learning_rate": 4.647018505825908e-07, |
|
"loss": 2.0933, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3529125030465513, |
|
"grad_norm": 0.3365094065666199, |
|
"learning_rate": 4.643591501028101e-07, |
|
"loss": 2.1049, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.35486229588106266, |
|
"grad_norm": 0.3169403076171875, |
|
"learning_rate": 4.6401644962302945e-07, |
|
"loss": 2.0636, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.35681208871557396, |
|
"grad_norm": 0.31843212246894836, |
|
"learning_rate": 4.636737491432488e-07, |
|
"loss": 2.0744, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3587618815500853, |
|
"grad_norm": 0.34016114473342896, |
|
"learning_rate": 4.633310486634681e-07, |
|
"loss": 2.0572, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3607116743845966, |
|
"grad_norm": 0.3435775935649872, |
|
"learning_rate": 4.6298834818368744e-07, |
|
"loss": 2.0702, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.36266146721910797, |
|
"grad_norm": 0.32756081223487854, |
|
"learning_rate": 4.6264564770390674e-07, |
|
"loss": 2.0219, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3646112600536193, |
|
"grad_norm": 0.3173263370990753, |
|
"learning_rate": 4.623029472241261e-07, |
|
"loss": 2.0134, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3665610528881306, |
|
"grad_norm": 0.33062443137168884, |
|
"learning_rate": 4.6196024674434543e-07, |
|
"loss": 2.0508, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.368510845722642, |
|
"grad_norm": 0.3294820785522461, |
|
"learning_rate": 4.616175462645647e-07, |
|
"loss": 1.9935, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3704606385571533, |
|
"grad_norm": 0.3417966663837433, |
|
"learning_rate": 4.6127484578478407e-07, |
|
"loss": 2.0486, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37241043139166463, |
|
"grad_norm": 0.35238054394721985, |
|
"learning_rate": 4.609321453050034e-07, |
|
"loss": 2.0854, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.374360224226176, |
|
"grad_norm": 0.3305458426475525, |
|
"learning_rate": 4.605894448252227e-07, |
|
"loss": 2.0449, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3763100170606873, |
|
"grad_norm": 0.324318528175354, |
|
"learning_rate": 4.6024674434544206e-07, |
|
"loss": 2.1153, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.37825980989519864, |
|
"grad_norm": 0.3373543322086334, |
|
"learning_rate": 4.599040438656614e-07, |
|
"loss": 2.0677, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.38020960272971, |
|
"grad_norm": 0.345115602016449, |
|
"learning_rate": 4.595613433858807e-07, |
|
"loss": 2.0312, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3821593955642213, |
|
"grad_norm": 0.3340489864349365, |
|
"learning_rate": 4.5921864290610005e-07, |
|
"loss": 1.9848, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.38410918839873265, |
|
"grad_norm": 0.3615861237049103, |
|
"learning_rate": 4.588759424263194e-07, |
|
"loss": 2.0471, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.38605898123324395, |
|
"grad_norm": 0.3380940854549408, |
|
"learning_rate": 4.585332419465387e-07, |
|
"loss": 2.0481, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.3880087740677553, |
|
"grad_norm": 0.3478194773197174, |
|
"learning_rate": 4.58190541466758e-07, |
|
"loss": 2.0324, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.38995856690226666, |
|
"grad_norm": 0.34738266468048096, |
|
"learning_rate": 4.578478409869774e-07, |
|
"loss": 2.0864, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39190835973677796, |
|
"grad_norm": 0.3694723844528198, |
|
"learning_rate": 4.575051405071967e-07, |
|
"loss": 2.1574, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3938581525712893, |
|
"grad_norm": 0.3413209617137909, |
|
"learning_rate": 4.57162440027416e-07, |
|
"loss": 2.067, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3958079454058006, |
|
"grad_norm": 0.3256085515022278, |
|
"learning_rate": 4.568197395476354e-07, |
|
"loss": 2.0749, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.39775773824031196, |
|
"grad_norm": 0.3281763792037964, |
|
"learning_rate": 4.5647703906785467e-07, |
|
"loss": 2.0431, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.3997075310748233, |
|
"grad_norm": 0.3446051776409149, |
|
"learning_rate": 4.5613433858807397e-07, |
|
"loss": 2.011, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4016573239093346, |
|
"grad_norm": 0.3425387442111969, |
|
"learning_rate": 4.5579163810829337e-07, |
|
"loss": 2.0987, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.403607116743846, |
|
"grad_norm": 0.33923473954200745, |
|
"learning_rate": 4.5544893762851266e-07, |
|
"loss": 2.0777, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.40555690957835727, |
|
"grad_norm": 0.34710973501205444, |
|
"learning_rate": 4.5510623714873196e-07, |
|
"loss": 2.0662, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4075067024128686, |
|
"grad_norm": 0.33852049708366394, |
|
"learning_rate": 4.5476353666895136e-07, |
|
"loss": 2.0872, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.40945649524738, |
|
"grad_norm": 0.342153400182724, |
|
"learning_rate": 4.5442083618917065e-07, |
|
"loss": 2.0414, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4114062880818913, |
|
"grad_norm": 0.34867721796035767, |
|
"learning_rate": 4.5407813570938995e-07, |
|
"loss": 2.1128, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.41335608091640264, |
|
"grad_norm": 0.33942094445228577, |
|
"learning_rate": 4.537354352296093e-07, |
|
"loss": 2.0786, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.415305873750914, |
|
"grad_norm": 0.33538249135017395, |
|
"learning_rate": 4.5339273474982864e-07, |
|
"loss": 2.0332, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4172556665854253, |
|
"grad_norm": 0.34453144669532776, |
|
"learning_rate": 4.5305003427004794e-07, |
|
"loss": 2.0629, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.41920545941993664, |
|
"grad_norm": 0.35166001319885254, |
|
"learning_rate": 4.527073337902673e-07, |
|
"loss": 2.0881, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.42115525225444794, |
|
"grad_norm": 0.3170466721057892, |
|
"learning_rate": 4.5236463331048663e-07, |
|
"loss": 2.0508, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4231050450889593, |
|
"grad_norm": 0.3201327919960022, |
|
"learning_rate": 4.520219328307059e-07, |
|
"loss": 2.0147, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.42505483792347065, |
|
"grad_norm": 0.34361732006073, |
|
"learning_rate": 4.5167923235092527e-07, |
|
"loss": 2.084, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.42700463075798195, |
|
"grad_norm": 0.3500427305698395, |
|
"learning_rate": 4.513365318711446e-07, |
|
"loss": 2.0568, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4289544235924933, |
|
"grad_norm": 0.34151604771614075, |
|
"learning_rate": 4.509938313913639e-07, |
|
"loss": 2.0366, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4309042164270046, |
|
"grad_norm": 0.3297358751296997, |
|
"learning_rate": 4.5065113091158326e-07, |
|
"loss": 2.0639, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.43285400926151596, |
|
"grad_norm": 0.3623073995113373, |
|
"learning_rate": 4.503084304318026e-07, |
|
"loss": 2.0477, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4348038020960273, |
|
"grad_norm": 0.34618520736694336, |
|
"learning_rate": 4.499657299520219e-07, |
|
"loss": 2.1036, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4367535949305386, |
|
"grad_norm": 0.3289443850517273, |
|
"learning_rate": 4.4962302947224125e-07, |
|
"loss": 2.0026, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.43870338776504997, |
|
"grad_norm": 0.3390786349773407, |
|
"learning_rate": 4.4928032899246055e-07, |
|
"loss": 2.0208, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4406531805995613, |
|
"grad_norm": 0.3597511351108551, |
|
"learning_rate": 4.489376285126799e-07, |
|
"loss": 2.1259, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.4426029734340726, |
|
"grad_norm": 0.3647196888923645, |
|
"learning_rate": 4.4859492803289924e-07, |
|
"loss": 2.1048, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.444552766268584, |
|
"grad_norm": 0.35180747509002686, |
|
"learning_rate": 4.4825222755311854e-07, |
|
"loss": 2.0439, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.4465025591030953, |
|
"grad_norm": 0.35504230856895447, |
|
"learning_rate": 4.479095270733379e-07, |
|
"loss": 2.0845, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.44845235193760663, |
|
"grad_norm": 0.3500707447528839, |
|
"learning_rate": 4.4756682659355723e-07, |
|
"loss": 2.0717, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.450402144772118, |
|
"grad_norm": 0.34788116812705994, |
|
"learning_rate": 4.472241261137765e-07, |
|
"loss": 2.1076, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.4523519376066293, |
|
"grad_norm": 0.3553301990032196, |
|
"learning_rate": 4.4688142563399587e-07, |
|
"loss": 2.0512, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.45430173044114064, |
|
"grad_norm": 0.3606579005718231, |
|
"learning_rate": 4.465387251542152e-07, |
|
"loss": 2.1154, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.45625152327565194, |
|
"grad_norm": 0.3678739368915558, |
|
"learning_rate": 4.461960246744345e-07, |
|
"loss": 2.0755, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.4582013161101633, |
|
"grad_norm": 0.3320152461528778, |
|
"learning_rate": 4.4585332419465386e-07, |
|
"loss": 2.0402, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.46015110894467465, |
|
"grad_norm": 0.3439280688762665, |
|
"learning_rate": 4.455106237148732e-07, |
|
"loss": 2.0674, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.46210090177918595, |
|
"grad_norm": 0.34789469838142395, |
|
"learning_rate": 4.451679232350925e-07, |
|
"loss": 2.0616, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4640506946136973, |
|
"grad_norm": 0.35700955986976624, |
|
"learning_rate": 4.448252227553118e-07, |
|
"loss": 2.0678, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4660004874482086, |
|
"grad_norm": 0.33981651067733765, |
|
"learning_rate": 4.444825222755312e-07, |
|
"loss": 2.0552, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.46795028028271995, |
|
"grad_norm": 0.36125004291534424, |
|
"learning_rate": 4.441398217957505e-07, |
|
"loss": 2.0739, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4699000731172313, |
|
"grad_norm": 0.3675917088985443, |
|
"learning_rate": 4.437971213159698e-07, |
|
"loss": 2.0341, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4718498659517426, |
|
"grad_norm": 0.36773043870925903, |
|
"learning_rate": 4.434544208361892e-07, |
|
"loss": 2.1091, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.47379965878625396, |
|
"grad_norm": 0.34321659803390503, |
|
"learning_rate": 4.431117203564085e-07, |
|
"loss": 2.0189, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.4757494516207653, |
|
"grad_norm": 0.36672836542129517, |
|
"learning_rate": 4.427690198766278e-07, |
|
"loss": 2.064, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.4776992444552766, |
|
"grad_norm": 0.3681386411190033, |
|
"learning_rate": 4.424263193968472e-07, |
|
"loss": 2.0895, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.47964903728978797, |
|
"grad_norm": 0.36538165807724, |
|
"learning_rate": 4.4208361891706647e-07, |
|
"loss": 2.0361, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.48159883012429927, |
|
"grad_norm": 0.3780750036239624, |
|
"learning_rate": 4.4174091843728577e-07, |
|
"loss": 2.053, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.4835486229588106, |
|
"grad_norm": 0.3471691310405731, |
|
"learning_rate": 4.4139821795750517e-07, |
|
"loss": 2.0051, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.485498415793322, |
|
"grad_norm": 0.36653193831443787, |
|
"learning_rate": 4.4105551747772446e-07, |
|
"loss": 2.1492, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.4874482086278333, |
|
"grad_norm": 0.37775489687919617, |
|
"learning_rate": 4.4071281699794376e-07, |
|
"loss": 2.0406, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.48939800146234463, |
|
"grad_norm": 0.3678765892982483, |
|
"learning_rate": 4.403701165181631e-07, |
|
"loss": 2.0804, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.49134779429685593, |
|
"grad_norm": 0.3415094316005707, |
|
"learning_rate": 4.4002741603838245e-07, |
|
"loss": 2.0187, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.4932975871313673, |
|
"grad_norm": 0.3463176190853119, |
|
"learning_rate": 4.3968471555860175e-07, |
|
"loss": 2.0618, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.49524737996587864, |
|
"grad_norm": 0.3565087616443634, |
|
"learning_rate": 4.393420150788211e-07, |
|
"loss": 2.0809, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.49719717280038994, |
|
"grad_norm": 0.3863977789878845, |
|
"learning_rate": 4.3899931459904044e-07, |
|
"loss": 2.038, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4991469656349013, |
|
"grad_norm": 0.3344396948814392, |
|
"learning_rate": 4.3865661411925974e-07, |
|
"loss": 2.071, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5010967584694126, |
|
"grad_norm": 0.3676479160785675, |
|
"learning_rate": 4.383139136394791e-07, |
|
"loss": 2.0469, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.503046551303924, |
|
"grad_norm": 0.36381298303604126, |
|
"learning_rate": 4.3797121315969843e-07, |
|
"loss": 2.0795, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5049963441384353, |
|
"grad_norm": 0.3515491783618927, |
|
"learning_rate": 4.376285126799177e-07, |
|
"loss": 1.9912, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5069461369729467, |
|
"grad_norm": 0.3699260354042053, |
|
"learning_rate": 4.3728581220013707e-07, |
|
"loss": 2.0829, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5088959298074579, |
|
"grad_norm": 0.39030641317367554, |
|
"learning_rate": 4.3694311172035637e-07, |
|
"loss": 2.0917, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5108457226419693, |
|
"grad_norm": 0.35085543990135193, |
|
"learning_rate": 4.366004112405757e-07, |
|
"loss": 2.0517, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5127955154764806, |
|
"grad_norm": 0.3542785048484802, |
|
"learning_rate": 4.3625771076079506e-07, |
|
"loss": 2.0519, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.514745308310992, |
|
"grad_norm": 0.36474236845970154, |
|
"learning_rate": 4.3591501028101436e-07, |
|
"loss": 1.9739, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5166951011455033, |
|
"grad_norm": 0.37260621786117554, |
|
"learning_rate": 4.355723098012337e-07, |
|
"loss": 1.9897, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5186448939800147, |
|
"grad_norm": 0.3556238114833832, |
|
"learning_rate": 4.3522960932145305e-07, |
|
"loss": 2.0196, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5205946868145259, |
|
"grad_norm": 0.36310216784477234, |
|
"learning_rate": 4.3488690884167235e-07, |
|
"loss": 2.0151, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5225444796490373, |
|
"grad_norm": 0.37483158707618713, |
|
"learning_rate": 4.345442083618917e-07, |
|
"loss": 2.0929, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5244942724835486, |
|
"grad_norm": 0.3717723786830902, |
|
"learning_rate": 4.3420150788211104e-07, |
|
"loss": 2.1377, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.52644406531806, |
|
"grad_norm": 0.34406736493110657, |
|
"learning_rate": 4.3385880740233034e-07, |
|
"loss": 2.0109, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5283938581525713, |
|
"grad_norm": 0.37034499645233154, |
|
"learning_rate": 4.335161069225497e-07, |
|
"loss": 2.0867, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5303436509870826, |
|
"grad_norm": 0.3672201931476593, |
|
"learning_rate": 4.3317340644276903e-07, |
|
"loss": 2.0828, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5322934438215939, |
|
"grad_norm": 0.3954712152481079, |
|
"learning_rate": 4.328307059629883e-07, |
|
"loss": 2.0625, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5342432366561053, |
|
"grad_norm": 0.35529398918151855, |
|
"learning_rate": 4.324880054832076e-07, |
|
"loss": 2.1149, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5361930294906166, |
|
"grad_norm": 0.34687867760658264, |
|
"learning_rate": 4.32145305003427e-07, |
|
"loss": 2.0161, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.538142822325128, |
|
"grad_norm": 0.3687521815299988, |
|
"learning_rate": 4.318026045236463e-07, |
|
"loss": 2.0979, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5400926151596392, |
|
"grad_norm": 0.36186617612838745, |
|
"learning_rate": 4.314599040438656e-07, |
|
"loss": 2.0323, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5420424079941506, |
|
"grad_norm": 0.34530189633369446, |
|
"learning_rate": 4.31117203564085e-07, |
|
"loss": 2.0252, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.543992200828662, |
|
"grad_norm": 0.36403632164001465, |
|
"learning_rate": 4.307745030843043e-07, |
|
"loss": 2.0518, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5459419936631733, |
|
"grad_norm": 0.4035261869430542, |
|
"learning_rate": 4.304318026045236e-07, |
|
"loss": 2.1648, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5478917864976847, |
|
"grad_norm": 0.36672019958496094, |
|
"learning_rate": 4.30089102124743e-07, |
|
"loss": 2.0564, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5498415793321959, |
|
"grad_norm": 0.386877179145813, |
|
"learning_rate": 4.297464016449623e-07, |
|
"loss": 2.0859, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5517913721667073, |
|
"grad_norm": 0.38155534863471985, |
|
"learning_rate": 4.294037011651816e-07, |
|
"loss": 2.0828, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5537411650012186, |
|
"grad_norm": 0.3724847435951233, |
|
"learning_rate": 4.29061000685401e-07, |
|
"loss": 2.167, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.55569095783573, |
|
"grad_norm": 0.3812715411186218, |
|
"learning_rate": 4.287183002056203e-07, |
|
"loss": 2.0624, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5576407506702413, |
|
"grad_norm": 0.365509569644928, |
|
"learning_rate": 4.283755997258396e-07, |
|
"loss": 2.0324, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5595905435047526, |
|
"grad_norm": 0.3624550700187683, |
|
"learning_rate": 4.2803289924605887e-07, |
|
"loss": 2.0274, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5615403363392639, |
|
"grad_norm": 0.38429534435272217, |
|
"learning_rate": 4.2769019876627827e-07, |
|
"loss": 2.0031, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5634901291737753, |
|
"grad_norm": 0.3589562773704529, |
|
"learning_rate": 4.2734749828649757e-07, |
|
"loss": 2.0547, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5654399220082866, |
|
"grad_norm": 0.3625582158565521, |
|
"learning_rate": 4.2700479780671686e-07, |
|
"loss": 2.0044, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.567389714842798, |
|
"grad_norm": 0.37126410007476807, |
|
"learning_rate": 4.2666209732693626e-07, |
|
"loss": 2.0788, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5693395076773092, |
|
"grad_norm": 0.36942729353904724, |
|
"learning_rate": 4.2631939684715556e-07, |
|
"loss": 2.0676, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5712893005118206, |
|
"grad_norm": 0.3787277936935425, |
|
"learning_rate": 4.2597669636737485e-07, |
|
"loss": 2.0491, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5732390933463319, |
|
"grad_norm": 0.3843463957309723, |
|
"learning_rate": 4.2563399588759425e-07, |
|
"loss": 2.0657, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5751888861808433, |
|
"grad_norm": 0.384347140789032, |
|
"learning_rate": 4.2529129540781355e-07, |
|
"loss": 2.042, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5771386790153546, |
|
"grad_norm": 0.38822734355926514, |
|
"learning_rate": 4.2494859492803284e-07, |
|
"loss": 2.1084, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.579088471849866, |
|
"grad_norm": 0.3850004971027374, |
|
"learning_rate": 4.2460589444825224e-07, |
|
"loss": 2.0527, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5810382646843772, |
|
"grad_norm": 0.3730074167251587, |
|
"learning_rate": 4.2426319396847154e-07, |
|
"loss": 2.0665, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5829880575188886, |
|
"grad_norm": 0.3895587623119354, |
|
"learning_rate": 4.2392049348869083e-07, |
|
"loss": 2.1166, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5849378503534, |
|
"grad_norm": 0.3875929117202759, |
|
"learning_rate": 4.235777930089102e-07, |
|
"loss": 2.1165, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5868876431879113, |
|
"grad_norm": 0.36664247512817383, |
|
"learning_rate": 4.232350925291295e-07, |
|
"loss": 2.0039, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5888374360224227, |
|
"grad_norm": 0.3771498501300812, |
|
"learning_rate": 4.228923920493488e-07, |
|
"loss": 2.0727, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.5907872288569339, |
|
"grad_norm": 0.3995096981525421, |
|
"learning_rate": 4.2254969156956817e-07, |
|
"loss": 2.0836, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5927370216914453, |
|
"grad_norm": 0.3781261444091797, |
|
"learning_rate": 4.222069910897875e-07, |
|
"loss": 2.0797, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5946868145259566, |
|
"grad_norm": 0.37572017312049866, |
|
"learning_rate": 4.218642906100068e-07, |
|
"loss": 2.0363, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.596636607360468, |
|
"grad_norm": 0.38773536682128906, |
|
"learning_rate": 4.2152159013022616e-07, |
|
"loss": 2.0423, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.5985864001949793, |
|
"grad_norm": 0.37952083349227905, |
|
"learning_rate": 4.211788896504455e-07, |
|
"loss": 2.0966, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6005361930294906, |
|
"grad_norm": 0.39403635263442993, |
|
"learning_rate": 4.208361891706648e-07, |
|
"loss": 2.1212, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6024859858640019, |
|
"grad_norm": 0.382625013589859, |
|
"learning_rate": 4.2049348869088415e-07, |
|
"loss": 2.0363, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6044357786985133, |
|
"grad_norm": 0.3843761682510376, |
|
"learning_rate": 4.201507882111035e-07, |
|
"loss": 1.9995, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6063855715330246, |
|
"grad_norm": 0.4082648754119873, |
|
"learning_rate": 4.198080877313228e-07, |
|
"loss": 2.1265, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.608335364367536, |
|
"grad_norm": 0.3746339678764343, |
|
"learning_rate": 4.1946538725154214e-07, |
|
"loss": 2.0025, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6102851572020472, |
|
"grad_norm": 0.38548338413238525, |
|
"learning_rate": 4.1912268677176143e-07, |
|
"loss": 2.0764, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6122349500365586, |
|
"grad_norm": 0.3702864944934845, |
|
"learning_rate": 4.187799862919808e-07, |
|
"loss": 2.0788, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6141847428710699, |
|
"grad_norm": 0.3946288824081421, |
|
"learning_rate": 4.184372858122001e-07, |
|
"loss": 2.0877, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6161345357055813, |
|
"grad_norm": 0.3777286410331726, |
|
"learning_rate": 4.180945853324194e-07, |
|
"loss": 1.9863, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6180843285400927, |
|
"grad_norm": 0.40816164016723633, |
|
"learning_rate": 4.1775188485263877e-07, |
|
"loss": 2.0987, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6200341213746039, |
|
"grad_norm": 0.39065074920654297, |
|
"learning_rate": 4.174091843728581e-07, |
|
"loss": 2.0629, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6219839142091153, |
|
"grad_norm": 0.38007447123527527, |
|
"learning_rate": 4.170664838930774e-07, |
|
"loss": 2.0544, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6239337070436266, |
|
"grad_norm": 0.3953652083873749, |
|
"learning_rate": 4.1672378341329676e-07, |
|
"loss": 2.07, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.625883499878138, |
|
"grad_norm": 0.38142332434654236, |
|
"learning_rate": 4.163810829335161e-07, |
|
"loss": 2.0495, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6278332927126493, |
|
"grad_norm": 0.40484854578971863, |
|
"learning_rate": 4.160383824537354e-07, |
|
"loss": 2.0341, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6297830855471606, |
|
"grad_norm": 0.4031660556793213, |
|
"learning_rate": 4.1569568197395475e-07, |
|
"loss": 2.0168, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6317328783816719, |
|
"grad_norm": 0.3859906792640686, |
|
"learning_rate": 4.153529814941741e-07, |
|
"loss": 2.051, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6336826712161833, |
|
"grad_norm": 0.37458735704421997, |
|
"learning_rate": 4.150102810143934e-07, |
|
"loss": 2.038, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6356324640506946, |
|
"grad_norm": 0.39573705196380615, |
|
"learning_rate": 4.146675805346127e-07, |
|
"loss": 2.0308, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.637582256885206, |
|
"grad_norm": 0.39273601770401, |
|
"learning_rate": 4.143248800548321e-07, |
|
"loss": 2.0746, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6395320497197173, |
|
"grad_norm": 0.39438948035240173, |
|
"learning_rate": 4.139821795750514e-07, |
|
"loss": 2.0568, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6414818425542286, |
|
"grad_norm": 0.3938084840774536, |
|
"learning_rate": 4.1363947909527067e-07, |
|
"loss": 2.0643, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6434316353887399, |
|
"grad_norm": 0.4020846486091614, |
|
"learning_rate": 4.1329677861549007e-07, |
|
"loss": 2.0737, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6453814282232513, |
|
"grad_norm": 0.413841187953949, |
|
"learning_rate": 4.1295407813570937e-07, |
|
"loss": 2.019, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6473312210577626, |
|
"grad_norm": 0.39189133048057556, |
|
"learning_rate": 4.1261137765592866e-07, |
|
"loss": 2.0795, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.649281013892274, |
|
"grad_norm": 0.4119293987751007, |
|
"learning_rate": 4.1226867717614806e-07, |
|
"loss": 2.0794, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6512308067267852, |
|
"grad_norm": 0.40321430563926697, |
|
"learning_rate": 4.1192597669636736e-07, |
|
"loss": 2.0249, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6531805995612966, |
|
"grad_norm": 0.39300522208213806, |
|
"learning_rate": 4.1158327621658665e-07, |
|
"loss": 2.0667, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.655130392395808, |
|
"grad_norm": 0.39836639165878296, |
|
"learning_rate": 4.1124057573680605e-07, |
|
"loss": 2.037, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6570801852303193, |
|
"grad_norm": 0.41594526171684265, |
|
"learning_rate": 4.1089787525702535e-07, |
|
"loss": 2.0795, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6590299780648307, |
|
"grad_norm": 0.3934768736362457, |
|
"learning_rate": 4.1055517477724464e-07, |
|
"loss": 2.0045, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6609797708993419, |
|
"grad_norm": 0.3954453766345978, |
|
"learning_rate": 4.10212474297464e-07, |
|
"loss": 2.0005, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6629295637338533, |
|
"grad_norm": 0.42002055048942566, |
|
"learning_rate": 4.0986977381768334e-07, |
|
"loss": 2.0781, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6648793565683646, |
|
"grad_norm": 0.3964640200138092, |
|
"learning_rate": 4.0952707333790263e-07, |
|
"loss": 2.1088, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.666829149402876, |
|
"grad_norm": 0.3742097318172455, |
|
"learning_rate": 4.09184372858122e-07, |
|
"loss": 2.0201, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6687789422373873, |
|
"grad_norm": 0.40264692902565, |
|
"learning_rate": 4.088416723783413e-07, |
|
"loss": 2.0927, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.6707287350718986, |
|
"grad_norm": 0.39995405077934265, |
|
"learning_rate": 4.084989718985606e-07, |
|
"loss": 2.0783, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.6726785279064099, |
|
"grad_norm": 0.39974457025527954, |
|
"learning_rate": 4.0815627141877997e-07, |
|
"loss": 2.0613, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6746283207409213, |
|
"grad_norm": 0.39440110325813293, |
|
"learning_rate": 4.078135709389993e-07, |
|
"loss": 2.0963, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.6765781135754326, |
|
"grad_norm": 0.40362536907196045, |
|
"learning_rate": 4.074708704592186e-07, |
|
"loss": 2.138, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.678527906409944, |
|
"grad_norm": 0.4271102547645569, |
|
"learning_rate": 4.0712816997943796e-07, |
|
"loss": 2.0668, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.6804776992444552, |
|
"grad_norm": 0.3873864412307739, |
|
"learning_rate": 4.067854694996573e-07, |
|
"loss": 2.0236, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.6824274920789666, |
|
"grad_norm": 0.39676573872566223, |
|
"learning_rate": 4.064427690198766e-07, |
|
"loss": 2.0723, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6843772849134779, |
|
"grad_norm": 0.3926120102405548, |
|
"learning_rate": 4.0610006854009595e-07, |
|
"loss": 2.0193, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.6863270777479893, |
|
"grad_norm": 0.3857557773590088, |
|
"learning_rate": 4.0575736806031524e-07, |
|
"loss": 2.0574, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.6882768705825006, |
|
"grad_norm": 0.4042007327079773, |
|
"learning_rate": 4.054146675805346e-07, |
|
"loss": 2.0196, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.6902266634170119, |
|
"grad_norm": 0.3976573944091797, |
|
"learning_rate": 4.0507196710075394e-07, |
|
"loss": 1.9201, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.6921764562515232, |
|
"grad_norm": 0.38179242610931396, |
|
"learning_rate": 4.0472926662097323e-07, |
|
"loss": 2.0551, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6941262490860346, |
|
"grad_norm": 0.4144536256790161, |
|
"learning_rate": 4.043865661411926e-07, |
|
"loss": 2.0633, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.696076041920546, |
|
"grad_norm": 0.42070674896240234, |
|
"learning_rate": 4.040438656614119e-07, |
|
"loss": 2.1222, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.6980258347550573, |
|
"grad_norm": 0.394010066986084, |
|
"learning_rate": 4.037011651816312e-07, |
|
"loss": 2.0497, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.6999756275895687, |
|
"grad_norm": 0.40751656889915466, |
|
"learning_rate": 4.0335846470185057e-07, |
|
"loss": 2.0554, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7019254204240799, |
|
"grad_norm": 0.3723933696746826, |
|
"learning_rate": 4.030157642220699e-07, |
|
"loss": 1.9727, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7038752132585913, |
|
"grad_norm": 0.3941795825958252, |
|
"learning_rate": 4.026730637422892e-07, |
|
"loss": 2.0793, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7058250060931026, |
|
"grad_norm": 0.3988247513771057, |
|
"learning_rate": 4.0233036326250856e-07, |
|
"loss": 2.1244, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.707774798927614, |
|
"grad_norm": 0.409525603055954, |
|
"learning_rate": 4.019876627827279e-07, |
|
"loss": 2.0778, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7097245917621253, |
|
"grad_norm": 0.37638112902641296, |
|
"learning_rate": 4.016449623029472e-07, |
|
"loss": 1.9827, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7116743845966366, |
|
"grad_norm": 0.41931676864624023, |
|
"learning_rate": 4.013022618231665e-07, |
|
"loss": 2.0805, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7136241774311479, |
|
"grad_norm": 0.391668438911438, |
|
"learning_rate": 4.009595613433859e-07, |
|
"loss": 2.0695, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7155739702656593, |
|
"grad_norm": 0.4082440733909607, |
|
"learning_rate": 4.006168608636052e-07, |
|
"loss": 2.0232, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7175237631001706, |
|
"grad_norm": 0.41394224762916565, |
|
"learning_rate": 4.002741603838245e-07, |
|
"loss": 2.024, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.719473555934682, |
|
"grad_norm": 0.41648924350738525, |
|
"learning_rate": 3.999314599040439e-07, |
|
"loss": 2.0108, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7214233487691932, |
|
"grad_norm": 0.408218652009964, |
|
"learning_rate": 3.995887594242632e-07, |
|
"loss": 2.0712, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7233731416037046, |
|
"grad_norm": 0.39029547572135925, |
|
"learning_rate": 3.9924605894448247e-07, |
|
"loss": 2.0475, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7253229344382159, |
|
"grad_norm": 0.4242095649242401, |
|
"learning_rate": 3.9890335846470187e-07, |
|
"loss": 2.0507, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.3876708745956421, |
|
"learning_rate": 3.9856065798492117e-07, |
|
"loss": 2.0161, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7292225201072386, |
|
"grad_norm": 0.41027507185935974, |
|
"learning_rate": 3.9821795750514046e-07, |
|
"loss": 2.0544, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7311723129417499, |
|
"grad_norm": 0.4173310697078705, |
|
"learning_rate": 3.9787525702535986e-07, |
|
"loss": 2.0615, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7331221057762612, |
|
"grad_norm": 0.40106937289237976, |
|
"learning_rate": 3.9753255654557916e-07, |
|
"loss": 2.0189, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7350718986107726, |
|
"grad_norm": 0.40185120701789856, |
|
"learning_rate": 3.9718985606579845e-07, |
|
"loss": 2.0624, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.737021691445284, |
|
"grad_norm": 0.39821675419807434, |
|
"learning_rate": 3.968471555860178e-07, |
|
"loss": 2.0664, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.7389714842797953, |
|
"grad_norm": 0.4365295171737671, |
|
"learning_rate": 3.9650445510623715e-07, |
|
"loss": 2.065, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7409212771143066, |
|
"grad_norm": 0.40240806341171265, |
|
"learning_rate": 3.9616175462645644e-07, |
|
"loss": 2.0526, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7428710699488179, |
|
"grad_norm": 0.4148831069469452, |
|
"learning_rate": 3.958190541466758e-07, |
|
"loss": 2.1255, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7448208627833293, |
|
"grad_norm": 0.4301227033138275, |
|
"learning_rate": 3.9547635366689514e-07, |
|
"loss": 2.0715, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7467706556178406, |
|
"grad_norm": 0.42958423495292664, |
|
"learning_rate": 3.9513365318711443e-07, |
|
"loss": 2.0762, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.748720448452352, |
|
"grad_norm": 0.40311166644096375, |
|
"learning_rate": 3.947909527073338e-07, |
|
"loss": 2.0102, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7506702412868632, |
|
"grad_norm": 0.41303250193595886, |
|
"learning_rate": 3.944482522275531e-07, |
|
"loss": 2.0435, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7526200341213746, |
|
"grad_norm": 0.4167964458465576, |
|
"learning_rate": 3.941055517477724e-07, |
|
"loss": 2.0648, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7545698269558859, |
|
"grad_norm": 0.39250755310058594, |
|
"learning_rate": 3.9376285126799177e-07, |
|
"loss": 2.032, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7565196197903973, |
|
"grad_norm": 0.41534167528152466, |
|
"learning_rate": 3.9342015078821106e-07, |
|
"loss": 2.023, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.7584694126249086, |
|
"grad_norm": 0.4158441424369812, |
|
"learning_rate": 3.930774503084304e-07, |
|
"loss": 2.1015, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.76041920545942, |
|
"grad_norm": 0.39154303073883057, |
|
"learning_rate": 3.9273474982864976e-07, |
|
"loss": 2.0166, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7623689982939312, |
|
"grad_norm": 0.3865329325199127, |
|
"learning_rate": 3.9239204934886905e-07, |
|
"loss": 2.0209, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7643187911284426, |
|
"grad_norm": 0.4046148955821991, |
|
"learning_rate": 3.920493488690884e-07, |
|
"loss": 2.0501, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.7662685839629539, |
|
"grad_norm": 0.4096246659755707, |
|
"learning_rate": 3.9170664838930775e-07, |
|
"loss": 2.0377, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.7682183767974653, |
|
"grad_norm": 0.40363749861717224, |
|
"learning_rate": 3.9136394790952704e-07, |
|
"loss": 2.0315, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.7701681696319767, |
|
"grad_norm": 0.4038202166557312, |
|
"learning_rate": 3.910212474297464e-07, |
|
"loss": 1.9516, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7721179624664879, |
|
"grad_norm": 0.3979615271091461, |
|
"learning_rate": 3.9067854694996574e-07, |
|
"loss": 2.02, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.7740677553009992, |
|
"grad_norm": 0.4166601896286011, |
|
"learning_rate": 3.9033584647018503e-07, |
|
"loss": 2.0672, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.7760175481355106, |
|
"grad_norm": 0.4038446545600891, |
|
"learning_rate": 3.899931459904044e-07, |
|
"loss": 2.0183, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.777967340970022, |
|
"grad_norm": 0.4230453670024872, |
|
"learning_rate": 3.896504455106237e-07, |
|
"loss": 2.0234, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.7799171338045333, |
|
"grad_norm": 0.4244215190410614, |
|
"learning_rate": 3.89307745030843e-07, |
|
"loss": 2.0863, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7818669266390446, |
|
"grad_norm": 0.42174607515335083, |
|
"learning_rate": 3.889650445510623e-07, |
|
"loss": 2.0775, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.7838167194735559, |
|
"grad_norm": 0.4019846022129059, |
|
"learning_rate": 3.886223440712817e-07, |
|
"loss": 2.0445, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.7857665123080673, |
|
"grad_norm": 0.4168083965778351, |
|
"learning_rate": 3.88279643591501e-07, |
|
"loss": 2.0457, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.7877163051425786, |
|
"grad_norm": 0.4132064878940582, |
|
"learning_rate": 3.879369431117203e-07, |
|
"loss": 2.0637, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.78966609797709, |
|
"grad_norm": 0.4239768981933594, |
|
"learning_rate": 3.875942426319397e-07, |
|
"loss": 2.0512, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7916158908116012, |
|
"grad_norm": 0.4192203879356384, |
|
"learning_rate": 3.87251542152159e-07, |
|
"loss": 2.0766, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.7935656836461126, |
|
"grad_norm": 0.4393591582775116, |
|
"learning_rate": 3.869088416723783e-07, |
|
"loss": 2.0497, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.7955154764806239, |
|
"grad_norm": 0.417614221572876, |
|
"learning_rate": 3.865661411925977e-07, |
|
"loss": 2.0518, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.7974652693151353, |
|
"grad_norm": 0.4034237563610077, |
|
"learning_rate": 3.86223440712817e-07, |
|
"loss": 2.0604, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.7994150621496466, |
|
"grad_norm": 0.4287107586860657, |
|
"learning_rate": 3.858807402330363e-07, |
|
"loss": 2.0386, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8013648549841579, |
|
"grad_norm": 0.4140661656856537, |
|
"learning_rate": 3.855380397532557e-07, |
|
"loss": 2.108, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8033146478186692, |
|
"grad_norm": 0.4189471900463104, |
|
"learning_rate": 3.85195339273475e-07, |
|
"loss": 2.0894, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8052644406531806, |
|
"grad_norm": 0.4111238121986389, |
|
"learning_rate": 3.8485263879369427e-07, |
|
"loss": 2.051, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.807214233487692, |
|
"grad_norm": 0.4296090006828308, |
|
"learning_rate": 3.845099383139136e-07, |
|
"loss": 2.0484, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8091640263222033, |
|
"grad_norm": 0.4000217020511627, |
|
"learning_rate": 3.8416723783413297e-07, |
|
"loss": 2.0449, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8111138191567145, |
|
"grad_norm": 0.44013938307762146, |
|
"learning_rate": 3.8382453735435226e-07, |
|
"loss": 2.1467, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8130636119912259, |
|
"grad_norm": 0.4252108633518219, |
|
"learning_rate": 3.834818368745716e-07, |
|
"loss": 2.0725, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8150134048257373, |
|
"grad_norm": 0.41153863072395325, |
|
"learning_rate": 3.8313913639479096e-07, |
|
"loss": 2.0829, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8169631976602486, |
|
"grad_norm": 0.417043536901474, |
|
"learning_rate": 3.8279643591501025e-07, |
|
"loss": 1.9899, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.81891299049476, |
|
"grad_norm": 0.41520485281944275, |
|
"learning_rate": 3.824537354352296e-07, |
|
"loss": 1.9941, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8208627833292713, |
|
"grad_norm": 0.4316999912261963, |
|
"learning_rate": 3.8211103495544895e-07, |
|
"loss": 2.051, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8228125761637826, |
|
"grad_norm": 0.4300172030925751, |
|
"learning_rate": 3.8176833447566824e-07, |
|
"loss": 2.025, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8247623689982939, |
|
"grad_norm": 0.4366534650325775, |
|
"learning_rate": 3.814256339958876e-07, |
|
"loss": 2.1326, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8267121618328053, |
|
"grad_norm": 0.412256121635437, |
|
"learning_rate": 3.8108293351610694e-07, |
|
"loss": 1.9799, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8286619546673166, |
|
"grad_norm": 0.4404711425304413, |
|
"learning_rate": 3.8074023303632623e-07, |
|
"loss": 2.0618, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.830611747501828, |
|
"grad_norm": 0.41743820905685425, |
|
"learning_rate": 3.803975325565456e-07, |
|
"loss": 2.0293, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.8325615403363392, |
|
"grad_norm": 0.40452542901039124, |
|
"learning_rate": 3.8005483207676487e-07, |
|
"loss": 2.0561, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.8345113331708506, |
|
"grad_norm": 0.41732680797576904, |
|
"learning_rate": 3.797121315969842e-07, |
|
"loss": 1.9826, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8364611260053619, |
|
"grad_norm": 0.43309998512268066, |
|
"learning_rate": 3.7936943111720357e-07, |
|
"loss": 2.0313, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.8384109188398733, |
|
"grad_norm": 0.43594348430633545, |
|
"learning_rate": 3.7902673063742286e-07, |
|
"loss": 2.0437, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8403607116743846, |
|
"grad_norm": 0.43290477991104126, |
|
"learning_rate": 3.786840301576422e-07, |
|
"loss": 2.1213, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8423105045088959, |
|
"grad_norm": 0.4143589735031128, |
|
"learning_rate": 3.7834132967786156e-07, |
|
"loss": 2.0327, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.8442602973434072, |
|
"grad_norm": 0.4311947226524353, |
|
"learning_rate": 3.7799862919808085e-07, |
|
"loss": 2.0604, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8462100901779186, |
|
"grad_norm": 0.4119859039783478, |
|
"learning_rate": 3.776559287183002e-07, |
|
"loss": 2.0091, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.84815988301243, |
|
"grad_norm": 0.4251650869846344, |
|
"learning_rate": 3.7731322823851955e-07, |
|
"loss": 2.05, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8501096758469413, |
|
"grad_norm": 0.4295788109302521, |
|
"learning_rate": 3.7697052775873884e-07, |
|
"loss": 2.0231, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8520594686814525, |
|
"grad_norm": 0.4099411964416504, |
|
"learning_rate": 3.766278272789582e-07, |
|
"loss": 2.1037, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8540092615159639, |
|
"grad_norm": 0.41294169425964355, |
|
"learning_rate": 3.7628512679917754e-07, |
|
"loss": 2.0535, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8559590543504753, |
|
"grad_norm": 0.4004737138748169, |
|
"learning_rate": 3.7594242631939683e-07, |
|
"loss": 2.0395, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8579088471849866, |
|
"grad_norm": 0.40913403034210205, |
|
"learning_rate": 3.755997258396161e-07, |
|
"loss": 1.9947, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.859858640019498, |
|
"grad_norm": 0.41119128465652466, |
|
"learning_rate": 3.752570253598355e-07, |
|
"loss": 1.9859, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.8618084328540092, |
|
"grad_norm": 0.44417282938957214, |
|
"learning_rate": 3.749143248800548e-07, |
|
"loss": 2.0712, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.8637582256885206, |
|
"grad_norm": 0.41587620973587036, |
|
"learning_rate": 3.745716244002741e-07, |
|
"loss": 1.9921, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.8657080185230319, |
|
"grad_norm": 0.4235389530658722, |
|
"learning_rate": 3.742289239204935e-07, |
|
"loss": 1.9941, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.8676578113575433, |
|
"grad_norm": 0.4219055771827698, |
|
"learning_rate": 3.738862234407128e-07, |
|
"loss": 2.0621, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8696076041920546, |
|
"grad_norm": 0.42184367775917053, |
|
"learning_rate": 3.735435229609321e-07, |
|
"loss": 2.0307, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.8715573970265659, |
|
"grad_norm": 0.39649975299835205, |
|
"learning_rate": 3.732008224811515e-07, |
|
"loss": 2.0264, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.8735071898610772, |
|
"grad_norm": 0.4187317490577698, |
|
"learning_rate": 3.728581220013708e-07, |
|
"loss": 1.9778, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.8754569826955886, |
|
"grad_norm": 0.41368138790130615, |
|
"learning_rate": 3.725154215215901e-07, |
|
"loss": 1.9953, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.8774067755300999, |
|
"grad_norm": 0.4397999942302704, |
|
"learning_rate": 3.721727210418095e-07, |
|
"loss": 2.0835, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8793565683646113, |
|
"grad_norm": 0.41927337646484375, |
|
"learning_rate": 3.718300205620288e-07, |
|
"loss": 2.0307, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.8813063611991226, |
|
"grad_norm": 0.43216344714164734, |
|
"learning_rate": 3.714873200822481e-07, |
|
"loss": 2.0669, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.8832561540336339, |
|
"grad_norm": 0.4566250741481781, |
|
"learning_rate": 3.711446196024674e-07, |
|
"loss": 2.0423, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.8852059468681452, |
|
"grad_norm": 0.4399709701538086, |
|
"learning_rate": 3.708019191226868e-07, |
|
"loss": 2.0859, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.8871557397026566, |
|
"grad_norm": 0.44788333773612976, |
|
"learning_rate": 3.7045921864290607e-07, |
|
"loss": 2.0349, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.889105532537168, |
|
"grad_norm": 0.4182490110397339, |
|
"learning_rate": 3.7011651816312537e-07, |
|
"loss": 1.9921, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.8910553253716793, |
|
"grad_norm": 0.4325038194656372, |
|
"learning_rate": 3.6977381768334477e-07, |
|
"loss": 2.0419, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.8930051182061906, |
|
"grad_norm": 0.48611199855804443, |
|
"learning_rate": 3.6943111720356406e-07, |
|
"loss": 2.1572, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.8949549110407019, |
|
"grad_norm": 0.4303911030292511, |
|
"learning_rate": 3.6908841672378336e-07, |
|
"loss": 2.0137, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.8969047038752133, |
|
"grad_norm": 0.4397573173046112, |
|
"learning_rate": 3.6874571624400276e-07, |
|
"loss": 2.0199, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8988544967097246, |
|
"grad_norm": 0.4570363163948059, |
|
"learning_rate": 3.6840301576422205e-07, |
|
"loss": 2.0648, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.900804289544236, |
|
"grad_norm": 0.43259698152542114, |
|
"learning_rate": 3.6806031528444135e-07, |
|
"loss": 2.0121, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9027540823787472, |
|
"grad_norm": 0.44078147411346436, |
|
"learning_rate": 3.6771761480466075e-07, |
|
"loss": 2.0422, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9047038752132586, |
|
"grad_norm": 0.4169975519180298, |
|
"learning_rate": 3.6737491432488004e-07, |
|
"loss": 2.0453, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9066536680477699, |
|
"grad_norm": 0.44096165895462036, |
|
"learning_rate": 3.6703221384509934e-07, |
|
"loss": 2.0722, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9086034608822813, |
|
"grad_norm": 0.4220427870750427, |
|
"learning_rate": 3.666895133653187e-07, |
|
"loss": 2.052, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9105532537167926, |
|
"grad_norm": 0.41613534092903137, |
|
"learning_rate": 3.6634681288553803e-07, |
|
"loss": 2.0031, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9125030465513039, |
|
"grad_norm": 0.4290630519390106, |
|
"learning_rate": 3.660041124057573e-07, |
|
"loss": 2.108, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.9144528393858152, |
|
"grad_norm": 0.41508668661117554, |
|
"learning_rate": 3.6566141192597667e-07, |
|
"loss": 2.0369, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9164026322203266, |
|
"grad_norm": 0.4051671326160431, |
|
"learning_rate": 3.65318711446196e-07, |
|
"loss": 2.0593, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9183524250548379, |
|
"grad_norm": 0.427229642868042, |
|
"learning_rate": 3.649760109664153e-07, |
|
"loss": 2.0303, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.9203022178893493, |
|
"grad_norm": 0.408236026763916, |
|
"learning_rate": 3.6463331048663466e-07, |
|
"loss": 2.0537, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.9222520107238605, |
|
"grad_norm": 0.4055333435535431, |
|
"learning_rate": 3.64290610006854e-07, |
|
"loss": 1.9684, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9242018035583719, |
|
"grad_norm": 0.4198017418384552, |
|
"learning_rate": 3.639479095270733e-07, |
|
"loss": 2.0429, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9261515963928832, |
|
"grad_norm": 0.4309008717536926, |
|
"learning_rate": 3.6360520904729265e-07, |
|
"loss": 2.0844, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9281013892273946, |
|
"grad_norm": 0.4177336096763611, |
|
"learning_rate": 3.63262508567512e-07, |
|
"loss": 2.0082, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.930051182061906, |
|
"grad_norm": 0.42606329917907715, |
|
"learning_rate": 3.629198080877313e-07, |
|
"loss": 2.0371, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9320009748964172, |
|
"grad_norm": 0.4223528504371643, |
|
"learning_rate": 3.6257710760795064e-07, |
|
"loss": 2.0128, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9339507677309286, |
|
"grad_norm": 0.43999001383781433, |
|
"learning_rate": 3.6223440712816994e-07, |
|
"loss": 1.9984, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.9359005605654399, |
|
"grad_norm": 0.44352471828460693, |
|
"learning_rate": 3.618917066483893e-07, |
|
"loss": 2.0501, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9378503533999513, |
|
"grad_norm": 0.4229583740234375, |
|
"learning_rate": 3.6154900616860863e-07, |
|
"loss": 2.0403, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9398001462344626, |
|
"grad_norm": 0.4202549457550049, |
|
"learning_rate": 3.612063056888279e-07, |
|
"loss": 1.9893, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.941749939068974, |
|
"grad_norm": 0.4364420771598816, |
|
"learning_rate": 3.6086360520904727e-07, |
|
"loss": 1.9953, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9436997319034852, |
|
"grad_norm": 0.4317263662815094, |
|
"learning_rate": 3.605209047292666e-07, |
|
"loss": 2.0787, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.9456495247379966, |
|
"grad_norm": 0.44858187437057495, |
|
"learning_rate": 3.601782042494859e-07, |
|
"loss": 2.1139, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9475993175725079, |
|
"grad_norm": 0.4311455488204956, |
|
"learning_rate": 3.5983550376970526e-07, |
|
"loss": 2.0409, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.9495491104070193, |
|
"grad_norm": 0.42990413308143616, |
|
"learning_rate": 3.594928032899246e-07, |
|
"loss": 2.0478, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.9514989032415306, |
|
"grad_norm": 0.4484078288078308, |
|
"learning_rate": 3.591501028101439e-07, |
|
"loss": 1.9989, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.9534486960760419, |
|
"grad_norm": 0.438047856092453, |
|
"learning_rate": 3.5880740233036325e-07, |
|
"loss": 2.0468, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.9553984889105532, |
|
"grad_norm": 0.4557168483734131, |
|
"learning_rate": 3.584647018505826e-07, |
|
"loss": 2.1145, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9573482817450646, |
|
"grad_norm": 0.41166436672210693, |
|
"learning_rate": 3.581220013708019e-07, |
|
"loss": 2.0639, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.9592980745795759, |
|
"grad_norm": 0.4612530767917633, |
|
"learning_rate": 3.577793008910212e-07, |
|
"loss": 2.0139, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.9612478674140873, |
|
"grad_norm": 0.4352019429206848, |
|
"learning_rate": 3.574366004112406e-07, |
|
"loss": 2.0984, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.9631976602485985, |
|
"grad_norm": 0.4246942400932312, |
|
"learning_rate": 3.570938999314599e-07, |
|
"loss": 2.054, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.9651474530831099, |
|
"grad_norm": 0.4309667646884918, |
|
"learning_rate": 3.567511994516792e-07, |
|
"loss": 1.9942, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9670972459176213, |
|
"grad_norm": 0.4459112584590912, |
|
"learning_rate": 3.564084989718986e-07, |
|
"loss": 2.0221, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.9690470387521326, |
|
"grad_norm": 0.44149142503738403, |
|
"learning_rate": 3.5606579849211787e-07, |
|
"loss": 2.0181, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.970996831586644, |
|
"grad_norm": 0.4406503736972809, |
|
"learning_rate": 3.5572309801233717e-07, |
|
"loss": 2.0666, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.9729466244211552, |
|
"grad_norm": 0.4117674231529236, |
|
"learning_rate": 3.5538039753255657e-07, |
|
"loss": 1.982, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.9748964172556666, |
|
"grad_norm": 0.43600788712501526, |
|
"learning_rate": 3.5503769705277586e-07, |
|
"loss": 1.9772, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9768462100901779, |
|
"grad_norm": 0.42391106486320496, |
|
"learning_rate": 3.5469499657299516e-07, |
|
"loss": 2.0304, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.9787960029246893, |
|
"grad_norm": 0.44462934136390686, |
|
"learning_rate": 3.5435229609321456e-07, |
|
"loss": 2.0374, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.9807457957592006, |
|
"grad_norm": 0.45238927006721497, |
|
"learning_rate": 3.5400959561343385e-07, |
|
"loss": 2.057, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.9826955885937119, |
|
"grad_norm": 0.43034645915031433, |
|
"learning_rate": 3.5366689513365315e-07, |
|
"loss": 2.0392, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.9846453814282232, |
|
"grad_norm": 0.42902877926826477, |
|
"learning_rate": 3.533241946538725e-07, |
|
"loss": 2.045, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9865951742627346, |
|
"grad_norm": 0.4340520203113556, |
|
"learning_rate": 3.5298149417409184e-07, |
|
"loss": 2.0439, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.9885449670972459, |
|
"grad_norm": 0.45374131202697754, |
|
"learning_rate": 3.5263879369431114e-07, |
|
"loss": 2.0431, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.9904947599317573, |
|
"grad_norm": 0.44037064909935, |
|
"learning_rate": 3.522960932145305e-07, |
|
"loss": 2.0123, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.9924445527662685, |
|
"grad_norm": 0.42846593260765076, |
|
"learning_rate": 3.5195339273474983e-07, |
|
"loss": 1.9661, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.9943943456007799, |
|
"grad_norm": 0.4789009392261505, |
|
"learning_rate": 3.516106922549691e-07, |
|
"loss": 2.0753, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9963441384352912, |
|
"grad_norm": 0.44283124804496765, |
|
"learning_rate": 3.5126799177518847e-07, |
|
"loss": 2.0581, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.9982939312698026, |
|
"grad_norm": 0.43828728795051575, |
|
"learning_rate": 3.509252912954078e-07, |
|
"loss": 2.05, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.9982939312698026, |
|
"eval_loss": 2.046032667160034, |
|
"eval_runtime": 481.0273, |
|
"eval_samples_per_second": 1.293, |
|
"eval_steps_per_second": 0.324, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.0002437241043138, |
|
"grad_norm": 0.41433945298194885, |
|
"learning_rate": 3.505825908156271e-07, |
|
"loss": 2.0366, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.0021935169388252, |
|
"grad_norm": 0.42399510741233826, |
|
"learning_rate": 3.5023989033584646e-07, |
|
"loss": 1.991, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.0041433097733365, |
|
"grad_norm": 0.45652541518211365, |
|
"learning_rate": 3.4989718985606576e-07, |
|
"loss": 2.0066, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.006093102607848, |
|
"grad_norm": 0.43585795164108276, |
|
"learning_rate": 3.495544893762851e-07, |
|
"loss": 2.0025, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.0080428954423593, |
|
"grad_norm": 0.43803489208221436, |
|
"learning_rate": 3.4921178889650445e-07, |
|
"loss": 2.0654, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.0099926882768706, |
|
"grad_norm": 0.43803176283836365, |
|
"learning_rate": 3.4886908841672375e-07, |
|
"loss": 2.0896, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.011942481111382, |
|
"grad_norm": 0.41983944177627563, |
|
"learning_rate": 3.485263879369431e-07, |
|
"loss": 2.0335, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.0138922739458933, |
|
"grad_norm": 0.4354363977909088, |
|
"learning_rate": 3.4818368745716244e-07, |
|
"loss": 2.0699, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.0158420667804047, |
|
"grad_norm": 0.42140671610832214, |
|
"learning_rate": 3.4784098697738174e-07, |
|
"loss": 1.9646, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.0177918596149158, |
|
"grad_norm": 0.4265493154525757, |
|
"learning_rate": 3.474982864976011e-07, |
|
"loss": 2.0735, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.0197416524494272, |
|
"grad_norm": 0.43847259879112244, |
|
"learning_rate": 3.4715558601782043e-07, |
|
"loss": 2.0986, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.0216914452839385, |
|
"grad_norm": 0.4600801467895508, |
|
"learning_rate": 3.468128855380397e-07, |
|
"loss": 2.0643, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.0236412381184499, |
|
"grad_norm": 0.42904648184776306, |
|
"learning_rate": 3.4647018505825907e-07, |
|
"loss": 2.0056, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.0255910309529612, |
|
"grad_norm": 0.46431151032447815, |
|
"learning_rate": 3.461274845784784e-07, |
|
"loss": 2.1056, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.0275408237874726, |
|
"grad_norm": 0.455836683511734, |
|
"learning_rate": 3.457847840986977e-07, |
|
"loss": 2.0187, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.029490616621984, |
|
"grad_norm": 0.4192461669445038, |
|
"learning_rate": 3.45442083618917e-07, |
|
"loss": 2.0832, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0314404094564953, |
|
"grad_norm": 0.4513595402240753, |
|
"learning_rate": 3.450993831391364e-07, |
|
"loss": 2.058, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.0333902022910066, |
|
"grad_norm": 0.4370152950286865, |
|
"learning_rate": 3.447566826593557e-07, |
|
"loss": 2.0537, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.035339995125518, |
|
"grad_norm": 0.4199161231517792, |
|
"learning_rate": 3.44413982179575e-07, |
|
"loss": 1.9518, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.0372897879600294, |
|
"grad_norm": 0.43688762187957764, |
|
"learning_rate": 3.440712816997944e-07, |
|
"loss": 2.0444, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.0392395807945405, |
|
"grad_norm": 0.49809253215789795, |
|
"learning_rate": 3.437285812200137e-07, |
|
"loss": 2.0401, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.0411893736290518, |
|
"grad_norm": 0.4518781900405884, |
|
"learning_rate": 3.43385880740233e-07, |
|
"loss": 2.0605, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.0431391664635632, |
|
"grad_norm": 0.45353132486343384, |
|
"learning_rate": 3.430431802604524e-07, |
|
"loss": 2.0402, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.0450889592980745, |
|
"grad_norm": 0.4396359622478485, |
|
"learning_rate": 3.427004797806717e-07, |
|
"loss": 2.0643, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.047038752132586, |
|
"grad_norm": 0.4434252083301544, |
|
"learning_rate": 3.42357779300891e-07, |
|
"loss": 2.0188, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.0489885449670973, |
|
"grad_norm": 0.4241044819355011, |
|
"learning_rate": 3.420150788211104e-07, |
|
"loss": 1.9556, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.0509383378016086, |
|
"grad_norm": 0.4382232129573822, |
|
"learning_rate": 3.4167237834132967e-07, |
|
"loss": 1.9855, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.05288813063612, |
|
"grad_norm": 0.4357564151287079, |
|
"learning_rate": 3.4132967786154897e-07, |
|
"loss": 2.0524, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.0548379234706313, |
|
"grad_norm": 0.46050140261650085, |
|
"learning_rate": 3.409869773817683e-07, |
|
"loss": 2.0461, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.0567877163051427, |
|
"grad_norm": 0.44581982493400574, |
|
"learning_rate": 3.4064427690198766e-07, |
|
"loss": 1.9955, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.0587375091396538, |
|
"grad_norm": 0.4502599835395813, |
|
"learning_rate": 3.4030157642220696e-07, |
|
"loss": 2.0864, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.0606873019741652, |
|
"grad_norm": 0.44767019152641296, |
|
"learning_rate": 3.399588759424263e-07, |
|
"loss": 2.0447, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.0626370948086765, |
|
"grad_norm": 0.44603490829467773, |
|
"learning_rate": 3.3961617546264565e-07, |
|
"loss": 2.0709, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.0645868876431879, |
|
"grad_norm": 0.4321264922618866, |
|
"learning_rate": 3.3927347498286495e-07, |
|
"loss": 2.0157, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.0665366804776992, |
|
"grad_norm": 0.4479556083679199, |
|
"learning_rate": 3.389307745030843e-07, |
|
"loss": 2.1088, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.0684864733122106, |
|
"grad_norm": 0.4292636513710022, |
|
"learning_rate": 3.3858807402330364e-07, |
|
"loss": 2.0847, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.070436266146722, |
|
"grad_norm": 0.43631821870803833, |
|
"learning_rate": 3.3824537354352294e-07, |
|
"loss": 2.034, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.0723860589812333, |
|
"grad_norm": 0.43201327323913574, |
|
"learning_rate": 3.379026730637423e-07, |
|
"loss": 1.9633, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.0743358518157446, |
|
"grad_norm": 0.4389747679233551, |
|
"learning_rate": 3.3755997258396163e-07, |
|
"loss": 2.0331, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.076285644650256, |
|
"grad_norm": 0.46588924527168274, |
|
"learning_rate": 3.372172721041809e-07, |
|
"loss": 2.0748, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.0782354374847674, |
|
"grad_norm": 0.45190852880477905, |
|
"learning_rate": 3.3687457162440027e-07, |
|
"loss": 1.9639, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.0801852303192785, |
|
"grad_norm": 0.4458979070186615, |
|
"learning_rate": 3.3653187114461957e-07, |
|
"loss": 2.1124, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.0821350231537898, |
|
"grad_norm": 0.40400832891464233, |
|
"learning_rate": 3.361891706648389e-07, |
|
"loss": 1.9776, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.0840848159883012, |
|
"grad_norm": 0.4538462460041046, |
|
"learning_rate": 3.3584647018505826e-07, |
|
"loss": 1.9962, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.0860346088228126, |
|
"grad_norm": 0.44181132316589355, |
|
"learning_rate": 3.3550376970527756e-07, |
|
"loss": 2.0973, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.087984401657324, |
|
"grad_norm": 0.43516308069229126, |
|
"learning_rate": 3.351610692254969e-07, |
|
"loss": 1.9923, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.0899341944918353, |
|
"grad_norm": 0.4485546052455902, |
|
"learning_rate": 3.3481836874571625e-07, |
|
"loss": 2.0242, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.0918839873263466, |
|
"grad_norm": 0.45358070731163025, |
|
"learning_rate": 3.3447566826593555e-07, |
|
"loss": 2.0603, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.093833780160858, |
|
"grad_norm": 0.43879690766334534, |
|
"learning_rate": 3.341329677861549e-07, |
|
"loss": 1.9869, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.0957835729953693, |
|
"grad_norm": 0.4376320242881775, |
|
"learning_rate": 3.3379026730637424e-07, |
|
"loss": 2.0447, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.0977333658298805, |
|
"grad_norm": 0.4591986835002899, |
|
"learning_rate": 3.3344756682659354e-07, |
|
"loss": 2.0188, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.0996831586643918, |
|
"grad_norm": 0.4306589961051941, |
|
"learning_rate": 3.331048663468129e-07, |
|
"loss": 2.0223, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.1016329514989032, |
|
"grad_norm": 0.43692710995674133, |
|
"learning_rate": 3.3276216586703223e-07, |
|
"loss": 2.0507, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.1035827443334145, |
|
"grad_norm": 0.4663935601711273, |
|
"learning_rate": 3.324194653872515e-07, |
|
"loss": 2.0444, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.1055325371679259, |
|
"grad_norm": 0.45090562105178833, |
|
"learning_rate": 3.320767649074708e-07, |
|
"loss": 1.9944, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.1074823300024372, |
|
"grad_norm": 0.4450632631778717, |
|
"learning_rate": 3.317340644276902e-07, |
|
"loss": 2.0264, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.1094321228369486, |
|
"grad_norm": 0.45126745104789734, |
|
"learning_rate": 3.313913639479095e-07, |
|
"loss": 2.081, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.11138191567146, |
|
"grad_norm": 0.44254472851753235, |
|
"learning_rate": 3.310486634681288e-07, |
|
"loss": 2.0223, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.1133317085059713, |
|
"grad_norm": 0.43211621046066284, |
|
"learning_rate": 3.307059629883482e-07, |
|
"loss": 2.0363, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.1152815013404827, |
|
"grad_norm": 0.4256265163421631, |
|
"learning_rate": 3.303632625085675e-07, |
|
"loss": 2.0363, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.117231294174994, |
|
"grad_norm": 0.4462417960166931, |
|
"learning_rate": 3.300205620287868e-07, |
|
"loss": 2.0394, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.1191810870095051, |
|
"grad_norm": 0.4583437442779541, |
|
"learning_rate": 3.296778615490062e-07, |
|
"loss": 2.0878, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.1211308798440165, |
|
"grad_norm": 0.4595088064670563, |
|
"learning_rate": 3.293351610692255e-07, |
|
"loss": 2.111, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.1230806726785278, |
|
"grad_norm": 0.4117080569267273, |
|
"learning_rate": 3.289924605894448e-07, |
|
"loss": 1.999, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.1250304655130392, |
|
"grad_norm": 0.4381641149520874, |
|
"learning_rate": 3.286497601096642e-07, |
|
"loss": 2.044, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.1269802583475506, |
|
"grad_norm": 0.43854039907455444, |
|
"learning_rate": 3.283070596298835e-07, |
|
"loss": 2.0272, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.128930051182062, |
|
"grad_norm": 0.4721965789794922, |
|
"learning_rate": 3.279643591501028e-07, |
|
"loss": 2.0697, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.1308798440165733, |
|
"grad_norm": 0.4373783767223358, |
|
"learning_rate": 3.2762165867032207e-07, |
|
"loss": 2.0102, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.1328296368510846, |
|
"grad_norm": 0.4286502003669739, |
|
"learning_rate": 3.2727895819054147e-07, |
|
"loss": 1.9695, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.134779429685596, |
|
"grad_norm": 0.4373305141925812, |
|
"learning_rate": 3.2693625771076077e-07, |
|
"loss": 1.9823, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.1367292225201073, |
|
"grad_norm": 0.4659106433391571, |
|
"learning_rate": 3.2659355723098006e-07, |
|
"loss": 2.081, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.1386790153546187, |
|
"grad_norm": 0.4315546154975891, |
|
"learning_rate": 3.2625085675119946e-07, |
|
"loss": 2.0336, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.1406288081891298, |
|
"grad_norm": 0.4512901306152344, |
|
"learning_rate": 3.2590815627141876e-07, |
|
"loss": 2.0642, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.1425786010236412, |
|
"grad_norm": 0.4398232400417328, |
|
"learning_rate": 3.2556545579163805e-07, |
|
"loss": 2.0401, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.1445283938581525, |
|
"grad_norm": 0.45262405276298523, |
|
"learning_rate": 3.2522275531185745e-07, |
|
"loss": 2.0999, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.1464781866926639, |
|
"grad_norm": 0.4210640490055084, |
|
"learning_rate": 3.2488005483207675e-07, |
|
"loss": 1.992, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.1484279795271752, |
|
"grad_norm": 0.4530121386051178, |
|
"learning_rate": 3.2453735435229604e-07, |
|
"loss": 2.0119, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.1503777723616866, |
|
"grad_norm": 0.43637722730636597, |
|
"learning_rate": 3.2419465387251544e-07, |
|
"loss": 2.0022, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.152327565196198, |
|
"grad_norm": 0.46872228384017944, |
|
"learning_rate": 3.2385195339273474e-07, |
|
"loss": 2.0545, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.1542773580307093, |
|
"grad_norm": 0.45964333415031433, |
|
"learning_rate": 3.2350925291295403e-07, |
|
"loss": 2.0313, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.1562271508652207, |
|
"grad_norm": 0.4444529414176941, |
|
"learning_rate": 3.231665524331734e-07, |
|
"loss": 2.0463, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.1581769436997318, |
|
"grad_norm": 0.4702310264110565, |
|
"learning_rate": 3.228238519533927e-07, |
|
"loss": 2.0055, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.1601267365342431, |
|
"grad_norm": 0.4435891807079315, |
|
"learning_rate": 3.22481151473612e-07, |
|
"loss": 2.1027, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.1620765293687545, |
|
"grad_norm": 0.4557732343673706, |
|
"learning_rate": 3.2213845099383137e-07, |
|
"loss": 2.0307, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.1640263222032659, |
|
"grad_norm": 0.4286348819732666, |
|
"learning_rate": 3.217957505140507e-07, |
|
"loss": 2.0196, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.1659761150377772, |
|
"grad_norm": 0.4475346803665161, |
|
"learning_rate": 3.2145305003427e-07, |
|
"loss": 2.1014, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.1679259078722886, |
|
"grad_norm": 0.418293297290802, |
|
"learning_rate": 3.2111034955448936e-07, |
|
"loss": 2.078, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.1698757007068, |
|
"grad_norm": 0.42740973830223083, |
|
"learning_rate": 3.207676490747087e-07, |
|
"loss": 1.9695, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1718254935413113, |
|
"grad_norm": 0.41325512528419495, |
|
"learning_rate": 3.20424948594928e-07, |
|
"loss": 2.0297, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.1737752863758226, |
|
"grad_norm": 0.4326270818710327, |
|
"learning_rate": 3.2008224811514735e-07, |
|
"loss": 2.0059, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.175725079210334, |
|
"grad_norm": 0.44774889945983887, |
|
"learning_rate": 3.197395476353667e-07, |
|
"loss": 2.0427, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.1776748720448453, |
|
"grad_norm": 0.446158766746521, |
|
"learning_rate": 3.19396847155586e-07, |
|
"loss": 2.0748, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.1796246648793565, |
|
"grad_norm": 0.4654727280139923, |
|
"learning_rate": 3.1905414667580534e-07, |
|
"loss": 1.9297, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.1815744577138678, |
|
"grad_norm": 0.45213672518730164, |
|
"learning_rate": 3.1871144619602463e-07, |
|
"loss": 2.1087, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.1835242505483792, |
|
"grad_norm": 0.45228397846221924, |
|
"learning_rate": 3.18368745716244e-07, |
|
"loss": 2.0961, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.1854740433828905, |
|
"grad_norm": 0.4470541477203369, |
|
"learning_rate": 3.180260452364633e-07, |
|
"loss": 2.0073, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.1874238362174019, |
|
"grad_norm": 0.4324132204055786, |
|
"learning_rate": 3.176833447566826e-07, |
|
"loss": 2.0334, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.1893736290519132, |
|
"grad_norm": 0.47044241428375244, |
|
"learning_rate": 3.1734064427690197e-07, |
|
"loss": 2.1086, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.1913234218864246, |
|
"grad_norm": 0.43018707633018494, |
|
"learning_rate": 3.169979437971213e-07, |
|
"loss": 2.0289, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.193273214720936, |
|
"grad_norm": 0.44133853912353516, |
|
"learning_rate": 3.166552433173406e-07, |
|
"loss": 2.0333, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.1952230075554473, |
|
"grad_norm": 0.45557719469070435, |
|
"learning_rate": 3.1631254283755996e-07, |
|
"loss": 2.0399, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.1971728003899587, |
|
"grad_norm": 0.4350452125072479, |
|
"learning_rate": 3.159698423577793e-07, |
|
"loss": 2.0224, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.19912259322447, |
|
"grad_norm": 0.4687999188899994, |
|
"learning_rate": 3.156271418779986e-07, |
|
"loss": 2.0228, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.2010723860589811, |
|
"grad_norm": 0.43684178590774536, |
|
"learning_rate": 3.1528444139821795e-07, |
|
"loss": 2.0776, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.2030221788934925, |
|
"grad_norm": 0.45561161637306213, |
|
"learning_rate": 3.149417409184373e-07, |
|
"loss": 2.0022, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.2049719717280039, |
|
"grad_norm": 0.4689810276031494, |
|
"learning_rate": 3.145990404386566e-07, |
|
"loss": 2.0173, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.2069217645625152, |
|
"grad_norm": 0.4293496310710907, |
|
"learning_rate": 3.142563399588759e-07, |
|
"loss": 1.9824, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.2088715573970266, |
|
"grad_norm": 0.4662802219390869, |
|
"learning_rate": 3.139136394790953e-07, |
|
"loss": 2.0784, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.210821350231538, |
|
"grad_norm": 0.45310187339782715, |
|
"learning_rate": 3.135709389993146e-07, |
|
"loss": 1.9844, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.2127711430660493, |
|
"grad_norm": 0.4419795870780945, |
|
"learning_rate": 3.1322823851953387e-07, |
|
"loss": 2.0515, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.2147209359005606, |
|
"grad_norm": 0.4516865611076355, |
|
"learning_rate": 3.1288553803975327e-07, |
|
"loss": 2.0879, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.216670728735072, |
|
"grad_norm": 0.46178489923477173, |
|
"learning_rate": 3.1254283755997257e-07, |
|
"loss": 2.0498, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.2186205215695831, |
|
"grad_norm": 0.4678952097892761, |
|
"learning_rate": 3.1220013708019186e-07, |
|
"loss": 2.0408, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.2205703144040945, |
|
"grad_norm": 0.4456236660480499, |
|
"learning_rate": 3.1185743660041126e-07, |
|
"loss": 1.9694, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.2225201072386058, |
|
"grad_norm": 0.4397581219673157, |
|
"learning_rate": 3.1151473612063056e-07, |
|
"loss": 2.0048, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.2244699000731172, |
|
"grad_norm": 0.4338027238845825, |
|
"learning_rate": 3.1117203564084985e-07, |
|
"loss": 2.0194, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.2264196929076285, |
|
"grad_norm": 0.4413823187351227, |
|
"learning_rate": 3.108293351610692e-07, |
|
"loss": 2.025, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.2283694857421399, |
|
"grad_norm": 0.43685299158096313, |
|
"learning_rate": 3.1048663468128855e-07, |
|
"loss": 2.0051, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.2303192785766512, |
|
"grad_norm": 0.4644426107406616, |
|
"learning_rate": 3.1014393420150784e-07, |
|
"loss": 2.0313, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.2322690714111626, |
|
"grad_norm": 0.4478755593299866, |
|
"learning_rate": 3.098012337217272e-07, |
|
"loss": 1.9669, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.234218864245674, |
|
"grad_norm": 0.43452218174934387, |
|
"learning_rate": 3.0945853324194654e-07, |
|
"loss": 1.9927, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.2361686570801853, |
|
"grad_norm": 0.4408141076564789, |
|
"learning_rate": 3.0911583276216583e-07, |
|
"loss": 2.136, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.2381184499146967, |
|
"grad_norm": 0.42754924297332764, |
|
"learning_rate": 3.087731322823852e-07, |
|
"loss": 2.0247, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.2400682427492078, |
|
"grad_norm": 0.4387798607349396, |
|
"learning_rate": 3.084304318026045e-07, |
|
"loss": 1.9643, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.2420180355837191, |
|
"grad_norm": 0.46978920698165894, |
|
"learning_rate": 3.080877313228238e-07, |
|
"loss": 2.0776, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.2439678284182305, |
|
"grad_norm": 0.41821563243865967, |
|
"learning_rate": 3.0774503084304317e-07, |
|
"loss": 2.0355, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.2459176212527419, |
|
"grad_norm": 0.4664837419986725, |
|
"learning_rate": 3.074023303632625e-07, |
|
"loss": 2.0328, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.2478674140872532, |
|
"grad_norm": 0.4467378258705139, |
|
"learning_rate": 3.070596298834818e-07, |
|
"loss": 2.0058, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.2498172069217646, |
|
"grad_norm": 0.442058265209198, |
|
"learning_rate": 3.0671692940370116e-07, |
|
"loss": 2.0565, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.251766999756276, |
|
"grad_norm": 0.4655166268348694, |
|
"learning_rate": 3.0637422892392045e-07, |
|
"loss": 2.0628, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.2537167925907873, |
|
"grad_norm": 0.4388466477394104, |
|
"learning_rate": 3.060315284441398e-07, |
|
"loss": 2.0716, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.2556665854252986, |
|
"grad_norm": 0.48705416917800903, |
|
"learning_rate": 3.0568882796435915e-07, |
|
"loss": 1.9872, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.2576163782598098, |
|
"grad_norm": 0.4618842899799347, |
|
"learning_rate": 3.0534612748457844e-07, |
|
"loss": 2.0306, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.2595661710943213, |
|
"grad_norm": 0.46533843874931335, |
|
"learning_rate": 3.050034270047978e-07, |
|
"loss": 2.0827, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.2615159639288325, |
|
"grad_norm": 0.4898700714111328, |
|
"learning_rate": 3.0466072652501714e-07, |
|
"loss": 1.9585, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.2634657567633438, |
|
"grad_norm": 0.4561532735824585, |
|
"learning_rate": 3.0431802604523643e-07, |
|
"loss": 2.0689, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.2654155495978552, |
|
"grad_norm": 0.4628736078739166, |
|
"learning_rate": 3.039753255654558e-07, |
|
"loss": 2.0307, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.2673653424323665, |
|
"grad_norm": 0.4475798010826111, |
|
"learning_rate": 3.036326250856751e-07, |
|
"loss": 2.0372, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.269315135266878, |
|
"grad_norm": 0.44448035955429077, |
|
"learning_rate": 3.032899246058944e-07, |
|
"loss": 2.0334, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.2712649281013892, |
|
"grad_norm": 0.4554859697818756, |
|
"learning_rate": 3.0294722412611377e-07, |
|
"loss": 2.0487, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.2732147209359006, |
|
"grad_norm": 0.44150403141975403, |
|
"learning_rate": 3.026045236463331e-07, |
|
"loss": 2.085, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.275164513770412, |
|
"grad_norm": 0.4476960301399231, |
|
"learning_rate": 3.022618231665524e-07, |
|
"loss": 1.9762, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.2771143066049233, |
|
"grad_norm": 0.4773290753364563, |
|
"learning_rate": 3.019191226867717e-07, |
|
"loss": 2.0565, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.2790640994394344, |
|
"grad_norm": 0.43788987398147583, |
|
"learning_rate": 3.015764222069911e-07, |
|
"loss": 2.0629, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.281013892273946, |
|
"grad_norm": 0.4314157962799072, |
|
"learning_rate": 3.012337217272104e-07, |
|
"loss": 2.0554, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.2829636851084572, |
|
"grad_norm": 0.45381680130958557, |
|
"learning_rate": 3.008910212474297e-07, |
|
"loss": 2.0514, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.2849134779429685, |
|
"grad_norm": 0.47213441133499146, |
|
"learning_rate": 3.005483207676491e-07, |
|
"loss": 2.0267, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.2868632707774799, |
|
"grad_norm": 0.4460486173629761, |
|
"learning_rate": 3.002056202878684e-07, |
|
"loss": 2.0717, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.2888130636119912, |
|
"grad_norm": 0.452747642993927, |
|
"learning_rate": 2.998629198080877e-07, |
|
"loss": 2.0634, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.2907628564465026, |
|
"grad_norm": 0.4495120942592621, |
|
"learning_rate": 2.995202193283071e-07, |
|
"loss": 2.042, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.292712649281014, |
|
"grad_norm": 0.433224081993103, |
|
"learning_rate": 2.991775188485264e-07, |
|
"loss": 2.0565, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.2946624421155253, |
|
"grad_norm": 0.4596520960330963, |
|
"learning_rate": 2.9883481836874567e-07, |
|
"loss": 2.0272, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.2966122349500366, |
|
"grad_norm": 0.433887243270874, |
|
"learning_rate": 2.9849211788896507e-07, |
|
"loss": 1.965, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.298562027784548, |
|
"grad_norm": 0.44755810499191284, |
|
"learning_rate": 2.9814941740918437e-07, |
|
"loss": 1.9915, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.3005118206190591, |
|
"grad_norm": 0.48203861713409424, |
|
"learning_rate": 2.9780671692940366e-07, |
|
"loss": 2.0296, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.3024616134535705, |
|
"grad_norm": 0.4314959943294525, |
|
"learning_rate": 2.97464016449623e-07, |
|
"loss": 2.0282, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.3044114062880818, |
|
"grad_norm": 0.4476211369037628, |
|
"learning_rate": 2.9712131596984236e-07, |
|
"loss": 2.0348, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.3063611991225932, |
|
"grad_norm": 0.45356854796409607, |
|
"learning_rate": 2.9677861549006165e-07, |
|
"loss": 2.0369, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.3083109919571045, |
|
"grad_norm": 0.4637032747268677, |
|
"learning_rate": 2.96435915010281e-07, |
|
"loss": 2.1002, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.310260784791616, |
|
"grad_norm": 0.4258365333080292, |
|
"learning_rate": 2.9609321453050035e-07, |
|
"loss": 2.0184, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.3122105776261273, |
|
"grad_norm": 0.4571716785430908, |
|
"learning_rate": 2.9575051405071964e-07, |
|
"loss": 2.0711, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.3141603704606386, |
|
"grad_norm": 0.4479144215583801, |
|
"learning_rate": 2.95407813570939e-07, |
|
"loss": 2.1037, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.31611016329515, |
|
"grad_norm": 0.463773638010025, |
|
"learning_rate": 2.9506511309115834e-07, |
|
"loss": 2.087, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.318059956129661, |
|
"grad_norm": 0.4595959782600403, |
|
"learning_rate": 2.9472241261137763e-07, |
|
"loss": 2.0246, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.3200097489641727, |
|
"grad_norm": 0.41977226734161377, |
|
"learning_rate": 2.94379712131597e-07, |
|
"loss": 2.0132, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.3219595417986838, |
|
"grad_norm": 0.4429217576980591, |
|
"learning_rate": 2.940370116518163e-07, |
|
"loss": 2.0414, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.3239093346331952, |
|
"grad_norm": 0.46036285161972046, |
|
"learning_rate": 2.936943111720356e-07, |
|
"loss": 2.0474, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.3258591274677065, |
|
"grad_norm": 0.4518478512763977, |
|
"learning_rate": 2.9335161069225497e-07, |
|
"loss": 1.991, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.3278089203022179, |
|
"grad_norm": 0.4507528841495514, |
|
"learning_rate": 2.9300891021247426e-07, |
|
"loss": 2.0038, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.3297587131367292, |
|
"grad_norm": 0.45446595549583435, |
|
"learning_rate": 2.926662097326936e-07, |
|
"loss": 1.9257, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.3317085059712406, |
|
"grad_norm": 0.45073091983795166, |
|
"learning_rate": 2.9232350925291296e-07, |
|
"loss": 2.0667, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.333658298805752, |
|
"grad_norm": 0.43848779797554016, |
|
"learning_rate": 2.9198080877313225e-07, |
|
"loss": 2.0127, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.3356080916402633, |
|
"grad_norm": 0.44587504863739014, |
|
"learning_rate": 2.916381082933516e-07, |
|
"loss": 2.0694, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.3375578844747746, |
|
"grad_norm": 0.46157652139663696, |
|
"learning_rate": 2.9129540781357095e-07, |
|
"loss": 2.112, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.3395076773092858, |
|
"grad_norm": 0.461897075176239, |
|
"learning_rate": 2.9095270733379024e-07, |
|
"loss": 2.0431, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.3414574701437973, |
|
"grad_norm": 0.42506590485572815, |
|
"learning_rate": 2.906100068540096e-07, |
|
"loss": 2.0612, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.3434072629783085, |
|
"grad_norm": 0.43368127942085266, |
|
"learning_rate": 2.9026730637422894e-07, |
|
"loss": 2.0253, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.3453570558128198, |
|
"grad_norm": 0.4484082758426666, |
|
"learning_rate": 2.8992460589444823e-07, |
|
"loss": 1.9962, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.3473068486473312, |
|
"grad_norm": 0.44570791721343994, |
|
"learning_rate": 2.895819054146676e-07, |
|
"loss": 2.018, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.3492566414818425, |
|
"grad_norm": 0.4472144842147827, |
|
"learning_rate": 2.892392049348869e-07, |
|
"loss": 2.0254, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.351206434316354, |
|
"grad_norm": 0.4680030047893524, |
|
"learning_rate": 2.888965044551062e-07, |
|
"loss": 2.1265, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.3531562271508653, |
|
"grad_norm": 0.44323253631591797, |
|
"learning_rate": 2.885538039753255e-07, |
|
"loss": 2.0222, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.3551060199853766, |
|
"grad_norm": 0.4732964038848877, |
|
"learning_rate": 2.882111034955449e-07, |
|
"loss": 2.0219, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.357055812819888, |
|
"grad_norm": 0.4392209053039551, |
|
"learning_rate": 2.878684030157642e-07, |
|
"loss": 1.9841, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.3590056056543993, |
|
"grad_norm": 0.46177539229393005, |
|
"learning_rate": 2.875257025359835e-07, |
|
"loss": 2.0461, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.3609553984889105, |
|
"grad_norm": 0.4625999927520752, |
|
"learning_rate": 2.871830020562029e-07, |
|
"loss": 2.0137, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.3629051913234218, |
|
"grad_norm": 0.43552806973457336, |
|
"learning_rate": 2.868403015764222e-07, |
|
"loss": 2.0408, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.3648549841579332, |
|
"grad_norm": 0.47674480080604553, |
|
"learning_rate": 2.864976010966415e-07, |
|
"loss": 2.0021, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3668047769924445, |
|
"grad_norm": 0.46479421854019165, |
|
"learning_rate": 2.861549006168609e-07, |
|
"loss": 1.9898, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.3687545698269559, |
|
"grad_norm": 0.4399622976779938, |
|
"learning_rate": 2.858122001370802e-07, |
|
"loss": 1.9638, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.3707043626614672, |
|
"grad_norm": 0.442557692527771, |
|
"learning_rate": 2.854694996572995e-07, |
|
"loss": 2.0099, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.3726541554959786, |
|
"grad_norm": 0.4601743817329407, |
|
"learning_rate": 2.851267991775189e-07, |
|
"loss": 2.057, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.37460394833049, |
|
"grad_norm": 0.4959220290184021, |
|
"learning_rate": 2.847840986977382e-07, |
|
"loss": 2.092, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.3765537411650013, |
|
"grad_norm": 0.40172404050827026, |
|
"learning_rate": 2.8444139821795747e-07, |
|
"loss": 2.0074, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.3785035339995124, |
|
"grad_norm": 0.4572814404964447, |
|
"learning_rate": 2.840986977381768e-07, |
|
"loss": 1.9777, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.380453326834024, |
|
"grad_norm": 0.4464624524116516, |
|
"learning_rate": 2.8375599725839617e-07, |
|
"loss": 2.0183, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.3824031196685351, |
|
"grad_norm": 0.4498922526836395, |
|
"learning_rate": 2.8341329677861546e-07, |
|
"loss": 2.0975, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.3843529125030465, |
|
"grad_norm": 0.4430985748767853, |
|
"learning_rate": 2.830705962988348e-07, |
|
"loss": 2.027, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.3863027053375578, |
|
"grad_norm": 0.4422641694545746, |
|
"learning_rate": 2.8272789581905416e-07, |
|
"loss": 2.0625, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.3882524981720692, |
|
"grad_norm": 0.46121206879615784, |
|
"learning_rate": 2.8238519533927345e-07, |
|
"loss": 2.0135, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.3902022910065805, |
|
"grad_norm": 0.4685353934764862, |
|
"learning_rate": 2.820424948594928e-07, |
|
"loss": 2.071, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.392152083841092, |
|
"grad_norm": 0.43733134865760803, |
|
"learning_rate": 2.8169979437971215e-07, |
|
"loss": 2.0531, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.3941018766756033, |
|
"grad_norm": 0.4479463994503021, |
|
"learning_rate": 2.8135709389993144e-07, |
|
"loss": 2.0192, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.3960516695101146, |
|
"grad_norm": 0.4477840065956116, |
|
"learning_rate": 2.810143934201508e-07, |
|
"loss": 2.0408, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.398001462344626, |
|
"grad_norm": 0.44232964515686035, |
|
"learning_rate": 2.8067169294037014e-07, |
|
"loss": 2.0992, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.399951255179137, |
|
"grad_norm": 0.4573095142841339, |
|
"learning_rate": 2.8032899246058943e-07, |
|
"loss": 1.9958, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.4019010480136487, |
|
"grad_norm": 0.4734794497489929, |
|
"learning_rate": 2.799862919808088e-07, |
|
"loss": 2.0268, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.4038508408481598, |
|
"grad_norm": 0.4753987193107605, |
|
"learning_rate": 2.7964359150102807e-07, |
|
"loss": 2.0436, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.4058006336826712, |
|
"grad_norm": 0.4515923261642456, |
|
"learning_rate": 2.793008910212474e-07, |
|
"loss": 2.0018, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.4077504265171825, |
|
"grad_norm": 0.45925289392471313, |
|
"learning_rate": 2.7895819054146677e-07, |
|
"loss": 2.0454, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.4097002193516939, |
|
"grad_norm": 0.4684261083602905, |
|
"learning_rate": 2.7861549006168606e-07, |
|
"loss": 2.0355, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.4116500121862052, |
|
"grad_norm": 0.4723130464553833, |
|
"learning_rate": 2.782727895819054e-07, |
|
"loss": 2.0189, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.4135998050207166, |
|
"grad_norm": 0.43946054577827454, |
|
"learning_rate": 2.7793008910212476e-07, |
|
"loss": 2.0165, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.415549597855228, |
|
"grad_norm": 0.45172879099845886, |
|
"learning_rate": 2.7758738862234405e-07, |
|
"loss": 1.9966, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.4174993906897393, |
|
"grad_norm": 0.4361145496368408, |
|
"learning_rate": 2.772446881425634e-07, |
|
"loss": 1.982, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.4194491835242506, |
|
"grad_norm": 0.4422454237937927, |
|
"learning_rate": 2.7690198766278275e-07, |
|
"loss": 2.0032, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.4213989763587618, |
|
"grad_norm": 0.4438495934009552, |
|
"learning_rate": 2.7655928718300204e-07, |
|
"loss": 2.0198, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.4233487691932731, |
|
"grad_norm": 0.4422749876976013, |
|
"learning_rate": 2.762165867032214e-07, |
|
"loss": 1.992, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.4252985620277845, |
|
"grad_norm": 0.4652174115180969, |
|
"learning_rate": 2.7587388622344074e-07, |
|
"loss": 2.0345, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.4272483548622958, |
|
"grad_norm": 0.46277597546577454, |
|
"learning_rate": 2.7553118574366003e-07, |
|
"loss": 2.0406, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.4291981476968072, |
|
"grad_norm": 0.45579442381858826, |
|
"learning_rate": 2.751884852638793e-07, |
|
"loss": 2.0671, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.4311479405313186, |
|
"grad_norm": 0.43527230620384216, |
|
"learning_rate": 2.748457847840987e-07, |
|
"loss": 2.0433, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.43309773336583, |
|
"grad_norm": 0.4699551463127136, |
|
"learning_rate": 2.74503084304318e-07, |
|
"loss": 2.0366, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.4350475262003413, |
|
"grad_norm": 0.4446089565753937, |
|
"learning_rate": 2.741603838245373e-07, |
|
"loss": 1.9986, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.4369973190348526, |
|
"grad_norm": 0.4645906686782837, |
|
"learning_rate": 2.738176833447567e-07, |
|
"loss": 2.1331, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.4389471118693637, |
|
"grad_norm": 0.46871501207351685, |
|
"learning_rate": 2.73474982864976e-07, |
|
"loss": 2.0402, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.4408969047038753, |
|
"grad_norm": 0.4507101774215698, |
|
"learning_rate": 2.731322823851953e-07, |
|
"loss": 2.0027, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.4428466975383865, |
|
"grad_norm": 0.4642309546470642, |
|
"learning_rate": 2.727895819054147e-07, |
|
"loss": 2.0613, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.4447964903728978, |
|
"grad_norm": 0.4762292206287384, |
|
"learning_rate": 2.72446881425634e-07, |
|
"loss": 2.0315, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.4467462832074092, |
|
"grad_norm": 0.4549463391304016, |
|
"learning_rate": 2.721041809458533e-07, |
|
"loss": 2.0492, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.4486960760419205, |
|
"grad_norm": 0.4566596448421478, |
|
"learning_rate": 2.717614804660727e-07, |
|
"loss": 1.9571, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.4506458688764319, |
|
"grad_norm": 0.4666212797164917, |
|
"learning_rate": 2.71418779986292e-07, |
|
"loss": 1.9897, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.4525956617109432, |
|
"grad_norm": 0.45651644468307495, |
|
"learning_rate": 2.710760795065113e-07, |
|
"loss": 2.0471, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 0.43935099244117737, |
|
"learning_rate": 2.707333790267306e-07, |
|
"loss": 1.9525, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.456495247379966, |
|
"grad_norm": 0.4813799560070038, |
|
"learning_rate": 2.7039067854695e-07, |
|
"loss": 2.0396, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.4584450402144773, |
|
"grad_norm": 0.4743799567222595, |
|
"learning_rate": 2.7004797806716927e-07, |
|
"loss": 2.0824, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.4603948330489884, |
|
"grad_norm": 0.4927983283996582, |
|
"learning_rate": 2.6970527758738857e-07, |
|
"loss": 2.0257, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.4623446258835, |
|
"grad_norm": 0.4711035192012787, |
|
"learning_rate": 2.6936257710760797e-07, |
|
"loss": 2.0487, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.4642944187180111, |
|
"grad_norm": 0.4515864849090576, |
|
"learning_rate": 2.6901987662782726e-07, |
|
"loss": 2.0244, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.4662442115525225, |
|
"grad_norm": 0.46076542139053345, |
|
"learning_rate": 2.6867717614804656e-07, |
|
"loss": 2.07, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.4681940043870338, |
|
"grad_norm": 0.44762691855430603, |
|
"learning_rate": 2.6833447566826596e-07, |
|
"loss": 2.0297, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.4701437972215452, |
|
"grad_norm": 0.4801499843597412, |
|
"learning_rate": 2.6799177518848525e-07, |
|
"loss": 2.0683, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.4720935900560566, |
|
"grad_norm": 0.45053598284721375, |
|
"learning_rate": 2.6764907470870455e-07, |
|
"loss": 1.9783, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.474043382890568, |
|
"grad_norm": 0.45730066299438477, |
|
"learning_rate": 2.673063742289239e-07, |
|
"loss": 2.0548, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.4759931757250793, |
|
"grad_norm": 0.4543995261192322, |
|
"learning_rate": 2.6696367374914324e-07, |
|
"loss": 2.0306, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.4779429685595906, |
|
"grad_norm": 0.4372531473636627, |
|
"learning_rate": 2.6662097326936254e-07, |
|
"loss": 2.0164, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.479892761394102, |
|
"grad_norm": 0.44617414474487305, |
|
"learning_rate": 2.662782727895819e-07, |
|
"loss": 1.9891, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.481842554228613, |
|
"grad_norm": 0.4605617821216583, |
|
"learning_rate": 2.6593557230980123e-07, |
|
"loss": 2.01, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.4837923470631245, |
|
"grad_norm": 0.4638999402523041, |
|
"learning_rate": 2.655928718300205e-07, |
|
"loss": 2.0685, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.4857421398976358, |
|
"grad_norm": 0.4548538327217102, |
|
"learning_rate": 2.6525017135023987e-07, |
|
"loss": 2.0665, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.4876919327321472, |
|
"grad_norm": 0.44948044419288635, |
|
"learning_rate": 2.649074708704592e-07, |
|
"loss": 1.9921, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.4896417255666585, |
|
"grad_norm": 0.4577581286430359, |
|
"learning_rate": 2.645647703906785e-07, |
|
"loss": 2.0392, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.4915915184011699, |
|
"grad_norm": 0.4821256101131439, |
|
"learning_rate": 2.6422206991089786e-07, |
|
"loss": 2.1304, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.4935413112356812, |
|
"grad_norm": 0.48839786648750305, |
|
"learning_rate": 2.638793694311172e-07, |
|
"loss": 2.0773, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.4954911040701926, |
|
"grad_norm": 0.43702590465545654, |
|
"learning_rate": 2.635366689513365e-07, |
|
"loss": 2.02, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.497440896904704, |
|
"grad_norm": 0.45477136969566345, |
|
"learning_rate": 2.6319396847155585e-07, |
|
"loss": 1.9962, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.499390689739215, |
|
"grad_norm": 0.47229456901550293, |
|
"learning_rate": 2.6285126799177515e-07, |
|
"loss": 2.0281, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.5013404825737267, |
|
"grad_norm": 0.4817400276660919, |
|
"learning_rate": 2.625085675119945e-07, |
|
"loss": 2.1009, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.5032902754082378, |
|
"grad_norm": 0.4645569324493408, |
|
"learning_rate": 2.6216586703221384e-07, |
|
"loss": 2.083, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.5052400682427494, |
|
"grad_norm": 0.44810667634010315, |
|
"learning_rate": 2.6182316655243314e-07, |
|
"loss": 2.09, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.5071898610772605, |
|
"grad_norm": 0.44432902336120605, |
|
"learning_rate": 2.614804660726525e-07, |
|
"loss": 2.0126, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.5091396539117719, |
|
"grad_norm": 0.4630286991596222, |
|
"learning_rate": 2.6113776559287183e-07, |
|
"loss": 2.0136, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.5110894467462832, |
|
"grad_norm": 0.44443148374557495, |
|
"learning_rate": 2.607950651130911e-07, |
|
"loss": 1.9979, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.5130392395807946, |
|
"grad_norm": 0.44903403520584106, |
|
"learning_rate": 2.6045236463331047e-07, |
|
"loss": 1.9788, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.514989032415306, |
|
"grad_norm": 0.45394134521484375, |
|
"learning_rate": 2.601096641535298e-07, |
|
"loss": 1.9529, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.516938825249817, |
|
"grad_norm": 0.46713778376579285, |
|
"learning_rate": 2.597669636737491e-07, |
|
"loss": 2.0212, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.5188886180843286, |
|
"grad_norm": 0.45262840390205383, |
|
"learning_rate": 2.5942426319396846e-07, |
|
"loss": 2.0723, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.5208384109188398, |
|
"grad_norm": 0.4648626446723938, |
|
"learning_rate": 2.590815627141878e-07, |
|
"loss": 2.0046, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.5227882037533513, |
|
"grad_norm": 0.4754423201084137, |
|
"learning_rate": 2.587388622344071e-07, |
|
"loss": 2.0434, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.5247379965878625, |
|
"grad_norm": 0.4271760880947113, |
|
"learning_rate": 2.583961617546264e-07, |
|
"loss": 2.0843, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.5266877894223738, |
|
"grad_norm": 0.48139727115631104, |
|
"learning_rate": 2.580534612748458e-07, |
|
"loss": 2.098, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.5286375822568852, |
|
"grad_norm": 0.473366379737854, |
|
"learning_rate": 2.577107607950651e-07, |
|
"loss": 2.0422, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.5305873750913965, |
|
"grad_norm": 0.4580918848514557, |
|
"learning_rate": 2.573680603152844e-07, |
|
"loss": 2.006, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.5325371679259079, |
|
"grad_norm": 0.4635441303253174, |
|
"learning_rate": 2.570253598355038e-07, |
|
"loss": 1.9736, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.5344869607604192, |
|
"grad_norm": 0.4621422290802002, |
|
"learning_rate": 2.566826593557231e-07, |
|
"loss": 2.1078, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.5364367535949306, |
|
"grad_norm": 0.4151935279369354, |
|
"learning_rate": 2.563399588759424e-07, |
|
"loss": 2.0092, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.5383865464294417, |
|
"grad_norm": 0.4793336093425751, |
|
"learning_rate": 2.559972583961618e-07, |
|
"loss": 2.0173, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.5403363392639533, |
|
"grad_norm": 0.4768364429473877, |
|
"learning_rate": 2.5565455791638107e-07, |
|
"loss": 2.0813, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.5422861320984644, |
|
"grad_norm": 0.452411949634552, |
|
"learning_rate": 2.5531185743660037e-07, |
|
"loss": 2.0527, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.544235924932976, |
|
"grad_norm": 0.44334676861763, |
|
"learning_rate": 2.5496915695681977e-07, |
|
"loss": 1.9701, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.5461857177674871, |
|
"grad_norm": 0.4465942978858948, |
|
"learning_rate": 2.5462645647703906e-07, |
|
"loss": 1.9905, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.5481355106019985, |
|
"grad_norm": 0.4681743085384369, |
|
"learning_rate": 2.5428375599725836e-07, |
|
"loss": 2.0654, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.5500853034365099, |
|
"grad_norm": 0.46780961751937866, |
|
"learning_rate": 2.539410555174777e-07, |
|
"loss": 2.0336, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.5520350962710212, |
|
"grad_norm": 0.44133254885673523, |
|
"learning_rate": 2.5359835503769705e-07, |
|
"loss": 1.9668, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.5539848891055326, |
|
"grad_norm": 0.45011645555496216, |
|
"learning_rate": 2.5325565455791635e-07, |
|
"loss": 2.0099, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.555934681940044, |
|
"grad_norm": 0.41162246465682983, |
|
"learning_rate": 2.529129540781357e-07, |
|
"loss": 1.9684, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.5578844747745553, |
|
"grad_norm": 0.438760906457901, |
|
"learning_rate": 2.5257025359835504e-07, |
|
"loss": 1.9934, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.5598342676090664, |
|
"grad_norm": 0.45921608805656433, |
|
"learning_rate": 2.5222755311857434e-07, |
|
"loss": 2.0447, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.561784060443578, |
|
"grad_norm": 0.4474433958530426, |
|
"learning_rate": 2.518848526387937e-07, |
|
"loss": 2.0508, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.5637338532780891, |
|
"grad_norm": 0.42901015281677246, |
|
"learning_rate": 2.5154215215901303e-07, |
|
"loss": 2.0607, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.5656836461126007, |
|
"grad_norm": 0.4604319632053375, |
|
"learning_rate": 2.511994516792323e-07, |
|
"loss": 2.0142, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.5676334389471118, |
|
"grad_norm": 0.4305102527141571, |
|
"learning_rate": 2.5085675119945167e-07, |
|
"loss": 1.9828, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.5695832317816232, |
|
"grad_norm": 0.4656990170478821, |
|
"learning_rate": 2.50514050719671e-07, |
|
"loss": 2.0302, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.5715330246161345, |
|
"grad_norm": 0.4602496325969696, |
|
"learning_rate": 2.501713502398903e-07, |
|
"loss": 2.0412, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.5734828174506459, |
|
"grad_norm": 0.4626891314983368, |
|
"learning_rate": 2.4982864976010966e-07, |
|
"loss": 2.0513, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.5754326102851572, |
|
"grad_norm": 0.4671951234340668, |
|
"learning_rate": 2.4948594928032896e-07, |
|
"loss": 2.003, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.5773824031196684, |
|
"grad_norm": 0.4399751126766205, |
|
"learning_rate": 2.491432488005483e-07, |
|
"loss": 2.0532, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.57933219595418, |
|
"grad_norm": 0.4228038191795349, |
|
"learning_rate": 2.4880054832076765e-07, |
|
"loss": 2.0078, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.581281988788691, |
|
"grad_norm": 0.4445479214191437, |
|
"learning_rate": 2.4845784784098695e-07, |
|
"loss": 2.0142, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.5832317816232027, |
|
"grad_norm": 0.4397488534450531, |
|
"learning_rate": 2.481151473612063e-07, |
|
"loss": 2.0468, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.5851815744577138, |
|
"grad_norm": 0.48187440633773804, |
|
"learning_rate": 2.4777244688142564e-07, |
|
"loss": 2.0444, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.5871313672922251, |
|
"grad_norm": 0.4355807304382324, |
|
"learning_rate": 2.4742974640164494e-07, |
|
"loss": 1.9955, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.5890811601267365, |
|
"grad_norm": 0.4219972491264343, |
|
"learning_rate": 2.470870459218643e-07, |
|
"loss": 1.9971, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.5910309529612479, |
|
"grad_norm": 0.44700267910957336, |
|
"learning_rate": 2.4674434544208363e-07, |
|
"loss": 2.0297, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.5929807457957592, |
|
"grad_norm": 0.45433923602104187, |
|
"learning_rate": 2.464016449623029e-07, |
|
"loss": 2.0064, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.5949305386302706, |
|
"grad_norm": 0.4188825488090515, |
|
"learning_rate": 2.4605894448252227e-07, |
|
"loss": 2.0236, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.596880331464782, |
|
"grad_norm": 0.4635048508644104, |
|
"learning_rate": 2.457162440027416e-07, |
|
"loss": 2.0652, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.598830124299293, |
|
"grad_norm": 0.4555036127567291, |
|
"learning_rate": 2.453735435229609e-07, |
|
"loss": 2.079, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.6007799171338046, |
|
"grad_norm": 0.45152541995048523, |
|
"learning_rate": 2.4503084304318026e-07, |
|
"loss": 1.9724, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.6027297099683158, |
|
"grad_norm": 0.4355667233467102, |
|
"learning_rate": 2.446881425633996e-07, |
|
"loss": 2.0444, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.6046795028028273, |
|
"grad_norm": 0.42853429913520813, |
|
"learning_rate": 2.443454420836189e-07, |
|
"loss": 1.9451, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.6066292956373385, |
|
"grad_norm": 0.4546351134777069, |
|
"learning_rate": 2.4400274160383825e-07, |
|
"loss": 2.015, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.6085790884718498, |
|
"grad_norm": 0.45015424489974976, |
|
"learning_rate": 2.4366004112405755e-07, |
|
"loss": 2.0171, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.6105288813063612, |
|
"grad_norm": 0.446065217256546, |
|
"learning_rate": 2.433173406442769e-07, |
|
"loss": 2.0085, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.6124786741408725, |
|
"grad_norm": 0.46771183609962463, |
|
"learning_rate": 2.4297464016449624e-07, |
|
"loss": 1.9844, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.614428466975384, |
|
"grad_norm": 0.4590853452682495, |
|
"learning_rate": 2.4263193968471554e-07, |
|
"loss": 2.0031, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.6163782598098952, |
|
"grad_norm": 0.4465842545032501, |
|
"learning_rate": 2.422892392049349e-07, |
|
"loss": 2.0344, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.6183280526444066, |
|
"grad_norm": 0.40251830220222473, |
|
"learning_rate": 2.419465387251542e-07, |
|
"loss": 2.0129, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.6202778454789177, |
|
"grad_norm": 0.45284631848335266, |
|
"learning_rate": 2.416038382453735e-07, |
|
"loss": 2.0354, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.6222276383134293, |
|
"grad_norm": 0.4733079969882965, |
|
"learning_rate": 2.4126113776559287e-07, |
|
"loss": 1.993, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.6241774311479404, |
|
"grad_norm": 0.4264031946659088, |
|
"learning_rate": 2.4091843728581217e-07, |
|
"loss": 2.007, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.626127223982452, |
|
"grad_norm": 0.46400555968284607, |
|
"learning_rate": 2.405757368060315e-07, |
|
"loss": 1.9825, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.6280770168169632, |
|
"grad_norm": 0.4408418834209442, |
|
"learning_rate": 2.4023303632625086e-07, |
|
"loss": 2.0199, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.6300268096514745, |
|
"grad_norm": 0.4353219270706177, |
|
"learning_rate": 2.3989033584647016e-07, |
|
"loss": 1.9767, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.6319766024859859, |
|
"grad_norm": 0.47256654500961304, |
|
"learning_rate": 2.395476353666895e-07, |
|
"loss": 2.0708, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.6339263953204972, |
|
"grad_norm": 0.44208547472953796, |
|
"learning_rate": 2.392049348869088e-07, |
|
"loss": 2.0518, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.6358761881550086, |
|
"grad_norm": 0.4937672019004822, |
|
"learning_rate": 2.3886223440712815e-07, |
|
"loss": 2.043, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.6378259809895197, |
|
"grad_norm": 0.46095776557922363, |
|
"learning_rate": 2.385195339273475e-07, |
|
"loss": 2.0421, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.6397757738240313, |
|
"grad_norm": 0.4658643901348114, |
|
"learning_rate": 2.3817683344756682e-07, |
|
"loss": 2.0225, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.6417255666585424, |
|
"grad_norm": 0.4451207220554352, |
|
"learning_rate": 2.3783413296778616e-07, |
|
"loss": 2.0244, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.643675359493054, |
|
"grad_norm": 0.43841567635536194, |
|
"learning_rate": 2.3749143248800546e-07, |
|
"loss": 1.9797, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.6456251523275651, |
|
"grad_norm": 0.45495790243148804, |
|
"learning_rate": 2.371487320082248e-07, |
|
"loss": 2.039, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.6475749451620765, |
|
"grad_norm": 0.4694961607456207, |
|
"learning_rate": 2.3680603152844415e-07, |
|
"loss": 2.0232, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.6495247379965878, |
|
"grad_norm": 0.4593546986579895, |
|
"learning_rate": 2.3646333104866345e-07, |
|
"loss": 2.0495, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.6514745308310992, |
|
"grad_norm": 0.4738862216472626, |
|
"learning_rate": 2.361206305688828e-07, |
|
"loss": 2.0105, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.6534243236656105, |
|
"grad_norm": 0.45088139176368713, |
|
"learning_rate": 2.357779300891021e-07, |
|
"loss": 2.0418, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.655374116500122, |
|
"grad_norm": 0.4501790702342987, |
|
"learning_rate": 2.3543522960932144e-07, |
|
"loss": 2.0531, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.6573239093346332, |
|
"grad_norm": 0.47187909483909607, |
|
"learning_rate": 2.3509252912954078e-07, |
|
"loss": 1.9907, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.6592737021691444, |
|
"grad_norm": 0.46769675612449646, |
|
"learning_rate": 2.3474982864976008e-07, |
|
"loss": 2.0145, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.661223495003656, |
|
"grad_norm": 0.44854676723480225, |
|
"learning_rate": 2.3440712816997943e-07, |
|
"loss": 2.0381, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.663173287838167, |
|
"grad_norm": 0.4576641023159027, |
|
"learning_rate": 2.3406442769019877e-07, |
|
"loss": 1.9722, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.6651230806726787, |
|
"grad_norm": 0.4568294584751129, |
|
"learning_rate": 2.3372172721041807e-07, |
|
"loss": 1.9744, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.6670728735071898, |
|
"grad_norm": 0.4591883718967438, |
|
"learning_rate": 2.3337902673063742e-07, |
|
"loss": 1.9666, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.6690226663417012, |
|
"grad_norm": 0.44672197103500366, |
|
"learning_rate": 2.3303632625085674e-07, |
|
"loss": 1.9944, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.6709724591762125, |
|
"grad_norm": 0.4896506667137146, |
|
"learning_rate": 2.3269362577107606e-07, |
|
"loss": 2.0492, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.6729222520107239, |
|
"grad_norm": 0.4453061521053314, |
|
"learning_rate": 2.323509252912954e-07, |
|
"loss": 1.9757, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.6748720448452352, |
|
"grad_norm": 0.4569021761417389, |
|
"learning_rate": 2.3200822481151473e-07, |
|
"loss": 2.0523, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.6768218376797466, |
|
"grad_norm": 0.4553905427455902, |
|
"learning_rate": 2.3166552433173405e-07, |
|
"loss": 2.0189, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.678771630514258, |
|
"grad_norm": 0.4560829699039459, |
|
"learning_rate": 2.3132282385195337e-07, |
|
"loss": 2.0833, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.680721423348769, |
|
"grad_norm": 0.4487151503562927, |
|
"learning_rate": 2.3098012337217272e-07, |
|
"loss": 1.9806, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.6826712161832806, |
|
"grad_norm": 0.440891832113266, |
|
"learning_rate": 2.3063742289239204e-07, |
|
"loss": 1.9989, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.6846210090177918, |
|
"grad_norm": 0.469881534576416, |
|
"learning_rate": 2.3029472241261136e-07, |
|
"loss": 2.0626, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.6865708018523033, |
|
"grad_norm": 0.43621349334716797, |
|
"learning_rate": 2.299520219328307e-07, |
|
"loss": 2.063, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.6885205946868145, |
|
"grad_norm": 0.45750436186790466, |
|
"learning_rate": 2.2960932145305003e-07, |
|
"loss": 2.0164, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.6904703875213258, |
|
"grad_norm": 0.46832090616226196, |
|
"learning_rate": 2.2926662097326935e-07, |
|
"loss": 2.0459, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.6924201803558372, |
|
"grad_norm": 0.4424852728843689, |
|
"learning_rate": 2.289239204934887e-07, |
|
"loss": 2.0148, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.6943699731903485, |
|
"grad_norm": 0.4639265239238739, |
|
"learning_rate": 2.28581220013708e-07, |
|
"loss": 2.0453, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.69631976602486, |
|
"grad_norm": 0.42720574140548706, |
|
"learning_rate": 2.2823851953392734e-07, |
|
"loss": 2.0164, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.698269558859371, |
|
"grad_norm": 0.46615973114967346, |
|
"learning_rate": 2.2789581905414668e-07, |
|
"loss": 2.0235, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.7002193516938826, |
|
"grad_norm": 0.46956273913383484, |
|
"learning_rate": 2.2755311857436598e-07, |
|
"loss": 2.0668, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.7021691445283937, |
|
"grad_norm": 0.45590096712112427, |
|
"learning_rate": 2.2721041809458533e-07, |
|
"loss": 2.0767, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.7041189373629053, |
|
"grad_norm": 0.4419032037258148, |
|
"learning_rate": 2.2686771761480465e-07, |
|
"loss": 2.0298, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.7060687301974164, |
|
"grad_norm": 0.48438993096351624, |
|
"learning_rate": 2.2652501713502397e-07, |
|
"loss": 2.0881, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.7080185230319278, |
|
"grad_norm": 0.4674246609210968, |
|
"learning_rate": 2.2618231665524332e-07, |
|
"loss": 1.9858, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.7099683158664392, |
|
"grad_norm": 0.4731968641281128, |
|
"learning_rate": 2.2583961617546264e-07, |
|
"loss": 2.0684, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.7119181087009505, |
|
"grad_norm": 0.44370540976524353, |
|
"learning_rate": 2.2549691569568196e-07, |
|
"loss": 2.0222, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.7138679015354619, |
|
"grad_norm": 0.43057727813720703, |
|
"learning_rate": 2.251542152159013e-07, |
|
"loss": 2.0054, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.7158176943699732, |
|
"grad_norm": 0.4575825035572052, |
|
"learning_rate": 2.2481151473612063e-07, |
|
"loss": 2.0194, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.7177674872044846, |
|
"grad_norm": 0.46100616455078125, |
|
"learning_rate": 2.2446881425633995e-07, |
|
"loss": 2.0362, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.7197172800389957, |
|
"grad_norm": 0.46780040860176086, |
|
"learning_rate": 2.2412611377655927e-07, |
|
"loss": 2.0458, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.7216670728735073, |
|
"grad_norm": 0.4316709339618683, |
|
"learning_rate": 2.2378341329677862e-07, |
|
"loss": 2.0401, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.7236168657080184, |
|
"grad_norm": 0.43883568048477173, |
|
"learning_rate": 2.2344071281699794e-07, |
|
"loss": 2.0407, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.72556665854253, |
|
"grad_norm": 0.44989317655563354, |
|
"learning_rate": 2.2309801233721726e-07, |
|
"loss": 2.0253, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.7275164513770411, |
|
"grad_norm": 0.4468737840652466, |
|
"learning_rate": 2.227553118574366e-07, |
|
"loss": 2.0336, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.7294662442115525, |
|
"grad_norm": 0.45126405358314514, |
|
"learning_rate": 2.224126113776559e-07, |
|
"loss": 2.0259, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.7314160370460638, |
|
"grad_norm": 0.43270209431648254, |
|
"learning_rate": 2.2206991089787525e-07, |
|
"loss": 2.0071, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.7333658298805752, |
|
"grad_norm": 0.4503726363182068, |
|
"learning_rate": 2.217272104180946e-07, |
|
"loss": 2.1025, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.7353156227150865, |
|
"grad_norm": 0.44900792837142944, |
|
"learning_rate": 2.213845099383139e-07, |
|
"loss": 1.9883, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.737265415549598, |
|
"grad_norm": 0.4531221091747284, |
|
"learning_rate": 2.2104180945853324e-07, |
|
"loss": 2.0095, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.7392152083841093, |
|
"grad_norm": 0.46359124779701233, |
|
"learning_rate": 2.2069910897875258e-07, |
|
"loss": 2.003, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.7411650012186204, |
|
"grad_norm": 0.4506163001060486, |
|
"learning_rate": 2.2035640849897188e-07, |
|
"loss": 1.9438, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.743114794053132, |
|
"grad_norm": 0.4618943929672241, |
|
"learning_rate": 2.2001370801919123e-07, |
|
"loss": 2.0772, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.745064586887643, |
|
"grad_norm": 0.4341379404067993, |
|
"learning_rate": 2.1967100753941055e-07, |
|
"loss": 1.9443, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.7470143797221547, |
|
"grad_norm": 0.4800126254558563, |
|
"learning_rate": 2.1932830705962987e-07, |
|
"loss": 1.9994, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.7489641725566658, |
|
"grad_norm": 0.45474764704704285, |
|
"learning_rate": 2.1898560657984922e-07, |
|
"loss": 2.0635, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.7509139653911772, |
|
"grad_norm": 0.44301092624664307, |
|
"learning_rate": 2.1864290610006854e-07, |
|
"loss": 1.9752, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.7528637582256885, |
|
"grad_norm": 0.4428479075431824, |
|
"learning_rate": 2.1830020562028786e-07, |
|
"loss": 1.9371, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.7548135510601999, |
|
"grad_norm": 0.4576126039028168, |
|
"learning_rate": 2.1795750514050718e-07, |
|
"loss": 2.063, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.7567633438947112, |
|
"grad_norm": 0.47722387313842773, |
|
"learning_rate": 2.1761480466072653e-07, |
|
"loss": 2.0743, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.7587131367292224, |
|
"grad_norm": 0.4575481712818146, |
|
"learning_rate": 2.1727210418094585e-07, |
|
"loss": 1.9873, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.760662929563734, |
|
"grad_norm": 0.4340214729309082, |
|
"learning_rate": 2.1692940370116517e-07, |
|
"loss": 1.9459, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.762612722398245, |
|
"grad_norm": 0.41616639494895935, |
|
"learning_rate": 2.1658670322138452e-07, |
|
"loss": 1.9505, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.7645625152327566, |
|
"grad_norm": 0.472650408744812, |
|
"learning_rate": 2.162440027416038e-07, |
|
"loss": 2.0594, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.7665123080672678, |
|
"grad_norm": 0.4756447374820709, |
|
"learning_rate": 2.1590130226182316e-07, |
|
"loss": 1.9695, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.7684621009017791, |
|
"grad_norm": 0.44738152623176575, |
|
"learning_rate": 2.155586017820425e-07, |
|
"loss": 2.0771, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.7704118937362905, |
|
"grad_norm": 0.4602157771587372, |
|
"learning_rate": 2.152159013022618e-07, |
|
"loss": 2.0813, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.7723616865708018, |
|
"grad_norm": 0.46765050292015076, |
|
"learning_rate": 2.1487320082248115e-07, |
|
"loss": 2.0801, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.7743114794053132, |
|
"grad_norm": 0.4703747034072876, |
|
"learning_rate": 2.145305003427005e-07, |
|
"loss": 2.0093, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.7762612722398246, |
|
"grad_norm": 0.48457059264183044, |
|
"learning_rate": 2.141877998629198e-07, |
|
"loss": 2.0528, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.778211065074336, |
|
"grad_norm": 0.478710412979126, |
|
"learning_rate": 2.1384509938313914e-07, |
|
"loss": 2.1099, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.780160857908847, |
|
"grad_norm": 0.4458109438419342, |
|
"learning_rate": 2.1350239890335843e-07, |
|
"loss": 2.0592, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.7821106507433586, |
|
"grad_norm": 0.4474625885486603, |
|
"learning_rate": 2.1315969842357778e-07, |
|
"loss": 2.0055, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.7840604435778697, |
|
"grad_norm": 0.4586813151836395, |
|
"learning_rate": 2.1281699794379713e-07, |
|
"loss": 2.0131, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.7860102364123813, |
|
"grad_norm": 0.45083218812942505, |
|
"learning_rate": 2.1247429746401642e-07, |
|
"loss": 2.0437, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.7879600292468925, |
|
"grad_norm": 0.44078171253204346, |
|
"learning_rate": 2.1213159698423577e-07, |
|
"loss": 1.9792, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.7899098220814038, |
|
"grad_norm": 0.4346940219402313, |
|
"learning_rate": 2.117888965044551e-07, |
|
"loss": 1.9933, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.7918596149159152, |
|
"grad_norm": 0.45846906304359436, |
|
"learning_rate": 2.114461960246744e-07, |
|
"loss": 1.9682, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.7938094077504265, |
|
"grad_norm": 0.4335155785083771, |
|
"learning_rate": 2.1110349554489376e-07, |
|
"loss": 2.03, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.7957592005849379, |
|
"grad_norm": 0.4618023633956909, |
|
"learning_rate": 2.1076079506511308e-07, |
|
"loss": 2.0966, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.7977089934194492, |
|
"grad_norm": 0.46044906973838806, |
|
"learning_rate": 2.104180945853324e-07, |
|
"loss": 2.0873, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.7996587862539606, |
|
"grad_norm": 0.4635170102119446, |
|
"learning_rate": 2.1007539410555175e-07, |
|
"loss": 1.9897, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.8016085790884717, |
|
"grad_norm": 0.4335494637489319, |
|
"learning_rate": 2.0973269362577107e-07, |
|
"loss": 2.0228, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.8035583719229833, |
|
"grad_norm": 0.44605642557144165, |
|
"learning_rate": 2.093899931459904e-07, |
|
"loss": 2.0561, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.8055081647574944, |
|
"grad_norm": 0.4611765146255493, |
|
"learning_rate": 2.090472926662097e-07, |
|
"loss": 2.0329, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.807457957592006, |
|
"grad_norm": 0.443036288022995, |
|
"learning_rate": 2.0870459218642906e-07, |
|
"loss": 1.9565, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.8094077504265171, |
|
"grad_norm": 0.4552265405654907, |
|
"learning_rate": 2.0836189170664838e-07, |
|
"loss": 2.0842, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.8113575432610285, |
|
"grad_norm": 0.41511160135269165, |
|
"learning_rate": 2.080191912268677e-07, |
|
"loss": 2.0043, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.8133073360955398, |
|
"grad_norm": 0.44421470165252686, |
|
"learning_rate": 2.0767649074708705e-07, |
|
"loss": 2.0433, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.8152571289300512, |
|
"grad_norm": 0.43709036707878113, |
|
"learning_rate": 2.0733379026730634e-07, |
|
"loss": 2.0405, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.8172069217645626, |
|
"grad_norm": 0.429074227809906, |
|
"learning_rate": 2.069910897875257e-07, |
|
"loss": 1.964, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.8191567145990737, |
|
"grad_norm": 0.4392930269241333, |
|
"learning_rate": 2.0664838930774504e-07, |
|
"loss": 1.9819, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.8211065074335853, |
|
"grad_norm": 0.41590166091918945, |
|
"learning_rate": 2.0630568882796433e-07, |
|
"loss": 1.9821, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.8230563002680964, |
|
"grad_norm": 0.445362389087677, |
|
"learning_rate": 2.0596298834818368e-07, |
|
"loss": 2.092, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.825006093102608, |
|
"grad_norm": 0.43674713373184204, |
|
"learning_rate": 2.0562028786840303e-07, |
|
"loss": 2.0371, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.826955885937119, |
|
"grad_norm": 0.4520663022994995, |
|
"learning_rate": 2.0527758738862232e-07, |
|
"loss": 2.0329, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.8289056787716305, |
|
"grad_norm": 0.4744395613670349, |
|
"learning_rate": 2.0493488690884167e-07, |
|
"loss": 2.0828, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.8308554716061418, |
|
"grad_norm": 0.45714208483695984, |
|
"learning_rate": 2.04592186429061e-07, |
|
"loss": 2.017, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.8328052644406532, |
|
"grad_norm": 0.4604392647743225, |
|
"learning_rate": 2.042494859492803e-07, |
|
"loss": 1.9813, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.8347550572751645, |
|
"grad_norm": 0.43890222907066345, |
|
"learning_rate": 2.0390678546949966e-07, |
|
"loss": 1.9902, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.8367048501096759, |
|
"grad_norm": 0.44383513927459717, |
|
"learning_rate": 2.0356408498971898e-07, |
|
"loss": 2.0434, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.8386546429441872, |
|
"grad_norm": 0.43706512451171875, |
|
"learning_rate": 2.032213845099383e-07, |
|
"loss": 2.052, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.8406044357786984, |
|
"grad_norm": 0.427843302488327, |
|
"learning_rate": 2.0287868403015762e-07, |
|
"loss": 1.8841, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.84255422861321, |
|
"grad_norm": 0.4639602601528168, |
|
"learning_rate": 2.0253598355037697e-07, |
|
"loss": 2.0831, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.844504021447721, |
|
"grad_norm": 0.44139614701271057, |
|
"learning_rate": 2.021932830705963e-07, |
|
"loss": 1.9867, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.8464538142822327, |
|
"grad_norm": 0.4408351182937622, |
|
"learning_rate": 2.018505825908156e-07, |
|
"loss": 2.0199, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.8484036071167438, |
|
"grad_norm": 0.49647897481918335, |
|
"learning_rate": 2.0150788211103496e-07, |
|
"loss": 2.0877, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.8503533999512551, |
|
"grad_norm": 0.46033725142478943, |
|
"learning_rate": 2.0116518163125428e-07, |
|
"loss": 2.0584, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.8523031927857665, |
|
"grad_norm": 0.4471881687641144, |
|
"learning_rate": 2.008224811514736e-07, |
|
"loss": 1.9694, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.8542529856202778, |
|
"grad_norm": 0.435660183429718, |
|
"learning_rate": 2.0047978067169295e-07, |
|
"loss": 2.0025, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.8562027784547892, |
|
"grad_norm": 0.4504587650299072, |
|
"learning_rate": 2.0013708019191224e-07, |
|
"loss": 2.0403, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.8581525712893006, |
|
"grad_norm": 0.446451336145401, |
|
"learning_rate": 1.997943797121316e-07, |
|
"loss": 1.9817, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.860102364123812, |
|
"grad_norm": 0.46191105246543884, |
|
"learning_rate": 1.9945167923235094e-07, |
|
"loss": 2.0329, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.862052156958323, |
|
"grad_norm": 0.4477747976779938, |
|
"learning_rate": 1.9910897875257023e-07, |
|
"loss": 2.0113, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.8640019497928346, |
|
"grad_norm": 0.46400219202041626, |
|
"learning_rate": 1.9876627827278958e-07, |
|
"loss": 2.0142, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.8659517426273458, |
|
"grad_norm": 0.45763564109802246, |
|
"learning_rate": 1.984235777930089e-07, |
|
"loss": 2.0555, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.8679015354618573, |
|
"grad_norm": 0.4603627920150757, |
|
"learning_rate": 1.9808087731322822e-07, |
|
"loss": 2.0022, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.8698513282963685, |
|
"grad_norm": 0.5134696364402771, |
|
"learning_rate": 1.9773817683344757e-07, |
|
"loss": 2.0396, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.8718011211308798, |
|
"grad_norm": 0.46097123622894287, |
|
"learning_rate": 1.973954763536669e-07, |
|
"loss": 2.0887, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.8737509139653912, |
|
"grad_norm": 0.45269545912742615, |
|
"learning_rate": 1.970527758738862e-07, |
|
"loss": 2.0184, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.8757007067999025, |
|
"grad_norm": 0.463885635137558, |
|
"learning_rate": 1.9671007539410553e-07, |
|
"loss": 2.0701, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.8776504996344139, |
|
"grad_norm": 0.4765574634075165, |
|
"learning_rate": 1.9636737491432488e-07, |
|
"loss": 1.9951, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.879600292468925, |
|
"grad_norm": 0.48183631896972656, |
|
"learning_rate": 1.960246744345442e-07, |
|
"loss": 2.0723, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.8815500853034366, |
|
"grad_norm": 0.44266360998153687, |
|
"learning_rate": 1.9568197395476352e-07, |
|
"loss": 2.0134, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.8834998781379477, |
|
"grad_norm": 0.4508133828639984, |
|
"learning_rate": 1.9533927347498287e-07, |
|
"loss": 1.9951, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.8854496709724593, |
|
"grad_norm": 0.4255620539188385, |
|
"learning_rate": 1.949965729952022e-07, |
|
"loss": 1.9663, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.8873994638069704, |
|
"grad_norm": 0.45423394441604614, |
|
"learning_rate": 1.946538725154215e-07, |
|
"loss": 2.0072, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.8893492566414818, |
|
"grad_norm": 0.4226663112640381, |
|
"learning_rate": 1.9431117203564086e-07, |
|
"loss": 1.9598, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.8912990494759931, |
|
"grad_norm": 0.47366762161254883, |
|
"learning_rate": 1.9396847155586015e-07, |
|
"loss": 1.9927, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.8932488423105045, |
|
"grad_norm": 0.44758790731430054, |
|
"learning_rate": 1.936257710760795e-07, |
|
"loss": 1.9628, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.8951986351450159, |
|
"grad_norm": 0.48197463154792786, |
|
"learning_rate": 1.9328307059629885e-07, |
|
"loss": 2.1004, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.8971484279795272, |
|
"grad_norm": 0.4538448750972748, |
|
"learning_rate": 1.9294037011651814e-07, |
|
"loss": 2.0199, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.8990982208140386, |
|
"grad_norm": 0.47362738847732544, |
|
"learning_rate": 1.925976696367375e-07, |
|
"loss": 2.0746, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.9010480136485497, |
|
"grad_norm": 0.47095638513565063, |
|
"learning_rate": 1.922549691569568e-07, |
|
"loss": 1.9897, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.9029978064830613, |
|
"grad_norm": 0.4763641059398651, |
|
"learning_rate": 1.9191226867717613e-07, |
|
"loss": 2.0156, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.9049475993175724, |
|
"grad_norm": 0.4224942922592163, |
|
"learning_rate": 1.9156956819739548e-07, |
|
"loss": 2.0114, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.906897392152084, |
|
"grad_norm": 0.44930440187454224, |
|
"learning_rate": 1.912268677176148e-07, |
|
"loss": 2.0121, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.9088471849865951, |
|
"grad_norm": 0.45916110277175903, |
|
"learning_rate": 1.9088416723783412e-07, |
|
"loss": 2.0053, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.9107969778211065, |
|
"grad_norm": 0.42759600281715393, |
|
"learning_rate": 1.9054146675805347e-07, |
|
"loss": 2.0109, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.9127467706556178, |
|
"grad_norm": 0.49347975850105286, |
|
"learning_rate": 1.901987662782728e-07, |
|
"loss": 2.0657, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.9146965634901292, |
|
"grad_norm": 0.4315294027328491, |
|
"learning_rate": 1.898560657984921e-07, |
|
"loss": 1.9473, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.9166463563246405, |
|
"grad_norm": 0.42915600538253784, |
|
"learning_rate": 1.8951336531871143e-07, |
|
"loss": 1.9958, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.9185961491591519, |
|
"grad_norm": 0.48152124881744385, |
|
"learning_rate": 1.8917066483893078e-07, |
|
"loss": 2.0815, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.9205459419936632, |
|
"grad_norm": 0.44423532485961914, |
|
"learning_rate": 1.888279643591501e-07, |
|
"loss": 2.0227, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.9224957348281744, |
|
"grad_norm": 0.4499359130859375, |
|
"learning_rate": 1.8848526387936942e-07, |
|
"loss": 1.961, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.924445527662686, |
|
"grad_norm": 0.4560549855232239, |
|
"learning_rate": 1.8814256339958877e-07, |
|
"loss": 2.03, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.926395320497197, |
|
"grad_norm": 0.48396381735801697, |
|
"learning_rate": 1.8779986291980806e-07, |
|
"loss": 1.985, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.9283451133317087, |
|
"grad_norm": 0.456910103559494, |
|
"learning_rate": 1.874571624400274e-07, |
|
"loss": 1.9802, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.9302949061662198, |
|
"grad_norm": 0.46041303873062134, |
|
"learning_rate": 1.8711446196024676e-07, |
|
"loss": 1.9507, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.9322446990007311, |
|
"grad_norm": 0.4496663510799408, |
|
"learning_rate": 1.8677176148046605e-07, |
|
"loss": 2.0329, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.9341944918352425, |
|
"grad_norm": 0.4381345212459564, |
|
"learning_rate": 1.864290610006854e-07, |
|
"loss": 1.9643, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.9361442846697539, |
|
"grad_norm": 0.43699464201927185, |
|
"learning_rate": 1.8608636052090475e-07, |
|
"loss": 2.026, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.9380940775042652, |
|
"grad_norm": 0.4496040344238281, |
|
"learning_rate": 1.8574366004112404e-07, |
|
"loss": 1.9318, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.9400438703387763, |
|
"grad_norm": 0.45028945803642273, |
|
"learning_rate": 1.854009595613434e-07, |
|
"loss": 2.0254, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.941993663173288, |
|
"grad_norm": 0.46241873502731323, |
|
"learning_rate": 1.8505825908156268e-07, |
|
"loss": 2.0224, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.943943456007799, |
|
"grad_norm": 0.4494277238845825, |
|
"learning_rate": 1.8471555860178203e-07, |
|
"loss": 2.0734, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.9458932488423106, |
|
"grad_norm": 0.44225579500198364, |
|
"learning_rate": 1.8437285812200138e-07, |
|
"loss": 2.0548, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.9478430416768218, |
|
"grad_norm": 0.4850820004940033, |
|
"learning_rate": 1.8403015764222067e-07, |
|
"loss": 1.9961, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.9497928345113331, |
|
"grad_norm": 0.46442610025405884, |
|
"learning_rate": 1.8368745716244002e-07, |
|
"loss": 1.9777, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9517426273458445, |
|
"grad_norm": 0.457109272480011, |
|
"learning_rate": 1.8334475668265934e-07, |
|
"loss": 2.0949, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.9536924201803558, |
|
"grad_norm": 0.4514349699020386, |
|
"learning_rate": 1.8300205620287866e-07, |
|
"loss": 2.0933, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.9556422130148672, |
|
"grad_norm": 0.4601777195930481, |
|
"learning_rate": 1.82659355723098e-07, |
|
"loss": 1.9975, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.9575920058493785, |
|
"grad_norm": 0.4604569673538208, |
|
"learning_rate": 1.8231665524331733e-07, |
|
"loss": 2.0364, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.95954179868389, |
|
"grad_norm": 0.4434170424938202, |
|
"learning_rate": 1.8197395476353665e-07, |
|
"loss": 1.9835, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.961491591518401, |
|
"grad_norm": 0.45063334703445435, |
|
"learning_rate": 1.81631254283756e-07, |
|
"loss": 1.9904, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.9634413843529126, |
|
"grad_norm": 0.45276153087615967, |
|
"learning_rate": 1.8128855380397532e-07, |
|
"loss": 2.021, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.9653911771874237, |
|
"grad_norm": 0.44774502515792847, |
|
"learning_rate": 1.8094585332419464e-07, |
|
"loss": 2.0024, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.9673409700219353, |
|
"grad_norm": 0.43734362721443176, |
|
"learning_rate": 1.8060315284441396e-07, |
|
"loss": 2.0261, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.9692907628564464, |
|
"grad_norm": 0.45293501019477844, |
|
"learning_rate": 1.802604523646333e-07, |
|
"loss": 2.0781, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.9712405556909578, |
|
"grad_norm": 0.4538004994392395, |
|
"learning_rate": 1.7991775188485263e-07, |
|
"loss": 2.0081, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.9731903485254692, |
|
"grad_norm": 0.45042964816093445, |
|
"learning_rate": 1.7957505140507195e-07, |
|
"loss": 2.0121, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.9751401413599805, |
|
"grad_norm": 0.4721399247646332, |
|
"learning_rate": 1.792323509252913e-07, |
|
"loss": 2.0071, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.9770899341944919, |
|
"grad_norm": 0.4297287166118622, |
|
"learning_rate": 1.788896504455106e-07, |
|
"loss": 2.0213, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.9790397270290032, |
|
"grad_norm": 0.4454828202724457, |
|
"learning_rate": 1.7854694996572994e-07, |
|
"loss": 2.0093, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.9809895198635146, |
|
"grad_norm": 0.4550788700580597, |
|
"learning_rate": 1.782042494859493e-07, |
|
"loss": 2.0599, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.9829393126980257, |
|
"grad_norm": 0.44854849576950073, |
|
"learning_rate": 1.7786154900616858e-07, |
|
"loss": 2.0262, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.9848891055325373, |
|
"grad_norm": 0.4477459192276001, |
|
"learning_rate": 1.7751884852638793e-07, |
|
"loss": 1.9533, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.9868388983670484, |
|
"grad_norm": 0.43663471937179565, |
|
"learning_rate": 1.7717614804660728e-07, |
|
"loss": 2.0122, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.98878869120156, |
|
"grad_norm": 0.45281800627708435, |
|
"learning_rate": 1.7683344756682657e-07, |
|
"loss": 2.0711, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.9907384840360711, |
|
"grad_norm": 0.44143861532211304, |
|
"learning_rate": 1.7649074708704592e-07, |
|
"loss": 2.0198, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.9926882768705825, |
|
"grad_norm": 0.4464763402938843, |
|
"learning_rate": 1.7614804660726524e-07, |
|
"loss": 2.0117, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.9946380697050938, |
|
"grad_norm": 0.42707762122154236, |
|
"learning_rate": 1.7580534612748456e-07, |
|
"loss": 1.9629, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.9965878625396052, |
|
"grad_norm": 0.4683617949485779, |
|
"learning_rate": 1.754626456477039e-07, |
|
"loss": 2.0467, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.9985376553741165, |
|
"grad_norm": 0.4215565025806427, |
|
"learning_rate": 1.7511994516792323e-07, |
|
"loss": 1.9545, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.9985376553741165, |
|
"eval_loss": 2.0196783542633057, |
|
"eval_runtime": 480.5583, |
|
"eval_samples_per_second": 1.294, |
|
"eval_steps_per_second": 0.325, |
|
"step": 1025 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1536, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0284206992292577e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|