|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 367, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 3.2173, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2e-05, |
|
"loss": 3.2031, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 3.1896, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4e-05, |
|
"loss": 3.5399, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.999925099660787e-05, |
|
"loss": 3.249, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.999700404253208e-05, |
|
"loss": 4.346, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.9993259306070256e-05, |
|
"loss": 4.1291, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.998801706770442e-05, |
|
"loss": 3.7064, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.9981277720080015e-05, |
|
"loss": 3.5643, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.9973041767976466e-05, |
|
"loss": 3.5031, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.996330982826937e-05, |
|
"loss": 3.4195, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.995208262988431e-05, |
|
"loss": 3.3598, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9939361013742275e-05, |
|
"loss": 3.3177, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.99251459326966e-05, |
|
"loss": 3.2443, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9909438451461695e-05, |
|
"loss": 3.2215, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.989223974653323e-05, |
|
"loss": 3.149, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.9873551106100035e-05, |
|
"loss": 3.1167, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.985337392994763e-05, |
|
"loss": 3.0538, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.983170972935333e-05, |
|
"loss": 3.029, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.9808560126973126e-05, |
|
"loss": 3.0055, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.9783926856720085e-05, |
|
"loss": 2.9598, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.975781176363451e-05, |
|
"loss": 2.9572, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.973021680374571e-05, |
|
"loss": 2.9004, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.9701144043925576e-05, |
|
"loss": 2.8984, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.9670595661733654e-05, |
|
"loss": 2.874, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.963857394525413e-05, |
|
"loss": 2.8689, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.960508129292446e-05, |
|
"loss": 2.8885, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9570120213355636e-05, |
|
"loss": 2.8331, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.953369332514438e-05, |
|
"loss": 2.8093, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.949580335667699e-05, |
|
"loss": 2.7919, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.945645314592495e-05, |
|
"loss": 2.7876, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.9415645640232386e-05, |
|
"loss": 2.791, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.937338389609533e-05, |
|
"loss": 2.7751, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.932967107893274e-05, |
|
"loss": 2.7673, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.928451046284946e-05, |
|
"loss": 2.74, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.923790543039095e-05, |
|
"loss": 2.7354, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9189859472289956e-05, |
|
"loss": 2.699, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9140376187205025e-05, |
|
"loss": 2.7079, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.9089459281451e-05, |
|
"loss": 2.6673, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.903711256872139e-05, |
|
"loss": 2.6733, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.898333996980275e-05, |
|
"loss": 2.659, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.8928145512280973e-05, |
|
"loss": 2.6657, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.8871533330239646e-05, |
|
"loss": 2.6348, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.8813507663950404e-05, |
|
"loss": 2.6497, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.8754072859555346e-05, |
|
"loss": 2.5951, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.869323336874146e-05, |
|
"loss": 2.6479, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.8630993748407274e-05, |
|
"loss": 2.6018, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.856735866032145e-05, |
|
"loss": 2.5817, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.8502332870773675e-05, |
|
"loss": 2.5903, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.843592125021764e-05, |
|
"loss": 2.5757, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.8368128772906254e-05, |
|
"loss": 2.5705, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.829896051651907e-05, |
|
"loss": 2.5914, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.822842166178194e-05, |
|
"loss": 2.5694, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.815651749207902e-05, |
|
"loss": 2.5826, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.8083253393057006e-05, |
|
"loss": 2.5435, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.8008634852221777e-05, |
|
"loss": 2.5286, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.793266745852735e-05, |
|
"loss": 2.5152, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.785535690195728e-05, |
|
"loss": 2.4879, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.7776708973098476e-05, |
|
"loss": 2.5058, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.769672956270749e-05, |
|
"loss": 2.5437, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.761542466126929e-05, |
|
"loss": 2.4668, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.753280035854857e-05, |
|
"loss": 2.501, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.7448862843133644e-05, |
|
"loss": 2.4697, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.736361840197288e-05, |
|
"loss": 2.4651, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.727707341990383e-05, |
|
"loss": 2.4394, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.718923437917503e-05, |
|
"loss": 2.4239, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.7100107858960404e-05, |
|
"loss": 2.4541, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.7009700534866557e-05, |
|
"loss": 2.445, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.691801917843273e-05, |
|
"loss": 2.422, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6825070656623626e-05, |
|
"loss": 2.4274, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6730861931315054e-05, |
|
"loss": 2.4062, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.663540005877249e-05, |
|
"loss": 2.4, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.653869218912258e-05, |
|
"loss": 2.4229, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.6440745565817556e-05, |
|
"loss": 2.3909, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.6341567525092727e-05, |
|
"loss": 2.3607, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.6241165495417006e-05, |
|
"loss": 2.399, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.613954699693645e-05, |
|
"loss": 2.3902, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.603671964091107e-05, |
|
"loss": 2.3692, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.593269112914472e-05, |
|
"loss": 2.3436, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.582746925340822e-05, |
|
"loss": 2.3629, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.5721061894855756e-05, |
|
"loss": 2.3287, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.561347702343456e-05, |
|
"loss": 2.3376, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.5504722697288025e-05, |
|
"loss": 2.3418, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.539480706215204e-05, |
|
"loss": 2.3182, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.5283738350744986e-05, |
|
"loss": 2.3287, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.517152488215101e-05, |
|
"loss": 2.3222, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.505817506119698e-05, |
|
"loss": 2.3393, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.494369737782293e-05, |
|
"loss": 2.322, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.4828100406446184e-05, |
|
"loss": 2.2907, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.47113928053191e-05, |
|
"loss": 2.3132, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.45935833158806e-05, |
|
"loss": 2.2972, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.44746807621014e-05, |
|
"loss": 2.2643, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.4354694049823124e-05, |
|
"loss": 2.2527, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.4233632166091205e-05, |
|
"loss": 2.2746, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.4111504178481813e-05, |
|
"loss": 2.2479, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.3988319234422636e-05, |
|
"loss": 2.2954, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.3864086560507785e-05, |
|
"loss": 2.2455, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.373881546180666e-05, |
|
"loss": 2.2442, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.361251532116707e-05, |
|
"loss": 2.2035, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.3485195598512365e-05, |
|
"loss": 2.2234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.3356865830132976e-05, |
|
"loss": 2.2226, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.322753562797209e-05, |
|
"loss": 2.2167, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.309721467890571e-05, |
|
"loss": 2.2481, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.296591274401712e-05, |
|
"loss": 2.1965, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.28336396578658e-05, |
|
"loss": 2.1935, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.270040532775077e-05, |
|
"loss": 2.196, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.256621973296854e-05, |
|
"loss": 2.1733, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.243109292406568e-05, |
|
"loss": 2.1735, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.229503502208602e-05, |
|
"loss": 2.1915, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.215805621781256e-05, |
|
"loss": 2.1795, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.202016677100422e-05, |
|
"loss": 2.2021, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.188137700962733e-05, |
|
"loss": 2.201, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.174169732908209e-05, |
|
"loss": 2.1857, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.1601138191423966e-05, |
|
"loss": 2.1818, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.145971012458005e-05, |
|
"loss": 2.1438, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.13174237215605e-05, |
|
"loss": 2.1359, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.11742896396652e-05, |
|
"loss": 2.1668, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.103031859968542e-05, |
|
"loss": 2.1769, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.0885521385100885e-05, |
|
"loss": 2.1445, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.0739908841272095e-05, |
|
"loss": 2.1193, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.059349187462798e-05, |
|
"loss": 2.1285, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.044628145184899e-05, |
|
"loss": 2.1411, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.0298288599045747e-05, |
|
"loss": 2.1321, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.0149524400933114e-05, |
|
"loss": 2.1008, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 2.1437, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.9849726595674756e-05, |
|
"loss": 2.1224, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.9698715443486338e-05, |
|
"loss": 2.083, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.9546977854221266e-05, |
|
"loss": 2.1156, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.9394525193076454e-05, |
|
"loss": 2.1127, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.9241368878807925e-05, |
|
"loss": 2.0949, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.908752038287558e-05, |
|
"loss": 2.0821, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.8932991228583954e-05, |
|
"loss": 2.0735, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.877779299021912e-05, |
|
"loss": 2.0996, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.8621937292181768e-05, |
|
"loss": 2.0967, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.846543580811656e-05, |
|
"loss": 2.0313, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.8308300260037734e-05, |
|
"loss": 2.0891, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.8150542417451144e-05, |
|
"loss": 2.0816, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.7992174096472714e-05, |
|
"loss": 2.0765, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.783320715894341e-05, |
|
"loss": 2.026, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.767365351154077e-05, |
|
"loss": 2.0424, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.751352510488711e-05, |
|
"loss": 2.074, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.7352833932654402e-05, |
|
"loss": 2.0189, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.719159203066597e-05, |
|
"loss": 2.0283, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.702981147599495e-05, |
|
"loss": 2.0373, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.6867504386059776e-05, |
|
"loss": 2.0141, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.6704682917716528e-05, |
|
"loss": 2.0197, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.6541359266348437e-05, |
|
"loss": 2.0168, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.637754566495238e-05, |
|
"loss": 2.0032, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.6213254383222665e-05, |
|
"loss": 2.0038, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.6048497726632023e-05, |
|
"loss": 1.9901, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.588328803550993e-05, |
|
"loss": 1.9917, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.571763768411829e-05, |
|
"loss": 1.9718, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.555155907972461e-05, |
|
"loss": 2.0155, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5385064661672692e-05, |
|
"loss": 2.0072, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5218166900450937e-05, |
|
"loss": 1.9935, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5050878296758255e-05, |
|
"loss": 1.9879, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.488321138056783e-05, |
|
"loss": 2.0028, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.471517871018855e-05, |
|
"loss": 1.9669, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.4546792871324424e-05, |
|
"loss": 1.9854, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.43780664761319e-05, |
|
"loss": 1.9613, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.4209012162275217e-05, |
|
"loss": 1.9474, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.4039642591979825e-05, |
|
"loss": 1.9424, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.3869970451083996e-05, |
|
"loss": 1.9656, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.370000844808863e-05, |
|
"loss": 1.9686, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.3529769313205423e-05, |
|
"loss": 1.9655, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.3359265797403297e-05, |
|
"loss": 1.97, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.318851067145345e-05, |
|
"loss": 1.9435, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.3017516724972716e-05, |
|
"loss": 1.972, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2846296765465708e-05, |
|
"loss": 1.9789, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.267486361736546e-05, |
|
"loss": 1.9466, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.250323012107292e-05, |
|
"loss": 1.9231, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2331409131995186e-05, |
|
"loss": 1.9672, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2159413519582623e-05, |
|
"loss": 1.9265, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.1987256166364937e-05, |
|
"loss": 1.911, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.1814949966986288e-05, |
|
"loss": 1.9236, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.1642507827239455e-05, |
|
"loss": 1.9543, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.1469942663099208e-05, |
|
"loss": 1.9296, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.129726739975486e-05, |
|
"loss": 1.9292, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.112449497064223e-05, |
|
"loss": 1.9132, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.095163831647485e-05, |
|
"loss": 1.9087, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.0778710384274757e-05, |
|
"loss": 1.9137, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.0605724126402734e-05, |
|
"loss": 1.8797, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.0432692499588174e-05, |
|
"loss": 1.8937, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.025962846395862e-05, |
|
"loss": 1.9124, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.0086544982069046e-05, |
|
"loss": 1.8763, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9913455017930964e-05, |
|
"loss": 1.8699, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9740371536041388e-05, |
|
"loss": 1.8841, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9567307500411833e-05, |
|
"loss": 1.9116, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.939427587359727e-05, |
|
"loss": 1.8997, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.922128961572525e-05, |
|
"loss": 1.8716, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9048361683525155e-05, |
|
"loss": 1.8651, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.8875505029357775e-05, |
|
"loss": 1.8827, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8702732600245138e-05, |
|
"loss": 1.871, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8530057336900805e-05, |
|
"loss": 1.8788, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.835749217276055e-05, |
|
"loss": 1.8641, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8185050033013715e-05, |
|
"loss": 1.8856, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8012743833635067e-05, |
|
"loss": 1.8524, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7840586480417387e-05, |
|
"loss": 1.8142, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.766859086800482e-05, |
|
"loss": 1.8361, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7496769878927085e-05, |
|
"loss": 1.8197, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7325136382634547e-05, |
|
"loss": 1.8723, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7153703234534302e-05, |
|
"loss": 1.8088, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6982483275027287e-05, |
|
"loss": 1.8264, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6811489328546557e-05, |
|
"loss": 1.7955, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6640734202596702e-05, |
|
"loss": 1.8337, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.647023068679459e-05, |
|
"loss": 1.8571, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6299991551911373e-05, |
|
"loss": 1.8662, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6130029548916007e-05, |
|
"loss": 1.8593, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.5960357408020178e-05, |
|
"loss": 1.8387, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.579098783772479e-05, |
|
"loss": 1.8395, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.5621933523868106e-05, |
|
"loss": 1.8271, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.5453207128675583e-05, |
|
"loss": 1.8738, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.5284821289811453e-05, |
|
"loss": 1.8598, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.5116788619432177e-05, |
|
"loss": 1.832, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4949121703241747e-05, |
|
"loss": 1.8176, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4781833099549072e-05, |
|
"loss": 1.8302, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.461493533832731e-05, |
|
"loss": 1.848, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4448440920275402e-05, |
|
"loss": 1.7919, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4282362315881719e-05, |
|
"loss": 1.8084, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4116711964490076e-05, |
|
"loss": 1.7979, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.395150227336798e-05, |
|
"loss": 1.8167, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.3786745616777348e-05, |
|
"loss": 1.8015, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3622454335047631e-05, |
|
"loss": 1.8359, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.345864073365157e-05, |
|
"loss": 1.8111, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.329531708228347e-05, |
|
"loss": 1.814, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3132495613940237e-05, |
|
"loss": 1.7876, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.2970188524005058e-05, |
|
"loss": 1.7877, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.2808407969334037e-05, |
|
"loss": 1.7683, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.2647166067345598e-05, |
|
"loss": 1.8017, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.24864748951129e-05, |
|
"loss": 1.7989, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.2326346488459237e-05, |
|
"loss": 1.752, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.2166792841056596e-05, |
|
"loss": 1.7771, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.2007825903527287e-05, |
|
"loss": 1.7739, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.1849457582548864e-05, |
|
"loss": 1.7821, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.1691699739962275e-05, |
|
"loss": 1.7745, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.153456419188345e-05, |
|
"loss": 1.7759, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.137806270781824e-05, |
|
"loss": 1.795, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.1222207009780888e-05, |
|
"loss": 1.7348, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.1067008771416047e-05, |
|
"loss": 1.7385, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.091247961712442e-05, |
|
"loss": 1.798, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0758631121192075e-05, |
|
"loss": 1.7615, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0605474806923556e-05, |
|
"loss": 1.7768, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0453022145778742e-05, |
|
"loss": 1.735, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0301284556513669e-05, |
|
"loss": 1.7635, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.0150273404325244e-05, |
|
"loss": 1.7395, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.0000000000000006e-05, |
|
"loss": 1.7238, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.85047559906689e-06, |
|
"loss": 1.7633, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.70171140095426e-06, |
|
"loss": 1.7585, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.553718548151011e-06, |
|
"loss": 1.7379, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.406508125372034e-06, |
|
"loss": 1.7427, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.260091158727913e-06, |
|
"loss": 1.7205, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.114478614899123e-06, |
|
"loss": 1.7576, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.969681400314589e-06, |
|
"loss": 1.7559, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.825710360334812e-06, |
|
"loss": 1.7465, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.682576278439504e-06, |
|
"loss": 1.7571, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.540289875419962e-06, |
|
"loss": 1.7239, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.39886180857604e-06, |
|
"loss": 1.737, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.258302670917915e-06, |
|
"loss": 1.7449, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.118622990372676e-06, |
|
"loss": 1.7399, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.979833228995782e-06, |
|
"loss": 1.7411, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.841943782187435e-06, |
|
"loss": 1.7333, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.704964977913984e-06, |
|
"loss": 1.7641, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.568907075934322e-06, |
|
"loss": 1.7311, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.433780267031463e-06, |
|
"loss": 1.7022, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.299594672249231e-06, |
|
"loss": 1.7178, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.166360342134202e-06, |
|
"loss": 1.7497, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.034087255982882e-06, |
|
"loss": 1.7196, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.902785321094301e-06, |
|
"loss": 1.707, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.7724643720279156e-06, |
|
"loss": 1.7525, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.643134169867031e-06, |
|
"loss": 1.6995, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.514804401487642e-06, |
|
"loss": 1.7229, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.38748467883294e-06, |
|
"loss": 1.7021, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.261184538193341e-06, |
|
"loss": 1.6753, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.135913439492227e-06, |
|
"loss": 1.7111, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.01168076557737e-06, |
|
"loss": 1.7046, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.888495821518194e-06, |
|
"loss": 1.7362, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.7663678339087995e-06, |
|
"loss": 1.7811, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.6453059501768806e-06, |
|
"loss": 1.6819, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.5253192378985966e-06, |
|
"loss": 1.7154, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.4064166841194e-06, |
|
"loss": 1.7069, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.288607194680899e-06, |
|
"loss": 1.715, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.171899593553824e-06, |
|
"loss": 1.7173, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.056302622177074e-06, |
|
"loss": 1.6873, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.941824938803024e-06, |
|
"loss": 1.7291, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.828475117848992e-06, |
|
"loss": 1.6928, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.716261649255021e-06, |
|
"loss": 1.6815, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.605192937847962e-06, |
|
"loss": 1.6701, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.495277302711982e-06, |
|
"loss": 1.7119, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.386522976565439e-06, |
|
"loss": 1.6813, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.278938105144255e-06, |
|
"loss": 1.6945, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.172530746591783e-06, |
|
"loss": 1.7221, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.06730887085528e-06, |
|
"loss": 1.7254, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.963280359088933e-06, |
|
"loss": 1.6873, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.86045300306356e-06, |
|
"loss": 1.6773, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7588345045830044e-06, |
|
"loss": 1.7121, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.658432474907274e-06, |
|
"loss": 1.6798, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.559254434182451e-06, |
|
"loss": 1.6762, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.461307810877428e-06, |
|
"loss": 1.6934, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.364599941227513e-06, |
|
"loss": 1.6862, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.2691380686849517e-06, |
|
"loss": 1.6708, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.174929343376374e-06, |
|
"loss": 1.6764, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.081980821567272e-06, |
|
"loss": 1.6886, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.990299465133446e-06, |
|
"loss": 1.6679, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.8998921410396e-06, |
|
"loss": 1.6854, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.8107656208249733e-06, |
|
"loss": 1.681, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.72292658009617e-06, |
|
"loss": 1.6987, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.6363815980271248e-06, |
|
"loss": 1.6574, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.551137156866357e-06, |
|
"loss": 1.7037, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.4671996414514276e-06, |
|
"loss": 1.702, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.384575338730717e-06, |
|
"loss": 1.6788, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.3032704372925176e-06, |
|
"loss": 1.6624, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.223291026901533e-06, |
|
"loss": 1.693, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.144643098042727e-06, |
|
"loss": 1.6687, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.0673325414726574e-06, |
|
"loss": 1.6914, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.991365147778228e-06, |
|
"loss": 1.6708, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.9167466069429964e-06, |
|
"loss": 1.6752, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.8434825079209884e-06, |
|
"loss": 1.6893, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.7715783382180672e-06, |
|
"loss": 1.6681, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.7010394834809373e-06, |
|
"loss": 1.6982, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6318712270937442e-06, |
|
"loss": 1.6421, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.5640787497823585e-06, |
|
"loss": 1.7007, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4976671292263257e-06, |
|
"loss": 1.6832, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4326413396785488e-06, |
|
"loss": 1.6584, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.3690062515927239e-06, |
|
"loss": 1.7009, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.306766631258536e-06, |
|
"loss": 1.6755, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.245927140444665e-06, |
|
"loss": 1.686, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1864923360496028e-06, |
|
"loss": 1.6916, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.128466669760362e-06, |
|
"loss": 1.6923, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0718544877190306e-06, |
|
"loss": 1.7079, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0166600301972517e-06, |
|
"loss": 1.6865, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.628874312786096e-07, |
|
"loss": 1.6624, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.105407185490067e-07, |
|
"loss": 1.6489, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.59623812794983e-07, |
|
"loss": 1.6923, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.101405277100549e-07, |
|
"loss": 1.678, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.620945696090532e-07, |
|
"loss": 1.6302, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 7.154895371505421e-07, |
|
"loss": 1.6555, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.703289210672603e-07, |
|
"loss": 1.6805, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.266161039046737e-07, |
|
"loss": 1.6744, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.843543597676138e-07, |
|
"loss": 1.6585, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.435468540750544e-07, |
|
"loss": 1.6951, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.041966433230094e-07, |
|
"loss": 1.6666, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.6630667485561885e-07, |
|
"loss": 1.6757, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.2987978664436936e-07, |
|
"loss": 1.6719, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.9491870707554445e-07, |
|
"loss": 1.6575, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.614260547458659e-07, |
|
"loss": 1.6398, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2940433826635257e-07, |
|
"loss": 1.6523, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9885595607443086e-07, |
|
"loss": 1.6521, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.697831962542874e-07, |
|
"loss": 1.6598, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4218823636549703e-07, |
|
"loss": 1.6721, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.1607314327991791e-07, |
|
"loss": 1.6975, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9143987302687738e-07, |
|
"loss": 1.6652, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.682902706466738e-07, |
|
"loss": 1.6554, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.4662607005237805e-07, |
|
"loss": 1.6603, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.264488938999664e-07, |
|
"loss": 1.6843, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0776025346677321e-07, |
|
"loss": 1.69, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.056154853830823e-08, |
|
"loss": 1.6755, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.485406730340483e-08, |
|
"loss": 1.6577, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.06389862577328e-08, |
|
"loss": 1.7059, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.7917370115688756e-08, |
|
"loss": 1.7009, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.66901717306356e-08, |
|
"loss": 1.6863, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.6958232023539532e-08, |
|
"loss": 1.6557, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.8722279919987098e-08, |
|
"loss": 1.6717, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1982932295582227e-08, |
|
"loss": 1.6401, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.7406939297520734e-09, |
|
"loss": 1.6846, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.995957467923916e-09, |
|
"loss": 1.6775, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.490033921331296e-10, |
|
"loss": 1.675, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6419, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 367, |
|
"total_flos": 3.839475069608788e+18, |
|
"train_loss": 2.069092570598509, |
|
"train_runtime": 4018.2223, |
|
"train_samples_per_second": 174.946, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"max_steps": 367, |
|
"num_train_epochs": 1, |
|
"total_flos": 3.839475069608788e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|