|
[
|
|
{
|
|
"loss": 0.2659,
|
|
"grad_norm": 3.4286012649536133,
|
|
"learning_rate": 1.9715261958997724e-05,
|
|
"epoch": 0.2847380410022779,
|
|
"step": 500
|
|
},
|
|
{
|
|
"loss": 0.1081,
|
|
"grad_norm": 0.7319045066833496,
|
|
"learning_rate": 1.9430523917995446e-05,
|
|
"epoch": 0.5694760820045558,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"loss": 0.0777,
|
|
"grad_norm": 3.7819042205810547,
|
|
"learning_rate": 1.9145785876993168e-05,
|
|
"epoch": 0.8542141230068337,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"eval_loss": 0.07203580439090729,
|
|
"eval_overall": {
|
|
"precision": 0.8912693298969072,
|
|
"recall": 0.931167956916863,
|
|
"f1": 0.9107818930041152,
|
|
"accuracy": 0.9814269735680226
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9239187076602398,
|
|
"recall": 0.965160587915079,
|
|
"f1": 0.9440894568690097,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.7727272727272727,
|
|
"recall": 0.8297180043383948,
|
|
"f1": 0.8002092050209204,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.8624823695345557,
|
|
"recall": 0.9120059656972409,
|
|
"f1": 0.8865530989488946,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9420520999468368,
|
|
"recall": 0.9619978284473398,
|
|
"f1": 0.9519204942250873,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.4533,
|
|
"eval_samples_per_second": 436.049,
|
|
"eval_steps_per_second": 54.607,
|
|
"epoch": 1.0,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"loss": 0.0631,
|
|
"grad_norm": 0.11871737241744995,
|
|
"learning_rate": 1.886104783599089e-05,
|
|
"epoch": 1.1389521640091116,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"loss": 0.0464,
|
|
"grad_norm": 0.2318667322397232,
|
|
"learning_rate": 1.8576309794988612e-05,
|
|
"epoch": 1.4236902050113895,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"loss": 0.0442,
|
|
"grad_norm": 0.35079020261764526,
|
|
"learning_rate": 1.8291571753986334e-05,
|
|
"epoch": 1.7084282460136673,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"loss": 0.0411,
|
|
"grad_norm": 0.06829982250928879,
|
|
"learning_rate": 1.8006833712984056e-05,
|
|
"epoch": 1.9931662870159452,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"eval_loss": 0.07787470519542694,
|
|
"eval_overall": {
|
|
"precision": 0.9304723885562209,
|
|
"recall": 0.9414338606529788,
|
|
"f1": 0.9359210306173666,
|
|
"accuracy": 0.9842968152116324
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9511015583019882,
|
|
"recall": 0.9635274904735982,
|
|
"f1": 0.9572742022714981,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8725910064239829,
|
|
"recall": 0.8839479392624728,
|
|
"f1": 0.8782327586206896,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9050822122571002,
|
|
"recall": 0.9030574198359433,
|
|
"f1": 0.9040686823441583,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9568919638105375,
|
|
"recall": 0.9761129207383279,
|
|
"f1": 0.9664068798710024,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5011,
|
|
"eval_samples_per_second": 433.267,
|
|
"eval_steps_per_second": 54.258,
|
|
"epoch": 2.0,
|
|
"step": 3512
|
|
},
|
|
{
|
|
"loss": 0.0258,
|
|
"grad_norm": 4.330985069274902,
|
|
"learning_rate": 1.7722095671981778e-05,
|
|
"epoch": 2.277904328018223,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"loss": 0.0284,
|
|
"grad_norm": 0.06637139618396759,
|
|
"learning_rate": 1.74373576309795e-05,
|
|
"epoch": 2.562642369020501,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"loss": 0.0269,
|
|
"grad_norm": 11.263030052185059,
|
|
"learning_rate": 1.7152619589977222e-05,
|
|
"epoch": 2.847380410022779,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"eval_loss": 0.07202505320310593,
|
|
"eval_overall": {
|
|
"precision": 0.9318106587222774,
|
|
"recall": 0.9474924267923258,
|
|
"f1": 0.9395861148197597,
|
|
"accuracy": 0.9857685288750221
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9626218851570965,
|
|
"recall": 0.9673380511703865,
|
|
"f1": 0.9649742058104807,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8943355119825708,
|
|
"recall": 0.8904555314533622,
|
|
"f1": 0.8923913043478261,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.8772791023842917,
|
|
"recall": 0.9328859060402684,
|
|
"f1": 0.9042284062161186,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9616630669546437,
|
|
"recall": 0.9668838219326819,
|
|
"f1": 0.9642663779101246,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 6.5043,
|
|
"eval_samples_per_second": 499.669,
|
|
"eval_steps_per_second": 62.574,
|
|
"epoch": 3.0,
|
|
"step": 5268
|
|
},
|
|
{
|
|
"loss": 0.0226,
|
|
"grad_norm": 0.7242124676704407,
|
|
"learning_rate": 1.6867881548974945e-05,
|
|
"epoch": 3.132118451025057,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"loss": 0.0165,
|
|
"grad_norm": 0.006902824155986309,
|
|
"learning_rate": 1.6583143507972667e-05,
|
|
"epoch": 3.416856492027335,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"loss": 0.0176,
|
|
"grad_norm": 0.031127002090215683,
|
|
"learning_rate": 1.629840546697039e-05,
|
|
"epoch": 3.7015945330296125,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"loss": 0.0196,
|
|
"grad_norm": 0.0038718737196177244,
|
|
"learning_rate": 1.601366742596811e-05,
|
|
"epoch": 3.9863325740318905,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"eval_loss": 0.08569859713315964,
|
|
"eval_overall": {
|
|
"precision": 0.930635838150289,
|
|
"recall": 0.9483338943116796,
|
|
"f1": 0.9394015170459282,
|
|
"accuracy": 0.9850621063165951
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9533011272141707,
|
|
"recall": 0.9667936853565596,
|
|
"f1": 0.96,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8623188405797102,
|
|
"recall": 0.903470715835141,
|
|
"f1": 0.8824152542372882,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.8995664739884393,
|
|
"recall": 0.9284116331096197,
|
|
"f1": 0.9137614678899083,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9668838219326819,
|
|
"recall": 0.9668838219326819,
|
|
"f1": 0.9668838219326819,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5249,
|
|
"eval_samples_per_second": 431.899,
|
|
"eval_steps_per_second": 54.087,
|
|
"epoch": 4.0,
|
|
"step": 7024
|
|
},
|
|
{
|
|
"loss": 0.0138,
|
|
"grad_norm": 0.17987537384033203,
|
|
"learning_rate": 1.5728929384965833e-05,
|
|
"epoch": 4.271070615034168,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"loss": 0.0134,
|
|
"grad_norm": 1.4385559558868408,
|
|
"learning_rate": 1.5444191343963555e-05,
|
|
"epoch": 4.555808656036446,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"loss": 0.0139,
|
|
"grad_norm": 0.02356315404176712,
|
|
"learning_rate": 1.5159453302961277e-05,
|
|
"epoch": 4.840546697038724,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"eval_loss": 0.08417785912752151,
|
|
"eval_overall": {
|
|
"precision": 0.9310686015831134,
|
|
"recall": 0.9501851228542578,
|
|
"f1": 0.9405297351324339,
|
|
"accuracy": 0.9851062577264967
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9627228525121556,
|
|
"recall": 0.9700598802395209,
|
|
"f1": 0.9663774403470715,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8386454183266933,
|
|
"recall": 0.913232104121475,
|
|
"f1": 0.8743509865005192,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9070110701107011,
|
|
"recall": 0.9164802386278896,
|
|
"f1": 0.9117210682492581,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9670981661272924,
|
|
"recall": 0.9733984799131379,
|
|
"f1": 0.9702380952380952,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5038,
|
|
"eval_samples_per_second": 433.114,
|
|
"eval_steps_per_second": 54.239,
|
|
"epoch": 5.0,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"loss": 0.0092,
|
|
"grad_norm": 0.7477717399597168,
|
|
"learning_rate": 1.4874715261958999e-05,
|
|
"epoch": 5.125284738041002,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"loss": 0.006,
|
|
"grad_norm": 0.04322722181677818,
|
|
"learning_rate": 1.4589977220956721e-05,
|
|
"epoch": 5.41002277904328,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"loss": 0.0073,
|
|
"grad_norm": 0.051729559898376465,
|
|
"learning_rate": 1.4305239179954442e-05,
|
|
"epoch": 5.694760820045558,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"loss": 0.0079,
|
|
"grad_norm": 0.012189100496470928,
|
|
"learning_rate": 1.4020501138952165e-05,
|
|
"epoch": 5.979498861047836,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"eval_loss": 0.09725591540336609,
|
|
"eval_overall": {
|
|
"precision": 0.9291845493562232,
|
|
"recall": 0.9473241332884551,
|
|
"f1": 0.9381666666666667,
|
|
"accuracy": 0.9844734208512392
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9546424759871932,
|
|
"recall": 0.9738704409363091,
|
|
"f1": 0.9641606036108865,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.858739837398374,
|
|
"recall": 0.9164859002169198,
|
|
"f1": 0.8866736621196223,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.8921852387843705,
|
|
"recall": 0.9194630872483222,
|
|
"f1": 0.9056188027910393,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9691969196919692,
|
|
"recall": 0.9565689467969598,
|
|
"f1": 0.9628415300546448,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.4822,
|
|
"eval_samples_per_second": 434.362,
|
|
"eval_steps_per_second": 54.395,
|
|
"epoch": 6.0,
|
|
"step": 10536
|
|
},
|
|
{
|
|
"loss": 0.008,
|
|
"grad_norm": 0.0029869996942579746,
|
|
"learning_rate": 1.3735763097949887e-05,
|
|
"epoch": 6.264236902050114,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"loss": 0.0066,
|
|
"grad_norm": 0.018263721838593483,
|
|
"learning_rate": 1.3451025056947608e-05,
|
|
"epoch": 6.548974943052392,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"loss": 0.0057,
|
|
"grad_norm": 0.020874306559562683,
|
|
"learning_rate": 1.3166287015945332e-05,
|
|
"epoch": 6.83371298405467,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"eval_loss": 0.097112737596035,
|
|
"eval_overall": {
|
|
"precision": 0.9343584656084656,
|
|
"recall": 0.9510265903736116,
|
|
"f1": 0.9426188490408675,
|
|
"accuracy": 0.9859745687878966
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9689373297002725,
|
|
"recall": 0.9678824169842134,
|
|
"f1": 0.968409586056645,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8805031446540881,
|
|
"recall": 0.911062906724512,
|
|
"f1": 0.8955223880597015,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.8851063829787233,
|
|
"recall": 0.930648769574944,
|
|
"f1": 0.9073064340239914,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.965386695511087,
|
|
"recall": 0.9690553745928339,
|
|
"f1": 0.967217556217827,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.4789,
|
|
"eval_samples_per_second": 434.555,
|
|
"eval_steps_per_second": 54.42,
|
|
"epoch": 7.0,
|
|
"step": 12292
|
|
},
|
|
{
|
|
"loss": 0.0031,
|
|
"grad_norm": 0.0007487820694223046,
|
|
"learning_rate": 1.2881548974943054e-05,
|
|
"epoch": 7.118451025056948,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"loss": 0.0037,
|
|
"grad_norm": 15.724783897399902,
|
|
"learning_rate": 1.2596810933940776e-05,
|
|
"epoch": 7.403189066059226,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"loss": 0.0061,
|
|
"grad_norm": 0.614613950252533,
|
|
"learning_rate": 1.2312072892938498e-05,
|
|
"epoch": 7.687927107061503,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"loss": 0.0036,
|
|
"grad_norm": 0.0012391641503199935,
|
|
"learning_rate": 1.2027334851936218e-05,
|
|
"epoch": 7.972665148063781,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"eval_loss": 0.10294844955205917,
|
|
"eval_overall": {
|
|
"precision": 0.9285831285831286,
|
|
"recall": 0.9540558734432851,
|
|
"f1": 0.9411471735701834,
|
|
"accuracy": 0.9852387119562018
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9664864864864865,
|
|
"recall": 0.9733260751224823,
|
|
"f1": 0.9698942229454841,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8497983870967742,
|
|
"recall": 0.9143167028199566,
|
|
"f1": 0.8808777429467085,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9019607843137255,
|
|
"recall": 0.9261744966442953,
|
|
"f1": 0.9139072847682119,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9522799575821845,
|
|
"recall": 0.9750271444082519,
|
|
"f1": 0.9635193133047211,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.6218,
|
|
"eval_samples_per_second": 426.41,
|
|
"eval_steps_per_second": 53.4,
|
|
"epoch": 8.0,
|
|
"step": 14048
|
|
},
|
|
{
|
|
"loss": 0.0042,
|
|
"grad_norm": 0.15165293216705322,
|
|
"learning_rate": 1.1742596810933942e-05,
|
|
"epoch": 8.257403189066059,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"loss": 0.0038,
|
|
"grad_norm": 0.01020512543618679,
|
|
"learning_rate": 1.1457858769931664e-05,
|
|
"epoch": 8.542141230068337,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"loss": 0.0028,
|
|
"grad_norm": 0.00046127362293191254,
|
|
"learning_rate": 1.1173120728929384e-05,
|
|
"epoch": 8.826879271070615,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"eval_loss": 0.11473368108272552,
|
|
"eval_overall": {
|
|
"precision": 0.9368072787427626,
|
|
"recall": 0.9530461124200605,
|
|
"f1": 0.944856928339034,
|
|
"accuracy": 0.9860187201977983
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9632034632034632,
|
|
"recall": 0.9689711486118672,
|
|
"f1": 0.966078697421981,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8518145161290323,
|
|
"recall": 0.9164859002169198,
|
|
"f1": 0.8829676071055382,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9313207547169812,
|
|
"recall": 0.9202087994034303,
|
|
"f1": 0.9257314328582145,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9595744680851064,
|
|
"recall": 0.9793702497285559,
|
|
"f1": 0.969371305749597,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5249,
|
|
"eval_samples_per_second": 431.9,
|
|
"eval_steps_per_second": 54.087,
|
|
"epoch": 9.0,
|
|
"step": 15804
|
|
},
|
|
{
|
|
"loss": 0.0049,
|
|
"grad_norm": 0.17741906642913818,
|
|
"learning_rate": 1.0888382687927108e-05,
|
|
"epoch": 9.111617312072893,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"loss": 0.003,
|
|
"grad_norm": 0.0018998866435140371,
|
|
"learning_rate": 1.060364464692483e-05,
|
|
"epoch": 9.39635535307517,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"loss": 0.0042,
|
|
"grad_norm": 0.0021700740326195955,
|
|
"learning_rate": 1.0318906605922552e-05,
|
|
"epoch": 9.681093394077449,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"loss": 0.0026,
|
|
"grad_norm": 0.014039441011846066,
|
|
"learning_rate": 1.0034168564920275e-05,
|
|
"epoch": 9.965831435079727,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"eval_loss": 0.1184961125254631,
|
|
"eval_overall": {
|
|
"precision": 0.9358889623265036,
|
|
"recall": 0.9532144059239314,
|
|
"f1": 0.9444722361180591,
|
|
"accuracy": 0.9852681462294696
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9669197396963124,
|
|
"recall": 0.9706042460533478,
|
|
"f1": 0.9687584895408857,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8826638477801269,
|
|
"recall": 0.9056399132321041,
|
|
"f1": 0.8940042826552462,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.8961318051575932,
|
|
"recall": 0.9328859060402684,
|
|
"f1": 0.9141395688710267,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9619506966773848,
|
|
"recall": 0.9744842562432139,
|
|
"f1": 0.9681769147788565,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5803,
|
|
"eval_samples_per_second": 428.744,
|
|
"eval_steps_per_second": 53.692,
|
|
"epoch": 10.0,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"loss": 0.0011,
|
|
"grad_norm": 0.00040622701635584235,
|
|
"learning_rate": 9.749430523917997e-06,
|
|
"epoch": 10.250569476082005,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"loss": 0.001,
|
|
"grad_norm": 0.0004215097869746387,
|
|
"learning_rate": 9.464692482915719e-06,
|
|
"epoch": 10.535307517084282,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"loss": 0.002,
|
|
"grad_norm": 0.0020023963879793882,
|
|
"learning_rate": 9.17995444191344e-06,
|
|
"epoch": 10.82004555808656,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"eval_loss": 0.11947210878133774,
|
|
"eval_overall": {
|
|
"precision": 0.9384717168375786,
|
|
"recall": 0.9548973409626389,
|
|
"f1": 0.9466132799466134,
|
|
"accuracy": 0.9857685288750221
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9658906334596643,
|
|
"recall": 0.9711486118671747,
|
|
"f1": 0.9685124864277959,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8954108858057631,
|
|
"recall": 0.9099783080260304,
|
|
"f1": 0.9026358257127487,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9075812274368231,
|
|
"recall": 0.9373601789709173,
|
|
"f1": 0.922230374174615,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9557805007991476,
|
|
"recall": 0.9739413680781759,
|
|
"f1": 0.9647754772788384,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5556,
|
|
"eval_samples_per_second": 430.142,
|
|
"eval_steps_per_second": 53.867,
|
|
"epoch": 11.0,
|
|
"step": 19316
|
|
},
|
|
{
|
|
"loss": 0.0028,
|
|
"grad_norm": 0.030722877010703087,
|
|
"learning_rate": 8.895216400911163e-06,
|
|
"epoch": 11.104783599088838,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"loss": 0.0033,
|
|
"grad_norm": 0.004747629631310701,
|
|
"learning_rate": 8.610478359908885e-06,
|
|
"epoch": 11.389521640091116,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"loss": 0.0018,
|
|
"grad_norm": 0.0014466517604887486,
|
|
"learning_rate": 8.325740318906607e-06,
|
|
"epoch": 11.674259681093394,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"loss": 0.0004,
|
|
"grad_norm": 0.005812987219542265,
|
|
"learning_rate": 8.041002277904329e-06,
|
|
"epoch": 11.958997722095672,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"eval_loss": 0.12146918475627899,
|
|
"eval_overall": {
|
|
"precision": 0.9431799302209669,
|
|
"recall": 0.9554022214742511,
|
|
"f1": 0.9492517348047823,
|
|
"accuracy": 0.9866074056631542
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.971195652173913,
|
|
"recall": 0.9727817093086554,
|
|
"f1": 0.9719880337231438,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.9096844396082698,
|
|
"recall": 0.9067245119305857,
|
|
"f1": 0.9082020640956002,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9121522693997072,
|
|
"recall": 0.9291573452647278,
|
|
"f1": 0.9205762837089029,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9545934530095037,
|
|
"recall": 0.9815418023887079,
|
|
"f1": 0.9678800856531049,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.7732,
|
|
"eval_samples_per_second": 418.106,
|
|
"eval_steps_per_second": 52.36,
|
|
"epoch": 12.0,
|
|
"step": 21072
|
|
},
|
|
{
|
|
"loss": 0.0006,
|
|
"grad_norm": 0.0022160038352012634,
|
|
"learning_rate": 7.75626423690205e-06,
|
|
"epoch": 12.24373576309795,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"loss": 0.0008,
|
|
"grad_norm": 0.00019432637782301754,
|
|
"learning_rate": 7.471526195899773e-06,
|
|
"epoch": 12.528473804100228,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"loss": 0.0011,
|
|
"grad_norm": 1.6397913694381714,
|
|
"learning_rate": 7.186788154897495e-06,
|
|
"epoch": 12.813211845102506,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"eval_loss": 0.12505799531936646,
|
|
"eval_overall": {
|
|
"precision": 0.9386808087504143,
|
|
"recall": 0.9532144059239314,
|
|
"f1": 0.9458917835671342,
|
|
"accuracy": 0.9860923058809677
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9675148890092041,
|
|
"recall": 0.9727817093086554,
|
|
"f1": 0.9701411509229099,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8782791185729276,
|
|
"recall": 0.9078091106290672,
|
|
"f1": 0.8927999999999999,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9241741741741741,
|
|
"recall": 0.9179716629381058,
|
|
"f1": 0.9210624766180322,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9511041009463722,
|
|
"recall": 0.9820846905537459,
|
|
"f1": 0.9663461538461537,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5826,
|
|
"eval_samples_per_second": 428.613,
|
|
"eval_steps_per_second": 53.676,
|
|
"epoch": 13.0,
|
|
"step": 22828
|
|
},
|
|
{
|
|
"loss": 0.0015,
|
|
"grad_norm": 0.00018138765881303698,
|
|
"learning_rate": 6.9020501138952166e-06,
|
|
"epoch": 13.097949886104784,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"loss": 0.0007,
|
|
"grad_norm": 0.00034025911008939147,
|
|
"learning_rate": 6.617312072892939e-06,
|
|
"epoch": 13.382687927107062,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"loss": 0.0006,
|
|
"grad_norm": 0.00041584973223507404,
|
|
"learning_rate": 6.3325740318906616e-06,
|
|
"epoch": 13.66742596810934,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"loss": 0.0008,
|
|
"grad_norm": 0.0006968477973714471,
|
|
"learning_rate": 6.047835990888384e-06,
|
|
"epoch": 13.952164009111618,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"eval_loss": 0.12853111326694489,
|
|
"eval_overall": {
|
|
"precision": 0.9430043203722167,
|
|
"recall": 0.9550656344665096,
|
|
"f1": 0.9489966555183946,
|
|
"accuracy": 0.986342497203744
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9736986301369863,
|
|
"recall": 0.9673380511703865,
|
|
"f1": 0.9705079191698526,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8892438764643238,
|
|
"recall": 0.9056399132321041,
|
|
"f1": 0.8973670069854918,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9200293470286134,
|
|
"recall": 0.9351230425055929,
|
|
"f1": 0.9275147928994084,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9566367001586462,
|
|
"recall": 0.9820846905537459,
|
|
"f1": 0.9691936780069649,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 7.5688,
|
|
"eval_samples_per_second": 429.396,
|
|
"eval_steps_per_second": 53.774,
|
|
"epoch": 14.0,
|
|
"step": 24584
|
|
},
|
|
{
|
|
"loss": 0.0006,
|
|
"grad_norm": 0.00020477738871704787,
|
|
"learning_rate": 5.763097949886105e-06,
|
|
"epoch": 14.236902050113896,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"loss": 0.0007,
|
|
"grad_norm": 0.00032146400189958513,
|
|
"learning_rate": 5.478359908883827e-06,
|
|
"epoch": 14.521640091116174,
|
|
"step": 25500
|
|
},
|
|
{
|
|
"loss": 0.0014,
|
|
"grad_norm": 0.000293695367872715,
|
|
"learning_rate": 5.19362186788155e-06,
|
|
"epoch": 14.806378132118452,
|
|
"step": 26000
|
|
},
|
|
{
|
|
"eval_loss": 0.1253676861524582,
|
|
"eval_overall": {
|
|
"precision": 0.938937613767996,
|
|
"recall": 0.9548973409626389,
|
|
"f1": 0.9468502294534834,
|
|
"accuracy": 0.9861364572908695
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9607948442534908,
|
|
"recall": 0.9738704409363091,
|
|
"f1": 0.9672884563395511,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8742203742203742,
|
|
"recall": 0.9121475054229935,
|
|
"f1": 0.8927813163481952,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9322289156626506,
|
|
"recall": 0.9231916480238628,
|
|
"f1": 0.9276882727613338,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9550502379693284,
|
|
"recall": 0.9804560260586319,
|
|
"f1": 0.9675863916421109,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 8.6163,
|
|
"eval_samples_per_second": 377.191,
|
|
"eval_steps_per_second": 47.236,
|
|
"epoch": 15.0,
|
|
"step": 26340
|
|
},
|
|
{
|
|
"loss": 0.0008,
|
|
"grad_norm": 0.00019819244334939867,
|
|
"learning_rate": 4.908883826879272e-06,
|
|
"epoch": 15.09111617312073,
|
|
"step": 26500
|
|
},
|
|
{
|
|
"loss": 0.0008,
|
|
"grad_norm": 0.00021079520229250193,
|
|
"learning_rate": 4.624145785876993e-06,
|
|
"epoch": 15.375854214123008,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"loss": 0.0003,
|
|
"grad_norm": 0.00012124140630476177,
|
|
"learning_rate": 4.339407744874715e-06,
|
|
"epoch": 15.660592255125284,
|
|
"step": 27500
|
|
},
|
|
{
|
|
"loss": 0.0008,
|
|
"grad_norm": 0.000182148942258209,
|
|
"learning_rate": 4.054669703872437e-06,
|
|
"epoch": 15.945330296127562,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"eval_loss": 0.12416187673807144,
|
|
"eval_overall": {
|
|
"precision": 0.9447862963578912,
|
|
"recall": 0.9560753954897341,
|
|
"f1": 0.9503973232956922,
|
|
"accuracy": 0.9866957084829575
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9680043383947939,
|
|
"recall": 0.9716929776810016,
|
|
"f1": 0.969845150774246,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8954108858057631,
|
|
"recall": 0.9099783080260304,
|
|
"f1": 0.9026358257127487,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9301634472511144,
|
|
"recall": 0.9336316181953765,
|
|
"f1": 0.9318943059173799,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.957051961823966,
|
|
"recall": 0.9799131378935939,
|
|
"f1": 0.9683476394849786,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 8.6628,
|
|
"eval_samples_per_second": 375.168,
|
|
"eval_steps_per_second": 46.983,
|
|
"epoch": 16.0,
|
|
"step": 28096
|
|
},
|
|
{
|
|
"loss": 0.001,
|
|
"grad_norm": 0.019741835072636604,
|
|
"learning_rate": 3.76993166287016e-06,
|
|
"epoch": 16.23006833712984,
|
|
"step": 28500
|
|
},
|
|
{
|
|
"loss": 0.0003,
|
|
"grad_norm": 0.00012148160021752119,
|
|
"learning_rate": 3.4851936218678815e-06,
|
|
"epoch": 16.514806378132118,
|
|
"step": 29000
|
|
},
|
|
{
|
|
"loss": 0.0005,
|
|
"grad_norm": 0.00011341737263137475,
|
|
"learning_rate": 3.200455580865604e-06,
|
|
"epoch": 16.799544419134396,
|
|
"step": 29500
|
|
},
|
|
{
|
|
"eval_loss": 0.12274094671010971,
|
|
"eval_overall": {
|
|
"precision": 0.9438631456568676,
|
|
"recall": 0.9564119824974756,
|
|
"f1": 0.9500961297333445,
|
|
"accuracy": 0.9868428798492965
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.971195652173913,
|
|
"recall": 0.9727817093086554,
|
|
"f1": 0.9719880337231438,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8761609907120743,
|
|
"recall": 0.920824295010846,
|
|
"f1": 0.8979375991538868,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9307004470938898,
|
|
"recall": 0.9313944817300522,
|
|
"f1": 0.9310473350726798,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9614973262032086,
|
|
"recall": 0.9761129207383279,
|
|
"f1": 0.9687500000000001,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 8.9038,
|
|
"eval_samples_per_second": 365.012,
|
|
"eval_steps_per_second": 45.711,
|
|
"epoch": 17.0,
|
|
"step": 29852
|
|
},
|
|
{
|
|
"loss": 0.0007,
|
|
"grad_norm": 0.00015434053784701973,
|
|
"learning_rate": 2.9157175398633257e-06,
|
|
"epoch": 17.084282460136674,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"loss": 0.0,
|
|
"grad_norm": 0.00021848917822353542,
|
|
"learning_rate": 2.6309794988610482e-06,
|
|
"epoch": 17.36902050113895,
|
|
"step": 30500
|
|
},
|
|
{
|
|
"loss": 0.0002,
|
|
"grad_norm": 0.0001007779865176417,
|
|
"learning_rate": 2.34624145785877e-06,
|
|
"epoch": 17.65375854214123,
|
|
"step": 31000
|
|
},
|
|
{
|
|
"loss": 0.0002,
|
|
"grad_norm": 0.00010259783448418602,
|
|
"learning_rate": 2.061503416856492e-06,
|
|
"epoch": 17.938496583143507,
|
|
"step": 31500
|
|
},
|
|
{
|
|
"eval_loss": 0.12293359637260437,
|
|
"eval_overall": {
|
|
"precision": 0.9440199335548173,
|
|
"recall": 0.9564119824974756,
|
|
"f1": 0.9501755559271025,
|
|
"accuracy": 0.9870636368988049
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9701249321021184,
|
|
"recall": 0.9722373434948285,
|
|
"f1": 0.9711799891245242,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8810020876826722,
|
|
"recall": 0.9154013015184381,
|
|
"f1": 0.8978723404255319,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.926829268292683,
|
|
"recall": 0.9351230425055929,
|
|
"f1": 0.9309576837416481,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.9630620985010707,
|
|
"recall": 0.9766558089033659,
|
|
"f1": 0.969811320754717,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 8.7313,
|
|
"eval_samples_per_second": 372.226,
|
|
"eval_steps_per_second": 46.614,
|
|
"epoch": 18.0,
|
|
"step": 31608
|
|
},
|
|
{
|
|
"loss": 0.0001,
|
|
"grad_norm": 0.0010326108895242214,
|
|
"learning_rate": 1.7767653758542143e-06,
|
|
"epoch": 18.223234624145785,
|
|
"step": 32000
|
|
},
|
|
{
|
|
"loss": 0.0007,
|
|
"grad_norm": 0.00016551795124541968,
|
|
"learning_rate": 1.4920273348519363e-06,
|
|
"epoch": 18.507972665148063,
|
|
"step": 32500
|
|
},
|
|
{
|
|
"loss": 0.0004,
|
|
"grad_norm": 0.00024008983746170998,
|
|
"learning_rate": 1.2072892938496584e-06,
|
|
"epoch": 18.79271070615034,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"eval_loss": 0.12236841022968292,
|
|
"eval_overall": {
|
|
"precision": 0.944813829787234,
|
|
"recall": 0.9565802760013463,
|
|
"f1": 0.9506606455929084,
|
|
"accuracy": 0.9872255254017779
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.971677559912854,
|
|
"recall": 0.9711486118671747,
|
|
"f1": 0.9714130138851075,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8712667353244078,
|
|
"recall": 0.9175704989154013,
|
|
"f1": 0.8938193343898574,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9383458646616541,
|
|
"recall": 0.930648769574944,
|
|
"f1": 0.9344814676151254,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.961149547631719,
|
|
"recall": 0.9804560260586319,
|
|
"f1": 0.9707067992475142,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 6.5081,
|
|
"eval_samples_per_second": 499.381,
|
|
"eval_steps_per_second": 62.538,
|
|
"epoch": 19.0,
|
|
"step": 33364
|
|
},
|
|
{
|
|
"loss": 0.0001,
|
|
"grad_norm": 0.00016071839490905404,
|
|
"learning_rate": 9.225512528473805e-07,
|
|
"epoch": 19.07744874715262,
|
|
"step": 33500
|
|
},
|
|
{
|
|
"loss": 0.0001,
|
|
"grad_norm": 9.665234392741695e-05,
|
|
"learning_rate": 6.378132118451026e-07,
|
|
"epoch": 19.362186788154897,
|
|
"step": 34000
|
|
},
|
|
{
|
|
"loss": 0.0001,
|
|
"grad_norm": 0.004949676804244518,
|
|
"learning_rate": 3.530751708428246e-07,
|
|
"epoch": 19.646924829157175,
|
|
"step": 34500
|
|
},
|
|
{
|
|
"loss": 0.0005,
|
|
"grad_norm": 0.00014002641546539962,
|
|
"learning_rate": 6.83371298405467e-08,
|
|
"epoch": 19.931662870159453,
|
|
"step": 35000
|
|
},
|
|
{
|
|
"eval_loss": 0.12258101999759674,
|
|
"eval_overall": {
|
|
"precision": 0.9443891102257637,
|
|
"recall": 0.9574217435207001,
|
|
"f1": 0.9508607721878657,
|
|
"accuracy": 0.9872549596750456
|
|
},
|
|
"eval_per_label": {
|
|
"LOC": {
|
|
"precision": 0.9727371864776445,
|
|
"recall": 0.9711486118671747,
|
|
"f1": 0.9719422500681013,
|
|
"number": 1837
|
|
},
|
|
"MISC": {
|
|
"precision": 0.8746113989637305,
|
|
"recall": 0.9154013015184381,
|
|
"f1": 0.8945416004239534,
|
|
"number": 922
|
|
},
|
|
"ORG": {
|
|
"precision": 0.9336810730253353,
|
|
"recall": 0.9343773303504848,
|
|
"f1": 0.9340290719344019,
|
|
"number": 1341
|
|
},
|
|
"PER": {
|
|
"precision": 0.960169941582581,
|
|
"recall": 0.9815418023887079,
|
|
"f1": 0.970738255033557,
|
|
"number": 1842
|
|
}
|
|
},
|
|
"eval_runtime": 6.7405,
|
|
"eval_samples_per_second": 482.16,
|
|
"eval_steps_per_second": 60.381,
|
|
"epoch": 20.0,
|
|
"step": 35120
|
|
},
|
|
{
|
|
"train_runtime": 3604.2086,
|
|
"train_samples_per_second": 77.914,
|
|
"train_steps_per_second": 9.744,
|
|
"total_flos": 6145780901050062.0,
|
|
"train_loss": 0.013757273165407275,
|
|
"epoch": 20.0,
|
|
"step": 35120
|
|
}
|
|
] |