BiCodeGen-MNTP-CodeSearchNet / trainer_state.json
Denis641's picture
Upload 14 files
c8e329f verified
raw
history blame
16.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.928870292887029,
"eval_steps": 100,
"global_step": 5600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05230125523012552,
"eval_accuracy": 0.10215735262383098,
"eval_loss": 5.751355171203613,
"eval_runtime": 184.0542,
"eval_samples_per_second": 80.194,
"eval_steps_per_second": 0.63,
"step": 100
},
{
"epoch": 0.10460251046025104,
"eval_accuracy": 0.12205444695787844,
"eval_loss": 5.327404975891113,
"eval_runtime": 185.0066,
"eval_samples_per_second": 79.781,
"eval_steps_per_second": 0.627,
"step": 200
},
{
"epoch": 0.15690376569037656,
"eval_accuracy": 0.13593891242440487,
"eval_loss": 5.167089939117432,
"eval_runtime": 184.2672,
"eval_samples_per_second": 80.101,
"eval_steps_per_second": 0.63,
"step": 300
},
{
"epoch": 0.20920502092050208,
"eval_accuracy": 0.14769799376050313,
"eval_loss": 5.03983736038208,
"eval_runtime": 184.4381,
"eval_samples_per_second": 80.027,
"eval_steps_per_second": 0.629,
"step": 400
},
{
"epoch": 0.2615062761506276,
"grad_norm": 1.4375,
"learning_rate": 4.564156206415621e-05,
"loss": 5.3792,
"step": 500
},
{
"epoch": 0.2615062761506276,
"eval_accuracy": 0.15595240798761018,
"eval_loss": 4.976524353027344,
"eval_runtime": 184.12,
"eval_samples_per_second": 80.165,
"eval_steps_per_second": 0.63,
"step": 500
},
{
"epoch": 0.3138075313807531,
"eval_accuracy": 0.16177388065723808,
"eval_loss": 4.927117347717285,
"eval_runtime": 184.3296,
"eval_samples_per_second": 80.074,
"eval_steps_per_second": 0.629,
"step": 600
},
{
"epoch": 0.36610878661087864,
"eval_accuracy": 0.16636529077842815,
"eval_loss": 4.898035049438477,
"eval_runtime": 184.3142,
"eval_samples_per_second": 80.081,
"eval_steps_per_second": 0.629,
"step": 700
},
{
"epoch": 0.41841004184100417,
"eval_accuracy": 0.1694151293866953,
"eval_loss": 4.875,
"eval_runtime": 185.3011,
"eval_samples_per_second": 79.654,
"eval_steps_per_second": 0.626,
"step": 800
},
{
"epoch": 0.4707112970711297,
"eval_accuracy": 0.17219956637036357,
"eval_loss": 4.854589939117432,
"eval_runtime": 184.8498,
"eval_samples_per_second": 79.849,
"eval_steps_per_second": 0.628,
"step": 900
},
{
"epoch": 0.5230125523012552,
"grad_norm": 1.2265625,
"learning_rate": 4.128312412831242e-05,
"loss": 4.8385,
"step": 1000
},
{
"epoch": 0.5230125523012552,
"eval_accuracy": 0.17474116808932638,
"eval_loss": 4.833265781402588,
"eval_runtime": 184.5693,
"eval_samples_per_second": 79.97,
"eval_steps_per_second": 0.628,
"step": 1000
},
{
"epoch": 0.5753138075313807,
"eval_accuracy": 0.17643840085242252,
"eval_loss": 4.817920207977295,
"eval_runtime": 184.1405,
"eval_samples_per_second": 80.156,
"eval_steps_per_second": 0.63,
"step": 1100
},
{
"epoch": 0.6276150627615062,
"eval_accuracy": 0.1773740269532399,
"eval_loss": 4.811416149139404,
"eval_runtime": 184.0435,
"eval_samples_per_second": 80.198,
"eval_steps_per_second": 0.63,
"step": 1200
},
{
"epoch": 0.6799163179916318,
"eval_accuracy": 0.17846844188652472,
"eval_loss": 4.802201747894287,
"eval_runtime": 185.0497,
"eval_samples_per_second": 79.762,
"eval_steps_per_second": 0.627,
"step": 1300
},
{
"epoch": 0.7322175732217573,
"eval_accuracy": 0.1789691379773809,
"eval_loss": 4.7901930809021,
"eval_runtime": 184.9855,
"eval_samples_per_second": 79.79,
"eval_steps_per_second": 0.627,
"step": 1400
},
{
"epoch": 0.7845188284518828,
"grad_norm": 1.5703125,
"learning_rate": 3.6924686192468624e-05,
"loss": 4.7486,
"step": 1500
},
{
"epoch": 0.7845188284518828,
"eval_accuracy": 0.18001693513769368,
"eval_loss": 4.785585880279541,
"eval_runtime": 183.9922,
"eval_samples_per_second": 80.221,
"eval_steps_per_second": 0.63,
"step": 1500
},
{
"epoch": 0.8368200836820083,
"eval_accuracy": 0.18058760065826654,
"eval_loss": 4.779539108276367,
"eval_runtime": 184.1103,
"eval_samples_per_second": 80.169,
"eval_steps_per_second": 0.63,
"step": 1600
},
{
"epoch": 0.8891213389121339,
"eval_accuracy": 0.18107188047030814,
"eval_loss": 4.776101112365723,
"eval_runtime": 184.1674,
"eval_samples_per_second": 80.144,
"eval_steps_per_second": 0.63,
"step": 1700
},
{
"epoch": 0.9414225941422594,
"eval_accuracy": 0.18138128706525738,
"eval_loss": 4.7754740715026855,
"eval_runtime": 184.197,
"eval_samples_per_second": 80.132,
"eval_steps_per_second": 0.63,
"step": 1800
},
{
"epoch": 0.9937238493723849,
"eval_accuracy": 0.18191716908996425,
"eval_loss": 4.767343997955322,
"eval_runtime": 184.0497,
"eval_samples_per_second": 80.196,
"eval_steps_per_second": 0.63,
"step": 1900
},
{
"epoch": 1.0460251046025104,
"grad_norm": 1.203125,
"learning_rate": 3.2566248256624825e-05,
"loss": 4.7159,
"step": 2000
},
{
"epoch": 1.0460251046025104,
"eval_accuracy": 0.18160189904678178,
"eval_loss": 4.769783020019531,
"eval_runtime": 185.2615,
"eval_samples_per_second": 79.671,
"eval_steps_per_second": 0.626,
"step": 2000
},
{
"epoch": 1.098326359832636,
"eval_accuracy": 0.18223365637995678,
"eval_loss": 4.763906002044678,
"eval_runtime": 184.3991,
"eval_samples_per_second": 80.044,
"eval_steps_per_second": 0.629,
"step": 2100
},
{
"epoch": 1.1506276150627615,
"eval_accuracy": 0.18256457647135646,
"eval_loss": 4.761280536651611,
"eval_runtime": 185.0808,
"eval_samples_per_second": 79.749,
"eval_steps_per_second": 0.627,
"step": 2200
},
{
"epoch": 1.202928870292887,
"eval_accuracy": 0.18278985816542134,
"eval_loss": 4.7557759284973145,
"eval_runtime": 184.6355,
"eval_samples_per_second": 79.941,
"eval_steps_per_second": 0.628,
"step": 2300
},
{
"epoch": 1.2552301255230125,
"eval_accuracy": 0.18294602847055624,
"eval_loss": 4.75867223739624,
"eval_runtime": 184.9626,
"eval_samples_per_second": 79.8,
"eval_steps_per_second": 0.627,
"step": 2400
},
{
"epoch": 1.3075313807531381,
"grad_norm": 1.75,
"learning_rate": 2.8207810320781032e-05,
"loss": 4.6997,
"step": 2500
},
{
"epoch": 1.3075313807531381,
"eval_accuracy": 0.18342178332552747,
"eval_loss": 4.754149913787842,
"eval_runtime": 185.1377,
"eval_samples_per_second": 79.724,
"eval_steps_per_second": 0.627,
"step": 2500
},
{
"epoch": 1.3598326359832635,
"eval_accuracy": 0.1834138543952502,
"eval_loss": 4.75181245803833,
"eval_runtime": 184.6159,
"eval_samples_per_second": 79.95,
"eval_steps_per_second": 0.628,
"step": 2600
},
{
"epoch": 1.4121338912133892,
"eval_accuracy": 0.1836629457159783,
"eval_loss": 4.746477127075195,
"eval_runtime": 185.4196,
"eval_samples_per_second": 79.603,
"eval_steps_per_second": 0.626,
"step": 2700
},
{
"epoch": 1.4644351464435146,
"eval_accuracy": 0.18383634667304455,
"eval_loss": 4.750728130340576,
"eval_runtime": 184.604,
"eval_samples_per_second": 79.955,
"eval_steps_per_second": 0.628,
"step": 2800
},
{
"epoch": 1.5167364016736402,
"eval_accuracy": 0.1834985251891202,
"eval_loss": 4.751083850860596,
"eval_runtime": 183.9829,
"eval_samples_per_second": 80.225,
"eval_steps_per_second": 0.63,
"step": 2900
},
{
"epoch": 1.5690376569037658,
"grad_norm": 1.1328125,
"learning_rate": 2.3849372384937242e-05,
"loss": 4.6905,
"step": 3000
},
{
"epoch": 1.5690376569037658,
"eval_accuracy": 0.18385013375825832,
"eval_loss": 4.750813007354736,
"eval_runtime": 184.3904,
"eval_samples_per_second": 80.048,
"eval_steps_per_second": 0.629,
"step": 3000
},
{
"epoch": 1.6213389121338913,
"eval_accuracy": 0.18415784707342428,
"eval_loss": 4.746849536895752,
"eval_runtime": 184.2103,
"eval_samples_per_second": 80.126,
"eval_steps_per_second": 0.63,
"step": 3100
},
{
"epoch": 1.6736401673640167,
"eval_accuracy": 0.1842263748450887,
"eval_loss": 4.746747970581055,
"eval_runtime": 184.1204,
"eval_samples_per_second": 80.165,
"eval_steps_per_second": 0.63,
"step": 3200
},
{
"epoch": 1.7259414225941423,
"eval_accuracy": 0.18430792521293346,
"eval_loss": 4.745037078857422,
"eval_runtime": 184.2802,
"eval_samples_per_second": 80.095,
"eval_steps_per_second": 0.629,
"step": 3300
},
{
"epoch": 1.778242677824268,
"eval_accuracy": 0.184430838994751,
"eval_loss": 4.746375560760498,
"eval_runtime": 184.234,
"eval_samples_per_second": 80.116,
"eval_steps_per_second": 0.63,
"step": 3400
},
{
"epoch": 1.8305439330543933,
"grad_norm": 2.46875,
"learning_rate": 1.9490934449093446e-05,
"loss": 4.687,
"step": 3500
},
{
"epoch": 1.8305439330543933,
"eval_accuracy": 0.18449599939000436,
"eval_loss": 4.7423272132873535,
"eval_runtime": 184.1596,
"eval_samples_per_second": 80.148,
"eval_steps_per_second": 0.63,
"step": 3500
},
{
"epoch": 1.8828451882845187,
"eval_accuracy": 0.18466320271445863,
"eval_loss": 4.74322509765625,
"eval_runtime": 183.9907,
"eval_samples_per_second": 80.221,
"eval_steps_per_second": 0.63,
"step": 3600
},
{
"epoch": 1.9351464435146444,
"eval_accuracy": 0.18432753476168612,
"eval_loss": 4.744410514831543,
"eval_runtime": 183.9473,
"eval_samples_per_second": 80.24,
"eval_steps_per_second": 0.631,
"step": 3700
},
{
"epoch": 1.98744769874477,
"eval_accuracy": 0.18470474137931034,
"eval_loss": 4.74097204208374,
"eval_runtime": 184.0059,
"eval_samples_per_second": 80.215,
"eval_steps_per_second": 0.63,
"step": 3800
},
{
"epoch": 2.0397489539748954,
"eval_accuracy": 0.1845896528685144,
"eval_loss": 4.744495391845703,
"eval_runtime": 184.1792,
"eval_samples_per_second": 80.139,
"eval_steps_per_second": 0.63,
"step": 3900
},
{
"epoch": 2.092050209205021,
"grad_norm": 1.015625,
"learning_rate": 1.5132496513249652e-05,
"loss": 4.6822,
"step": 4000
},
{
"epoch": 2.092050209205021,
"eval_accuracy": 0.1840621610356637,
"eval_loss": 4.743766784667969,
"eval_runtime": 184.0079,
"eval_samples_per_second": 80.214,
"eval_steps_per_second": 0.63,
"step": 4000
},
{
"epoch": 2.1443514644351462,
"eval_accuracy": 0.18438914008407487,
"eval_loss": 4.742242336273193,
"eval_runtime": 184.052,
"eval_samples_per_second": 80.195,
"eval_steps_per_second": 0.63,
"step": 4100
},
{
"epoch": 2.196652719665272,
"eval_accuracy": 0.18475792298529636,
"eval_loss": 4.741429328918457,
"eval_runtime": 184.719,
"eval_samples_per_second": 79.905,
"eval_steps_per_second": 0.628,
"step": 4200
},
{
"epoch": 2.2489539748953975,
"eval_accuracy": 0.1848885987251326,
"eval_loss": 4.740514755249023,
"eval_runtime": 184.0592,
"eval_samples_per_second": 80.192,
"eval_steps_per_second": 0.63,
"step": 4300
},
{
"epoch": 2.301255230125523,
"eval_accuracy": 0.18497058180948975,
"eval_loss": 4.738888740539551,
"eval_runtime": 184.7818,
"eval_samples_per_second": 79.878,
"eval_steps_per_second": 0.628,
"step": 4400
},
{
"epoch": 2.3535564853556483,
"grad_norm": 1.5546875,
"learning_rate": 1.0774058577405859e-05,
"loss": 4.6787,
"step": 4500
},
{
"epoch": 2.3535564853556483,
"eval_accuracy": 0.18458716879639453,
"eval_loss": 4.743495941162109,
"eval_runtime": 185.012,
"eval_samples_per_second": 79.779,
"eval_steps_per_second": 0.627,
"step": 4500
},
{
"epoch": 2.405857740585774,
"eval_accuracy": 0.18485314383095383,
"eval_loss": 4.742412090301514,
"eval_runtime": 184.018,
"eval_samples_per_second": 80.21,
"eval_steps_per_second": 0.63,
"step": 4600
},
{
"epoch": 2.4581589958158996,
"eval_accuracy": 0.18451762653729054,
"eval_loss": 4.744495391845703,
"eval_runtime": 184.1989,
"eval_samples_per_second": 80.131,
"eval_steps_per_second": 0.63,
"step": 4700
},
{
"epoch": 2.510460251046025,
"eval_accuracy": 0.18499375944599727,
"eval_loss": 4.742056369781494,
"eval_runtime": 184.9544,
"eval_samples_per_second": 79.803,
"eval_steps_per_second": 0.627,
"step": 4800
},
{
"epoch": 2.562761506276151,
"eval_accuracy": 0.1845635125940072,
"eval_loss": 4.74495267868042,
"eval_runtime": 184.45,
"eval_samples_per_second": 80.022,
"eval_steps_per_second": 0.629,
"step": 4900
},
{
"epoch": 2.6150627615062763,
"grad_norm": 1.25,
"learning_rate": 6.415620641562065e-06,
"loss": 4.6809,
"step": 5000
},
{
"epoch": 2.6150627615062763,
"eval_accuracy": 0.18461182940869522,
"eval_loss": 4.739973068237305,
"eval_runtime": 185.0928,
"eval_samples_per_second": 79.744,
"eval_steps_per_second": 0.627,
"step": 5000
},
{
"epoch": 2.6673640167364017,
"eval_accuracy": 0.18471778348337312,
"eval_loss": 4.740243911743164,
"eval_runtime": 184.3979,
"eval_samples_per_second": 80.044,
"eval_steps_per_second": 0.629,
"step": 5100
},
{
"epoch": 2.719665271966527,
"eval_accuracy": 0.18489124970131737,
"eval_loss": 4.738804340362549,
"eval_runtime": 184.6067,
"eval_samples_per_second": 79.954,
"eval_steps_per_second": 0.628,
"step": 5200
},
{
"epoch": 2.7719665271966525,
"eval_accuracy": 0.18462476994845917,
"eval_loss": 4.741514205932617,
"eval_runtime": 184.2661,
"eval_samples_per_second": 80.102,
"eval_steps_per_second": 0.63,
"step": 5300
},
{
"epoch": 2.8242677824267783,
"eval_accuracy": 0.18472434773026122,
"eval_loss": 4.739160060882568,
"eval_runtime": 185.2276,
"eval_samples_per_second": 79.686,
"eval_steps_per_second": 0.626,
"step": 5400
},
{
"epoch": 2.8765690376569037,
"grad_norm": 1.6796875,
"learning_rate": 2.057182705718271e-06,
"loss": 4.6819,
"step": 5500
},
{
"epoch": 2.8765690376569037,
"eval_accuracy": 0.18477532080101736,
"eval_loss": 4.742056369781494,
"eval_runtime": 184.4871,
"eval_samples_per_second": 80.006,
"eval_steps_per_second": 0.629,
"step": 5500
},
{
"epoch": 2.928870292887029,
"eval_accuracy": 0.18478108276943064,
"eval_loss": 4.740701198577881,
"eval_runtime": 184.224,
"eval_samples_per_second": 80.12,
"eval_steps_per_second": 0.63,
"step": 5600
}
],
"logging_steps": 500,
"max_steps": 5736,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"total_flos": 6.743272090868122e+17,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}