test-codegpt / trainer_state.json
Danda245's picture
Upload 14 files
ef6a502 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 1000,
"global_step": 31479,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047650814828933574,
"grad_norm": 2.0100979804992676,
"learning_rate": 0.00019682327901140442,
"loss": 2.3566,
"step": 500
},
{
"epoch": 0.09530162965786715,
"grad_norm": 1.877261996269226,
"learning_rate": 0.00019364655802280888,
"loss": 2.2178,
"step": 1000
},
{
"epoch": 0.09530162965786715,
"eval_loss": 2.3788223266601562,
"eval_runtime": 80.302,
"eval_samples_per_second": 173.271,
"eval_steps_per_second": 7.223,
"step": 1000
},
{
"epoch": 0.14295244448680072,
"grad_norm": 1.7388309240341187,
"learning_rate": 0.00019046983703421329,
"loss": 2.1744,
"step": 1500
},
{
"epoch": 0.1906032593157343,
"grad_norm": 1.8366143703460693,
"learning_rate": 0.00018729311604561772,
"loss": 2.163,
"step": 2000
},
{
"epoch": 0.1906032593157343,
"eval_loss": 2.3654611110687256,
"eval_runtime": 80.3504,
"eval_samples_per_second": 173.166,
"eval_steps_per_second": 7.218,
"step": 2000
},
{
"epoch": 0.23825407414466787,
"grad_norm": 1.6628751754760742,
"learning_rate": 0.00018411639505702213,
"loss": 2.1515,
"step": 2500
},
{
"epoch": 0.28590488897360145,
"grad_norm": 1.6291817426681519,
"learning_rate": 0.0001809396740684266,
"loss": 2.1196,
"step": 3000
},
{
"epoch": 0.28590488897360145,
"eval_loss": 2.3521649837493896,
"eval_runtime": 80.224,
"eval_samples_per_second": 173.439,
"eval_steps_per_second": 7.23,
"step": 3000
},
{
"epoch": 0.333555703802535,
"grad_norm": 1.7604336738586426,
"learning_rate": 0.000177762953079831,
"loss": 2.1074,
"step": 3500
},
{
"epoch": 0.3812065186314686,
"grad_norm": 1.34886634349823,
"learning_rate": 0.00017458623209123543,
"loss": 2.0926,
"step": 4000
},
{
"epoch": 0.3812065186314686,
"eval_loss": 2.3420486450195312,
"eval_runtime": 80.1942,
"eval_samples_per_second": 173.504,
"eval_steps_per_second": 7.232,
"step": 4000
},
{
"epoch": 0.42885733346040217,
"grad_norm": 1.3510360717773438,
"learning_rate": 0.00017140951110263986,
"loss": 2.074,
"step": 4500
},
{
"epoch": 0.47650814828933574,
"grad_norm": 1.272275447845459,
"learning_rate": 0.0001682327901140443,
"loss": 2.0752,
"step": 5000
},
{
"epoch": 0.47650814828933574,
"eval_loss": 2.3270885944366455,
"eval_runtime": 80.2619,
"eval_samples_per_second": 173.358,
"eval_steps_per_second": 7.226,
"step": 5000
},
{
"epoch": 0.5241589631182694,
"grad_norm": 1.289753794670105,
"learning_rate": 0.0001650560691254487,
"loss": 2.0487,
"step": 5500
},
{
"epoch": 0.5718097779472029,
"grad_norm": 1.1615971326828003,
"learning_rate": 0.00016187934813685314,
"loss": 2.0437,
"step": 6000
},
{
"epoch": 0.5718097779472029,
"eval_loss": 2.3274528980255127,
"eval_runtime": 80.2214,
"eval_samples_per_second": 173.445,
"eval_steps_per_second": 7.23,
"step": 6000
},
{
"epoch": 0.6194605927761365,
"grad_norm": 1.3484673500061035,
"learning_rate": 0.00015870262714825757,
"loss": 2.0134,
"step": 6500
},
{
"epoch": 0.66711140760507,
"grad_norm": 1.4737777709960938,
"learning_rate": 0.000155525906159662,
"loss": 2.0379,
"step": 7000
},
{
"epoch": 0.66711140760507,
"eval_loss": 2.3164169788360596,
"eval_runtime": 80.2177,
"eval_samples_per_second": 173.453,
"eval_steps_per_second": 7.23,
"step": 7000
},
{
"epoch": 0.7147622224340037,
"grad_norm": 1.1502068042755127,
"learning_rate": 0.00015234918517106642,
"loss": 1.9916,
"step": 7500
},
{
"epoch": 0.7624130372629372,
"grad_norm": 1.2299320697784424,
"learning_rate": 0.00014917246418247085,
"loss": 2.0068,
"step": 8000
},
{
"epoch": 0.7624130372629372,
"eval_loss": 2.311408042907715,
"eval_runtime": 80.2576,
"eval_samples_per_second": 173.367,
"eval_steps_per_second": 7.227,
"step": 8000
},
{
"epoch": 0.8100638520918708,
"grad_norm": 1.2537345886230469,
"learning_rate": 0.00014599574319387528,
"loss": 1.9886,
"step": 8500
},
{
"epoch": 0.8577146669208043,
"grad_norm": 1.0486429929733276,
"learning_rate": 0.00014281902220527972,
"loss": 1.9882,
"step": 9000
},
{
"epoch": 0.8577146669208043,
"eval_loss": 2.304290294647217,
"eval_runtime": 80.1372,
"eval_samples_per_second": 173.627,
"eval_steps_per_second": 7.238,
"step": 9000
},
{
"epoch": 0.905365481749738,
"grad_norm": 1.1815516948699951,
"learning_rate": 0.00013964230121668413,
"loss": 1.9732,
"step": 9500
},
{
"epoch": 0.9530162965786715,
"grad_norm": 1.2301689386367798,
"learning_rate": 0.0001364655802280886,
"loss": 1.9787,
"step": 10000
},
{
"epoch": 0.9530162965786715,
"eval_loss": 2.2939772605895996,
"eval_runtime": 80.1592,
"eval_samples_per_second": 173.579,
"eval_steps_per_second": 7.236,
"step": 10000
},
{
"epoch": 1.0006671114076051,
"grad_norm": 1.497831106185913,
"learning_rate": 0.000133288859239493,
"loss": 1.9557,
"step": 10500
},
{
"epoch": 1.0483179262365387,
"grad_norm": 1.3323341608047485,
"learning_rate": 0.00013011213825089743,
"loss": 1.7231,
"step": 11000
},
{
"epoch": 1.0483179262365387,
"eval_loss": 2.313231945037842,
"eval_runtime": 80.1199,
"eval_samples_per_second": 173.665,
"eval_steps_per_second": 7.239,
"step": 11000
},
{
"epoch": 1.0959687410654722,
"grad_norm": 1.8000659942626953,
"learning_rate": 0.00012693541726230184,
"loss": 1.714,
"step": 11500
},
{
"epoch": 1.1436195558944058,
"grad_norm": 1.2369180917739868,
"learning_rate": 0.0001237586962737063,
"loss": 1.7114,
"step": 12000
},
{
"epoch": 1.1436195558944058,
"eval_loss": 2.313917875289917,
"eval_runtime": 80.1492,
"eval_samples_per_second": 173.601,
"eval_steps_per_second": 7.237,
"step": 12000
},
{
"epoch": 1.1912703707233394,
"grad_norm": 1.431038498878479,
"learning_rate": 0.0001205819752851107,
"loss": 1.7283,
"step": 12500
},
{
"epoch": 1.238921185552273,
"grad_norm": 1.4570106267929077,
"learning_rate": 0.00011740525429651514,
"loss": 1.7033,
"step": 13000
},
{
"epoch": 1.238921185552273,
"eval_loss": 2.310853958129883,
"eval_runtime": 80.0945,
"eval_samples_per_second": 173.72,
"eval_steps_per_second": 7.241,
"step": 13000
},
{
"epoch": 1.2865720003812064,
"grad_norm": 1.557187795639038,
"learning_rate": 0.00011422853330791956,
"loss": 1.7289,
"step": 13500
},
{
"epoch": 1.33422281521014,
"grad_norm": 1.5775034427642822,
"learning_rate": 0.000111051812319324,
"loss": 1.7151,
"step": 14000
},
{
"epoch": 1.33422281521014,
"eval_loss": 2.300920009613037,
"eval_runtime": 80.1537,
"eval_samples_per_second": 173.591,
"eval_steps_per_second": 7.236,
"step": 14000
},
{
"epoch": 1.3818736300390737,
"grad_norm": 1.2451566457748413,
"learning_rate": 0.00010787509133072841,
"loss": 1.7218,
"step": 14500
},
{
"epoch": 1.4295244448680071,
"grad_norm": 1.650688886642456,
"learning_rate": 0.00010469837034213286,
"loss": 1.7202,
"step": 15000
},
{
"epoch": 1.4295244448680071,
"eval_loss": 2.290478467941284,
"eval_runtime": 80.1852,
"eval_samples_per_second": 173.523,
"eval_steps_per_second": 7.233,
"step": 15000
},
{
"epoch": 1.4771752596969407,
"grad_norm": 1.4705020189285278,
"learning_rate": 0.00010152164935353727,
"loss": 1.721,
"step": 15500
},
{
"epoch": 1.5248260745258744,
"grad_norm": 1.530394434928894,
"learning_rate": 9.834492836494172e-05,
"loss": 1.7261,
"step": 16000
},
{
"epoch": 1.5248260745258744,
"eval_loss": 2.2944624423980713,
"eval_runtime": 80.1122,
"eval_samples_per_second": 173.682,
"eval_steps_per_second": 7.24,
"step": 16000
},
{
"epoch": 1.572476889354808,
"grad_norm": 1.667024850845337,
"learning_rate": 9.516820737634614e-05,
"loss": 1.7072,
"step": 16500
},
{
"epoch": 1.6201277041837416,
"grad_norm": 1.4624521732330322,
"learning_rate": 9.199148638775057e-05,
"loss": 1.7091,
"step": 17000
},
{
"epoch": 1.6201277041837416,
"eval_loss": 2.2861549854278564,
"eval_runtime": 80.0947,
"eval_samples_per_second": 173.719,
"eval_steps_per_second": 7.241,
"step": 17000
},
{
"epoch": 1.6677785190126753,
"grad_norm": 1.7141919136047363,
"learning_rate": 8.881476539915499e-05,
"loss": 1.7281,
"step": 17500
},
{
"epoch": 1.7154293338416087,
"grad_norm": 1.367767333984375,
"learning_rate": 8.563804441055943e-05,
"loss": 1.7098,
"step": 18000
},
{
"epoch": 1.7154293338416087,
"eval_loss": 2.2811758518218994,
"eval_runtime": 80.1424,
"eval_samples_per_second": 173.616,
"eval_steps_per_second": 7.237,
"step": 18000
},
{
"epoch": 1.7630801486705423,
"grad_norm": 1.530991792678833,
"learning_rate": 8.246132342196385e-05,
"loss": 1.6994,
"step": 18500
},
{
"epoch": 1.8107309634994757,
"grad_norm": 1.4421322345733643,
"learning_rate": 7.928460243336828e-05,
"loss": 1.6943,
"step": 19000
},
{
"epoch": 1.8107309634994757,
"eval_loss": 2.273425579071045,
"eval_runtime": 80.1385,
"eval_samples_per_second": 173.624,
"eval_steps_per_second": 7.237,
"step": 19000
},
{
"epoch": 1.8583817783284093,
"grad_norm": 1.5695687532424927,
"learning_rate": 7.610788144477272e-05,
"loss": 1.7,
"step": 19500
},
{
"epoch": 1.906032593157343,
"grad_norm": 1.6507039070129395,
"learning_rate": 7.293116045617714e-05,
"loss": 1.7035,
"step": 20000
},
{
"epoch": 1.906032593157343,
"eval_loss": 2.266268730163574,
"eval_runtime": 80.1631,
"eval_samples_per_second": 173.571,
"eval_steps_per_second": 7.235,
"step": 20000
},
{
"epoch": 1.9536834079862766,
"grad_norm": 1.41545832157135,
"learning_rate": 6.975443946758157e-05,
"loss": 1.6948,
"step": 20500
},
{
"epoch": 2.0013342228152102,
"grad_norm": 1.3855451345443726,
"learning_rate": 6.657771847898599e-05,
"loss": 1.6776,
"step": 21000
},
{
"epoch": 2.0013342228152102,
"eval_loss": 2.302978515625,
"eval_runtime": 80.1675,
"eval_samples_per_second": 173.562,
"eval_steps_per_second": 7.235,
"step": 21000
},
{
"epoch": 2.048985037644144,
"grad_norm": 1.3997050523757935,
"learning_rate": 6.340099749039043e-05,
"loss": 1.438,
"step": 21500
},
{
"epoch": 2.0966358524730775,
"grad_norm": 1.4828859567642212,
"learning_rate": 6.0224276501794854e-05,
"loss": 1.4406,
"step": 22000
},
{
"epoch": 2.0966358524730775,
"eval_loss": 2.3172175884246826,
"eval_runtime": 80.1748,
"eval_samples_per_second": 173.546,
"eval_steps_per_second": 7.234,
"step": 22000
},
{
"epoch": 2.1442866673020107,
"grad_norm": 1.8176885843276978,
"learning_rate": 5.704755551319928e-05,
"loss": 1.4555,
"step": 22500
},
{
"epoch": 2.1919374821309443,
"grad_norm": 1.48106050491333,
"learning_rate": 5.387083452460371e-05,
"loss": 1.4659,
"step": 23000
},
{
"epoch": 2.1919374821309443,
"eval_loss": 2.3182783126831055,
"eval_runtime": 80.2101,
"eval_samples_per_second": 173.47,
"eval_steps_per_second": 7.231,
"step": 23000
},
{
"epoch": 2.239588296959878,
"grad_norm": 1.6957001686096191,
"learning_rate": 5.0694113536008136e-05,
"loss": 1.448,
"step": 23500
},
{
"epoch": 2.2872391117888116,
"grad_norm": 1.3845641613006592,
"learning_rate": 4.7517392547412564e-05,
"loss": 1.4608,
"step": 24000
},
{
"epoch": 2.2872391117888116,
"eval_loss": 2.318488836288452,
"eval_runtime": 80.1689,
"eval_samples_per_second": 173.559,
"eval_steps_per_second": 7.235,
"step": 24000
},
{
"epoch": 2.334889926617745,
"grad_norm": 1.9913188219070435,
"learning_rate": 4.434067155881699e-05,
"loss": 1.439,
"step": 24500
},
{
"epoch": 2.382540741446679,
"grad_norm": 1.8244202136993408,
"learning_rate": 4.116395057022142e-05,
"loss": 1.4423,
"step": 25000
},
{
"epoch": 2.382540741446679,
"eval_loss": 2.3121349811553955,
"eval_runtime": 80.1537,
"eval_samples_per_second": 173.591,
"eval_steps_per_second": 7.236,
"step": 25000
},
{
"epoch": 2.4301915562756125,
"grad_norm": 1.347023606300354,
"learning_rate": 3.7987229581625846e-05,
"loss": 1.4506,
"step": 25500
},
{
"epoch": 2.477842371104546,
"grad_norm": 1.49163019657135,
"learning_rate": 3.481050859303028e-05,
"loss": 1.4378,
"step": 26000
},
{
"epoch": 2.477842371104546,
"eval_loss": 2.3090391159057617,
"eval_runtime": 80.1708,
"eval_samples_per_second": 173.554,
"eval_steps_per_second": 7.235,
"step": 26000
},
{
"epoch": 2.5254931859334793,
"grad_norm": 1.7945301532745361,
"learning_rate": 3.163378760443471e-05,
"loss": 1.4436,
"step": 26500
},
{
"epoch": 2.573144000762413,
"grad_norm": 1.5082517862319946,
"learning_rate": 2.8457066615839136e-05,
"loss": 1.4277,
"step": 27000
},
{
"epoch": 2.573144000762413,
"eval_loss": 2.3082542419433594,
"eval_runtime": 80.1802,
"eval_samples_per_second": 173.534,
"eval_steps_per_second": 7.234,
"step": 27000
},
{
"epoch": 2.6207948155913465,
"grad_norm": 1.4329321384429932,
"learning_rate": 2.5280345627243563e-05,
"loss": 1.4301,
"step": 27500
},
{
"epoch": 2.66844563042028,
"grad_norm": 1.2606436014175415,
"learning_rate": 2.2103624638647987e-05,
"loss": 1.4251,
"step": 28000
},
{
"epoch": 2.66844563042028,
"eval_loss": 2.2960703372955322,
"eval_runtime": 80.1531,
"eval_samples_per_second": 173.593,
"eval_steps_per_second": 7.236,
"step": 28000
},
{
"epoch": 2.716096445249214,
"grad_norm": 1.4542068243026733,
"learning_rate": 1.8926903650052415e-05,
"loss": 1.4248,
"step": 28500
},
{
"epoch": 2.7637472600781474,
"grad_norm": 1.6642916202545166,
"learning_rate": 1.5750182661456846e-05,
"loss": 1.4219,
"step": 29000
},
{
"epoch": 2.7637472600781474,
"eval_loss": 2.296442985534668,
"eval_runtime": 80.1753,
"eval_samples_per_second": 173.545,
"eval_steps_per_second": 7.234,
"step": 29000
},
{
"epoch": 2.811398074907081,
"grad_norm": 2.0301756858825684,
"learning_rate": 1.2573461672861273e-05,
"loss": 1.4281,
"step": 29500
},
{
"epoch": 2.8590488897360142,
"grad_norm": 1.6031594276428223,
"learning_rate": 9.3967406842657e-06,
"loss": 1.434,
"step": 30000
},
{
"epoch": 2.8590488897360142,
"eval_loss": 2.2933690547943115,
"eval_runtime": 80.1482,
"eval_samples_per_second": 173.603,
"eval_steps_per_second": 7.237,
"step": 30000
},
{
"epoch": 2.9066997045649483,
"grad_norm": 1.6658378839492798,
"learning_rate": 6.22001969567013e-06,
"loss": 1.4291,
"step": 30500
},
{
"epoch": 2.9543505193938815,
"grad_norm": 1.589982032775879,
"learning_rate": 3.0432987070745578e-06,
"loss": 1.4279,
"step": 31000
},
{
"epoch": 2.9543505193938815,
"eval_loss": 2.2906086444854736,
"eval_runtime": 80.2746,
"eval_samples_per_second": 173.33,
"eval_steps_per_second": 7.225,
"step": 31000
}
],
"logging_steps": 500,
"max_steps": 31479,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.934891962368e+16,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}