Upload folder using huggingface_hub

5405343 verified 25 days ago

18.1 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.8635578583765112,
	"eval_steps": 500,
	"global_step": 1000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.008635578583765112,
	"grad_norm": 0.6623008251190186,
	"learning_rate": 0.0002,
	"loss": 1.7783,
	"step": 10
	},
	{
	"epoch": 0.017271157167530225,
	"grad_norm": 0.5188003182411194,
	"learning_rate": 0.00019994965423831854,
	"loss": 1.3036,
	"step": 20
	},
	{
	"epoch": 0.025906735751295335,
	"grad_norm": 0.5021234154701233,
	"learning_rate": 0.00019979866764718843,
	"loss": 0.9562,
	"step": 30
	},
	{
	"epoch": 0.03454231433506045,
	"grad_norm": 0.4302387833595276,
	"learning_rate": 0.00019954719225730847,
	"loss": 0.9219,
	"step": 40
	},
	{
	"epoch": 0.04317789291882556,
	"grad_norm": 0.5690401792526245,
	"learning_rate": 0.00019919548128307954,
	"loss": 0.9422,
	"step": 50
	},
	{
	"epoch": 0.05181347150259067,
	"grad_norm": 0.5356280207633972,
	"learning_rate": 0.00019874388886763944,
	"loss": 0.8915,
	"step": 60
	},
	{
	"epoch": 0.06044905008635579,
	"grad_norm": 0.4261893332004547,
	"learning_rate": 0.00019819286972627066,
	"loss": 0.8435,
	"step": 70
	},
	{
	"epoch": 0.0690846286701209,
	"grad_norm": 0.5050117373466492,
	"learning_rate": 0.00019754297868854073,
	"loss": 0.8846,
	"step": 80
	},
	{
	"epoch": 0.07772020725388601,
	"grad_norm": 0.39581066370010376,
	"learning_rate": 0.00019679487013963564,
	"loss": 0.8297,
	"step": 90
	},
	{
	"epoch": 0.08635578583765112,
	"grad_norm": 0.45784541964530945,
	"learning_rate": 0.00019594929736144976,
	"loss": 0.8334,
	"step": 100
	},
	{
	"epoch": 0.09499136442141623,
	"grad_norm": 0.43146270513534546,
	"learning_rate": 0.00019500711177409454,
	"loss": 0.8614,
	"step": 110
	},
	{
	"epoch": 0.10362694300518134,
	"grad_norm": 0.38489606976509094,
	"learning_rate": 0.00019396926207859084,
	"loss": 0.7923,
	"step": 120
	},
	{
	"epoch": 0.11226252158894647,
	"grad_norm": 0.4087812006473541,
	"learning_rate": 0.00019283679330160726,
	"loss": 0.8122,
	"step": 130
	},
	{
	"epoch": 0.12089810017271158,
	"grad_norm": 0.579022228717804,
	"learning_rate": 0.00019161084574320696,
	"loss": 0.7754,
	"step": 140
	},
	{
	"epoch": 0.12953367875647667,
	"grad_norm": 0.4284251928329468,
	"learning_rate": 0.00019029265382866214,
	"loss": 0.8205,
	"step": 150
	},
	{
	"epoch": 0.1381692573402418,
	"grad_norm": 0.4063330590724945,
	"learning_rate": 0.00018888354486549237,
	"loss": 0.8301,
	"step": 160
	},
	{
	"epoch": 0.14680483592400692,
	"grad_norm": 0.42471617460250854,
	"learning_rate": 0.00018738493770697852,
	"loss": 0.8253,
	"step": 170
	},
	{
	"epoch": 0.15544041450777202,
	"grad_norm": 0.44914358854293823,
	"learning_rate": 0.00018579834132349772,
	"loss": 0.8211,
	"step": 180
	},
	{
	"epoch": 0.16407599309153714,
	"grad_norm": 0.35791847109794617,
	"learning_rate": 0.00018412535328311814,
	"loss": 0.8241,
	"step": 190
	},
	{
	"epoch": 0.17271157167530224,
	"grad_norm": 0.41551169753074646,
	"learning_rate": 0.0001823676581429833,
	"loss": 0.8187,
	"step": 200
	},
	{
	"epoch": 0.18134715025906736,
	"grad_norm": 0.3767964243888855,
	"learning_rate": 0.00018052702575310588,
	"loss": 0.7815,
	"step": 210
	},
	{
	"epoch": 0.18998272884283246,
	"grad_norm": 0.3870415687561035,
	"learning_rate": 0.00017860530947427875,
	"loss": 0.7907,
	"step": 220
	},
	{
	"epoch": 0.19861830742659758,
	"grad_norm": 0.4934289753437042,
	"learning_rate": 0.0001766044443118978,
	"loss": 0.7788,
	"step": 230
	},
	{
	"epoch": 0.20725388601036268,
	"grad_norm": 0.40702807903289795,
	"learning_rate": 0.0001745264449675755,
	"loss": 0.7932,
	"step": 240
	},
	{
	"epoch": 0.2158894645941278,
	"grad_norm": 0.40032950043678284,
	"learning_rate": 0.00017237340381050703,
	"loss": 0.7682,
	"step": 250
	},
	{
	"epoch": 0.22452504317789293,
	"grad_norm": 0.4420917332172394,
	"learning_rate": 0.00017014748877063214,
	"loss": 0.7663,
	"step": 260
	},
	{
	"epoch": 0.23316062176165803,
	"grad_norm": 0.3428107500076294,
	"learning_rate": 0.00016785094115571322,
	"loss": 0.7693,
	"step": 270
	},
	{
	"epoch": 0.24179620034542315,
	"grad_norm": 0.7044374942779541,
	"learning_rate": 0.00016548607339452853,
	"loss": 0.7809,
	"step": 280
	},
	{
	"epoch": 0.2504317789291883,
	"grad_norm": 0.37226754426956177,
	"learning_rate": 0.00016305526670845226,
	"loss": 0.8263,
	"step": 290
	},
	{
	"epoch": 0.25906735751295334,
	"grad_norm": 0.40080901980400085,
	"learning_rate": 0.00016056096871376667,
	"loss": 0.8182,
	"step": 300
	},
	{
	"epoch": 0.26770293609671847,
	"grad_norm": 0.37441566586494446,
	"learning_rate": 0.00015800569095711982,
	"loss": 0.7597,
	"step": 310
	},
	{
	"epoch": 0.2763385146804836,
	"grad_norm": 0.4305630624294281,
	"learning_rate": 0.00015539200638661104,
	"loss": 0.801,
	"step": 320
	},
	{
	"epoch": 0.2849740932642487,
	"grad_norm": 0.482138991355896,
	"learning_rate": 0.00015272254676105025,
	"loss": 0.7541,
	"step": 330
	},
	{
	"epoch": 0.29360967184801384,
	"grad_norm": 0.40154337882995605,
	"learning_rate": 0.00015000000000000001,
	"loss": 0.8057,
	"step": 340
	},
	{
	"epoch": 0.3022452504317789,
	"grad_norm": 0.3598850667476654,
	"learning_rate": 0.0001472271074772683,
	"loss": 0.756,
	"step": 350
	},
	{
	"epoch": 0.31088082901554404,
	"grad_norm": 0.46537917852401733,
	"learning_rate": 0.00014440666126057744,
	"loss": 0.7563,
	"step": 360
	},
	{
	"epoch": 0.31951640759930916,
	"grad_norm": 0.44803386926651,
	"learning_rate": 0.00014154150130018866,
	"loss": 0.7908,
	"step": 370
	},
	{
	"epoch": 0.3281519861830743,
	"grad_norm": 0.4054509401321411,
	"learning_rate": 0.00013863451256931287,
	"loss": 0.7768,
	"step": 380
	},
	{
	"epoch": 0.33678756476683935,
	"grad_norm": 0.3694852292537689,
	"learning_rate": 0.00013568862215918717,
	"loss": 0.7508,
	"step": 390
	},
	{
	"epoch": 0.3454231433506045,
	"grad_norm": 0.44693148136138916,
	"learning_rate": 0.00013270679633174218,
	"loss": 0.7723,
	"step": 400
	},
	{
	"epoch": 0.3540587219343696,
	"grad_norm": 0.47046658396720886,
	"learning_rate": 0.0001296920375328275,
	"loss": 0.7662,
	"step": 410
	},
	{
	"epoch": 0.3626943005181347,
	"grad_norm": 0.4129233658313751,
	"learning_rate": 0.00012664738136900348,
	"loss": 0.7546,
	"step": 420
	},
	{
	"epoch": 0.37132987910189985,
	"grad_norm": 0.34191545844078064,
	"learning_rate": 0.00012357589355094275,
	"loss": 0.7586,
	"step": 430
	},
	{
	"epoch": 0.3799654576856649,
	"grad_norm": 0.37853655219078064,
	"learning_rate": 0.00012048066680651908,
	"loss": 0.7964,
	"step": 440
	},
	{
	"epoch": 0.38860103626943004,
	"grad_norm": 0.4268665313720703,
	"learning_rate": 0.00011736481776669306,
	"loss": 0.758,
	"step": 450
	},
	{
	"epoch": 0.39723661485319517,
	"grad_norm": 0.3980403542518616,
	"learning_rate": 0.00011423148382732853,
	"loss": 0.7618,
	"step": 460
	},
	{
	"epoch": 0.4058721934369603,
	"grad_norm": 0.3414579927921295,
	"learning_rate": 0.00011108381999010111,
	"loss": 0.7867,
	"step": 470
	},
	{
	"epoch": 0.41450777202072536,
	"grad_norm": 0.3817692697048187,
	"learning_rate": 0.00010792499568567884,
	"loss": 0.738,
	"step": 480
	},
	{
	"epoch": 0.4231433506044905,
	"grad_norm": 0.43348562717437744,
	"learning_rate": 0.00010475819158237425,
	"loss": 0.8304,
	"step": 490
	},
	{
	"epoch": 0.4317789291882556,
	"grad_norm": 0.31839361786842346,
	"learning_rate": 0.00010158659638348081,
	"loss": 0.7905,
	"step": 500
	},
	{
	"epoch": 0.44041450777202074,
	"grad_norm": 0.3816024661064148,
	"learning_rate": 9.84134036165192e-05,
	"loss": 0.7286,
	"step": 510
	},
	{
	"epoch": 0.44905008635578586,
	"grad_norm": 0.39195144176483154,
	"learning_rate": 9.524180841762577e-05,
	"loss": 0.7773,
	"step": 520
	},
	{
	"epoch": 0.45768566493955093,
	"grad_norm": 0.4253169298171997,
	"learning_rate": 9.207500431432115e-05,
	"loss": 0.7905,
	"step": 530
	},
	{
	"epoch": 0.46632124352331605,
	"grad_norm": 0.4840407967567444,
	"learning_rate": 8.891618000989891e-05,
	"loss": 0.7924,
	"step": 540
	},
	{
	"epoch": 0.4749568221070812,
	"grad_norm": 0.36509501934051514,
	"learning_rate": 8.57685161726715e-05,
	"loss": 0.772,
	"step": 550
	},
	{
	"epoch": 0.4835924006908463,
	"grad_norm": 0.3916187584400177,
	"learning_rate": 8.263518223330697e-05,
	"loss": 0.7499,
	"step": 560
	},
	{
	"epoch": 0.49222797927461137,
	"grad_norm": 0.39139890670776367,
	"learning_rate": 7.951933319348095e-05,
	"loss": 0.7694,
	"step": 570
	},
	{
	"epoch": 0.5008635578583766,
	"grad_norm": 0.4077214300632477,
	"learning_rate": 7.642410644905726e-05,
	"loss": 0.7335,
	"step": 580
	},
	{
	"epoch": 0.5094991364421416,
	"grad_norm": 0.39956483244895935,
	"learning_rate": 7.335261863099651e-05,
	"loss": 0.7627,
	"step": 590
	},
	{
	"epoch": 0.5181347150259067,
	"grad_norm": 0.5192585587501526,
	"learning_rate": 7.030796246717255e-05,
	"loss": 0.7224,
	"step": 600
	},
	{
	"epoch": 0.5267702936096719,
	"grad_norm": 0.3494277000427246,
	"learning_rate": 6.729320366825784e-05,
	"loss": 0.7576,
	"step": 610
	},
	{
	"epoch": 0.5354058721934369,
	"grad_norm": 0.3863460123538971,
	"learning_rate": 6.431137784081282e-05,
	"loss": 0.7628,
	"step": 620
	},
	{
	"epoch": 0.5440414507772021,
	"grad_norm": 0.3836175501346588,
	"learning_rate": 6.136548743068713e-05,
	"loss": 0.7852,
	"step": 630
	},
	{
	"epoch": 0.5526770293609672,
	"grad_norm": 0.3461022973060608,
	"learning_rate": 5.845849869981137e-05,
	"loss": 0.7745,
	"step": 640
	},
	{
	"epoch": 0.5613126079447323,
	"grad_norm": 0.4282170534133911,
	"learning_rate": 5.559333873942259e-05,
	"loss": 0.7755,
	"step": 650
	},
	{
	"epoch": 0.5699481865284974,
	"grad_norm": 0.457685261964798,
	"learning_rate": 5.277289252273174e-05,
	"loss": 0.7358,
	"step": 660
	},
	{
	"epoch": 0.5785837651122625,
	"grad_norm": 0.37797123193740845,
	"learning_rate": 5.000000000000002e-05,
	"loss": 0.7377,
	"step": 670
	},
	{
	"epoch": 0.5872193436960277,
	"grad_norm": 0.3950762450695038,
	"learning_rate": 4.727745323894976e-05,
	"loss": 0.7795,
	"step": 680
	},
	{
	"epoch": 0.5958549222797928,
	"grad_norm": 0.3642215132713318,
	"learning_rate": 4.4607993613388976e-05,
	"loss": 0.7442,
	"step": 690
	},
	{
	"epoch": 0.6044905008635578,
	"grad_norm": 0.3860316574573517,
	"learning_rate": 4.19943090428802e-05,
	"loss": 0.7364,
	"step": 700
	},
	{
	"epoch": 0.613126079447323,
	"grad_norm": 0.36184850335121155,
	"learning_rate": 3.943903128623335e-05,
	"loss": 0.7424,
	"step": 710
	},
	{
	"epoch": 0.6217616580310881,
	"grad_norm": 0.3922266662120819,
	"learning_rate": 3.694473329154778e-05,
	"loss": 0.7364,
	"step": 720
	},
	{
	"epoch": 0.6303972366148531,
	"grad_norm": 0.38554486632347107,
	"learning_rate": 3.45139266054715e-05,
	"loss": 0.7684,
	"step": 730
	},
	{
	"epoch": 0.6390328151986183,
	"grad_norm": 0.3428330421447754,
	"learning_rate": 3.21490588442868e-05,
	"loss": 0.7346,
	"step": 740
	},
	{
	"epoch": 0.6476683937823834,
	"grad_norm": 0.41248151659965515,
	"learning_rate": 2.9852511229367865e-05,
	"loss": 0.7606,
	"step": 750
	},
	{
	"epoch": 0.6563039723661486,
	"grad_norm": 0.36212557554244995,
	"learning_rate": 2.7626596189492983e-05,
	"loss": 0.7511,
	"step": 760
	},
	{
	"epoch": 0.6649395509499136,
	"grad_norm": 0.36411234736442566,
	"learning_rate": 2.5473555032424533e-05,
	"loss": 0.7316,
	"step": 770
	},
	{
	"epoch": 0.6735751295336787,
	"grad_norm": 0.34422898292541504,
	"learning_rate": 2.339555568810221e-05,
	"loss": 0.7304,
	"step": 780
	},
	{
	"epoch": 0.6822107081174439,
	"grad_norm": 0.4062047302722931,
	"learning_rate": 2.139469052572127e-05,
	"loss": 0.7369,
	"step": 790
	},
	{
	"epoch": 0.690846286701209,
	"grad_norm": 0.3605830669403076,
	"learning_rate": 1.947297424689414e-05,
	"loss": 0.7435,
	"step": 800
	},
	{
	"epoch": 0.6994818652849741,
	"grad_norm": 0.39452221989631653,
	"learning_rate": 1.763234185701673e-05,
	"loss": 0.733,
	"step": 810
	},
	{
	"epoch": 0.7081174438687392,
	"grad_norm": 0.5000078082084656,
	"learning_rate": 1.587464671688187e-05,
	"loss": 0.7747,
	"step": 820
	},
	{
	"epoch": 0.7167530224525043,
	"grad_norm": 0.35579192638397217,
	"learning_rate": 1.4201658676502294e-05,
	"loss": 0.7468,
	"step": 830
	},
	{
	"epoch": 0.7253886010362695,
	"grad_norm": 0.36388713121414185,
	"learning_rate": 1.2615062293021507e-05,
	"loss": 0.74,
	"step": 840
	},
	{
	"epoch": 0.7340241796200345,
	"grad_norm": 0.352222740650177,
	"learning_rate": 1.1116455134507664e-05,
	"loss": 0.735,
	"step": 850
	},
	{
	"epoch": 0.7426597582037997,
	"grad_norm": 0.42218002676963806,
	"learning_rate": 9.707346171337894e-06,
	"loss": 0.7086,
	"step": 860
	},
	{
	"epoch": 0.7512953367875648,
	"grad_norm": 0.38099607825279236,
	"learning_rate": 8.38915425679304e-06,
	"loss": 0.7615,
	"step": 870
	},
	{
	"epoch": 0.7599309153713298,
	"grad_norm": 0.35596323013305664,
	"learning_rate": 7.163206698392744e-06,
	"loss": 0.7398,
	"step": 880
	},
	{
	"epoch": 0.768566493955095,
	"grad_norm": 0.38860464096069336,
	"learning_rate": 6.030737921409169e-06,
	"loss": 0.7362,
	"step": 890
	},
	{
	"epoch": 0.7772020725388601,
	"grad_norm": 0.4052976965904236,
	"learning_rate": 4.992888225905468e-06,
	"loss": 0.7543,
	"step": 900
	},
	{
	"epoch": 0.7858376511226253,
	"grad_norm": 0.3824405372142792,
	"learning_rate": 4.050702638550275e-06,
	"loss": 0.724,
	"step": 910
	},
	{
	"epoch": 0.7944732297063903,
	"grad_norm": 0.40496277809143066,
	"learning_rate": 3.2051298603643753e-06,
	"loss": 0.706,
	"step": 920
	},
	{
	"epoch": 0.8031088082901554,
	"grad_norm": 0.3866259455680847,
	"learning_rate": 2.4570213114592954e-06,
	"loss": 0.741,
	"step": 930
	},
	{
	"epoch": 0.8117443868739206,
	"grad_norm": 0.35403546690940857,
	"learning_rate": 1.8071302737293295e-06,
	"loss": 0.7306,
	"step": 940
	},
	{
	"epoch": 0.8203799654576857,
	"grad_norm": 0.4546634554862976,
	"learning_rate": 1.2561111323605712e-06,
	"loss": 0.7597,
	"step": 950
	},
	{
	"epoch": 0.8290155440414507,
	"grad_norm": 0.3442727029323578,
	"learning_rate": 8.04518716920466e-07,
	"loss": 0.7845,
	"step": 960
	},
	{
	"epoch": 0.8376511226252159,
	"grad_norm": 0.37662404775619507,
	"learning_rate": 4.5280774269154115e-07,
	"loss": 0.7295,
	"step": 970
	},
	{
	"epoch": 0.846286701208981,
	"grad_norm": 0.34127819538116455,
	"learning_rate": 2.0133235281156736e-07,
	"loss": 0.7542,
	"step": 980
	},
	{
	"epoch": 0.8549222797927462,
	"grad_norm": 0.38656285405158997,
	"learning_rate": 5.0345761681491746e-08,
	"loss": 0.7498,
	"step": 990
	},
	{
	"epoch": 0.8635578583765112,
	"grad_norm": 0.3828294575214386,
	"learning_rate": 0.0,
	"loss": 0.74,
	"step": 1000
	}
	],
	"logging_steps": 10,
	"max_steps": 1000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.3420339184869376e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}