hchcsuim
/

batch-size16_FFPP-raw_opencv-1FPS_faces-expand0-aligned_unaugmentation_seeds-42_226_2080S

hchcsuim commited on Jul 9, 2024

Commit

3e5b690

verified ·

1 Parent(s): 5b64c5d

Model save

Browse files

Files changed (7) hide show

all_results.json +8 -0
runs/Jul09_18-04-18_Lab4/events.out.tfevents.1720519464.Lab4.13264.0 +3 -0
train_results.json +8 -0
trained_weights_biases.pkl +3 -0
trainer_state.json +1014 -0
training_args.bin +1 -1
weights_biases.pkl +1 -1

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 0.9996370235934664,
+    "total_flos": 2.1905085973455176e+18,
+    "train_loss": 0.1390944759354006,
+    "train_runtime": 1416.7601,
+    "train_samples_per_second": 62.218,
+    "train_steps_per_second": 0.972
+}

runs/Jul09_18-04-18_Lab4/events.out.tfevents.1720519464.Lab4.13264.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1ec253cbddd6c65bf4511e228797606fde0bac321b06dbbcdd5beeea8e1d99f
+size 35077

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 0.9996370235934664,
+    "total_flos": 2.1905085973455176e+18,
+    "train_loss": 0.1390944759354006,
+    "train_runtime": 1416.7601,
+    "train_samples_per_second": 62.218,
+    "train_steps_per_second": 0.972
+}

trained_weights_biases.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f22d75b488550312fd2044eb4352d14520cbbbe261dfcb6fab117e5a3418556
+size 6858

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1014 @@

+{
+  "best_metric": 0.9838793846712347,
+  "best_model_checkpoint": "batch-size16_FFPP-raw_opencv-1FPS_faces-expand0-aligned_unaugmentation_seeds-42_226_2080S\\checkpoint-1377",
+  "epoch": 0.9996370235934664,
+  "eval_steps": 500,
+  "global_step": 1377,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.007259528130671506,
+      "grad_norm": 2.737785577774048,
+      "learning_rate": 3.6231884057971017e-06,
+      "loss": 0.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.014519056261343012,
+      "grad_norm": 5.670254707336426,
+      "learning_rate": 7.246376811594203e-06,
+      "loss": 0.5189,
+      "step": 20
+    },
+    {
+      "epoch": 0.021778584392014518,
+      "grad_norm": 3.771057367324829,
+      "learning_rate": 1.0869565217391305e-05,
+      "loss": 0.5151,
+      "step": 30
+    },
+    {
+      "epoch": 0.029038112522686024,
+      "grad_norm": 3.4408185482025146,
+      "learning_rate": 1.4492753623188407e-05,
+      "loss": 0.4985,
+      "step": 40
+    },
+    {
+      "epoch": 0.036297640653357534,
+      "grad_norm": 3.975008726119995,
+      "learning_rate": 1.8115942028985507e-05,
+      "loss": 0.4928,
+      "step": 50
+    },
+    {
+      "epoch": 0.043557168784029036,
+      "grad_norm": 5.987420558929443,
+      "learning_rate": 2.173913043478261e-05,
+      "loss": 0.5283,
+      "step": 60
+    },
+    {
+      "epoch": 0.050816696914700546,
+      "grad_norm": 5.085023403167725,
+      "learning_rate": 2.5362318840579714e-05,
+      "loss": 0.4647,
+      "step": 70
+    },
+    {
+      "epoch": 0.05807622504537205,
+      "grad_norm": 4.82670783996582,
+      "learning_rate": 2.8985507246376814e-05,
+      "loss": 0.4624,
+      "step": 80
+    },
+    {
+      "epoch": 0.06533575317604355,
+      "grad_norm": 22.98508071899414,
+      "learning_rate": 3.260869565217392e-05,
+      "loss": 0.3943,
+      "step": 90
+    },
+    {
+      "epoch": 0.07259528130671507,
+      "grad_norm": 14.131559371948242,
+      "learning_rate": 3.6231884057971014e-05,
+      "loss": 0.3991,
+      "step": 100
+    },
+    {
+      "epoch": 0.07985480943738657,
+      "grad_norm": 17.21990394592285,
+      "learning_rate": 3.985507246376812e-05,
+      "loss": 0.3497,
+      "step": 110
+    },
+    {
+      "epoch": 0.08711433756805807,
+      "grad_norm": 16.338533401489258,
+      "learning_rate": 4.347826086956522e-05,
+      "loss": 0.3352,
+      "step": 120
+    },
+    {
+      "epoch": 0.09437386569872959,
+      "grad_norm": 22.675607681274414,
+      "learning_rate": 4.710144927536232e-05,
+      "loss": 0.2831,
+      "step": 130
+    },
+    {
+      "epoch": 0.10163339382940109,
+      "grad_norm": 37.350643157958984,
+      "learning_rate": 4.9919289749798225e-05,
+      "loss": 0.2436,
+      "step": 140
+    },
+    {
+      "epoch": 0.1088929219600726,
+      "grad_norm": 37.97492980957031,
+      "learning_rate": 4.951573849878935e-05,
+      "loss": 0.2268,
+      "step": 150
+    },
+    {
+      "epoch": 0.1161524500907441,
+      "grad_norm": 16.116291046142578,
+      "learning_rate": 4.911218724778047e-05,
+      "loss": 0.222,
+      "step": 160
+    },
+    {
+      "epoch": 0.12341197822141561,
+      "grad_norm": 12.323293685913086,
+      "learning_rate": 4.870863599677159e-05,
+      "loss": 0.2354,
+      "step": 170
+    },
+    {
+      "epoch": 0.1306715063520871,
+      "grad_norm": 41.4226188659668,
+      "learning_rate": 4.8305084745762714e-05,
+      "loss": 0.2426,
+      "step": 180
+    },
+    {
+      "epoch": 0.13793103448275862,
+      "grad_norm": 8.462140083312988,
+      "learning_rate": 4.7901533494753834e-05,
+      "loss": 0.2723,
+      "step": 190
+    },
+    {
+      "epoch": 0.14519056261343014,
+      "grad_norm": 19.180883407592773,
+      "learning_rate": 4.749798224374496e-05,
+      "loss": 0.1473,
+      "step": 200
+    },
+    {
+      "epoch": 0.15245009074410162,
+      "grad_norm": 27.919723510742188,
+      "learning_rate": 4.7094430992736075e-05,
+      "loss": 0.2545,
+      "step": 210
+    },
+    {
+      "epoch": 0.15970961887477314,
+      "grad_norm": 11.533125877380371,
+      "learning_rate": 4.66908797417272e-05,
+      "loss": 0.2211,
+      "step": 220
+    },
+    {
+      "epoch": 0.16696914700544466,
+      "grad_norm": 8.824727058410645,
+      "learning_rate": 4.628732849071832e-05,
+      "loss": 0.2145,
+      "step": 230
+    },
+    {
+      "epoch": 0.17422867513611615,
+      "grad_norm": 6.202546119689941,
+      "learning_rate": 4.588377723970945e-05,
+      "loss": 0.1939,
+      "step": 240
+    },
+    {
+      "epoch": 0.18148820326678766,
+      "grad_norm": 35.62083435058594,
+      "learning_rate": 4.548022598870056e-05,
+      "loss": 0.1966,
+      "step": 250
+    },
+    {
+      "epoch": 0.18874773139745918,
+      "grad_norm": 32.47185134887695,
+      "learning_rate": 4.507667473769169e-05,
+      "loss": 0.201,
+      "step": 260
+    },
+    {
+      "epoch": 0.19600725952813067,
+      "grad_norm": 5.475787162780762,
+      "learning_rate": 4.467312348668281e-05,
+      "loss": 0.1356,
+      "step": 270
+    },
+    {
+      "epoch": 0.20326678765880218,
+      "grad_norm": 30.500520706176758,
+      "learning_rate": 4.426957223567393e-05,
+      "loss": 0.1653,
+      "step": 280
+    },
+    {
+      "epoch": 0.21052631578947367,
+      "grad_norm": 10.784961700439453,
+      "learning_rate": 4.386602098466506e-05,
+      "loss": 0.2356,
+      "step": 290
+    },
+    {
+      "epoch": 0.2177858439201452,
+      "grad_norm": 3.632953405380249,
+      "learning_rate": 4.346246973365617e-05,
+      "loss": 0.142,
+      "step": 300
+    },
+    {
+      "epoch": 0.2250453720508167,
+      "grad_norm": 17.169273376464844,
+      "learning_rate": 4.30589184826473e-05,
+      "loss": 0.1331,
+      "step": 310
+    },
+    {
+      "epoch": 0.2323049001814882,
+      "grad_norm": 13.25188159942627,
+      "learning_rate": 4.265536723163842e-05,
+      "loss": 0.1813,
+      "step": 320
+    },
+    {
+      "epoch": 0.2395644283121597,
+      "grad_norm": 16.338443756103516,
+      "learning_rate": 4.225181598062955e-05,
+      "loss": 0.1602,
+      "step": 330
+    },
+    {
+      "epoch": 0.24682395644283123,
+      "grad_norm": 5.392543315887451,
+      "learning_rate": 4.184826472962066e-05,
+      "loss": 0.1341,
+      "step": 340
+    },
+    {
+      "epoch": 0.2540834845735027,
+      "grad_norm": 5.405388832092285,
+      "learning_rate": 4.144471347861179e-05,
+      "loss": 0.1379,
+      "step": 350
+    },
+    {
+      "epoch": 0.2613430127041742,
+      "grad_norm": 17.226938247680664,
+      "learning_rate": 4.104116222760291e-05,
+      "loss": 0.1228,
+      "step": 360
+    },
+    {
+      "epoch": 0.26860254083484575,
+      "grad_norm": 7.700235843658447,
+      "learning_rate": 4.063761097659403e-05,
+      "loss": 0.1375,
+      "step": 370
+    },
+    {
+      "epoch": 0.27586206896551724,
+      "grad_norm": 7.433803558349609,
+      "learning_rate": 4.023405972558515e-05,
+      "loss": 0.1781,
+      "step": 380
+    },
+    {
+      "epoch": 0.2831215970961887,
+      "grad_norm": 5.178743839263916,
+      "learning_rate": 3.983050847457627e-05,
+      "loss": 0.1278,
+      "step": 390
+    },
+    {
+      "epoch": 0.29038112522686027,
+      "grad_norm": 4.961187362670898,
+      "learning_rate": 3.94269572235674e-05,
+      "loss": 0.1389,
+      "step": 400
+    },
+    {
+      "epoch": 0.29764065335753176,
+      "grad_norm": 11.25297737121582,
+      "learning_rate": 3.902340597255852e-05,
+      "loss": 0.1479,
+      "step": 410
+    },
+    {
+      "epoch": 0.30490018148820325,
+      "grad_norm": 11.958194732666016,
+      "learning_rate": 3.861985472154964e-05,
+      "loss": 0.1526,
+      "step": 420
+    },
+    {
+      "epoch": 0.3121597096188748,
+      "grad_norm": 7.577833652496338,
+      "learning_rate": 3.821630347054076e-05,
+      "loss": 0.1142,
+      "step": 430
+    },
+    {
+      "epoch": 0.3194192377495463,
+      "grad_norm": 21.478981018066406,
+      "learning_rate": 3.7812752219531885e-05,
+      "loss": 0.1274,
+      "step": 440
+    },
+    {
+      "epoch": 0.32667876588021777,
+      "grad_norm": 9.360047340393066,
+      "learning_rate": 3.7409200968523006e-05,
+      "loss": 0.1214,
+      "step": 450
+    },
+    {
+      "epoch": 0.3339382940108893,
+      "grad_norm": 11.478937149047852,
+      "learning_rate": 3.7005649717514126e-05,
+      "loss": 0.1418,
+      "step": 460
+    },
+    {
+      "epoch": 0.3411978221415608,
+      "grad_norm": 9.956938743591309,
+      "learning_rate": 3.6602098466505247e-05,
+      "loss": 0.1241,
+      "step": 470
+    },
+    {
+      "epoch": 0.3484573502722323,
+      "grad_norm": 14.257845878601074,
+      "learning_rate": 3.619854721549637e-05,
+      "loss": 0.1162,
+      "step": 480
+    },
+    {
+      "epoch": 0.35571687840290384,
+      "grad_norm": 11.823905944824219,
+      "learning_rate": 3.5794995964487494e-05,
+      "loss": 0.1542,
+      "step": 490
+    },
+    {
+      "epoch": 0.3629764065335753,
+      "grad_norm": 4.8425421714782715,
+      "learning_rate": 3.539144471347861e-05,
+      "loss": 0.1201,
+      "step": 500
+    },
+    {
+      "epoch": 0.3702359346642468,
+      "grad_norm": 13.574287414550781,
+      "learning_rate": 3.4987893462469735e-05,
+      "loss": 0.0932,
+      "step": 510
+    },
+    {
+      "epoch": 0.37749546279491836,
+      "grad_norm": 15.155635833740234,
+      "learning_rate": 3.4584342211460856e-05,
+      "loss": 0.1096,
+      "step": 520
+    },
+    {
+      "epoch": 0.38475499092558985,
+      "grad_norm": 13.314964294433594,
+      "learning_rate": 3.418079096045198e-05,
+      "loss": 0.1278,
+      "step": 530
+    },
+    {
+      "epoch": 0.39201451905626133,
+      "grad_norm": 9.03641128540039,
+      "learning_rate": 3.3777239709443096e-05,
+      "loss": 0.1082,
+      "step": 540
+    },
+    {
+      "epoch": 0.3992740471869328,
+      "grad_norm": 9.013300895690918,
+      "learning_rate": 3.3373688458434224e-05,
+      "loss": 0.1242,
+      "step": 550
+    },
+    {
+      "epoch": 0.40653357531760437,
+      "grad_norm": 18.238086700439453,
+      "learning_rate": 3.2970137207425344e-05,
+      "loss": 0.1175,
+      "step": 560
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 3.2947638034820557,
+      "learning_rate": 3.2566585956416464e-05,
+      "loss": 0.1004,
+      "step": 570
+    },
+    {
+      "epoch": 0.42105263157894735,
+      "grad_norm": 9.514063835144043,
+      "learning_rate": 3.216303470540759e-05,
+      "loss": 0.0834,
+      "step": 580
+    },
+    {
+      "epoch": 0.4283121597096189,
+      "grad_norm": 16.605690002441406,
+      "learning_rate": 3.1759483454398705e-05,
+      "loss": 0.1197,
+      "step": 590
+    },
+    {
+      "epoch": 0.4355716878402904,
+      "grad_norm": 14.686426162719727,
+      "learning_rate": 3.135593220338983e-05,
+      "loss": 0.084,
+      "step": 600
+    },
+    {
+      "epoch": 0.44283121597096187,
+      "grad_norm": 5.132232189178467,
+      "learning_rate": 3.095238095238095e-05,
+      "loss": 0.0838,
+      "step": 610
+    },
+    {
+      "epoch": 0.4500907441016334,
+      "grad_norm": 6.26077127456665,
+      "learning_rate": 3.054882970137208e-05,
+      "loss": 0.0998,
+      "step": 620
+    },
+    {
+      "epoch": 0.4573502722323049,
+      "grad_norm": 2.3136749267578125,
+      "learning_rate": 3.0145278450363197e-05,
+      "loss": 0.0875,
+      "step": 630
+    },
+    {
+      "epoch": 0.4646098003629764,
+      "grad_norm": 9.352468490600586,
+      "learning_rate": 2.9741727199354318e-05,
+      "loss": 0.1028,
+      "step": 640
+    },
+    {
+      "epoch": 0.47186932849364793,
+      "grad_norm": 10.247867584228516,
+      "learning_rate": 2.933817594834544e-05,
+      "loss": 0.0902,
+      "step": 650
+    },
+    {
+      "epoch": 0.4791288566243194,
+      "grad_norm": 7.45835542678833,
+      "learning_rate": 2.8934624697336565e-05,
+      "loss": 0.1026,
+      "step": 660
+    },
+    {
+      "epoch": 0.4863883847549909,
+      "grad_norm": 8.243184089660645,
+      "learning_rate": 2.8531073446327682e-05,
+      "loss": 0.1103,
+      "step": 670
+    },
+    {
+      "epoch": 0.49364791288566245,
+      "grad_norm": 8.296549797058105,
+      "learning_rate": 2.8127522195318806e-05,
+      "loss": 0.0843,
+      "step": 680
+    },
+    {
+      "epoch": 0.5009074410163339,
+      "grad_norm": 1.9985827207565308,
+      "learning_rate": 2.772397094430993e-05,
+      "loss": 0.08,
+      "step": 690
+    },
+    {
+      "epoch": 0.5081669691470054,
+      "grad_norm": 5.689640045166016,
+      "learning_rate": 2.7320419693301054e-05,
+      "loss": 0.1065,
+      "step": 700
+    },
+    {
+      "epoch": 0.515426497277677,
+      "grad_norm": 4.976999759674072,
+      "learning_rate": 2.691686844229217e-05,
+      "loss": 0.0894,
+      "step": 710
+    },
+    {
+      "epoch": 0.5226860254083484,
+      "grad_norm": 4.71283483505249,
+      "learning_rate": 2.6513317191283295e-05,
+      "loss": 0.0964,
+      "step": 720
+    },
+    {
+      "epoch": 0.52994555353902,
+      "grad_norm": 12.80367660522461,
+      "learning_rate": 2.6109765940274415e-05,
+      "loss": 0.1052,
+      "step": 730
+    },
+    {
+      "epoch": 0.5372050816696915,
+      "grad_norm": 3.522590398788452,
+      "learning_rate": 2.570621468926554e-05,
+      "loss": 0.1431,
+      "step": 740
+    },
+    {
+      "epoch": 0.5444646098003629,
+      "grad_norm": 11.088656425476074,
+      "learning_rate": 2.5302663438256656e-05,
+      "loss": 0.1191,
+      "step": 750
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 8.015061378479004,
+      "learning_rate": 2.489911218724778e-05,
+      "loss": 0.0802,
+      "step": 760
+    },
+    {
+      "epoch": 0.558983666061706,
+      "grad_norm": 7.619071960449219,
+      "learning_rate": 2.4495560936238903e-05,
+      "loss": 0.0915,
+      "step": 770
+    },
+    {
+      "epoch": 0.5662431941923775,
+      "grad_norm": 16.504602432250977,
+      "learning_rate": 2.4092009685230024e-05,
+      "loss": 0.0815,
+      "step": 780
+    },
+    {
+      "epoch": 0.573502722323049,
+      "grad_norm": 11.828579902648926,
+      "learning_rate": 2.3688458434221148e-05,
+      "loss": 0.0853,
+      "step": 790
+    },
+    {
+      "epoch": 0.5807622504537205,
+      "grad_norm": 29.000469207763672,
+      "learning_rate": 2.3284907183212268e-05,
+      "loss": 0.1209,
+      "step": 800
+    },
+    {
+      "epoch": 0.588021778584392,
+      "grad_norm": 8.757672309875488,
+      "learning_rate": 2.2881355932203392e-05,
+      "loss": 0.0898,
+      "step": 810
+    },
+    {
+      "epoch": 0.5952813067150635,
+      "grad_norm": 10.0402193069458,
+      "learning_rate": 2.2477804681194512e-05,
+      "loss": 0.1202,
+      "step": 820
+    },
+    {
+      "epoch": 0.6025408348457351,
+      "grad_norm": 19.456077575683594,
+      "learning_rate": 2.2074253430185636e-05,
+      "loss": 0.0748,
+      "step": 830
+    },
+    {
+      "epoch": 0.6098003629764065,
+      "grad_norm": 13.841584205627441,
+      "learning_rate": 2.1670702179176757e-05,
+      "loss": 0.1123,
+      "step": 840
+    },
+    {
+      "epoch": 0.617059891107078,
+      "grad_norm": 6.337653636932373,
+      "learning_rate": 2.1267150928167877e-05,
+      "loss": 0.1086,
+      "step": 850
+    },
+    {
+      "epoch": 0.6243194192377496,
+      "grad_norm": 3.7396390438079834,
+      "learning_rate": 2.0863599677159e-05,
+      "loss": 0.0811,
+      "step": 860
+    },
+    {
+      "epoch": 0.631578947368421,
+      "grad_norm": 2.1864776611328125,
+      "learning_rate": 2.046004842615012e-05,
+      "loss": 0.077,
+      "step": 870
+    },
+    {
+      "epoch": 0.6388384754990926,
+      "grad_norm": 2.572352886199951,
+      "learning_rate": 2.0056497175141245e-05,
+      "loss": 0.06,
+      "step": 880
+    },
+    {
+      "epoch": 0.6460980036297641,
+      "grad_norm": 10.233570098876953,
+      "learning_rate": 1.9652945924132365e-05,
+      "loss": 0.0846,
+      "step": 890
+    },
+    {
+      "epoch": 0.6533575317604355,
+      "grad_norm": 7.469882965087891,
+      "learning_rate": 1.924939467312349e-05,
+      "loss": 0.057,
+      "step": 900
+    },
+    {
+      "epoch": 0.6606170598911071,
+      "grad_norm": 8.410418510437012,
+      "learning_rate": 1.884584342211461e-05,
+      "loss": 0.0452,
+      "step": 910
+    },
+    {
+      "epoch": 0.6678765880217786,
+      "grad_norm": 15.23531436920166,
+      "learning_rate": 1.8442292171105734e-05,
+      "loss": 0.085,
+      "step": 920
+    },
+    {
+      "epoch": 0.6751361161524501,
+      "grad_norm": 8.20494270324707,
+      "learning_rate": 1.8038740920096854e-05,
+      "loss": 0.1351,
+      "step": 930
+    },
+    {
+      "epoch": 0.6823956442831216,
+      "grad_norm": 3.9129507541656494,
+      "learning_rate": 1.7635189669087974e-05,
+      "loss": 0.0997,
+      "step": 940
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 13.237354278564453,
+      "learning_rate": 1.7231638418079095e-05,
+      "loss": 0.0568,
+      "step": 950
+    },
+    {
+      "epoch": 0.6969147005444646,
+      "grad_norm": 4.347463607788086,
+      "learning_rate": 1.682808716707022e-05,
+      "loss": 0.0566,
+      "step": 960
+    },
+    {
+      "epoch": 0.7041742286751361,
+      "grad_norm": 12.865084648132324,
+      "learning_rate": 1.642453591606134e-05,
+      "loss": 0.0621,
+      "step": 970
+    },
+    {
+      "epoch": 0.7114337568058077,
+      "grad_norm": 2.132204294204712,
+      "learning_rate": 1.6020984665052463e-05,
+      "loss": 0.0699,
+      "step": 980
+    },
+    {
+      "epoch": 0.7186932849364791,
+      "grad_norm": 17.504295349121094,
+      "learning_rate": 1.5617433414043583e-05,
+      "loss": 0.111,
+      "step": 990
+    },
+    {
+      "epoch": 0.7259528130671506,
+      "grad_norm": 6.843140125274658,
+      "learning_rate": 1.5213882163034707e-05,
+      "loss": 0.1505,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7332123411978222,
+      "grad_norm": 3.8472251892089844,
+      "learning_rate": 1.4810330912025828e-05,
+      "loss": 0.0588,
+      "step": 1010
+    },
+    {
+      "epoch": 0.7404718693284936,
+      "grad_norm": 7.102182865142822,
+      "learning_rate": 1.440677966101695e-05,
+      "loss": 0.0617,
+      "step": 1020
+    },
+    {
+      "epoch": 0.7477313974591652,
+      "grad_norm": 3.76487135887146,
+      "learning_rate": 1.400322841000807e-05,
+      "loss": 0.0919,
+      "step": 1030
+    },
+    {
+      "epoch": 0.7549909255898367,
+      "grad_norm": 6.440709590911865,
+      "learning_rate": 1.3599677158999194e-05,
+      "loss": 0.1049,
+      "step": 1040
+    },
+    {
+      "epoch": 0.7622504537205081,
+      "grad_norm": 7.21970272064209,
+      "learning_rate": 1.3196125907990314e-05,
+      "loss": 0.0648,
+      "step": 1050
+    },
+    {
+      "epoch": 0.7695099818511797,
+      "grad_norm": 6.058024883270264,
+      "learning_rate": 1.2792574656981438e-05,
+      "loss": 0.0494,
+      "step": 1060
+    },
+    {
+      "epoch": 0.7767695099818511,
+      "grad_norm": 12.306318283081055,
+      "learning_rate": 1.2389023405972559e-05,
+      "loss": 0.0613,
+      "step": 1070
+    },
+    {
+      "epoch": 0.7840290381125227,
+      "grad_norm": 2.709543228149414,
+      "learning_rate": 1.198547215496368e-05,
+      "loss": 0.0926,
+      "step": 1080
+    },
+    {
+      "epoch": 0.7912885662431942,
+      "grad_norm": 12.46874713897705,
+      "learning_rate": 1.1581920903954803e-05,
+      "loss": 0.0616,
+      "step": 1090
+    },
+    {
+      "epoch": 0.7985480943738656,
+      "grad_norm": 2.520782232284546,
+      "learning_rate": 1.1178369652945925e-05,
+      "loss": 0.0718,
+      "step": 1100
+    },
+    {
+      "epoch": 0.8058076225045372,
+      "grad_norm": 12.071261405944824,
+      "learning_rate": 1.0774818401937047e-05,
+      "loss": 0.0438,
+      "step": 1110
+    },
+    {
+      "epoch": 0.8130671506352087,
+      "grad_norm": 15.032535552978516,
+      "learning_rate": 1.0371267150928169e-05,
+      "loss": 0.1217,
+      "step": 1120
+    },
+    {
+      "epoch": 0.8203266787658802,
+      "grad_norm": 5.926509380340576,
+      "learning_rate": 9.96771589991929e-06,
+      "loss": 0.0475,
+      "step": 1130
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 20.453245162963867,
+      "learning_rate": 9.564164648910412e-06,
+      "loss": 0.0533,
+      "step": 1140
+    },
+    {
+      "epoch": 0.8348457350272233,
+      "grad_norm": 5.995026588439941,
+      "learning_rate": 9.160613397901534e-06,
+      "loss": 0.0845,
+      "step": 1150
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 8.493058204650879,
+      "learning_rate": 8.757062146892656e-06,
+      "loss": 0.0577,
+      "step": 1160
+    },
+    {
+      "epoch": 0.8493647912885662,
+      "grad_norm": 14.51004409790039,
+      "learning_rate": 8.353510895883778e-06,
+      "loss": 0.0735,
+      "step": 1170
+    },
+    {
+      "epoch": 0.8566243194192378,
+      "grad_norm": 10.728328704833984,
+      "learning_rate": 7.949959644874898e-06,
+      "loss": 0.062,
+      "step": 1180
+    },
+    {
+      "epoch": 0.8638838475499092,
+      "grad_norm": 4.626131057739258,
+      "learning_rate": 7.546408393866021e-06,
+      "loss": 0.0746,
+      "step": 1190
+    },
+    {
+      "epoch": 0.8711433756805808,
+      "grad_norm": 2.4815566539764404,
+      "learning_rate": 7.142857142857143e-06,
+      "loss": 0.0418,
+      "step": 1200
+    },
+    {
+      "epoch": 0.8784029038112523,
+      "grad_norm": 3.1350157260894775,
+      "learning_rate": 6.739305891848265e-06,
+      "loss": 0.0403,
+      "step": 1210
+    },
+    {
+      "epoch": 0.8856624319419237,
+      "grad_norm": 13.391355514526367,
+      "learning_rate": 6.335754640839386e-06,
+      "loss": 0.0456,
+      "step": 1220
+    },
+    {
+      "epoch": 0.8929219600725953,
+      "grad_norm": 10.643239974975586,
+      "learning_rate": 5.932203389830509e-06,
+      "loss": 0.0497,
+      "step": 1230
+    },
+    {
+      "epoch": 0.9001814882032668,
+      "grad_norm": 8.388955116271973,
+      "learning_rate": 5.52865213882163e-06,
+      "loss": 0.0466,
+      "step": 1240
+    },
+    {
+      "epoch": 0.9074410163339383,
+      "grad_norm": 6.461310386657715,
+      "learning_rate": 5.1251008878127525e-06,
+      "loss": 0.0576,
+      "step": 1250
+    },
+    {
+      "epoch": 0.9147005444646098,
+      "grad_norm": 10.813868522644043,
+      "learning_rate": 4.721549636803875e-06,
+      "loss": 0.0565,
+      "step": 1260
+    },
+    {
+      "epoch": 0.9219600725952813,
+      "grad_norm": 2.3280351161956787,
+      "learning_rate": 4.317998385794996e-06,
+      "loss": 0.047,
+      "step": 1270
+    },
+    {
+      "epoch": 0.9292196007259528,
+      "grad_norm": 6.7660441398620605,
+      "learning_rate": 3.914447134786118e-06,
+      "loss": 0.0384,
+      "step": 1280
+    },
+    {
+      "epoch": 0.9364791288566243,
+      "grad_norm": 8.67406940460205,
+      "learning_rate": 3.5108958837772397e-06,
+      "loss": 0.0709,
+      "step": 1290
+    },
+    {
+      "epoch": 0.9437386569872959,
+      "grad_norm": 9.94025993347168,
+      "learning_rate": 3.107344632768362e-06,
+      "loss": 0.0598,
+      "step": 1300
+    },
+    {
+      "epoch": 0.9509981851179673,
+      "grad_norm": 4.01815938949585,
+      "learning_rate": 2.7037933817594835e-06,
+      "loss": 0.0299,
+      "step": 1310
+    },
+    {
+      "epoch": 0.9582577132486388,
+      "grad_norm": 19.582164764404297,
+      "learning_rate": 2.3002421307506056e-06,
+      "loss": 0.0595,
+      "step": 1320
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 10.374151229858398,
+      "learning_rate": 1.8966908797417273e-06,
+      "loss": 0.0577,
+      "step": 1330
+    },
+    {
+      "epoch": 0.9727767695099818,
+      "grad_norm": 1.8138954639434814,
+      "learning_rate": 1.4931396287328492e-06,
+      "loss": 0.0507,
+      "step": 1340
+    },
+    {
+      "epoch": 0.9800362976406534,
+      "grad_norm": 7.8535308837890625,
+      "learning_rate": 1.089588377723971e-06,
+      "loss": 0.0456,
+      "step": 1350
+    },
+    {
+      "epoch": 0.9872958257713249,
+      "grad_norm": 7.184234142303467,
+      "learning_rate": 6.860371267150928e-07,
+      "loss": 0.0536,
+      "step": 1360
+    },
+    {
+      "epoch": 0.9945553539019963,
+      "grad_norm": 11.848790168762207,
+      "learning_rate": 2.8248587570621473e-07,
+      "loss": 0.0508,
+      "step": 1370
+    },
+    {
+      "epoch": 0.9996370235934664,
+      "eval_accuracy": 0.9838793846712347,
+      "eval_f1": 0.9897581894843057,
+      "eval_loss": 0.04314929246902466,
+      "eval_precision": 0.9843590956661362,
+      "eval_recall": 0.9952168367346939,
+      "eval_roc_auc": 0.9989303794647093,
+      "eval_runtime": 372.429,
+      "eval_samples_per_second": 236.684,
+      "eval_steps_per_second": 14.795,
+      "step": 1377
+    },
+    {
+      "epoch": 0.9996370235934664,
+      "step": 1377,
+      "total_flos": 2.1905085973455176e+18,
+      "train_loss": 0.1390944759354006,
+      "train_runtime": 1416.7601,
+      "train_samples_per_second": 62.218,
+      "train_steps_per_second": 0.972
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1377,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.1905085973455176e+18,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e9ef9c40a6c3c05721a165439580392ac32f6c29ae10c1d349676c87a07461a
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d7ee2f072bbca6437d00588419d5a0be31d48e361dc2dac438015c3caa01497
 size 5240

weights_biases.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd18c6e2cc32804fb9afadf2a5e1af9d5d65377ac9396920c1941e06e655da61
 size 6858

 version https://git-lfs.github.com/spec/v1
+oid sha256:550cc073bf9de5b51bd34ec57fd42da48e1e5b9cabbb3f28fd6c1230a7e86b45
 size 6858