Upload 3 files

Browse files

Files changed (3) hide show

events.out.tfevents.1728228710.c51c281a98ce.2055.0 +3 -0
events.out.tfevents.1728233913.c51c281a98ce.2055.1 +3 -0
trainer_state.json +441 -0

events.out.tfevents.1728228710.c51c281a98ce.2055.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:782e98157db2efa561b1eacff6f1f9d70c8e0ebafc4c2eac592e8476242cf765
+size 18372

events.out.tfevents.1728233913.c51c281a98ce.2055.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7824addfc89ce5164e956f24f899b329793a5027481996a7ce134e1553cffec
+size 359

trainer_state.json ADDED Viewed

	@@ -0,0 +1,441 @@

+{
+  "best_metric": 0.8340622782707214,
+  "best_model_checkpoint": "./results/checkpoint-1000",
+  "epoch": 2.9940436796823295,
+  "eval_steps": 500,
+  "global_step": 1131,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05294506949040371,
+      "grad_norm": 62.576904296875,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 16.7099,
+      "step": 20
+    },
+    {
+      "epoch": 0.10589013898080742,
+      "grad_norm": 35.65088653564453,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 14.7891,
+      "step": 40
+    },
+    {
+      "epoch": 0.15883520847121113,
+      "grad_norm": 30.480260848999023,
+      "learning_rate": 6e-06,
+      "loss": 11.6833,
+      "step": 60
+    },
+    {
+      "epoch": 0.21178027796161483,
+      "grad_norm": 24.958763122558594,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 7.878,
+      "step": 80
+    },
+    {
+      "epoch": 0.26472534745201853,
+      "grad_norm": 4.896631717681885,
+      "learning_rate": 1e-05,
+      "loss": 4.1299,
+      "step": 100
+    },
+    {
+      "epoch": 0.31767041694242226,
+      "grad_norm": 1.8821851015090942,
+      "learning_rate": 1.2e-05,
+      "loss": 2.512,
+      "step": 120
+    },
+    {
+      "epoch": 0.37061548643282594,
+      "grad_norm": 0.7400406002998352,
+      "learning_rate": 1.4000000000000001e-05,
+      "loss": 1.8807,
+      "step": 140
+    },
+    {
+      "epoch": 0.42356055592322966,
+      "grad_norm": 0.601634681224823,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 1.5617,
+      "step": 160
+    },
+    {
+      "epoch": 0.47650562541363334,
+      "grad_norm": 0.5250083208084106,
+      "learning_rate": 1.8e-05,
+      "loss": 1.461,
+      "step": 180
+    },
+    {
+      "epoch": 0.5294506949040371,
+      "grad_norm": 0.5037821531295776,
+      "learning_rate": 2e-05,
+      "loss": 1.3703,
+      "step": 200
+    },
+    {
+      "epoch": 0.5823957643944407,
+      "grad_norm": 0.47873854637145996,
+      "learning_rate": 2.2000000000000003e-05,
+      "loss": 1.2966,
+      "step": 220
+    },
+    {
+      "epoch": 0.6353408338848445,
+      "grad_norm": 3.9664485454559326,
+      "learning_rate": 2.4e-05,
+      "loss": 1.2427,
+      "step": 240
+    },
+    {
+      "epoch": 0.6882859033752482,
+      "grad_norm": 0.45016345381736755,
+      "learning_rate": 2.6000000000000002e-05,
+      "loss": 1.2139,
+      "step": 260
+    },
+    {
+      "epoch": 0.7412309728656519,
+      "grad_norm": 0.5136398077011108,
+      "learning_rate": 2.8000000000000003e-05,
+      "loss": 1.1679,
+      "step": 280
+    },
+    {
+      "epoch": 0.7941760423560555,
+      "grad_norm": 0.3736862242221832,
+      "learning_rate": 3e-05,
+      "loss": 1.1604,
+      "step": 300
+    },
+    {
+      "epoch": 0.8471211118464593,
+      "grad_norm": 0.38698282837867737,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 1.1407,
+      "step": 320
+    },
+    {
+      "epoch": 0.900066181336863,
+      "grad_norm": 0.4257580637931824,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 1.1177,
+      "step": 340
+    },
+    {
+      "epoch": 0.9530112508272667,
+      "grad_norm": 0.3982521891593933,
+      "learning_rate": 3.6e-05,
+      "loss": 1.1199,
+      "step": 360
+    },
+    {
+      "epoch": 1.0059563203176705,
+      "grad_norm": 0.3849237859249115,
+      "learning_rate": 3.8e-05,
+      "loss": 1.0925,
+      "step": 380
+    },
+    {
+      "epoch": 1.0589013898080741,
+      "grad_norm": 0.3753887414932251,
+      "learning_rate": 4e-05,
+      "loss": 1.0605,
+      "step": 400
+    },
+    {
+      "epoch": 1.1118464592984778,
+      "grad_norm": 0.3810591697692871,
+      "learning_rate": 4.2e-05,
+      "loss": 1.0541,
+      "step": 420
+    },
+    {
+      "epoch": 1.1647915287888815,
+      "grad_norm": 0.3707886040210724,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 1.0334,
+      "step": 440
+    },
+    {
+      "epoch": 1.2177365982792852,
+      "grad_norm": 0.36902502179145813,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 1.0249,
+      "step": 460
+    },
+    {
+      "epoch": 1.270681667769689,
+      "grad_norm": 0.3862062692642212,
+      "learning_rate": 4.8e-05,
+      "loss": 1.0141,
+      "step": 480
+    },
+    {
+      "epoch": 1.3236267372600927,
+      "grad_norm": 0.36468520760536194,
+      "learning_rate": 5e-05,
+      "loss": 1.063,
+      "step": 500
+    },
+    {
+      "epoch": 1.3236267372600927,
+      "eval_loss": 0.9045532941818237,
+      "eval_runtime": 74.2771,
+      "eval_samples_per_second": 10.165,
+      "eval_steps_per_second": 2.545,
+      "step": 500
+    },
+    {
+      "epoch": 1.3765718067504964,
+      "grad_norm": 0.3426459729671478,
+      "learning_rate": 4.8415213946117275e-05,
+      "loss": 1.01,
+      "step": 520
+    },
+    {
+      "epoch": 1.4295168762409,
+      "grad_norm": 0.3736313581466675,
+      "learning_rate": 4.6830427892234554e-05,
+      "loss": 0.984,
+      "step": 540
+    },
+    {
+      "epoch": 1.4824619457313037,
+      "grad_norm": 0.36285898089408875,
+      "learning_rate": 4.524564183835183e-05,
+      "loss": 0.9816,
+      "step": 560
+    },
+    {
+      "epoch": 1.5354070152217076,
+      "grad_norm": 0.37838441133499146,
+      "learning_rate": 4.36608557844691e-05,
+      "loss": 0.9807,
+      "step": 580
+    },
+    {
+      "epoch": 1.588352084712111,
+      "grad_norm": 0.3449678421020508,
+      "learning_rate": 4.207606973058637e-05,
+      "loss": 0.982,
+      "step": 600
+    },
+    {
+      "epoch": 1.641297154202515,
+      "grad_norm": 0.3467804789543152,
+      "learning_rate": 4.0491283676703644e-05,
+      "loss": 0.9553,
+      "step": 620
+    },
+    {
+      "epoch": 1.6942422236929184,
+      "grad_norm": 0.3551880419254303,
+      "learning_rate": 3.8906497622820917e-05,
+      "loss": 0.9701,
+      "step": 640
+    },
+    {
+      "epoch": 1.7471872931833223,
+      "grad_norm": 0.3425547182559967,
+      "learning_rate": 3.7321711568938196e-05,
+      "loss": 0.973,
+      "step": 660
+    },
+    {
+      "epoch": 1.800132362673726,
+      "grad_norm": 0.32189810276031494,
+      "learning_rate": 3.573692551505547e-05,
+      "loss": 0.9599,
+      "step": 680
+    },
+    {
+      "epoch": 1.8530774321641297,
+      "grad_norm": 0.34214696288108826,
+      "learning_rate": 3.415213946117274e-05,
+      "loss": 0.952,
+      "step": 700
+    },
+    {
+      "epoch": 1.9060225016545336,
+      "grad_norm": 0.33412784337997437,
+      "learning_rate": 3.256735340729002e-05,
+      "loss": 0.9453,
+      "step": 720
+    },
+    {
+      "epoch": 1.958967571144937,
+      "grad_norm": 0.33273905515670776,
+      "learning_rate": 3.098256735340729e-05,
+      "loss": 0.9387,
+      "step": 740
+    },
+    {
+      "epoch": 2.011912640635341,
+      "grad_norm": 0.32698702812194824,
+      "learning_rate": 2.939778129952457e-05,
+      "loss": 0.9456,
+      "step": 760
+    },
+    {
+      "epoch": 2.0648577101257444,
+      "grad_norm": 0.36428529024124146,
+      "learning_rate": 2.7812995245641837e-05,
+      "loss": 0.9445,
+      "step": 780
+    },
+    {
+      "epoch": 2.1178027796161483,
+      "grad_norm": 0.3233266770839691,
+      "learning_rate": 2.6228209191759113e-05,
+      "loss": 0.9185,
+      "step": 800
+    },
+    {
+      "epoch": 2.1707478491065517,
+      "grad_norm": 0.3173067569732666,
+      "learning_rate": 2.4643423137876386e-05,
+      "loss": 0.9146,
+      "step": 820
+    },
+    {
+      "epoch": 2.2236929185969556,
+      "grad_norm": 0.33917301893234253,
+      "learning_rate": 2.305863708399366e-05,
+      "loss": 0.9195,
+      "step": 840
+    },
+    {
+      "epoch": 2.2766379880873595,
+      "grad_norm": 0.3438282608985901,
+      "learning_rate": 2.1473851030110938e-05,
+      "loss": 0.9356,
+      "step": 860
+    },
+    {
+      "epoch": 2.329583057577763,
+      "grad_norm": 0.33590319752693176,
+      "learning_rate": 1.988906497622821e-05,
+      "loss": 0.9319,
+      "step": 880
+    },
+    {
+      "epoch": 2.382528127068167,
+      "grad_norm": 0.5414553880691528,
+      "learning_rate": 1.8304278922345483e-05,
+      "loss": 0.9208,
+      "step": 900
+    },
+    {
+      "epoch": 2.4354731965585703,
+      "grad_norm": 0.34509792923927307,
+      "learning_rate": 1.671949286846276e-05,
+      "loss": 0.9026,
+      "step": 920
+    },
+    {
+      "epoch": 2.488418266048974,
+      "grad_norm": 0.30984020233154297,
+      "learning_rate": 1.5134706814580033e-05,
+      "loss": 0.9066,
+      "step": 940
+    },
+    {
+      "epoch": 2.541363335539378,
+      "grad_norm": 0.31895536184310913,
+      "learning_rate": 1.3549920760697307e-05,
+      "loss": 0.9275,
+      "step": 960
+    },
+    {
+      "epoch": 2.5943084050297816,
+      "grad_norm": 0.3005692660808563,
+      "learning_rate": 1.1965134706814581e-05,
+      "loss": 0.9241,
+      "step": 980
+    },
+    {
+      "epoch": 2.6472534745201854,
+      "grad_norm": 0.325959712266922,
+      "learning_rate": 1.0380348652931855e-05,
+      "loss": 0.9181,
+      "step": 1000
+    },
+    {
+      "epoch": 2.6472534745201854,
+      "eval_loss": 0.8340622782707214,
+      "eval_runtime": 74.3137,
+      "eval_samples_per_second": 10.16,
+      "eval_steps_per_second": 2.543,
+      "step": 1000
+    },
+    {
+      "epoch": 2.700198544010589,
+      "grad_norm": 0.37024152278900146,
+      "learning_rate": 8.79556259904913e-06,
+      "loss": 0.9156,
+      "step": 1020
+    },
+    {
+      "epoch": 2.753143613500993,
+      "grad_norm": 0.3021298050880432,
+      "learning_rate": 7.2107765451664034e-06,
+      "loss": 0.9204,
+      "step": 1040
+    },
+    {
+      "epoch": 2.8060886829913967,
+      "grad_norm": 0.35478419065475464,
+      "learning_rate": 5.625990491283677e-06,
+      "loss": 0.9342,
+      "step": 1060
+    },
+    {
+      "epoch": 2.8590337524818,
+      "grad_norm": 0.34113916754722595,
+      "learning_rate": 4.041204437400952e-06,
+      "loss": 0.9187,
+      "step": 1080
+    },
+    {
+      "epoch": 2.9119788219722036,
+      "grad_norm": 0.33516696095466614,
+      "learning_rate": 2.456418383518225e-06,
+      "loss": 0.9238,
+      "step": 1100
+    },
+    {
+      "epoch": 2.9649238914626075,
+      "grad_norm": 0.2989369034767151,
+      "learning_rate": 8.716323296354993e-07,
+      "loss": 0.9188,
+      "step": 1120
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 1131,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.101970198757376e+16,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}