Model save

Browse files

Files changed (4) hide show

adapter_config.json +4 -4
adapter_model.safetensors +2 -2
trainer_state.json +33 -33
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -19,17 +19,17 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
-    "o_proj",
     "gate_proj",
-    "v_proj",
     "up_proj",
     "down_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "q_proj",
     "gate_proj",
     "up_proj",
     "down_proj",
+    "v_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07f3924e34badcecbf387a22b15f0befc53d9e7aba5a94ed23b9289307716fd2
-size 1239447000

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c1085cc31aaf24ecb9fa39dfa434995d07da84f55dfdaedba2a0f0b282223a5
+size 1149269232

trainer_state.json CHANGED Viewed

@@ -10,110 +10,110 @@
   "log_history": [
     {
       "epoch": 0.28435345134001566,
-      "grad_norm": 0.49568846821784973,
       "learning_rate": 9.294251565167901e-05,
-      "loss": 1.2539,
       "step": 250
     },
     {
       "epoch": 0.5687069026800313,
-      "grad_norm": 0.45915305614471436,
       "learning_rate": 8.582811610700058e-05,
-      "loss": 0.6466,
       "step": 500
     },
     {
       "epoch": 0.8530603540200469,
-      "grad_norm": 0.3644406795501709,
       "learning_rate": 7.871371656232215e-05,
-      "loss": 0.5166,
       "step": 750
     },
     {
       "epoch": 1.1374138053600626,
-      "grad_norm": 0.3468233644962311,
       "learning_rate": 7.159931701764372e-05,
       "loss": 0.4885,
       "step": 1000
     },
     {
       "epoch": 1.421767256700078,
-      "grad_norm": 0.3474404513835907,
       "learning_rate": 6.448491747296529e-05,
-      "loss": 0.4758,
       "step": 1250
     },
     {
       "epoch": 1.706120708040094,
-      "grad_norm": 0.3625764548778534,
       "learning_rate": 5.737051792828686e-05,
-      "loss": 0.4624,
       "step": 1500
     },
     {
       "epoch": 1.9904741593801094,
-      "grad_norm": 0.3761649429798126,
       "learning_rate": 5.025611838360843e-05,
-      "loss": 0.4549,
       "step": 1750
     },
     {
       "epoch": 2.2748276107201253,
-      "grad_norm": 0.3799266815185547,
       "learning_rate": 4.3141718838929996e-05,
-      "loss": 0.4438,
       "step": 2000
     },
     {
       "epoch": 2.5591810620601407,
-      "grad_norm": 0.4071201682090759,
       "learning_rate": 3.602731929425157e-05,
-      "loss": 0.431,
       "step": 2250
     },
     {
       "epoch": 2.843534513400156,
-      "grad_norm": 0.3974260985851288,
       "learning_rate": 2.8912919749573137e-05,
-      "loss": 0.4193,
       "step": 2500
     },
     {
       "epoch": 3.127887964740172,
-      "grad_norm": 0.4286128580570221,
       "learning_rate": 2.1798520204894708e-05,
-      "loss": 0.4081,
       "step": 2750
     },
     {
       "epoch": 3.4122414160801875,
-      "grad_norm": 0.4309234917163849,
       "learning_rate": 1.4684120660216277e-05,
-      "loss": 0.3971,
       "step": 3000
     },
     {
       "epoch": 3.6965948674202034,
-      "grad_norm": 0.4457033574581146,
       "learning_rate": 7.569721115537849e-06,
-      "loss": 0.3893,
       "step": 3250
     },
     {
       "epoch": 3.980948318760219,
-      "grad_norm": 0.45286279916763306,
       "learning_rate": 4.553215708594195e-07,
-      "loss": 0.3847,
       "step": 3500
     },
     {
       "epoch": 3.9991469396459802,
       "step": 3516,
-      "total_flos": 9.688385912636621e+17,
-      "train_loss": 0.51168809517523,
-      "train_runtime": 22604.8542,
-      "train_samples_per_second": 39.825,
-      "train_steps_per_second": 0.156
     }
   ],
   "logging_steps": 250,
@@ -133,7 +133,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.688385912636621e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "log_history": [
     {
       "epoch": 0.28435345134001566,
+      "grad_norm": 0.656255841255188,
       "learning_rate": 9.294251565167901e-05,
+      "loss": 1.2378,
       "step": 250
     },
     {
       "epoch": 0.5687069026800313,
+      "grad_norm": 0.543454647064209,
       "learning_rate": 8.582811610700058e-05,
+      "loss": 0.6375,
       "step": 500
     },
     {
       "epoch": 0.8530603540200469,
+      "grad_norm": 0.43407657742500305,
       "learning_rate": 7.871371656232215e-05,
+      "loss": 0.5158,
       "step": 750
     },
     {
       "epoch": 1.1374138053600626,
+      "grad_norm": 0.4036562740802765,
       "learning_rate": 7.159931701764372e-05,
       "loss": 0.4885,
       "step": 1000
     },
     {
       "epoch": 1.421767256700078,
+      "grad_norm": 0.40085434913635254,
       "learning_rate": 6.448491747296529e-05,
+      "loss": 0.4759,
       "step": 1250
     },
     {
       "epoch": 1.706120708040094,
+      "grad_norm": 0.4041031002998352,
       "learning_rate": 5.737051792828686e-05,
+      "loss": 0.4629,
       "step": 1500
     },
     {
       "epoch": 1.9904741593801094,
+      "grad_norm": 0.4215051233768463,
       "learning_rate": 5.025611838360843e-05,
+      "loss": 0.4555,
       "step": 1750
     },
     {
       "epoch": 2.2748276107201253,
+      "grad_norm": 0.4411364495754242,
       "learning_rate": 4.3141718838929996e-05,
+      "loss": 0.4451,
       "step": 2000
     },
     {
       "epoch": 2.5591810620601407,
+      "grad_norm": 0.4689200818538666,
       "learning_rate": 3.602731929425157e-05,
+      "loss": 0.433,
       "step": 2250
     },
     {
       "epoch": 2.843534513400156,
+      "grad_norm": 0.4492100775241852,
       "learning_rate": 2.8912919749573137e-05,
+      "loss": 0.4217,
       "step": 2500
     },
     {
       "epoch": 3.127887964740172,
+      "grad_norm": 0.4875541627407074,
       "learning_rate": 2.1798520204894708e-05,
+      "loss": 0.4116,
       "step": 2750
     },
     {
       "epoch": 3.4122414160801875,
+      "grad_norm": 0.48542124032974243,
       "learning_rate": 1.4684120660216277e-05,
+      "loss": 0.4012,
       "step": 3000
     },
     {
       "epoch": 3.6965948674202034,
+      "grad_norm": 0.5120503306388855,
       "learning_rate": 7.569721115537849e-06,
+      "loss": 0.3939,
       "step": 3250
     },
     {
       "epoch": 3.980948318760219,
+      "grad_norm": 0.510686993598938,
       "learning_rate": 4.553215708594195e-07,
+      "loss": 0.3892,
       "step": 3500
     },
     {
       "epoch": 3.9991469396459802,
       "step": 3516,
+      "total_flos": 9.473878732962447e+17,
+      "train_loss": 0.5115500152856959,
+      "train_runtime": 22624.5633,
+      "train_samples_per_second": 39.791,
+      "train_steps_per_second": 0.155
     }
   ],
   "logging_steps": 250,
       "attributes": {}
     }
   },
+  "total_flos": 9.473878732962447e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d440338186a609f27bd06ffc3c0ece6d78660db22740828600577e50e4528def
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:d25eb557ad70264fc715b2aa13435242b988886d53c26e60542469af6e08bf28
 size 5176