sanduntg/llama_2_dpo_with_reward_1000

Files changed (5) hide show

README.md CHANGED Viewed

@@ -41,7 +41,7 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 5
 ### Training results

 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 1
 ### Training results

adapter_config.json CHANGED Viewed

@@ -6,6 +6,7 @@
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},

   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
+  "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d65d2deb2b1a2db1bc04f625b0992b22e37347143d50fcfcae0faa88edb91321
 size 134235048

 version https://git-lfs.github.com/spec/v1
+oid sha256:b81da75c8efc20cb169e645668a633395dc6cbb114b8a343ddbd7160b51df37a
 size 134235048

runs/Mar20_16-41-22_d12c3011894c/events.out.tfevents.1710952897.d12c3011894c.1141.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:b149ebb3e748c46d748529eebcf4be8c38f18f7ea0416aedf0096999a86c46be
+size 7404

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51ba410074d2e28631ede90d45a943be077563d143ef01732f2bf1a38c2c4ed8
 size 4475

 version https://git-lfs.github.com/spec/v1
+oid sha256:04e50e81ce114fffd92cc882f3345c51ab5afad372b5e8f0d3c7e7bd0a80f6cf
 size 4475