mkhalifa
/

instrucode

Model card Files Files and versions Community

Muhammad Khalifa commited on May 17, 2024

Commit

bacaabd

1 Parent(s): ed42aba

add low-shot models

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

low-shot-task-specific/coin_flip/best_model/adapter_config.json +21 -0
low-shot-task-specific/coin_flip/best_model/adapter_model.bin +3 -0
low-shot-task-specific/coin_flip/best_model/optimizer.pt +3 -0
low-shot-task-specific/coin_flip/best_model/rng_state.pth +3 -0
low-shot-task-specific/coin_flip/best_model/scheduler.pt +3 -0
low-shot-task-specific/coin_flip/best_model/trainer_state.json +121 -0
low-shot-task-specific/coin_flip/best_model/training_args.bin +3 -0
low-shot-task-specific/cola/best_model/adapter_config.json +21 -0
low-shot-task-specific/cola/best_model/adapter_model.bin +3 -0
low-shot-task-specific/cola/best_model/optimizer.pt +3 -0
low-shot-task-specific/cola/best_model/rng_state.pth +3 -0
low-shot-task-specific/cola/best_model/scheduler.pt +3 -0
low-shot-task-specific/cola/best_model/trainer_state.json +85 -0
low-shot-task-specific/cola/best_model/training_args.bin +3 -0
low-shot-task-specific/commonsense_qa/best_model/adapter_config.json +21 -0
low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin +3 -0
low-shot-task-specific/commonsense_qa/best_model/optimizer.pt +3 -0
low-shot-task-specific/commonsense_qa/best_model/rng_state.pth +3 -0
low-shot-task-specific/commonsense_qa/best_model/scheduler.pt +3 -0
low-shot-task-specific/commonsense_qa/best_model/trainer_state.json +111 -0
low-shot-task-specific/commonsense_qa/best_model/training_args.bin +3 -0
low-shot-task-specific/emotion/best_model/adapter_config.json +21 -0
low-shot-task-specific/emotion/best_model/adapter_model.bin +3 -0
low-shot-task-specific/emotion/best_model/optimizer.pt +3 -0
low-shot-task-specific/emotion/best_model/rng_state.pth +3 -0
low-shot-task-specific/emotion/best_model/scheduler.pt +3 -0
low-shot-task-specific/emotion/best_model/trainer_state.json +157 -0
low-shot-task-specific/emotion/best_model/training_args.bin +3 -0
low-shot-task-specific/social_i_qa/best_model/adapter_config.json +21 -0
low-shot-task-specific/social_i_qa/best_model/adapter_model.bin +3 -0
low-shot-task-specific/social_i_qa/best_model/optimizer.pt +3 -0
low-shot-task-specific/social_i_qa/best_model/rng_state.pth +3 -0
low-shot-task-specific/social_i_qa/best_model/scheduler.pt +3 -0
low-shot-task-specific/social_i_qa/best_model/trainer_state.json +111 -0
low-shot-task-specific/social_i_qa/best_model/training_args.bin +3 -0
low-shot-task-specific/sst/best_model/adapter_config.json +21 -0
low-shot-task-specific/sst/best_model/adapter_model.bin +3 -0
low-shot-task-specific/sst/best_model/optimizer.pt +3 -0
low-shot-task-specific/sst/best_model/rng_state.pth +3 -0
low-shot-task-specific/sst/best_model/scheduler.pt +3 -0
low-shot-task-specific/sst/best_model/trainer_state.json +85 -0
low-shot-task-specific/sst/best_model/training_args.bin +3 -0
low-shot-task-specific/sum/best_model/adapter_config.json +21 -0
low-shot-task-specific/sum/best_model/adapter_model.bin +3 -0
low-shot-task-specific/sum/best_model/optimizer.pt +3 -0
low-shot-task-specific/sum/best_model/rng_state.pth +3 -0
low-shot-task-specific/sum/best_model/scheduler.pt +3 -0
low-shot-task-specific/sum/best_model/trainer_state.json +85 -0
low-shot-task-specific/sum/best_model/training_args.bin +3 -0
low-shot-task-specific/svamp/best_model/adapter_config.json +21 -0

low-shot-task-specific/coin_flip/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific/coin_flip/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b931c5915506612ec64883aa04ea154cc8aaf9f230aacb89dd47db7713e55f5b
+size 104973389

low-shot-task-specific/coin_flip/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d92f8e6b0b40ecda9624a1676867dbcea32bdbef5e0eecbcac5106784ec8465b
+size 209984517

low-shot-task-specific/coin_flip/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c980b24b62e2109d15363aa73d40fa6fafc88b732c285e1b6fab92db69ce36b
+size 14575

low-shot-task-specific/coin_flip/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7da15a993b502c23d3f1c3380001efcd3dd910c920a088c178a788bdf015b29
+size 627

low-shot-task-specific/coin_flip/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,121 @@

+{
+  "best_metric": 0.14907684922218323,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/coin_flip/checkpoint-55",
+  "epoch": 8.979591836734693,
+  "eval_steps": 500,
+  "global_step": 55,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.98,
+      "eval_loss": 4.5179572105407715,
+      "eval_runtime": 1.8312,
+      "eval_samples_per_second": 26.758,
+      "eval_steps_per_second": 3.823,
+      "step": 6
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 4.5562,
+      "step": 10
+    },
+    {
+      "epoch": 1.96,
+      "eval_loss": 3.3285250663757324,
+      "eval_runtime": 1.8387,
+      "eval_samples_per_second": 26.649,
+      "eval_steps_per_second": 3.807,
+      "step": 12
+    },
+    {
+      "epoch": 2.94,
+      "eval_loss": 1.0093011856079102,
+      "eval_runtime": 1.8346,
+      "eval_samples_per_second": 26.709,
+      "eval_steps_per_second": 3.816,
+      "step": 18
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 2.3848,
+      "step": 20
+    },
+    {
+      "epoch": 3.92,
+      "eval_loss": 0.2200772613286972,
+      "eval_runtime": 1.8356,
+      "eval_samples_per_second": 26.694,
+      "eval_steps_per_second": 3.813,
+      "step": 24
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.3144,
+      "step": 30
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.21126192808151245,
+      "eval_runtime": 1.8403,
+      "eval_samples_per_second": 26.626,
+      "eval_steps_per_second": 3.804,
+      "step": 30
+    },
+    {
+      "epoch": 5.88,
+      "eval_loss": 0.18616808950901031,
+      "eval_runtime": 1.8423,
+      "eval_samples_per_second": 26.598,
+      "eval_steps_per_second": 3.8,
+      "step": 36
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.2066,
+      "step": 40
+    },
+    {
+      "epoch": 6.86,
+      "eval_loss": 0.1662234216928482,
+      "eval_runtime": 1.8364,
+      "eval_samples_per_second": 26.683,
+      "eval_steps_per_second": 3.812,
+      "step": 42
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.2262299805879593,
+      "eval_runtime": 1.8315,
+      "eval_samples_per_second": 26.754,
+      "eval_steps_per_second": 3.822,
+      "step": 49
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 0.0003,
+      "loss": 0.1856,
+      "step": 50
+    },
+    {
+      "epoch": 8.98,
+      "eval_loss": 0.14907684922218323,
+      "eval_runtime": 1.8356,
+      "eval_samples_per_second": 26.694,
+      "eval_steps_per_second": 3.813,
+      "step": 55
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 60,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 6584591944581120.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific/coin_flip/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f450808ed0897cbe91d86e09bf730b8688854884991e7216373c40ee768a0c9b
+size 4091

low-shot-task-specific/cola/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific/cola/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:411c8f5252840aa1fc66fe6e846a855cc0c9826eb0e8a5e7e8ba168ffdeded3d
+size 104973389

low-shot-task-specific/cola/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3ebebf0b8ce1d3bed95e63794192866c76d3ed6e03bc0d928dc945817500540
+size 209984517

low-shot-task-specific/cola/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef953e6438f145b783f6ca5f8d6d997cb169a9ddb6824cf4f2f9e126b56b09b7
+size 14575

low-shot-task-specific/cola/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbabbf26b9b37d257cc72f404a441c985e21acf5c3e6fb7626e5104e04ff3282
+size 627

low-shot-task-specific/cola/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "best_metric": 0.1422310322523117,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/cola/checkpoint-75",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 75,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4,
+      "learning_rate": 5.399999999999999e-05,
+      "loss": 7.2579,
+      "step": 10
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 0.00011399999999999999,
+      "loss": 6.0871,
+      "step": 20
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.0100170373916626,
+      "eval_runtime": 5.8549,
+      "eval_samples_per_second": 34.159,
+      "eval_steps_per_second": 4.27,
+      "step": 25
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 0.00017399999999999997,
+      "loss": 1.808,
+      "step": 30
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 0.000234,
+      "loss": 0.2533,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.000294,
+      "loss": 0.2083,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.19681957364082336,
+      "eval_runtime": 5.8474,
+      "eval_samples_per_second": 34.203,
+      "eval_steps_per_second": 4.275,
+      "step": 50
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 0.00028649999999999997,
+      "loss": 0.1663,
+      "step": 60
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 0.0002715,
+      "loss": 0.1771,
+      "step": 70
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.1422310322523117,
+      "eval_runtime": 5.848,
+      "eval_samples_per_second": 34.2,
+      "eval_steps_per_second": 4.275,
+      "step": 75
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 250,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 4495226494648320.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific/cola/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:432e4f00d89268eb8a2e942ba35b41ff6bef5e5193df86888baa8dbedf03e4e1
+size 4091

low-shot-task-specific/commonsense_qa/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5911565f25237e1e4a88d364af20dbdd3c53aa306935a116cdb82f52cba3baa8
+size 104973389

low-shot-task-specific/commonsense_qa/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:488383f8f47eb284f498b258cf82a60fcc881827248f5358d43805942165bc4d
+size 209984517

low-shot-task-specific/commonsense_qa/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:329449cd1278f022b5239a2bd97e216a89c73ae3d215b6a1bfc73b69c537d4a3
+size 14575

low-shot-task-specific/commonsense_qa/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e
+size 627

low-shot-task-specific/commonsense_qa/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,111 @@

+{
+  "best_metric": 0.28430670499801636,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/commonsense_qa/checkpoint-100",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 5.5323,
+      "step": 10
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 3.1134,
+      "step": 20
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6229318976402283,
+      "eval_runtime": 7.9588,
+      "eval_samples_per_second": 25.129,
+      "eval_steps_per_second": 3.141,
+      "step": 25
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.6745,
+      "step": 30
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.3959,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.0003,
+      "loss": 0.3388,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.32543906569480896,
+      "eval_runtime": 7.9767,
+      "eval_samples_per_second": 25.073,
+      "eval_steps_per_second": 3.134,
+      "step": 50
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 0.000285,
+      "loss": 0.2496,
+      "step": 60
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 0.00027,
+      "loss": 0.1963,
+      "step": 70
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.28735020756721497,
+      "eval_runtime": 7.9596,
+      "eval_samples_per_second": 25.127,
+      "eval_steps_per_second": 3.141,
+      "step": 75
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 0.00025499999999999996,
+      "loss": 0.1475,
+      "step": 80
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.087,
+      "step": 90
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 0.000225,
+      "loss": 0.0841,
+      "step": 100
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.28430670499801636,
+      "eval_runtime": 7.9315,
+      "eval_samples_per_second": 25.216,
+      "eval_steps_per_second": 3.152,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 250,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.532283750678528e+16,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific/commonsense_qa/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac62dd2138b26a82acced238425ed68cca2c7eb6c44552fa9843fba2e1d0cf34
+size 4091

low-shot-task-specific/emotion/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific/emotion/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55526193b5417dd0e6ec1a050c8c68add7ac57e4f9adc8f5523fb7b6109cb1d5
+size 104973389

low-shot-task-specific/emotion/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3c619e22fe8786bb47d576383db1964d04ec5895cf49c7967a06ea21ba69e24
+size 209984517

low-shot-task-specific/emotion/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a33d07fcc46ed21e10fb24f0266332833c17c1787ecf4b21b90883591a74c17a
+size 14575

low-shot-task-specific/emotion/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24a1a40a49a3444d450b508a337be12226511f236bc6c3d4905032050bc15d21
+size 627

low-shot-task-specific/emotion/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,157 @@

+{
+  "best_metric": 0.13983282446861267,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/emotion/checkpoint-150",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 150,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 6.4494,
+      "step": 10
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 5.0703,
+      "step": 20
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.654482126235962,
+      "eval_runtime": 6.8374,
+      "eval_samples_per_second": 29.251,
+      "eval_steps_per_second": 3.656,
+      "step": 25
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 1.9694,
+      "step": 30
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.4062,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.0003,
+      "loss": 0.248,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.22630518674850464,
+      "eval_runtime": 6.8055,
+      "eval_samples_per_second": 29.388,
+      "eval_steps_per_second": 3.674,
+      "step": 50
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 0.000285,
+      "loss": 0.1644,
+      "step": 60
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 0.00027,
+      "loss": 0.1532,
+      "step": 70
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.17524582147598267,
+      "eval_runtime": 6.7943,
+      "eval_samples_per_second": 29.437,
+      "eval_steps_per_second": 3.68,
+      "step": 75
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 0.00025499999999999996,
+      "loss": 0.1291,
+      "step": 80
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.082,
+      "step": 90
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 0.000225,
+      "loss": 0.0672,
+      "step": 100
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.14034521579742432,
+      "eval_runtime": 6.8294,
+      "eval_samples_per_second": 29.285,
+      "eval_steps_per_second": 3.661,
+      "step": 100
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 0.00020999999999999998,
+      "loss": 0.0443,
+      "step": 110
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 0.000195,
+      "loss": 0.0505,
+      "step": 120
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.187747061252594,
+      "eval_runtime": 6.8658,
+      "eval_samples_per_second": 29.13,
+      "eval_steps_per_second": 3.641,
+      "step": 125
+    },
+    {
+      "epoch": 5.2,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.042,
+      "step": 130
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 0.000165,
+      "loss": 0.0225,
+      "step": 140
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 0.00015,
+      "loss": 0.0131,
+      "step": 150
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.13983282446861267,
+      "eval_runtime": 6.8662,
+      "eval_samples_per_second": 29.128,
+      "eval_steps_per_second": 3.641,
+      "step": 150
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 250,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.692015121170432e+16,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific/emotion/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43e1dfa83a1cabba6524b8aef4b9cf9f06ad12b54c1b84b046d4557a3bea2b51
+size 4091

low-shot-task-specific/social_i_qa/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific/social_i_qa/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21047febd6ad48a886cf9d5fcaed091ec146720fa2ace5db287e7337cbf7a46a
+size 104973389

low-shot-task-specific/social_i_qa/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51615aa1f8c3316a2d9f1d9b34cd7bd25fa7d1fd75182407da214a4549fcc3fe
+size 209984517

low-shot-task-specific/social_i_qa/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d184eb9d6f950ca5fa7558982747687291171db4b5d64ca0e406118be389e9f5
+size 14575

low-shot-task-specific/social_i_qa/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e
+size 627

low-shot-task-specific/social_i_qa/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,111 @@

+{
+  "best_metric": 0.21922020614147186,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/social_i_qa/checkpoint-100",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 6.145,
+      "step": 10
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 3.2951,
+      "step": 20
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.47254127264022827,
+      "eval_runtime": 8.3699,
+      "eval_samples_per_second": 23.895,
+      "eval_steps_per_second": 2.987,
+      "step": 25
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.5553,
+      "step": 30
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.3415,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.0003,
+      "loss": 0.3055,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.29911044239997864,
+      "eval_runtime": 8.3444,
+      "eval_samples_per_second": 23.968,
+      "eval_steps_per_second": 2.996,
+      "step": 50
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 0.000285,
+      "loss": 0.2157,
+      "step": 60
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 0.00027,
+      "loss": 0.1871,
+      "step": 70
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.2219252735376358,
+      "eval_runtime": 8.3281,
+      "eval_samples_per_second": 24.015,
+      "eval_steps_per_second": 3.002,
+      "step": 75
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 0.00025499999999999996,
+      "loss": 0.1417,
+      "step": 80
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.12,
+      "step": 90
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 0.000225,
+      "loss": 0.1079,
+      "step": 100
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.21922020614147186,
+      "eval_runtime": 8.385,
+      "eval_samples_per_second": 23.852,
+      "eval_steps_per_second": 2.981,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 250,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.656903891124224e+16,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific/social_i_qa/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1df6d236b24d8ccd4f73c811e7a410d0eabcb7077106cde9555c3305ab36be9c
+size 4091

low-shot-task-specific/sst/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific/sst/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f159b31b434f4cbc2859fe1a9d310fe6ff28774a227edc490206f028896a6c4b
+size 104973389

low-shot-task-specific/sst/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a3f8fa515dd79da7e74337497fcd24225c380d188278a17aecea3bbdbdab20b
+size 209984517

low-shot-task-specific/sst/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8fb7ddc07ac1c6b830dbc025657118a71cc05bef3beda9880d700dfe72a190a1
+size 14575

low-shot-task-specific/sst/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:472963d9d147c2cd377a0a377de820bc06cc3f0119cb01d2dc8c5a02c4d14738
+size 627

low-shot-task-specific/sst/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "best_metric": 0.0313660129904747,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sst/checkpoint-75",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 75,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4,
+      "learning_rate": 4.2e-05,
+      "loss": 7.604,
+      "step": 10
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 0.000102,
+      "loss": 6.408,
+      "step": 20
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.78858482837677,
+      "eval_runtime": 6.2379,
+      "eval_samples_per_second": 32.062,
+      "eval_steps_per_second": 4.008,
+      "step": 25
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 0.000162,
+      "loss": 1.6353,
+      "step": 30
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 0.00022199999999999998,
+      "loss": 0.1518,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.00028199999999999997,
+      "loss": 0.0807,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.06099913269281387,
+      "eval_runtime": 6.2244,
+      "eval_samples_per_second": 32.132,
+      "eval_steps_per_second": 4.016,
+      "step": 50
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 0.0002895,
+      "loss": 0.0667,
+      "step": 60
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 0.0002745,
+      "loss": 0.0418,
+      "step": 70
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.0313660129904747,
+      "eval_runtime": 6.2691,
+      "eval_samples_per_second": 31.902,
+      "eval_steps_per_second": 3.988,
+      "step": 75
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 250,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 6295295189975040.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific/sst/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c336d21dc0b7346d796426a4797d56084a81a08ea200e8c91411fa3449b6e06
+size 4091

low-shot-task-specific/sum/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific/sum/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca0a0cb9b7eade6b441f997e152af520540514264d51b1c267e30e7c500669bc
+size 104973389

low-shot-task-specific/sum/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8a812e3aab6e78e990e7ca36e1b9e85917fdaca9d531c7bcdc41b82d6c982d1
+size 209984517

low-shot-task-specific/sum/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edc7dd2e3f40ac0c046da2f233e18f2314fea538368fd7bd263fa95f95f7fbef
+size 14575

low-shot-task-specific/sum/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c71df4de1094152c023456a0f4f7f28571d7f0bd29b962a097a17dff09a83bd7
+size 627

low-shot-task-specific/sum/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "best_metric": 0.024566762149333954,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sum/checkpoint-75",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 75,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 3.5065,
+      "step": 10
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 2.4397,
+      "step": 20
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.4209679365158081,
+      "eval_runtime": 5.755,
+      "eval_samples_per_second": 34.753,
+      "eval_steps_per_second": 4.344,
+      "step": 25
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.8483,
+      "step": 30
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.1766,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.0003,
+      "loss": 0.0503,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.045773524791002274,
+      "eval_runtime": 5.7905,
+      "eval_samples_per_second": 34.539,
+      "eval_steps_per_second": 4.317,
+      "step": 50
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 0.000285,
+      "loss": 0.0382,
+      "step": 60
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 0.00027,
+      "loss": 0.0355,
+      "step": 70
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.024566762149333954,
+      "eval_runtime": 5.7985,
+      "eval_samples_per_second": 34.492,
+      "eval_steps_per_second": 4.311,
+      "step": 75
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 250,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 4450719301632000.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific/sum/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe2d99ceccff158e6a2e2efb1d0072c3a4d6419ea8c9ba1122915df6fece215d
+size 4091

low-shot-task-specific/svamp/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}