mkhalifa
/

instrucode

Model card Files Files and versions Community

Muhammad Khalifa commited on May 22, 2024

Commit

e5d11d8

•

1 Parent(s): 70d9848

add 500-shot models

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin +3 -0
low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt +3 -0
low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth +3 -0
low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt +3 -0
low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json +135 -0
low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin +3 -0
low-shot-task-specific-500-ex/cola/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin +3 -0
low-shot-task-specific-500-ex/cola/best_model/optimizer.pt +3 -0
low-shot-task-specific-500-ex/cola/best_model/rng_state.pth +3 -0
low-shot-task-specific-500-ex/cola/best_model/scheduler.pt +3 -0
low-shot-task-specific-500-ex/cola/best_model/trainer_state.json +171 -0
low-shot-task-specific-500-ex/cola/best_model/training_args.bin +3 -0
low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin +3 -0
low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt +3 -0
low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth +3 -0
low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt +3 -0
low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json +171 -0
low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin +3 -0
low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin +3 -0
low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt +3 -0
low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth +3 -0
low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt +3 -0
low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json +123 -0
low-shot-task-specific-500-ex/emotion/best_model/training_args.bin +3 -0
low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin +3 -0
low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt +3 -0
low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth +3 -0
low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt +3 -0
low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json +109 -0
low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin +3 -0
low-shot-task-specific-500-ex/sst/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin +3 -0
low-shot-task-specific-500-ex/sst/best_model/optimizer.pt +3 -0
low-shot-task-specific-500-ex/sst/best_model/rng_state.pth +3 -0
low-shot-task-specific-500-ex/sst/best_model/scheduler.pt +3 -0
low-shot-task-specific-500-ex/sst/best_model/trainer_state.json +123 -0
low-shot-task-specific-500-ex/sst/best_model/training_args.bin +3 -0
low-shot-task-specific-500-ex/sum/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin +3 -0
low-shot-task-specific-500-ex/sum/best_model/optimizer.pt +3 -0
low-shot-task-specific-500-ex/sum/best_model/rng_state.pth +3 -0
low-shot-task-specific-500-ex/sum/best_model/scheduler.pt +3 -0
low-shot-task-specific-500-ex/sum/best_model/trainer_state.json +143 -0
low-shot-task-specific-500-ex/sum/best_model/training_args.bin +3 -0
low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json +21 -0

low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:206bdaf3cf034deec6de2394ba6a7b29d0b637ab2d1925332e8f1abb76025dd6
+size 104973389

low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d53c7e4694c2cfcf4dd0eb9bfe3b05a7cd7809c6a8e6ff871c4c99d6ddfefaf4
+size 209984517

low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3055502c9e3004eb987550db217f6677d695763c959badae25d773f1d985ab91
+size 14575

low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666
+size 627

low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+  "best_metric": 0.17182409763336182,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/coin_flip/checkpoint-60",
+  "epoch": 9.795918367346939,
+  "eval_steps": 500,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.98,
+      "eval_loss": 3.2136309146881104,
+      "eval_runtime": 1.7971,
+      "eval_samples_per_second": 27.266,
+      "eval_steps_per_second": 3.895,
+      "step": 6
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 3.5659,
+      "step": 10
+    },
+    {
+      "epoch": 1.96,
+      "eval_loss": 1.1381325721740723,
+      "eval_runtime": 1.8028,
+      "eval_samples_per_second": 27.18,
+      "eval_steps_per_second": 3.883,
+      "step": 12
+    },
+    {
+      "epoch": 2.94,
+      "eval_loss": 0.39599937200546265,
+      "eval_runtime": 1.7938,
+      "eval_samples_per_second": 27.316,
+      "eval_steps_per_second": 3.902,
+      "step": 18
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.8239,
+      "step": 20
+    },
+    {
+      "epoch": 3.92,
+      "eval_loss": 0.23788291215896606,
+      "eval_runtime": 1.8071,
+      "eval_samples_per_second": 27.115,
+      "eval_steps_per_second": 3.874,
+      "step": 24
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 4e-05,
+      "loss": 0.2375,
+      "step": 30
+    },
+    {
+      "epoch": 4.9,
+      "eval_loss": 0.1869448572397232,
+      "eval_runtime": 1.8046,
+      "eval_samples_per_second": 27.154,
+      "eval_steps_per_second": 3.879,
+      "step": 30
+    },
+    {
+      "epoch": 5.88,
+      "eval_loss": 0.1762770116329193,
+      "eval_runtime": 1.7955,
+      "eval_samples_per_second": 27.291,
+      "eval_steps_per_second": 3.899,
+      "step": 36
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 0.1756,
+      "step": 40
+    },
+    {
+      "epoch": 6.86,
+      "eval_loss": 0.17334015667438507,
+      "eval_runtime": 1.7994,
+      "eval_samples_per_second": 27.231,
+      "eval_steps_per_second": 3.89,
+      "step": 42
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.17443998157978058,
+      "eval_runtime": 1.7969,
+      "eval_samples_per_second": 27.269,
+      "eval_steps_per_second": 3.896,
+      "step": 49
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 0.1626,
+      "step": 50
+    },
+    {
+      "epoch": 8.98,
+      "eval_loss": 0.17577075958251953,
+      "eval_runtime": 1.7999,
+      "eval_samples_per_second": 27.223,
+      "eval_steps_per_second": 3.889,
+      "step": 55
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 0.0,
+      "loss": 0.1535,
+      "step": 60
+    },
+    {
+      "epoch": 9.8,
+      "eval_loss": 0.17182409763336182,
+      "eval_runtime": 1.7992,
+      "eval_samples_per_second": 27.235,
+      "eval_steps_per_second": 3.891,
+      "step": 60
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 60,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 7168130697461760.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5173d21d2a38d1cd1cd4daa45bed5a9f6f0d64b0897c6366683a240cd58f864
+size 4091

low-shot-task-specific-500-ex/cola/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88db25f61b79433f848f0788c44a69c4f0e655ee09f0508b3af035fc7e02179e
+size 104973389

low-shot-task-specific-500-ex/cola/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43508585f5b8ebfc9532b38cb5a03b32bd704e2b5ebaf34c0b503292c13d7c3f
+size 209984517

low-shot-task-specific-500-ex/cola/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df43d0030d9a94c82d1f09bcf5abbca157094e974c01f4c6b0214cfabe62d21a
+size 14575

low-shot-task-specific-500-ex/cola/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd5a1245e45cfb0cd08e1aaad686b01aa603042a02b323bff0c30b6b0eaca154
+size 627

low-shot-task-specific-500-ex/cola/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,171 @@

+{
+  "best_metric": 0.16061067581176758,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/cola/checkpoint-120",
+  "epoch": 9.6,
+  "eval_steps": 500,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 6.6687,
+      "step": 10
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 4.879603385925293,
+      "eval_runtime": 2.9872,
+      "eval_samples_per_second": 33.476,
+      "eval_steps_per_second": 4.352,
+      "step": 12
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 6.733333333333333e-05,
+      "loss": 4.1857,
+      "step": 20
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6735175251960754,
+      "eval_runtime": 2.9786,
+      "eval_samples_per_second": 33.573,
+      "eval_steps_per_second": 4.364,
+      "step": 25
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 6.0666666666666666e-05,
+      "loss": 1.1578,
+      "step": 30
+    },
+    {
+      "epoch": 2.96,
+      "eval_loss": 0.22044576704502106,
+      "eval_runtime": 2.9802,
+      "eval_samples_per_second": 33.554,
+      "eval_steps_per_second": 4.362,
+      "step": 37
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.2691,
+      "step": 40
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.2011,
+      "step": 50
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.18446393311023712,
+      "eval_runtime": 2.9788,
+      "eval_samples_per_second": 33.571,
+      "eval_steps_per_second": 4.364,
+      "step": 50
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 4.066666666666667e-05,
+      "loss": 0.1782,
+      "step": 60
+    },
+    {
+      "epoch": 4.96,
+      "eval_loss": 0.17961610853672028,
+      "eval_runtime": 2.9749,
+      "eval_samples_per_second": 33.615,
+      "eval_steps_per_second": 4.37,
+      "step": 62
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 3.4e-05,
+      "loss": 0.1609,
+      "step": 70
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.1864309310913086,
+      "eval_runtime": 2.9806,
+      "eval_samples_per_second": 33.55,
+      "eval_steps_per_second": 4.362,
+      "step": 75
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 2.7333333333333335e-05,
+      "loss": 0.1644,
+      "step": 80
+    },
+    {
+      "epoch": 6.96,
+      "eval_loss": 0.16424360871315002,
+      "eval_runtime": 2.9883,
+      "eval_samples_per_second": 33.464,
+      "eval_steps_per_second": 4.35,
+      "step": 87
+    },
+    {
+      "epoch": 7.2,
+      "learning_rate": 2.066666666666667e-05,
+      "loss": 0.1389,
+      "step": 90
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 1.4e-05,
+      "loss": 0.1294,
+      "step": 100
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.16847126185894012,
+      "eval_runtime": 2.9824,
+      "eval_samples_per_second": 33.53,
+      "eval_steps_per_second": 4.359,
+      "step": 100
+    },
+    {
+      "epoch": 8.8,
+      "learning_rate": 7.333333333333333e-06,
+      "loss": 0.1189,
+      "step": 110
+    },
+    {
+      "epoch": 8.96,
+      "eval_loss": 0.16718144714832306,
+      "eval_runtime": 2.9865,
+      "eval_samples_per_second": 33.485,
+      "eval_steps_per_second": 4.353,
+      "step": 112
+    },
+    {
+      "epoch": 9.6,
+      "learning_rate": 6.666666666666667e-07,
+      "loss": 0.1159,
+      "step": 120
+    },
+    {
+      "epoch": 9.6,
+      "eval_loss": 0.16061067581176758,
+      "eval_runtime": 3.0082,
+      "eval_samples_per_second": 33.243,
+      "eval_steps_per_second": 4.322,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 120,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 7598366896619520.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific-500-ex/cola/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8fd855aa267cbdcebda3428b287cf8b570b4df8a9e36df6feb7196098250a51
+size 4091

low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5152d962ae2befcfe0aedba6ab58b8db4d23257a65d5616e0d250964461d934
+size 104973389

low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1d6a49094ccff938ab86e6209f355a618f8701ca301da56250d93f602c172c6
+size 209984517

low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c30c2a2ce0908cdf3fafe95df8bab394435e84155013c948e02ec0288e93b6fe
+size 14575

low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b328efc508129bc7f57f4b7996c1bcd196558a43b1355a8510ec55800cd250a4
+size 627

low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,171 @@

+{
+  "best_metric": 0.315158873796463,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/commonsense_qa/checkpoint-120",
+  "epoch": 9.6,
+  "eval_steps": 500,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 4.3959,
+      "step": 10
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 2.007786750793457,
+      "eval_runtime": 3.9225,
+      "eval_samples_per_second": 25.494,
+      "eval_steps_per_second": 3.314,
+      "step": 12
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 1.4138,
+      "step": 20
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5842701196670532,
+      "eval_runtime": 3.8952,
+      "eval_samples_per_second": 25.673,
+      "eval_steps_per_second": 3.337,
+      "step": 25
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 6.000000000000001e-05,
+      "loss": 0.5802,
+      "step": 30
+    },
+    {
+      "epoch": 2.96,
+      "eval_loss": 0.48449742794036865,
+      "eval_runtime": 3.9041,
+      "eval_samples_per_second": 25.614,
+      "eval_steps_per_second": 3.33,
+      "step": 37
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.4476,
+      "step": 40
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.3758,
+      "step": 50
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.3487338721752167,
+      "eval_runtime": 3.9136,
+      "eval_samples_per_second": 25.552,
+      "eval_steps_per_second": 3.322,
+      "step": 50
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 4e-05,
+      "loss": 0.3099,
+      "step": 60
+    },
+    {
+      "epoch": 4.96,
+      "eval_loss": 0.3447181284427643,
+      "eval_runtime": 3.9145,
+      "eval_samples_per_second": 25.546,
+      "eval_steps_per_second": 3.321,
+      "step": 62
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.2785,
+      "step": 70
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.3341815173625946,
+      "eval_runtime": 3.9058,
+      "eval_samples_per_second": 25.603,
+      "eval_steps_per_second": 3.328,
+      "step": 75
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 0.2473,
+      "step": 80
+    },
+    {
+      "epoch": 6.96,
+      "eval_loss": 0.32787469029426575,
+      "eval_runtime": 3.9132,
+      "eval_samples_per_second": 25.555,
+      "eval_steps_per_second": 3.322,
+      "step": 87
+    },
+    {
+      "epoch": 7.2,
+      "learning_rate": 2e-05,
+      "loss": 0.2096,
+      "step": 90
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 0.2026,
+      "step": 100
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.36058053374290466,
+      "eval_runtime": 3.9049,
+      "eval_samples_per_second": 25.609,
+      "eval_steps_per_second": 3.329,
+      "step": 100
+    },
+    {
+      "epoch": 8.8,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 0.1703,
+      "step": 110
+    },
+    {
+      "epoch": 8.96,
+      "eval_loss": 0.32292404770851135,
+      "eval_runtime": 3.9053,
+      "eval_samples_per_second": 25.606,
+      "eval_steps_per_second": 3.329,
+      "step": 112
+    },
+    {
+      "epoch": 9.6,
+      "learning_rate": 0.0,
+      "loss": 0.174,
+      "step": 120
+    },
+    {
+      "epoch": 9.6,
+      "eval_loss": 0.315158873796463,
+      "eval_runtime": 3.9182,
+      "eval_samples_per_second": 25.522,
+      "eval_steps_per_second": 3.318,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 120,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.884385099874304e+16,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45bff2219beb51a82849d4d07eba12e3cd594f77292977d4c4572844ac5cbf0b
+size 4091

low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd259302fbc3589e465552374ac7ef975db55d5443842d0886e31d2e84eafd63
+size 104973389

low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc44b0176a9bcc2ff66801d67ece43987fa19edcaffeda2d47329715c2eca0b
+size 209984517

low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:052af8166da591bdc27e359bc7d7771179713f7891b6826f85f597392b9ae762
+size 14575

low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc5e65f6f4846aebdaab8b704e9eeffb8f1787e8b333c20c764dad3451c8daf1
+size 627

low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,123 @@

+{
+  "best_metric": 0.27617308497428894,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/emotion/checkpoint-87",
+  "epoch": 6.96,
+  "eval_steps": 500,
+  "global_step": 87,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 5.8573,
+      "step": 10
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 4.265514373779297,
+      "eval_runtime": 3.4719,
+      "eval_samples_per_second": 28.803,
+      "eval_steps_per_second": 3.744,
+      "step": 12
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 3.8105,
+      "step": 20
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.5850574970245361,
+      "eval_runtime": 3.4571,
+      "eval_samples_per_second": 28.926,
+      "eval_steps_per_second": 3.76,
+      "step": 25
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 6.0666666666666666e-05,
+      "loss": 1.7041,
+      "step": 30
+    },
+    {
+      "epoch": 2.96,
+      "eval_loss": 0.5069144368171692,
+      "eval_runtime": 3.4616,
+      "eval_samples_per_second": 28.889,
+      "eval_steps_per_second": 3.756,
+      "step": 37
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.6618,
+      "step": 40
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.3247,
+      "step": 50
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.33890244364738464,
+      "eval_runtime": 3.4571,
+      "eval_samples_per_second": 28.926,
+      "eval_steps_per_second": 3.76,
+      "step": 50
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 4.066666666666667e-05,
+      "loss": 0.271,
+      "step": 60
+    },
+    {
+      "epoch": 4.96,
+      "eval_loss": 0.3074319362640381,
+      "eval_runtime": 3.4533,
+      "eval_samples_per_second": 28.958,
+      "eval_steps_per_second": 3.765,
+      "step": 62
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 3.4e-05,
+      "loss": 0.2088,
+      "step": 70
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.29454201459884644,
+      "eval_runtime": 3.448,
+      "eval_samples_per_second": 29.002,
+      "eval_steps_per_second": 3.77,
+      "step": 75
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 2.7333333333333335e-05,
+      "loss": 0.1924,
+      "step": 80
+    },
+    {
+      "epoch": 6.96,
+      "eval_loss": 0.27617308497428894,
+      "eval_runtime": 3.4613,
+      "eval_samples_per_second": 28.89,
+      "eval_steps_per_second": 3.756,
+      "step": 87
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 120,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.002400891600896e+16,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific-500-ex/emotion/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2807111bcd404841c3e700ab3cab78a978a93e97c447ffe05c31e323ab3de999
+size 4091

low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:452926c1e61adf1dc9d07ddcd94668ffb5984646601a7f43cfccf35f8ed8f15d
+size 104973389

low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c2ceb0a22aaf3ac5943e52d26f725ad35605b686d466e24cb6b88e9b56bab9e
+size 209984517

low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb2d1c591c012870eb39230986af7413438032c45508997b22b8b2e04069c233
+size 14575

low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86f741a77aed590e2df1e55bdd0d9033c12228c5cb1e1789672b7ce71994aa05
+size 627

low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+  "best_metric": 0.22931724786758423,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/social_i_qa/checkpoint-75",
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 75,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 4.8517,
+      "step": 10
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 1.9629485607147217,
+      "eval_runtime": 4.1824,
+      "eval_samples_per_second": 23.91,
+      "eval_steps_per_second": 3.108,
+      "step": 12
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 1.2888,
+      "step": 20
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.41052401065826416,
+      "eval_runtime": 4.1752,
+      "eval_samples_per_second": 23.951,
+      "eval_steps_per_second": 3.114,
+      "step": 25
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 6.000000000000001e-05,
+      "loss": 0.4255,
+      "step": 30
+    },
+    {
+      "epoch": 2.96,
+      "eval_loss": 0.32185935974121094,
+      "eval_runtime": 4.1821,
+      "eval_samples_per_second": 23.911,
+      "eval_steps_per_second": 3.108,
+      "step": 37
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.2955,
+      "step": 40
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.2552,
+      "step": 50
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.26777762174606323,
+      "eval_runtime": 4.1799,
+      "eval_samples_per_second": 23.924,
+      "eval_steps_per_second": 3.11,
+      "step": 50
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 4e-05,
+      "loss": 0.2144,
+      "step": 60
+    },
+    {
+      "epoch": 4.96,
+      "eval_loss": 0.24417449533939362,
+      "eval_runtime": 4.1595,
+      "eval_samples_per_second": 24.042,
+      "eval_steps_per_second": 3.125,
+      "step": 62
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.1887,
+      "step": 70
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.22931724786758423,
+      "eval_runtime": 4.1585,
+      "eval_samples_per_second": 24.047,
+      "eval_steps_per_second": 3.126,
+      "step": 75
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 120,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.244223306989568e+16,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc625edfba8d629ae9a11f5c619aeadcf62fa8f504d60898b62237fc19448f60
+size 4091

low-shot-task-specific-500-ex/sst/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f0bf1649f6d9b8dc8d6a74e917b2986eb9e0c9c257614ade4af288256d9a4f4
+size 104973389

low-shot-task-specific-500-ex/sst/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:522572fa606bbb71751d11ef99ae52f5681a609d3d119335844ab4f53ba0d826
+size 209984517

low-shot-task-specific-500-ex/sst/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3580967c07f4e6cea186553a49db7882eeeb990b25cfad881cf2a6edb9233e4a
+size 14575

low-shot-task-specific-500-ex/sst/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dc34eb4d15f40db25f296376c3b3cbb8431c5236c4b6fd8813dabe4ca7b3ea2
+size 627

low-shot-task-specific-500-ex/sst/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,123 @@

+{
+  "best_metric": 0.042198196053504944,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sst/checkpoint-87",
+  "epoch": 6.96,
+  "eval_steps": 500,
+  "global_step": 87,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8,
+      "learning_rate": 7.466666666666667e-05,
+      "loss": 7.0533,
+      "step": 10
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 4.983966827392578,
+      "eval_runtime": 3.24,
+      "eval_samples_per_second": 30.865,
+      "eval_steps_per_second": 4.012,
+      "step": 12
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 6.866666666666666e-05,
+      "loss": 4.1938,
+      "step": 20
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.4440341889858246,
+      "eval_runtime": 3.2423,
+      "eval_samples_per_second": 30.843,
+      "eval_steps_per_second": 4.01,
+      "step": 25
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 6.2e-05,
+      "loss": 0.6862,
+      "step": 30
+    },
+    {
+      "epoch": 2.96,
+      "eval_loss": 0.1788669228553772,
+      "eval_runtime": 3.2349,
+      "eval_samples_per_second": 30.913,
+      "eval_steps_per_second": 4.019,
+      "step": 37
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 5.5333333333333334e-05,
+      "loss": 0.2043,
+      "step": 40
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4.8666666666666666e-05,
+      "loss": 0.1107,
+      "step": 50
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.06379850953817368,
+      "eval_runtime": 3.2374,
+      "eval_samples_per_second": 30.889,
+      "eval_steps_per_second": 4.016,
+      "step": 50
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 4.2000000000000004e-05,
+      "loss": 0.0491,
+      "step": 60
+    },
+    {
+      "epoch": 4.96,
+      "eval_loss": 0.0445735827088356,
+      "eval_runtime": 3.2374,
+      "eval_samples_per_second": 30.889,
+      "eval_steps_per_second": 4.016,
+      "step": 62
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 3.5333333333333336e-05,
+      "loss": 0.0273,
+      "step": 70
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.04596562311053276,
+      "eval_runtime": 3.2388,
+      "eval_samples_per_second": 30.876,
+      "eval_steps_per_second": 4.014,
+      "step": 75
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 2.8666666666666668e-05,
+      "loss": 0.0222,
+      "step": 80
+    },
+    {
+      "epoch": 6.96,
+      "eval_loss": 0.042198196053504944,
+      "eval_runtime": 3.2385,
+      "eval_samples_per_second": 30.879,
+      "eval_steps_per_second": 4.014,
+      "step": 87
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 120,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 7054390093086720.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific-500-ex/sst/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c42a2e587a4c59713f0200d20e8dcc233dcef141930bfbf631c04969b44050c3
+size 4091

low-shot-task-specific-500-ex/sum/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a729e9feda81f6fe2ca5709b1c5420648c705df61f8d0f7729878f5fb4de6b9
+size 104973389

low-shot-task-specific-500-ex/sum/best_model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0940dfeb998ac49fc9ea6ee82de9e1a31f888566cadbbf3e12a6b31771c9257b
+size 209984517

low-shot-task-specific-500-ex/sum/best_model/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db42f39e4e1e49a8785b28d59ee0d6a43f5f529564318dd434906402c044f9e5
+size 14575

low-shot-task-specific-500-ex/sum/best_model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c348388a8e293c1a759c71c596beff737512abb60e161371fe0d9e9edf9afe53
+size 627

low-shot-task-specific-500-ex/sum/best_model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "best_metric": 0.031680114567279816,
+  "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sum/checkpoint-100",
+  "epoch": 8.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.8,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 2.9796,
+      "step": 10
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 1.8474284410476685,
+      "eval_runtime": 2.9025,
+      "eval_samples_per_second": 34.453,
+      "eval_steps_per_second": 4.479,
+      "step": 12
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 1.6178,
+      "step": 20
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.31484881043434143,
+      "eval_runtime": 2.9228,
+      "eval_samples_per_second": 34.214,
+      "eval_steps_per_second": 4.448,
+      "step": 25
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 6.000000000000001e-05,
+      "loss": 0.4051,
+      "step": 30
+    },
+    {
+      "epoch": 2.96,
+      "eval_loss": 0.13756035268306732,
+      "eval_runtime": 2.9586,
+      "eval_samples_per_second": 33.799,
+      "eval_steps_per_second": 4.394,
+      "step": 37
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.1943,
+      "step": 40
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.0721,
+      "step": 50
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.06226326525211334,
+      "eval_runtime": 2.9426,
+      "eval_samples_per_second": 33.984,
+      "eval_steps_per_second": 4.418,
+      "step": 50
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 4e-05,
+      "loss": 0.043,
+      "step": 60
+    },
+    {
+      "epoch": 4.96,
+      "eval_loss": 0.03685503825545311,
+      "eval_runtime": 2.9565,
+      "eval_samples_per_second": 33.823,
+      "eval_steps_per_second": 4.397,
+      "step": 62
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.0342,
+      "step": 70
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.045043423771858215,
+      "eval_runtime": 2.9492,
+      "eval_samples_per_second": 33.907,
+      "eval_steps_per_second": 4.408,
+      "step": 75
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 0.0254,
+      "step": 80
+    },
+    {
+      "epoch": 6.96,
+      "eval_loss": 0.04237747564911842,
+      "eval_runtime": 2.9496,
+      "eval_samples_per_second": 33.903,
+      "eval_steps_per_second": 4.407,
+      "step": 87
+    },
+    {
+      "epoch": 7.2,
+      "learning_rate": 2e-05,
+      "loss": 0.0293,
+      "step": 90
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 0.0163,
+      "step": 100
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.031680114567279816,
+      "eval_runtime": 2.9477,
+      "eval_samples_per_second": 33.925,
+      "eval_steps_per_second": 4.41,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 120,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 5934292402176000.0,
+  "trial_name": null,
+  "trial_params": null
+}

low-shot-task-specific-500-ex/sum/best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ed1b612847b0105ad99860aaefb41fa0340e2e0280e3c4076ca491ac381da18
+size 4091

low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "merge_weights": false,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}