diff --git a/low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json b/low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c7acf64429db5804344bd87921db27bcc21f788 --- /dev/null +++ b/low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206bdaf3cf034deec6de2394ba6a7b29d0b637ab2d1925332e8f1abb76025dd6 +size 104973389 diff --git a/low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt b/low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f0fdef2118b40d40ba5cb2b326497cf9ad42e73 --- /dev/null +++ b/low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53c7e4694c2cfcf4dd0eb9bfe3b05a7cd7809c6a8e6ff871c4c99d6ddfefaf4 +size 209984517 diff --git a/low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth b/low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..34660095e1c21a91e581d45ece0014e4c9ea196f --- /dev/null +++ b/low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3055502c9e3004eb987550db217f6677d695763c959badae25d773f1d985ab91 +size 14575 diff --git a/low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt b/low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..97af97ef41be68d37872e3e436854f56a12660f7 --- /dev/null +++ b/low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666 +size 627 diff --git a/low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json b/low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..00f5dd1c284bb90f5183a7f82de266a13658acce --- /dev/null +++ b/low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json @@ -0,0 +1,135 @@ +{ + "best_metric": 0.17182409763336182, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/coin_flip/checkpoint-60", + "epoch": 9.795918367346939, + "eval_steps": 500, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.98, + "eval_loss": 3.2136309146881104, + "eval_runtime": 1.7971, + "eval_samples_per_second": 27.266, + "eval_steps_per_second": 3.895, + "step": 6 + }, + { + "epoch": 1.63, + "learning_rate": 6.666666666666667e-05, + "loss": 3.5659, + "step": 10 + }, + { + "epoch": 1.96, + "eval_loss": 1.1381325721740723, + "eval_runtime": 1.8028, + "eval_samples_per_second": 27.18, + "eval_steps_per_second": 3.883, + "step": 12 + }, + { + "epoch": 2.94, + "eval_loss": 0.39599937200546265, + "eval_runtime": 1.7938, + "eval_samples_per_second": 27.316, + "eval_steps_per_second": 3.902, + "step": 18 + }, + { + "epoch": 3.27, + "learning_rate": 5.333333333333333e-05, + "loss": 0.8239, + "step": 20 + }, + { + "epoch": 3.92, + "eval_loss": 0.23788291215896606, + "eval_runtime": 1.8071, + "eval_samples_per_second": 27.115, + "eval_steps_per_second": 3.874, + "step": 24 + }, + { + "epoch": 4.9, + "learning_rate": 4e-05, + "loss": 0.2375, + "step": 30 + }, + { + "epoch": 4.9, + "eval_loss": 0.1869448572397232, + "eval_runtime": 1.8046, + "eval_samples_per_second": 27.154, + "eval_steps_per_second": 3.879, + "step": 30 + }, + { + "epoch": 5.88, + "eval_loss": 0.1762770116329193, + "eval_runtime": 1.7955, + "eval_samples_per_second": 27.291, + "eval_steps_per_second": 3.899, + "step": 36 + }, + { + "epoch": 6.53, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.1756, + "step": 40 + }, + { + "epoch": 6.86, + "eval_loss": 0.17334015667438507, + "eval_runtime": 1.7994, + "eval_samples_per_second": 27.231, + "eval_steps_per_second": 3.89, + "step": 42 + }, + { + "epoch": 8.0, + "eval_loss": 0.17443998157978058, + "eval_runtime": 1.7969, + "eval_samples_per_second": 27.269, + "eval_steps_per_second": 3.896, + "step": 49 + }, + { + "epoch": 8.16, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.1626, + "step": 50 + }, + { + "epoch": 8.98, + "eval_loss": 0.17577075958251953, + "eval_runtime": 1.7999, + "eval_samples_per_second": 27.223, + "eval_steps_per_second": 3.889, + "step": 55 + }, + { + "epoch": 9.8, + "learning_rate": 0.0, + "loss": 0.1535, + "step": 60 + }, + { + "epoch": 9.8, + "eval_loss": 0.17182409763336182, + "eval_runtime": 1.7992, + "eval_samples_per_second": 27.235, + "eval_steps_per_second": 3.891, + "step": 60 + } + ], + "logging_steps": 10, + "max_steps": 60, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7168130697461760.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin b/low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..07dfb76050f2a4c267f868ab929afeba975637e1 --- /dev/null +++ b/low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5173d21d2a38d1cd1cd4daa45bed5a9f6f0d64b0897c6366683a240cd58f864 +size 4091 diff --git a/low-shot-task-specific-500-ex/cola/best_model/adapter_config.json b/low-shot-task-specific-500-ex/cola/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/cola/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..348692eea6298ddaaccbf218bf8d21214d79c79e --- /dev/null +++ b/low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88db25f61b79433f848f0788c44a69c4f0e655ee09f0508b3af035fc7e02179e +size 104973389 diff --git a/low-shot-task-specific-500-ex/cola/best_model/optimizer.pt b/low-shot-task-specific-500-ex/cola/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d4a4a540f0c051e770940c766027e303d20098d --- /dev/null +++ b/low-shot-task-specific-500-ex/cola/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43508585f5b8ebfc9532b38cb5a03b32bd704e2b5ebaf34c0b503292c13d7c3f +size 209984517 diff --git a/low-shot-task-specific-500-ex/cola/best_model/rng_state.pth b/low-shot-task-specific-500-ex/cola/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..da67a1fbfdc6166e07ca8783f9caa975c03c021e --- /dev/null +++ b/low-shot-task-specific-500-ex/cola/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df43d0030d9a94c82d1f09bcf5abbca157094e974c01f4c6b0214cfabe62d21a +size 14575 diff --git a/low-shot-task-specific-500-ex/cola/best_model/scheduler.pt b/low-shot-task-specific-500-ex/cola/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a88fb7eea52925a5828b6c51e234684b99b96a7 --- /dev/null +++ b/low-shot-task-specific-500-ex/cola/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5a1245e45cfb0cd08e1aaad686b01aa603042a02b323bff0c30b6b0eaca154 +size 627 diff --git a/low-shot-task-specific-500-ex/cola/best_model/trainer_state.json b/low-shot-task-specific-500-ex/cola/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d134398b3b2d622a63f57e9f6d246c7af13d914d --- /dev/null +++ b/low-shot-task-specific-500-ex/cola/best_model/trainer_state.json @@ -0,0 +1,171 @@ +{ + "best_metric": 0.16061067581176758, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/cola/checkpoint-120", + "epoch": 9.6, + "eval_steps": 500, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.333333333333333e-05, + "loss": 6.6687, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 4.879603385925293, + "eval_runtime": 2.9872, + "eval_samples_per_second": 33.476, + "eval_steps_per_second": 4.352, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.733333333333333e-05, + "loss": 4.1857, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 0.6735175251960754, + "eval_runtime": 2.9786, + "eval_samples_per_second": 33.573, + "eval_steps_per_second": 4.364, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.0666666666666666e-05, + "loss": 1.1578, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.22044576704502106, + "eval_runtime": 2.9802, + "eval_samples_per_second": 33.554, + "eval_steps_per_second": 4.362, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.4000000000000005e-05, + "loss": 0.2691, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.7333333333333336e-05, + "loss": 0.2011, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.18446393311023712, + "eval_runtime": 2.9788, + "eval_samples_per_second": 33.571, + "eval_steps_per_second": 4.364, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4.066666666666667e-05, + "loss": 0.1782, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.17961610853672028, + "eval_runtime": 2.9749, + "eval_samples_per_second": 33.615, + "eval_steps_per_second": 4.37, + "step": 62 + }, + { + "epoch": 5.6, + "learning_rate": 3.4e-05, + "loss": 0.1609, + "step": 70 + }, + { + "epoch": 6.0, + "eval_loss": 0.1864309310913086, + "eval_runtime": 2.9806, + "eval_samples_per_second": 33.55, + "eval_steps_per_second": 4.362, + "step": 75 + }, + { + "epoch": 6.4, + "learning_rate": 2.7333333333333335e-05, + "loss": 0.1644, + "step": 80 + }, + { + "epoch": 6.96, + "eval_loss": 0.16424360871315002, + "eval_runtime": 2.9883, + "eval_samples_per_second": 33.464, + "eval_steps_per_second": 4.35, + "step": 87 + }, + { + "epoch": 7.2, + "learning_rate": 2.066666666666667e-05, + "loss": 0.1389, + "step": 90 + }, + { + "epoch": 8.0, + "learning_rate": 1.4e-05, + "loss": 0.1294, + "step": 100 + }, + { + "epoch": 8.0, + "eval_loss": 0.16847126185894012, + "eval_runtime": 2.9824, + "eval_samples_per_second": 33.53, + "eval_steps_per_second": 4.359, + "step": 100 + }, + { + "epoch": 8.8, + "learning_rate": 7.333333333333333e-06, + "loss": 0.1189, + "step": 110 + }, + { + "epoch": 8.96, + "eval_loss": 0.16718144714832306, + "eval_runtime": 2.9865, + "eval_samples_per_second": 33.485, + "eval_steps_per_second": 4.353, + "step": 112 + }, + { + "epoch": 9.6, + "learning_rate": 6.666666666666667e-07, + "loss": 0.1159, + "step": 120 + }, + { + "epoch": 9.6, + "eval_loss": 0.16061067581176758, + "eval_runtime": 3.0082, + "eval_samples_per_second": 33.243, + "eval_steps_per_second": 4.322, + "step": 120 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7598366896619520.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/cola/best_model/training_args.bin b/low-shot-task-specific-500-ex/cola/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a63489e8652f6893b8c0e22102cc72bcaad6b4b --- /dev/null +++ b/low-shot-task-specific-500-ex/cola/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8fd855aa267cbdcebda3428b287cf8b570b4df8a9e36df6feb7196098250a51 +size 4091 diff --git a/low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json b/low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e0319534b540a07e0b33435adf66ba278bfe9e4 --- /dev/null +++ b/low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5152d962ae2befcfe0aedba6ab58b8db4d23257a65d5616e0d250964461d934 +size 104973389 diff --git a/low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt b/low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3456955fbb5b100cbefb3ab972f89591f3c986a3 --- /dev/null +++ b/low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d6a49094ccff938ab86e6209f355a618f8701ca301da56250d93f602c172c6 +size 209984517 diff --git a/low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth b/low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..20caa7511255681d0f97a536f44f36d92f8223d1 --- /dev/null +++ b/low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30c2a2ce0908cdf3fafe95df8bab394435e84155013c948e02ec0288e93b6fe +size 14575 diff --git a/low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt b/low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72e4a7916329eb8a16fb9eb9d5f820ab39a9f9ed --- /dev/null +++ b/low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b328efc508129bc7f57f4b7996c1bcd196558a43b1355a8510ec55800cd250a4 +size 627 diff --git a/low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json b/low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9ba71f741df2dbda489bcffc335ff387a672f89a --- /dev/null +++ b/low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json @@ -0,0 +1,171 @@ +{ + "best_metric": 0.315158873796463, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/commonsense_qa/checkpoint-120", + "epoch": 9.6, + "eval_steps": 500, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.333333333333333e-05, + "loss": 4.3959, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 2.007786750793457, + "eval_runtime": 3.9225, + "eval_samples_per_second": 25.494, + "eval_steps_per_second": 3.314, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 1.4138, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 0.5842701196670532, + "eval_runtime": 3.8952, + "eval_samples_per_second": 25.673, + "eval_steps_per_second": 3.337, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.000000000000001e-05, + "loss": 0.5802, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.48449742794036865, + "eval_runtime": 3.9041, + "eval_samples_per_second": 25.614, + "eval_steps_per_second": 3.33, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 0.4476, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.666666666666667e-05, + "loss": 0.3758, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.3487338721752167, + "eval_runtime": 3.9136, + "eval_samples_per_second": 25.552, + "eval_steps_per_second": 3.322, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4e-05, + "loss": 0.3099, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.3447181284427643, + "eval_runtime": 3.9145, + "eval_samples_per_second": 25.546, + "eval_steps_per_second": 3.321, + "step": 62 + }, + { + "epoch": 5.6, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2785, + "step": 70 + }, + { + "epoch": 6.0, + "eval_loss": 0.3341815173625946, + "eval_runtime": 3.9058, + "eval_samples_per_second": 25.603, + "eval_steps_per_second": 3.328, + "step": 75 + }, + { + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.2473, + "step": 80 + }, + { + "epoch": 6.96, + "eval_loss": 0.32787469029426575, + "eval_runtime": 3.9132, + "eval_samples_per_second": 25.555, + "eval_steps_per_second": 3.322, + "step": 87 + }, + { + "epoch": 7.2, + "learning_rate": 2e-05, + "loss": 0.2096, + "step": 90 + }, + { + "epoch": 8.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.2026, + "step": 100 + }, + { + "epoch": 8.0, + "eval_loss": 0.36058053374290466, + "eval_runtime": 3.9049, + "eval_samples_per_second": 25.609, + "eval_steps_per_second": 3.329, + "step": 100 + }, + { + "epoch": 8.8, + "learning_rate": 6.666666666666667e-06, + "loss": 0.1703, + "step": 110 + }, + { + "epoch": 8.96, + "eval_loss": 0.32292404770851135, + "eval_runtime": 3.9053, + "eval_samples_per_second": 25.606, + "eval_steps_per_second": 3.329, + "step": 112 + }, + { + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.174, + "step": 120 + }, + { + "epoch": 9.6, + "eval_loss": 0.315158873796463, + "eval_runtime": 3.9182, + "eval_samples_per_second": 25.522, + "eval_steps_per_second": 3.318, + "step": 120 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.884385099874304e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin b/low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6a03285b96097ffe2261ebde262000d5ab6ae3d --- /dev/null +++ b/low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45bff2219beb51a82849d4d07eba12e3cd594f77292977d4c4572844ac5cbf0b +size 4091 diff --git a/low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json b/low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9928d39d7c492aa73d2a965af6ca48cb4e9de310 --- /dev/null +++ b/low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd259302fbc3589e465552374ac7ef975db55d5443842d0886e31d2e84eafd63 +size 104973389 diff --git a/low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt b/low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8990ca88592b55e25ee34daf83422e6134ebb934 --- /dev/null +++ b/low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc44b0176a9bcc2ff66801d67ece43987fa19edcaffeda2d47329715c2eca0b +size 209984517 diff --git a/low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth b/low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ace494b1e61a8f8f352e59b292c4e4001d9cb0a1 --- /dev/null +++ b/low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052af8166da591bdc27e359bc7d7771179713f7891b6826f85f597392b9ae762 +size 14575 diff --git a/low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt b/low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..00abcd51c8b894a60d842924c655a5fe90eb8d31 --- /dev/null +++ b/low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc5e65f6f4846aebdaab8b704e9eeffb8f1787e8b333c20c764dad3451c8daf1 +size 627 diff --git a/low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json b/low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0d5399886b424b2649562c78b05e189c7ee0b17 --- /dev/null +++ b/low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json @@ -0,0 +1,123 @@ +{ + "best_metric": 0.27617308497428894, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/emotion/checkpoint-87", + "epoch": 6.96, + "eval_steps": 500, + "global_step": 87, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.333333333333333e-05, + "loss": 5.8573, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 4.265514373779297, + "eval_runtime": 3.4719, + "eval_samples_per_second": 28.803, + "eval_steps_per_second": 3.744, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 3.8105, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 1.5850574970245361, + "eval_runtime": 3.4571, + "eval_samples_per_second": 28.926, + "eval_steps_per_second": 3.76, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.0666666666666666e-05, + "loss": 1.7041, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.5069144368171692, + "eval_runtime": 3.4616, + "eval_samples_per_second": 28.889, + "eval_steps_per_second": 3.756, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.4000000000000005e-05, + "loss": 0.6618, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.7333333333333336e-05, + "loss": 0.3247, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.33890244364738464, + "eval_runtime": 3.4571, + "eval_samples_per_second": 28.926, + "eval_steps_per_second": 3.76, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4.066666666666667e-05, + "loss": 0.271, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.3074319362640381, + "eval_runtime": 3.4533, + "eval_samples_per_second": 28.958, + "eval_steps_per_second": 3.765, + "step": 62 + }, + { + "epoch": 5.6, + "learning_rate": 3.4e-05, + "loss": 0.2088, + "step": 70 + }, + { + "epoch": 6.0, + "eval_loss": 0.29454201459884644, + "eval_runtime": 3.448, + "eval_samples_per_second": 29.002, + "eval_steps_per_second": 3.77, + "step": 75 + }, + { + "epoch": 6.4, + "learning_rate": 2.7333333333333335e-05, + "loss": 0.1924, + "step": 80 + }, + { + "epoch": 6.96, + "eval_loss": 0.27617308497428894, + "eval_runtime": 3.4613, + "eval_samples_per_second": 28.89, + "eval_steps_per_second": 3.756, + "step": 87 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.002400891600896e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/emotion/best_model/training_args.bin b/low-shot-task-specific-500-ex/emotion/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41437dd4f1052dd70a36f4731673abb95d28a1ef --- /dev/null +++ b/low-shot-task-specific-500-ex/emotion/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2807111bcd404841c3e700ab3cab78a978a93e97c447ffe05c31e323ab3de999 +size 4091 diff --git a/low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json b/low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0f611d41d5161f026868c52d9ad49decd423d93 --- /dev/null +++ b/low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452926c1e61adf1dc9d07ddcd94668ffb5984646601a7f43cfccf35f8ed8f15d +size 104973389 diff --git a/low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt b/low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0a5409d9fd61468b4c90a8e45d20de6169ef9d5 --- /dev/null +++ b/low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2ceb0a22aaf3ac5943e52d26f725ad35605b686d466e24cb6b88e9b56bab9e +size 209984517 diff --git a/low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth b/low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..98601a0ae65c3e99460b9885102fb2b537bded73 --- /dev/null +++ b/low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2d1c591c012870eb39230986af7413438032c45508997b22b8b2e04069c233 +size 14575 diff --git a/low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt b/low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..41bf25472560db3fa03436f59637398fae515760 --- /dev/null +++ b/low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f741a77aed590e2df1e55bdd0d9033c12228c5cb1e1789672b7ce71994aa05 +size 627 diff --git a/low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json b/low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bce5ef5a753faa4a1ed4dec5044b8591583789bb --- /dev/null +++ b/low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json @@ -0,0 +1,109 @@ +{ + "best_metric": 0.22931724786758423, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/social_i_qa/checkpoint-75", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.333333333333333e-05, + "loss": 4.8517, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 1.9629485607147217, + "eval_runtime": 4.1824, + "eval_samples_per_second": 23.91, + "eval_steps_per_second": 3.108, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 1.2888, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 0.41052401065826416, + "eval_runtime": 4.1752, + "eval_samples_per_second": 23.951, + "eval_steps_per_second": 3.114, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.000000000000001e-05, + "loss": 0.4255, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.32185935974121094, + "eval_runtime": 4.1821, + "eval_samples_per_second": 23.911, + "eval_steps_per_second": 3.108, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 0.2955, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.666666666666667e-05, + "loss": 0.2552, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.26777762174606323, + "eval_runtime": 4.1799, + "eval_samples_per_second": 23.924, + "eval_steps_per_second": 3.11, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4e-05, + "loss": 0.2144, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.24417449533939362, + "eval_runtime": 4.1595, + "eval_samples_per_second": 24.042, + "eval_steps_per_second": 3.125, + "step": 62 + }, + { + "epoch": 5.6, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.1887, + "step": 70 + }, + { + "epoch": 6.0, + "eval_loss": 0.22931724786758423, + "eval_runtime": 4.1585, + "eval_samples_per_second": 24.047, + "eval_steps_per_second": 3.126, + "step": 75 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.244223306989568e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin b/low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f70eede713c86b7fc8559900bee2eac82c85a06 --- /dev/null +++ b/low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc625edfba8d629ae9a11f5c619aeadcf62fa8f504d60898b62237fc19448f60 +size 4091 diff --git a/low-shot-task-specific-500-ex/sst/best_model/adapter_config.json b/low-shot-task-specific-500-ex/sst/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/sst/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3647e51a9ee1167651f33e6bef210cc05dc404cb --- /dev/null +++ b/low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0bf1649f6d9b8dc8d6a74e917b2986eb9e0c9c257614ade4af288256d9a4f4 +size 104973389 diff --git a/low-shot-task-specific-500-ex/sst/best_model/optimizer.pt b/low-shot-task-specific-500-ex/sst/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..06ef146b194ecad6a8a3adb9a1e0a344b6bf8264 --- /dev/null +++ b/low-shot-task-specific-500-ex/sst/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522572fa606bbb71751d11ef99ae52f5681a609d3d119335844ab4f53ba0d826 +size 209984517 diff --git a/low-shot-task-specific-500-ex/sst/best_model/rng_state.pth b/low-shot-task-specific-500-ex/sst/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6475bea2ae82208f4ba00666c68ba7f8f3ac5282 --- /dev/null +++ b/low-shot-task-specific-500-ex/sst/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3580967c07f4e6cea186553a49db7882eeeb990b25cfad881cf2a6edb9233e4a +size 14575 diff --git a/low-shot-task-specific-500-ex/sst/best_model/scheduler.pt b/low-shot-task-specific-500-ex/sst/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdf61a694428152ae37af6b9f1df4856df582363 --- /dev/null +++ b/low-shot-task-specific-500-ex/sst/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc34eb4d15f40db25f296376c3b3cbb8431c5236c4b6fd8813dabe4ca7b3ea2 +size 627 diff --git a/low-shot-task-specific-500-ex/sst/best_model/trainer_state.json b/low-shot-task-specific-500-ex/sst/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..da20f3dcb9d7c8107a27c67ff9c9358b290b7a69 --- /dev/null +++ b/low-shot-task-specific-500-ex/sst/best_model/trainer_state.json @@ -0,0 +1,123 @@ +{ + "best_metric": 0.042198196053504944, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sst/checkpoint-87", + "epoch": 6.96, + "eval_steps": 500, + "global_step": 87, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.466666666666667e-05, + "loss": 7.0533, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 4.983966827392578, + "eval_runtime": 3.24, + "eval_samples_per_second": 30.865, + "eval_steps_per_second": 4.012, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.866666666666666e-05, + "loss": 4.1938, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 0.4440341889858246, + "eval_runtime": 3.2423, + "eval_samples_per_second": 30.843, + "eval_steps_per_second": 4.01, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.2e-05, + "loss": 0.6862, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.1788669228553772, + "eval_runtime": 3.2349, + "eval_samples_per_second": 30.913, + "eval_steps_per_second": 4.019, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.5333333333333334e-05, + "loss": 0.2043, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.8666666666666666e-05, + "loss": 0.1107, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.06379850953817368, + "eval_runtime": 3.2374, + "eval_samples_per_second": 30.889, + "eval_steps_per_second": 4.016, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4.2000000000000004e-05, + "loss": 0.0491, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.0445735827088356, + "eval_runtime": 3.2374, + "eval_samples_per_second": 30.889, + "eval_steps_per_second": 4.016, + "step": 62 + }, + { + "epoch": 5.6, + "learning_rate": 3.5333333333333336e-05, + "loss": 0.0273, + "step": 70 + }, + { + "epoch": 6.0, + "eval_loss": 0.04596562311053276, + "eval_runtime": 3.2388, + "eval_samples_per_second": 30.876, + "eval_steps_per_second": 4.014, + "step": 75 + }, + { + "epoch": 6.4, + "learning_rate": 2.8666666666666668e-05, + "loss": 0.0222, + "step": 80 + }, + { + "epoch": 6.96, + "eval_loss": 0.042198196053504944, + "eval_runtime": 3.2385, + "eval_samples_per_second": 30.879, + "eval_steps_per_second": 4.014, + "step": 87 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7054390093086720.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/sst/best_model/training_args.bin b/low-shot-task-specific-500-ex/sst/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..58ebb3e9f8ed62e68b0f395dd5f382cca625ece5 --- /dev/null +++ b/low-shot-task-specific-500-ex/sst/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42a2e587a4c59713f0200d20e8dcc233dcef141930bfbf631c04969b44050c3 +size 4091 diff --git a/low-shot-task-specific-500-ex/sum/best_model/adapter_config.json b/low-shot-task-specific-500-ex/sum/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/sum/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7e7ec312fee71cfe1abab51cbc57c54bc7c2022 --- /dev/null +++ b/low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a729e9feda81f6fe2ca5709b1c5420648c705df61f8d0f7729878f5fb4de6b9 +size 104973389 diff --git a/low-shot-task-specific-500-ex/sum/best_model/optimizer.pt b/low-shot-task-specific-500-ex/sum/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..681173ec061bc04ebb1cc311149d5eee22e053db --- /dev/null +++ b/low-shot-task-specific-500-ex/sum/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0940dfeb998ac49fc9ea6ee82de9e1a31f888566cadbbf3e12a6b31771c9257b +size 209984517 diff --git a/low-shot-task-specific-500-ex/sum/best_model/rng_state.pth b/low-shot-task-specific-500-ex/sum/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c573f5eb02101d97358ee0eb4edb5a3ea6f897ad --- /dev/null +++ b/low-shot-task-specific-500-ex/sum/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db42f39e4e1e49a8785b28d59ee0d6a43f5f529564318dd434906402c044f9e5 +size 14575 diff --git a/low-shot-task-specific-500-ex/sum/best_model/scheduler.pt b/low-shot-task-specific-500-ex/sum/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..94a567ae23a9d83ba552445c28c532424e1c199f --- /dev/null +++ b/low-shot-task-specific-500-ex/sum/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c348388a8e293c1a759c71c596beff737512abb60e161371fe0d9e9edf9afe53 +size 627 diff --git a/low-shot-task-specific-500-ex/sum/best_model/trainer_state.json b/low-shot-task-specific-500-ex/sum/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..48f014fb1ca642bab7964e4329e19bf685f74fcf --- /dev/null +++ b/low-shot-task-specific-500-ex/sum/best_model/trainer_state.json @@ -0,0 +1,143 @@ +{ + "best_metric": 0.031680114567279816, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sum/checkpoint-100", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.333333333333333e-05, + "loss": 2.9796, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 1.8474284410476685, + "eval_runtime": 2.9025, + "eval_samples_per_second": 34.453, + "eval_steps_per_second": 4.479, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 1.6178, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 0.31484881043434143, + "eval_runtime": 2.9228, + "eval_samples_per_second": 34.214, + "eval_steps_per_second": 4.448, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.000000000000001e-05, + "loss": 0.4051, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.13756035268306732, + "eval_runtime": 2.9586, + "eval_samples_per_second": 33.799, + "eval_steps_per_second": 4.394, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 0.1943, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.666666666666667e-05, + "loss": 0.0721, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.06226326525211334, + "eval_runtime": 2.9426, + "eval_samples_per_second": 33.984, + "eval_steps_per_second": 4.418, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4e-05, + "loss": 0.043, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.03685503825545311, + "eval_runtime": 2.9565, + "eval_samples_per_second": 33.823, + "eval_steps_per_second": 4.397, + "step": 62 + }, + { + "epoch": 5.6, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.0342, + "step": 70 + }, + { + "epoch": 6.0, + "eval_loss": 0.045043423771858215, + "eval_runtime": 2.9492, + "eval_samples_per_second": 33.907, + "eval_steps_per_second": 4.408, + "step": 75 + }, + { + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.0254, + "step": 80 + }, + { + "epoch": 6.96, + "eval_loss": 0.04237747564911842, + "eval_runtime": 2.9496, + "eval_samples_per_second": 33.903, + "eval_steps_per_second": 4.407, + "step": 87 + }, + { + "epoch": 7.2, + "learning_rate": 2e-05, + "loss": 0.0293, + "step": 90 + }, + { + "epoch": 8.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.0163, + "step": 100 + }, + { + "epoch": 8.0, + "eval_loss": 0.031680114567279816, + "eval_runtime": 2.9477, + "eval_samples_per_second": 33.925, + "eval_steps_per_second": 4.41, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5934292402176000.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/sum/best_model/training_args.bin b/low-shot-task-specific-500-ex/sum/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4576dbbf588edeef9735c713b7cfe798db716e8 --- /dev/null +++ b/low-shot-task-specific-500-ex/sum/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed1b612847b0105ad99860aaefb41fa0340e2e0280e3c4076ca491ac381da18 +size 4091 diff --git a/low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json b/low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/svamp/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/svamp/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e1b5a64a89e0e430b97ff10617ac855cb56f374 --- /dev/null +++ b/low-shot-task-specific-500-ex/svamp/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda2a8837ed6afec0340d46201a78096816ae4d2d152dcc7558946ed73b446ad +size 104973389 diff --git a/low-shot-task-specific-500-ex/svamp/best_model/optimizer.pt b/low-shot-task-specific-500-ex/svamp/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..776737dfa3e06d9804eee4de718def9ee52f3ac6 --- /dev/null +++ b/low-shot-task-specific-500-ex/svamp/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a28fcc50d05164c50b93bf214b215f0c5b6185bea27dc47ad7c2f2439ef2510 +size 209984517 diff --git a/low-shot-task-specific-500-ex/svamp/best_model/rng_state.pth b/low-shot-task-specific-500-ex/svamp/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c839e55d332340db3729fc9e6a3c8e1b6f5c7ca --- /dev/null +++ b/low-shot-task-specific-500-ex/svamp/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6c0a36d7eda3986225a8dbaba2de43f0c2c5bfd73d0b29b23656add5cb142f +size 14575 diff --git a/low-shot-task-specific-500-ex/svamp/best_model/scheduler.pt b/low-shot-task-specific-500-ex/svamp/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..392626723afe7fe47552c25dc53914362c7b022a --- /dev/null +++ b/low-shot-task-specific-500-ex/svamp/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d3362569b4621dc89473c25409c83a8453a01ed11ab25e3ecbf873b407ca87 +size 627 diff --git a/low-shot-task-specific-500-ex/svamp/best_model/trainer_state.json b/low-shot-task-specific-500-ex/svamp/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c159b093da6c29e30ed2781f009e96a29d35487c --- /dev/null +++ b/low-shot-task-specific-500-ex/svamp/best_model/trainer_state.json @@ -0,0 +1,95 @@ +{ + "best_metric": 0.4858732521533966, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/svamp/checkpoint-62", + "epoch": 4.96, + "eval_steps": 500, + "global_step": 62, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.400000000000001e-05, + "loss": 4.1509, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 2.4244415760040283, + "eval_runtime": 4.2583, + "eval_samples_per_second": 23.484, + "eval_steps_per_second": 3.053, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.733333333333333e-05, + "loss": 2.0378, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 1.0243476629257202, + "eval_runtime": 4.2676, + "eval_samples_per_second": 23.432, + "eval_steps_per_second": 3.046, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.0666666666666666e-05, + "loss": 1.1215, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.7239224314689636, + "eval_runtime": 4.2565, + "eval_samples_per_second": 23.494, + "eval_steps_per_second": 3.054, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.4000000000000005e-05, + "loss": 0.7816, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.7333333333333336e-05, + "loss": 0.5969, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.5359882116317749, + "eval_runtime": 4.2654, + "eval_samples_per_second": 23.444, + "eval_steps_per_second": 3.048, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4.066666666666667e-05, + "loss": 0.5028, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.4858732521533966, + "eval_runtime": 4.2509, + "eval_samples_per_second": 23.524, + "eval_steps_per_second": 3.058, + "step": 62 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.050122493001728e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/svamp/best_model/training_args.bin b/low-shot-task-specific-500-ex/svamp/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c10933a2908433685fb0e7ea3846b164a6645e85 --- /dev/null +++ b/low-shot-task-specific-500-ex/svamp/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32eba2b1ac32490eba0b5f3e6bdeac3b8357d5af7839c7a56cf77b3204380b8 +size 4091 diff --git a/low-shot-task-specific-500-ex/word-sorting/best_model/adapter_config.json b/low-shot-task-specific-500-ex/word-sorting/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific-500-ex/word-sorting/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific-500-ex/word-sorting/best_model/adapter_model.bin b/low-shot-task-specific-500-ex/word-sorting/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c1022015031f1534142867debce79916df78e0 --- /dev/null +++ b/low-shot-task-specific-500-ex/word-sorting/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbac4520e32702cf711c398e1625ff0ee9c1eceb51f5a0197a7b03d450756a41 +size 104973389 diff --git a/low-shot-task-specific-500-ex/word-sorting/best_model/optimizer.pt b/low-shot-task-specific-500-ex/word-sorting/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdf2e933887d5478173787e08563935520e2b2ea --- /dev/null +++ b/low-shot-task-specific-500-ex/word-sorting/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df227037bf7704c6cf61a7489fff1af396bacbde65f33f578e64b6aae8ae8152 +size 209984517 diff --git a/low-shot-task-specific-500-ex/word-sorting/best_model/rng_state.pth b/low-shot-task-specific-500-ex/word-sorting/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e87919edd0a80c073728a5d31f8e826cbf62c95c --- /dev/null +++ b/low-shot-task-specific-500-ex/word-sorting/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a345ade014dcb380162a9c6aeeca87d56f3c422d4e7998597c7d5fff0e134c81 +size 14575 diff --git a/low-shot-task-specific-500-ex/word-sorting/best_model/scheduler.pt b/low-shot-task-specific-500-ex/word-sorting/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72e4a7916329eb8a16fb9eb9d5f820ab39a9f9ed --- /dev/null +++ b/low-shot-task-specific-500-ex/word-sorting/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b328efc508129bc7f57f4b7996c1bcd196558a43b1355a8510ec55800cd250a4 +size 627 diff --git a/low-shot-task-specific-500-ex/word-sorting/best_model/trainer_state.json b/low-shot-task-specific-500-ex/word-sorting/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..44bd7111bd3bf54ce24ce8b29169429f46229b23 --- /dev/null +++ b/low-shot-task-specific-500-ex/word-sorting/best_model/trainer_state.json @@ -0,0 +1,171 @@ +{ + "best_metric": 0.030880222097039223, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/word-sorting/checkpoint-112", + "epoch": 9.6, + "eval_steps": 500, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.8, + "learning_rate": 7.333333333333333e-05, + "loss": 0.2601, + "step": 10 + }, + { + "epoch": 0.96, + "eval_loss": 0.053824424743652344, + "eval_runtime": 8.4016, + "eval_samples_per_second": 11.902, + "eval_steps_per_second": 1.547, + "step": 12 + }, + { + "epoch": 1.6, + "learning_rate": 6.666666666666667e-05, + "loss": 0.0526, + "step": 20 + }, + { + "epoch": 2.0, + "eval_loss": 0.03642543405294418, + "eval_runtime": 8.3694, + "eval_samples_per_second": 11.948, + "eval_steps_per_second": 1.553, + "step": 25 + }, + { + "epoch": 2.4, + "learning_rate": 6.000000000000001e-05, + "loss": 0.0367, + "step": 30 + }, + { + "epoch": 2.96, + "eval_loss": 0.03345979005098343, + "eval_runtime": 8.4155, + "eval_samples_per_second": 11.883, + "eval_steps_per_second": 1.545, + "step": 37 + }, + { + "epoch": 3.2, + "learning_rate": 5.333333333333333e-05, + "loss": 0.0331, + "step": 40 + }, + { + "epoch": 4.0, + "learning_rate": 4.666666666666667e-05, + "loss": 0.0296, + "step": 50 + }, + { + "epoch": 4.0, + "eval_loss": 0.031256407499313354, + "eval_runtime": 8.4766, + "eval_samples_per_second": 11.797, + "eval_steps_per_second": 1.534, + "step": 50 + }, + { + "epoch": 4.8, + "learning_rate": 4e-05, + "loss": 0.0262, + "step": 60 + }, + { + "epoch": 4.96, + "eval_loss": 0.031165990978479385, + "eval_runtime": 8.4029, + "eval_samples_per_second": 11.901, + "eval_steps_per_second": 1.547, + "step": 62 + }, + { + "epoch": 5.6, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.0239, + "step": 70 + }, + { + "epoch": 6.0, + "eval_loss": 0.03090561181306839, + "eval_runtime": 8.3105, + "eval_samples_per_second": 12.033, + "eval_steps_per_second": 1.564, + "step": 75 + }, + { + "epoch": 6.4, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.0209, + "step": 80 + }, + { + "epoch": 6.96, + "eval_loss": 0.03128715232014656, + "eval_runtime": 8.4557, + "eval_samples_per_second": 11.826, + "eval_steps_per_second": 1.537, + "step": 87 + }, + { + "epoch": 7.2, + "learning_rate": 2e-05, + "loss": 0.0187, + "step": 90 + }, + { + "epoch": 8.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.0171, + "step": 100 + }, + { + "epoch": 8.0, + "eval_loss": 0.030982598662376404, + "eval_runtime": 8.4433, + "eval_samples_per_second": 11.844, + "eval_steps_per_second": 1.54, + "step": 100 + }, + { + "epoch": 8.8, + "learning_rate": 6.666666666666667e-06, + "loss": 0.0171, + "step": 110 + }, + { + "epoch": 8.96, + "eval_loss": 0.030880222097039223, + "eval_runtime": 8.4894, + "eval_samples_per_second": 11.779, + "eval_steps_per_second": 1.531, + "step": 112 + }, + { + "epoch": 9.6, + "learning_rate": 0.0, + "loss": 0.0155, + "step": 120 + }, + { + "epoch": 9.6, + "eval_loss": 0.031018836423754692, + "eval_runtime": 8.4634, + "eval_samples_per_second": 11.816, + "eval_steps_per_second": 1.536, + "step": 120 + } + ], + "logging_steps": 10, + "max_steps": 120, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4.874279421837312e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific-500-ex/word-sorting/best_model/training_args.bin b/low-shot-task-specific-500-ex/word-sorting/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eeb2b9dd8c0a86a21897cd7379ff4d6c892e49ef --- /dev/null +++ b/low-shot-task-specific-500-ex/word-sorting/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beeac0cc3d21a8c70fe5601161eb2b3843b7929b06edb02f540d65f6c972690a +size 4091