diff --git a/low-shot-task-specific/coin_flip/best_model/adapter_config.json b/low-shot-task-specific/coin_flip/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/coin_flip/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/coin_flip/best_model/adapter_model.bin b/low-shot-task-specific/coin_flip/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf2bb676f08f12864a1c0704699b4a3a132b5716 --- /dev/null +++ b/low-shot-task-specific/coin_flip/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b931c5915506612ec64883aa04ea154cc8aaf9f230aacb89dd47db7713e55f5b +size 104973389 diff --git a/low-shot-task-specific/coin_flip/best_model/optimizer.pt b/low-shot-task-specific/coin_flip/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5afd46bb4791a4fef047f645785e59bca373d7be --- /dev/null +++ b/low-shot-task-specific/coin_flip/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d92f8e6b0b40ecda9624a1676867dbcea32bdbef5e0eecbcac5106784ec8465b +size 209984517 diff --git a/low-shot-task-specific/coin_flip/best_model/rng_state.pth b/low-shot-task-specific/coin_flip/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..95e361dd25ae9e89fce971e9808cc8343d2f9765 --- /dev/null +++ b/low-shot-task-specific/coin_flip/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c980b24b62e2109d15363aa73d40fa6fafc88b732c285e1b6fab92db69ce36b +size 14575 diff --git a/low-shot-task-specific/coin_flip/best_model/scheduler.pt b/low-shot-task-specific/coin_flip/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fcdc5466646758ebda41ef77c8ae2628621f3bf7 --- /dev/null +++ b/low-shot-task-specific/coin_flip/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7da15a993b502c23d3f1c3380001efcd3dd910c920a088c178a788bdf015b29 +size 627 diff --git a/low-shot-task-specific/coin_flip/best_model/trainer_state.json b/low-shot-task-specific/coin_flip/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e8ed145c9c4830840cd9ae0dd7e6203b39cef6b4 --- /dev/null +++ b/low-shot-task-specific/coin_flip/best_model/trainer_state.json @@ -0,0 +1,121 @@ +{ + "best_metric": 0.14907684922218323, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/coin_flip/checkpoint-55", + "epoch": 8.979591836734693, + "eval_steps": 500, + "global_step": 55, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.98, + "eval_loss": 4.5179572105407715, + "eval_runtime": 1.8312, + "eval_samples_per_second": 26.758, + "eval_steps_per_second": 3.823, + "step": 6 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 4.5562, + "step": 10 + }, + { + "epoch": 1.96, + "eval_loss": 3.3285250663757324, + "eval_runtime": 1.8387, + "eval_samples_per_second": 26.649, + "eval_steps_per_second": 3.807, + "step": 12 + }, + { + "epoch": 2.94, + "eval_loss": 1.0093011856079102, + "eval_runtime": 1.8346, + "eval_samples_per_second": 26.709, + "eval_steps_per_second": 3.816, + "step": 18 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 2.3848, + "step": 20 + }, + { + "epoch": 3.92, + "eval_loss": 0.2200772613286972, + "eval_runtime": 1.8356, + "eval_samples_per_second": 26.694, + "eval_steps_per_second": 3.813, + "step": 24 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3144, + "step": 30 + }, + { + "epoch": 4.9, + "eval_loss": 0.21126192808151245, + "eval_runtime": 1.8403, + "eval_samples_per_second": 26.626, + "eval_steps_per_second": 3.804, + "step": 30 + }, + { + "epoch": 5.88, + "eval_loss": 0.18616808950901031, + "eval_runtime": 1.8423, + "eval_samples_per_second": 26.598, + "eval_steps_per_second": 3.8, + "step": 36 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.2066, + "step": 40 + }, + { + "epoch": 6.86, + "eval_loss": 0.1662234216928482, + "eval_runtime": 1.8364, + "eval_samples_per_second": 26.683, + "eval_steps_per_second": 3.812, + "step": 42 + }, + { + "epoch": 8.0, + "eval_loss": 0.2262299805879593, + "eval_runtime": 1.8315, + "eval_samples_per_second": 26.754, + "eval_steps_per_second": 3.822, + "step": 49 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.1856, + "step": 50 + }, + { + "epoch": 8.98, + "eval_loss": 0.14907684922218323, + "eval_runtime": 1.8356, + "eval_samples_per_second": 26.694, + "eval_steps_per_second": 3.813, + "step": 55 + } + ], + "logging_steps": 10, + "max_steps": 60, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6584591944581120.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/coin_flip/best_model/training_args.bin b/low-shot-task-specific/coin_flip/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a8ee660b925abc5ceb3ad500bbb3581ae9dafc3 --- /dev/null +++ b/low-shot-task-specific/coin_flip/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f450808ed0897cbe91d86e09bf730b8688854884991e7216373c40ee768a0c9b +size 4091 diff --git a/low-shot-task-specific/cola/best_model/adapter_config.json b/low-shot-task-specific/cola/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/cola/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/cola/best_model/adapter_model.bin b/low-shot-task-specific/cola/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c1af09c51a86a40b6d77a6a858ac24090fb12ab --- /dev/null +++ b/low-shot-task-specific/cola/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411c8f5252840aa1fc66fe6e846a855cc0c9826eb0e8a5e7e8ba168ffdeded3d +size 104973389 diff --git a/low-shot-task-specific/cola/best_model/optimizer.pt b/low-shot-task-specific/cola/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9abf46b94e8e90fe5bae90dbd87bfd96ef7a9ea --- /dev/null +++ b/low-shot-task-specific/cola/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ebebf0b8ce1d3bed95e63794192866c76d3ed6e03bc0d928dc945817500540 +size 209984517 diff --git a/low-shot-task-specific/cola/best_model/rng_state.pth b/low-shot-task-specific/cola/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc454de1a178b61617be4861d11d34583fe9dd5a --- /dev/null +++ b/low-shot-task-specific/cola/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef953e6438f145b783f6ca5f8d6d997cb169a9ddb6824cf4f2f9e126b56b09b7 +size 14575 diff --git a/low-shot-task-specific/cola/best_model/scheduler.pt b/low-shot-task-specific/cola/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e7de6b66545d4b398510675daa1a1e2d707c89e --- /dev/null +++ b/low-shot-task-specific/cola/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbabbf26b9b37d257cc72f404a441c985e21acf5c3e6fb7626e5104e04ff3282 +size 627 diff --git a/low-shot-task-specific/cola/best_model/trainer_state.json b/low-shot-task-specific/cola/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e2b48474a1fcf111088c29632b2117a605a6aa77 --- /dev/null +++ b/low-shot-task-specific/cola/best_model/trainer_state.json @@ -0,0 +1,85 @@ +{ + "best_metric": 0.1422310322523117, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/cola/checkpoint-75", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "learning_rate": 5.399999999999999e-05, + "loss": 7.2579, + "step": 10 + }, + { + "epoch": 0.8, + "learning_rate": 0.00011399999999999999, + "loss": 6.0871, + "step": 20 + }, + { + "epoch": 1.0, + "eval_loss": 1.0100170373916626, + "eval_runtime": 5.8549, + "eval_samples_per_second": 34.159, + "eval_steps_per_second": 4.27, + "step": 25 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017399999999999997, + "loss": 1.808, + "step": 30 + }, + { + "epoch": 1.6, + "learning_rate": 0.000234, + "loss": 0.2533, + "step": 40 + }, + { + "epoch": 2.0, + "learning_rate": 0.000294, + "loss": 0.2083, + "step": 50 + }, + { + "epoch": 2.0, + "eval_loss": 0.19681957364082336, + "eval_runtime": 5.8474, + "eval_samples_per_second": 34.203, + "eval_steps_per_second": 4.275, + "step": 50 + }, + { + "epoch": 2.4, + "learning_rate": 0.00028649999999999997, + "loss": 0.1663, + "step": 60 + }, + { + "epoch": 2.8, + "learning_rate": 0.0002715, + "loss": 0.1771, + "step": 70 + }, + { + "epoch": 3.0, + "eval_loss": 0.1422310322523117, + "eval_runtime": 5.848, + "eval_samples_per_second": 34.2, + "eval_steps_per_second": 4.275, + "step": 75 + } + ], + "logging_steps": 10, + "max_steps": 250, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4495226494648320.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/cola/best_model/training_args.bin b/low-shot-task-specific/cola/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d946c70cb68396558b1e052fa10b38999e90f8d --- /dev/null +++ b/low-shot-task-specific/cola/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432e4f00d89268eb8a2e942ba35b41ff6bef5e5193df86888baa8dbedf03e4e1 +size 4091 diff --git a/low-shot-task-specific/commonsense_qa/best_model/adapter_config.json b/low-shot-task-specific/commonsense_qa/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/commonsense_qa/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin b/low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8c4820b9c20512015841dbcebc92f5d52e00844 --- /dev/null +++ b/low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5911565f25237e1e4a88d364af20dbdd3c53aa306935a116cdb82f52cba3baa8 +size 104973389 diff --git a/low-shot-task-specific/commonsense_qa/best_model/optimizer.pt b/low-shot-task-specific/commonsense_qa/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..069e00a19254cb5f0cab5576d91a94586bc66143 --- /dev/null +++ b/low-shot-task-specific/commonsense_qa/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488383f8f47eb284f498b258cf82a60fcc881827248f5358d43805942165bc4d +size 209984517 diff --git a/low-shot-task-specific/commonsense_qa/best_model/rng_state.pth b/low-shot-task-specific/commonsense_qa/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27bc73cbdd7d5a0590031f21d53bd9912c62e7e4 --- /dev/null +++ b/low-shot-task-specific/commonsense_qa/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329449cd1278f022b5239a2bd97e216a89c73ae3d215b6a1bfc73b69c537d4a3 +size 14575 diff --git a/low-shot-task-specific/commonsense_qa/best_model/scheduler.pt b/low-shot-task-specific/commonsense_qa/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76b782916c86c0ae45b52838a41971fcc6f0a7fb --- /dev/null +++ b/low-shot-task-specific/commonsense_qa/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e +size 627 diff --git a/low-shot-task-specific/commonsense_qa/best_model/trainer_state.json b/low-shot-task-specific/commonsense_qa/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b71f3fa274a2463fd76978322ebf88473781f094 --- /dev/null +++ b/low-shot-task-specific/commonsense_qa/best_model/trainer_state.json @@ -0,0 +1,111 @@ +{ + "best_metric": 0.28430670499801636, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/commonsense_qa/checkpoint-100", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "learning_rate": 5.9999999999999995e-05, + "loss": 5.5323, + "step": 10 + }, + { + "epoch": 0.8, + "learning_rate": 0.00011999999999999999, + "loss": 3.1134, + "step": 20 + }, + { + "epoch": 1.0, + "eval_loss": 0.6229318976402283, + "eval_runtime": 7.9588, + "eval_samples_per_second": 25.129, + "eval_steps_per_second": 3.141, + "step": 25 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017999999999999998, + "loss": 0.6745, + "step": 30 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.3959, + "step": 40 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003, + "loss": 0.3388, + "step": 50 + }, + { + "epoch": 2.0, + "eval_loss": 0.32543906569480896, + "eval_runtime": 7.9767, + "eval_samples_per_second": 25.073, + "eval_steps_per_second": 3.134, + "step": 50 + }, + { + "epoch": 2.4, + "learning_rate": 0.000285, + "loss": 0.2496, + "step": 60 + }, + { + "epoch": 2.8, + "learning_rate": 0.00027, + "loss": 0.1963, + "step": 70 + }, + { + "epoch": 3.0, + "eval_loss": 0.28735020756721497, + "eval_runtime": 7.9596, + "eval_samples_per_second": 25.127, + "eval_steps_per_second": 3.141, + "step": 75 + }, + { + "epoch": 3.2, + "learning_rate": 0.00025499999999999996, + "loss": 0.1475, + "step": 80 + }, + { + "epoch": 3.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.087, + "step": 90 + }, + { + "epoch": 4.0, + "learning_rate": 0.000225, + "loss": 0.0841, + "step": 100 + }, + { + "epoch": 4.0, + "eval_loss": 0.28430670499801636, + "eval_runtime": 7.9315, + "eval_samples_per_second": 25.216, + "eval_steps_per_second": 3.152, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 250, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.532283750678528e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/commonsense_qa/best_model/training_args.bin b/low-shot-task-specific/commonsense_qa/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..63f2dff3d02348c58182cbd53640042a21fd8a0c --- /dev/null +++ b/low-shot-task-specific/commonsense_qa/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac62dd2138b26a82acced238425ed68cca2c7eb6c44552fa9843fba2e1d0cf34 +size 4091 diff --git a/low-shot-task-specific/emotion/best_model/adapter_config.json b/low-shot-task-specific/emotion/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/emotion/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/emotion/best_model/adapter_model.bin b/low-shot-task-specific/emotion/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdbb3ece5e6ab05c20ab6aa8ed11b0bc31da1cfb --- /dev/null +++ b/low-shot-task-specific/emotion/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55526193b5417dd0e6ec1a050c8c68add7ac57e4f9adc8f5523fb7b6109cb1d5 +size 104973389 diff --git a/low-shot-task-specific/emotion/best_model/optimizer.pt b/low-shot-task-specific/emotion/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f30a0d33aa14b650f5c3d3b6d6a6a5248a3c3498 --- /dev/null +++ b/low-shot-task-specific/emotion/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c619e22fe8786bb47d576383db1964d04ec5895cf49c7967a06ea21ba69e24 +size 209984517 diff --git a/low-shot-task-specific/emotion/best_model/rng_state.pth b/low-shot-task-specific/emotion/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6e6d3b44b92533abd83e0000486e29986c09de3 --- /dev/null +++ b/low-shot-task-specific/emotion/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33d07fcc46ed21e10fb24f0266332833c17c1787ecf4b21b90883591a74c17a +size 14575 diff --git a/low-shot-task-specific/emotion/best_model/scheduler.pt b/low-shot-task-specific/emotion/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..facbfa49cd5f41f1048c78a0bc4875a35799ad25 --- /dev/null +++ b/low-shot-task-specific/emotion/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a1a40a49a3444d450b508a337be12226511f236bc6c3d4905032050bc15d21 +size 627 diff --git a/low-shot-task-specific/emotion/best_model/trainer_state.json b/low-shot-task-specific/emotion/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d1d5e81c86b0f2442fd49a877f9b1510a10a48c3 --- /dev/null +++ b/low-shot-task-specific/emotion/best_model/trainer_state.json @@ -0,0 +1,157 @@ +{ + "best_metric": 0.13983282446861267, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/emotion/checkpoint-150", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "learning_rate": 5.9999999999999995e-05, + "loss": 6.4494, + "step": 10 + }, + { + "epoch": 0.8, + "learning_rate": 0.00011999999999999999, + "loss": 5.0703, + "step": 20 + }, + { + "epoch": 1.0, + "eval_loss": 1.654482126235962, + "eval_runtime": 6.8374, + "eval_samples_per_second": 29.251, + "eval_steps_per_second": 3.656, + "step": 25 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017999999999999998, + "loss": 1.9694, + "step": 30 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.4062, + "step": 40 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003, + "loss": 0.248, + "step": 50 + }, + { + "epoch": 2.0, + "eval_loss": 0.22630518674850464, + "eval_runtime": 6.8055, + "eval_samples_per_second": 29.388, + "eval_steps_per_second": 3.674, + "step": 50 + }, + { + "epoch": 2.4, + "learning_rate": 0.000285, + "loss": 0.1644, + "step": 60 + }, + { + "epoch": 2.8, + "learning_rate": 0.00027, + "loss": 0.1532, + "step": 70 + }, + { + "epoch": 3.0, + "eval_loss": 0.17524582147598267, + "eval_runtime": 6.7943, + "eval_samples_per_second": 29.437, + "eval_steps_per_second": 3.68, + "step": 75 + }, + { + "epoch": 3.2, + "learning_rate": 0.00025499999999999996, + "loss": 0.1291, + "step": 80 + }, + { + "epoch": 3.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.082, + "step": 90 + }, + { + "epoch": 4.0, + "learning_rate": 0.000225, + "loss": 0.0672, + "step": 100 + }, + { + "epoch": 4.0, + "eval_loss": 0.14034521579742432, + "eval_runtime": 6.8294, + "eval_samples_per_second": 29.285, + "eval_steps_per_second": 3.661, + "step": 100 + }, + { + "epoch": 4.4, + "learning_rate": 0.00020999999999999998, + "loss": 0.0443, + "step": 110 + }, + { + "epoch": 4.8, + "learning_rate": 0.000195, + "loss": 0.0505, + "step": 120 + }, + { + "epoch": 5.0, + "eval_loss": 0.187747061252594, + "eval_runtime": 6.8658, + "eval_samples_per_second": 29.13, + "eval_steps_per_second": 3.641, + "step": 125 + }, + { + "epoch": 5.2, + "learning_rate": 0.00017999999999999998, + "loss": 0.042, + "step": 130 + }, + { + "epoch": 5.6, + "learning_rate": 0.000165, + "loss": 0.0225, + "step": 140 + }, + { + "epoch": 6.0, + "learning_rate": 0.00015, + "loss": 0.0131, + "step": 150 + }, + { + "epoch": 6.0, + "eval_loss": 0.13983282446861267, + "eval_runtime": 6.8662, + "eval_samples_per_second": 29.128, + "eval_steps_per_second": 3.641, + "step": 150 + } + ], + "logging_steps": 10, + "max_steps": 250, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.692015121170432e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/emotion/best_model/training_args.bin b/low-shot-task-specific/emotion/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..35dcc5d5937121ef866e1b52bc4bee965a12293a --- /dev/null +++ b/low-shot-task-specific/emotion/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e1dfa83a1cabba6524b8aef4b9cf9f06ad12b54c1b84b046d4557a3bea2b51 +size 4091 diff --git a/low-shot-task-specific/social_i_qa/best_model/adapter_config.json b/low-shot-task-specific/social_i_qa/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/social_i_qa/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/social_i_qa/best_model/adapter_model.bin b/low-shot-task-specific/social_i_qa/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..57b6874b10d731ca53dfa7debcd4ecd154a75334 --- /dev/null +++ b/low-shot-task-specific/social_i_qa/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21047febd6ad48a886cf9d5fcaed091ec146720fa2ace5db287e7337cbf7a46a +size 104973389 diff --git a/low-shot-task-specific/social_i_qa/best_model/optimizer.pt b/low-shot-task-specific/social_i_qa/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d3804b36e78182396f03cc58b565206d4ff7564 --- /dev/null +++ b/low-shot-task-specific/social_i_qa/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51615aa1f8c3316a2d9f1d9b34cd7bd25fa7d1fd75182407da214a4549fcc3fe +size 209984517 diff --git a/low-shot-task-specific/social_i_qa/best_model/rng_state.pth b/low-shot-task-specific/social_i_qa/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6ff27f70aa43cb8d675ff96b7f52fcec3d9f833 --- /dev/null +++ b/low-shot-task-specific/social_i_qa/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d184eb9d6f950ca5fa7558982747687291171db4b5d64ca0e406118be389e9f5 +size 14575 diff --git a/low-shot-task-specific/social_i_qa/best_model/scheduler.pt b/low-shot-task-specific/social_i_qa/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76b782916c86c0ae45b52838a41971fcc6f0a7fb --- /dev/null +++ b/low-shot-task-specific/social_i_qa/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e +size 627 diff --git a/low-shot-task-specific/social_i_qa/best_model/trainer_state.json b/low-shot-task-specific/social_i_qa/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b1fb2808a282dbbb67998a473c093e42242f6fd8 --- /dev/null +++ b/low-shot-task-specific/social_i_qa/best_model/trainer_state.json @@ -0,0 +1,111 @@ +{ + "best_metric": 0.21922020614147186, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/social_i_qa/checkpoint-100", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "learning_rate": 5.9999999999999995e-05, + "loss": 6.145, + "step": 10 + }, + { + "epoch": 0.8, + "learning_rate": 0.00011999999999999999, + "loss": 3.2951, + "step": 20 + }, + { + "epoch": 1.0, + "eval_loss": 0.47254127264022827, + "eval_runtime": 8.3699, + "eval_samples_per_second": 23.895, + "eval_steps_per_second": 2.987, + "step": 25 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017999999999999998, + "loss": 0.5553, + "step": 30 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.3415, + "step": 40 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003, + "loss": 0.3055, + "step": 50 + }, + { + "epoch": 2.0, + "eval_loss": 0.29911044239997864, + "eval_runtime": 8.3444, + "eval_samples_per_second": 23.968, + "eval_steps_per_second": 2.996, + "step": 50 + }, + { + "epoch": 2.4, + "learning_rate": 0.000285, + "loss": 0.2157, + "step": 60 + }, + { + "epoch": 2.8, + "learning_rate": 0.00027, + "loss": 0.1871, + "step": 70 + }, + { + "epoch": 3.0, + "eval_loss": 0.2219252735376358, + "eval_runtime": 8.3281, + "eval_samples_per_second": 24.015, + "eval_steps_per_second": 3.002, + "step": 75 + }, + { + "epoch": 3.2, + "learning_rate": 0.00025499999999999996, + "loss": 0.1417, + "step": 80 + }, + { + "epoch": 3.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.12, + "step": 90 + }, + { + "epoch": 4.0, + "learning_rate": 0.000225, + "loss": 0.1079, + "step": 100 + }, + { + "epoch": 4.0, + "eval_loss": 0.21922020614147186, + "eval_runtime": 8.385, + "eval_samples_per_second": 23.852, + "eval_steps_per_second": 2.981, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 250, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.656903891124224e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/social_i_qa/best_model/training_args.bin b/low-shot-task-specific/social_i_qa/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d811f6eea87d95367413fe17e336ef725c0f0d0e --- /dev/null +++ b/low-shot-task-specific/social_i_qa/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df6d236b24d8ccd4f73c811e7a410d0eabcb7077106cde9555c3305ab36be9c +size 4091 diff --git a/low-shot-task-specific/sst/best_model/adapter_config.json b/low-shot-task-specific/sst/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/sst/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/sst/best_model/adapter_model.bin b/low-shot-task-specific/sst/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e88a1b71237fd16414ab7e9fd5637d6bdc01517 --- /dev/null +++ b/low-shot-task-specific/sst/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f159b31b434f4cbc2859fe1a9d310fe6ff28774a227edc490206f028896a6c4b +size 104973389 diff --git a/low-shot-task-specific/sst/best_model/optimizer.pt b/low-shot-task-specific/sst/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a926adca1f30bb385e8f41adc25f9fa63070e272 --- /dev/null +++ b/low-shot-task-specific/sst/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3f8fa515dd79da7e74337497fcd24225c380d188278a17aecea3bbdbdab20b +size 209984517 diff --git a/low-shot-task-specific/sst/best_model/rng_state.pth b/low-shot-task-specific/sst/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6765c5c88bcfa5065e0ef57c6759fa7e31c24f62 --- /dev/null +++ b/low-shot-task-specific/sst/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb7ddc07ac1c6b830dbc025657118a71cc05bef3beda9880d700dfe72a190a1 +size 14575 diff --git a/low-shot-task-specific/sst/best_model/scheduler.pt b/low-shot-task-specific/sst/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63a4950c4e809053f49673c57e69f29069abd09c --- /dev/null +++ b/low-shot-task-specific/sst/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:472963d9d147c2cd377a0a377de820bc06cc3f0119cb01d2dc8c5a02c4d14738 +size 627 diff --git a/low-shot-task-specific/sst/best_model/trainer_state.json b/low-shot-task-specific/sst/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c6f42340a33da1cf7637b626246c679be9bdba46 --- /dev/null +++ b/low-shot-task-specific/sst/best_model/trainer_state.json @@ -0,0 +1,85 @@ +{ + "best_metric": 0.0313660129904747, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sst/checkpoint-75", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "learning_rate": 4.2e-05, + "loss": 7.604, + "step": 10 + }, + { + "epoch": 0.8, + "learning_rate": 0.000102, + "loss": 6.408, + "step": 20 + }, + { + "epoch": 1.0, + "eval_loss": 0.78858482837677, + "eval_runtime": 6.2379, + "eval_samples_per_second": 32.062, + "eval_steps_per_second": 4.008, + "step": 25 + }, + { + "epoch": 1.2, + "learning_rate": 0.000162, + "loss": 1.6353, + "step": 30 + }, + { + "epoch": 1.6, + "learning_rate": 0.00022199999999999998, + "loss": 0.1518, + "step": 40 + }, + { + "epoch": 2.0, + "learning_rate": 0.00028199999999999997, + "loss": 0.0807, + "step": 50 + }, + { + "epoch": 2.0, + "eval_loss": 0.06099913269281387, + "eval_runtime": 6.2244, + "eval_samples_per_second": 32.132, + "eval_steps_per_second": 4.016, + "step": 50 + }, + { + "epoch": 2.4, + "learning_rate": 0.0002895, + "loss": 0.0667, + "step": 60 + }, + { + "epoch": 2.8, + "learning_rate": 0.0002745, + "loss": 0.0418, + "step": 70 + }, + { + "epoch": 3.0, + "eval_loss": 0.0313660129904747, + "eval_runtime": 6.2691, + "eval_samples_per_second": 31.902, + "eval_steps_per_second": 3.988, + "step": 75 + } + ], + "logging_steps": 10, + "max_steps": 250, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6295295189975040.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/sst/best_model/training_args.bin b/low-shot-task-specific/sst/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b29e7792eb8f6b75beb85fd52f1622045cd62cac --- /dev/null +++ b/low-shot-task-specific/sst/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c336d21dc0b7346d796426a4797d56084a81a08ea200e8c91411fa3449b6e06 +size 4091 diff --git a/low-shot-task-specific/sum/best_model/adapter_config.json b/low-shot-task-specific/sum/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/sum/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/sum/best_model/adapter_model.bin b/low-shot-task-specific/sum/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d43774581ad0f8fa370c2aaf711960be10046cf --- /dev/null +++ b/low-shot-task-specific/sum/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0a0cb9b7eade6b441f997e152af520540514264d51b1c267e30e7c500669bc +size 104973389 diff --git a/low-shot-task-specific/sum/best_model/optimizer.pt b/low-shot-task-specific/sum/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..46ccbb81d9d8144e5d78dbf49388c6b9609dc11f --- /dev/null +++ b/low-shot-task-specific/sum/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a812e3aab6e78e990e7ca36e1b9e85917fdaca9d531c7bcdc41b82d6c982d1 +size 209984517 diff --git a/low-shot-task-specific/sum/best_model/rng_state.pth b/low-shot-task-specific/sum/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..76eb3e5f5d27034961a38cf155f197f99aa025e2 --- /dev/null +++ b/low-shot-task-specific/sum/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc7dd2e3f40ac0c046da2f233e18f2314fea538368fd7bd263fa95f95f7fbef +size 14575 diff --git a/low-shot-task-specific/sum/best_model/scheduler.pt b/low-shot-task-specific/sum/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd6e778656bf0ae8d49c55004421f0ff5ed95d0d --- /dev/null +++ b/low-shot-task-specific/sum/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71df4de1094152c023456a0f4f7f28571d7f0bd29b962a097a17dff09a83bd7 +size 627 diff --git a/low-shot-task-specific/sum/best_model/trainer_state.json b/low-shot-task-specific/sum/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3f0fa9411cdfa9e850a1c2b51d5fea8c37482faa --- /dev/null +++ b/low-shot-task-specific/sum/best_model/trainer_state.json @@ -0,0 +1,85 @@ +{ + "best_metric": 0.024566762149333954, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sum/checkpoint-75", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "learning_rate": 5.9999999999999995e-05, + "loss": 3.5065, + "step": 10 + }, + { + "epoch": 0.8, + "learning_rate": 0.00011999999999999999, + "loss": 2.4397, + "step": 20 + }, + { + "epoch": 1.0, + "eval_loss": 0.4209679365158081, + "eval_runtime": 5.755, + "eval_samples_per_second": 34.753, + "eval_steps_per_second": 4.344, + "step": 25 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017999999999999998, + "loss": 0.8483, + "step": 30 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.1766, + "step": 40 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003, + "loss": 0.0503, + "step": 50 + }, + { + "epoch": 2.0, + "eval_loss": 0.045773524791002274, + "eval_runtime": 5.7905, + "eval_samples_per_second": 34.539, + "eval_steps_per_second": 4.317, + "step": 50 + }, + { + "epoch": 2.4, + "learning_rate": 0.000285, + "loss": 0.0382, + "step": 60 + }, + { + "epoch": 2.8, + "learning_rate": 0.00027, + "loss": 0.0355, + "step": 70 + }, + { + "epoch": 3.0, + "eval_loss": 0.024566762149333954, + "eval_runtime": 5.7985, + "eval_samples_per_second": 34.492, + "eval_steps_per_second": 4.311, + "step": 75 + } + ], + "logging_steps": 10, + "max_steps": 250, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4450719301632000.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/sum/best_model/training_args.bin b/low-shot-task-specific/sum/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d2377516e0fcef4e866db241de96222343e74f9 --- /dev/null +++ b/low-shot-task-specific/sum/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2d99ceccff158e6a2e2efb1d0072c3a4d6419ea8c9ba1122915df6fece215d +size 4091 diff --git a/low-shot-task-specific/svamp/best_model/adapter_config.json b/low-shot-task-specific/svamp/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/svamp/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/svamp/best_model/adapter_model.bin b/low-shot-task-specific/svamp/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9afdd9933861629c6e7a6eec6978feb9ff39abf --- /dev/null +++ b/low-shot-task-specific/svamp/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8804d14b72ecad8836ce6ece87d8b154d4efba71a2f4e81f154f964f7add4ac4 +size 104973389 diff --git a/low-shot-task-specific/svamp/best_model/optimizer.pt b/low-shot-task-specific/svamp/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a7f2c880651e9056fa386927b7c4c8d6b46598f --- /dev/null +++ b/low-shot-task-specific/svamp/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583d54f067e746da9c7b1171472477e18f6e4025181a7474276f3a698c9c32c3 +size 209984517 diff --git a/low-shot-task-specific/svamp/best_model/rng_state.pth b/low-shot-task-specific/svamp/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0305c5c44949a9782ddd1a356ded5740fdf791e3 --- /dev/null +++ b/low-shot-task-specific/svamp/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da48047c59128ed57a37317aee46a2a734d1ab2d6c0827bc83e4e160db56bf25 +size 14575 diff --git a/low-shot-task-specific/svamp/best_model/scheduler.pt b/low-shot-task-specific/svamp/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d4d12e4762b5323ecf126c300501046d82cb521 --- /dev/null +++ b/low-shot-task-specific/svamp/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b6019b4aa6167982722ac06ec70b66bde1590efd1e829b3736ce746697f12c +size 627 diff --git a/low-shot-task-specific/svamp/best_model/trainer_state.json b/low-shot-task-specific/svamp/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dd1361bccd8f36160ffb1ab9b7b68cc268e295dd --- /dev/null +++ b/low-shot-task-specific/svamp/best_model/trainer_state.json @@ -0,0 +1,73 @@ +{ + "best_metric": 0.5390210151672363, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/svamp/checkpoint-52", + "epoch": 2.9714285714285715, + "eval_steps": 500, + "global_step": 52, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.57, + "learning_rate": 5.9999999999999995e-05, + "loss": 4.9058, + "step": 10 + }, + { + "epoch": 0.97, + "eval_loss": 2.449695348739624, + "eval_runtime": 6.0415, + "eval_samples_per_second": 23.173, + "eval_steps_per_second": 2.979, + "step": 17 + }, + { + "epoch": 1.14, + "learning_rate": 0.00011999999999999999, + "loss": 3.0825, + "step": 20 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017999999999999998, + "loss": 1.2018, + "step": 30 + }, + { + "epoch": 2.0, + "eval_loss": 0.6993179321289062, + "eval_runtime": 6.0439, + "eval_samples_per_second": 23.164, + "eval_steps_per_second": 2.978, + "step": 35 + }, + { + "epoch": 2.29, + "learning_rate": 0.00023999999999999998, + "loss": 0.6819, + "step": 40 + }, + { + "epoch": 2.86, + "learning_rate": 0.0003, + "loss": 0.5012, + "step": 50 + }, + { + "epoch": 2.97, + "eval_loss": 0.5390210151672363, + "eval_runtime": 6.0418, + "eval_samples_per_second": 23.172, + "eval_steps_per_second": 2.979, + "step": 52 + } + ], + "logging_steps": 10, + "max_steps": 170, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8725882453032960.0, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/svamp/best_model/training_args.bin b/low-shot-task-specific/svamp/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c842db9df87c166182e53224b242d47057a5295e --- /dev/null +++ b/low-shot-task-specific/svamp/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08225a59523f52456fc51d48a0875bdc9df4d6a48f36beee17fa36bdb0876cc2 +size 4091 diff --git a/low-shot-task-specific/word-sorting/best_model/adapter_config.json b/low-shot-task-specific/word-sorting/best_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8089cffca57122c6474c8736277a1ee41f75c20e --- /dev/null +++ b/low-shot-task-specific/word-sorting/best_model/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "enable_lora": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "lora_alpha": 16, + "lora_dropout": 0.05, + "merge_weights": false, + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/low-shot-task-specific/word-sorting/best_model/adapter_model.bin b/low-shot-task-specific/word-sorting/best_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c380f92177170ec316d4acfca7275b7eea3de103 --- /dev/null +++ b/low-shot-task-specific/word-sorting/best_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657d7ad4eff0b1ad0605dbd7158e895976b0c6e108fe7f54c8700b368ac51b72 +size 104973389 diff --git a/low-shot-task-specific/word-sorting/best_model/optimizer.pt b/low-shot-task-specific/word-sorting/best_model/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a0ca5a5919a91467db1ebc0794cc61001c398a6 --- /dev/null +++ b/low-shot-task-specific/word-sorting/best_model/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40aec5f06aca5df3a94e52a67ddd826bb6e26540bf02cf3b4d50c0cefd006726 +size 209984517 diff --git a/low-shot-task-specific/word-sorting/best_model/rng_state.pth b/low-shot-task-specific/word-sorting/best_model/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4673440dada762f8f1fdd4f73a216e7c9948ddab --- /dev/null +++ b/low-shot-task-specific/word-sorting/best_model/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a4aa34f81b98b21683c74ef9a5ea3f6882fc55fc826bd2d807ddec0c4fe4b50 +size 14575 diff --git a/low-shot-task-specific/word-sorting/best_model/scheduler.pt b/low-shot-task-specific/word-sorting/best_model/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76b782916c86c0ae45b52838a41971fcc6f0a7fb --- /dev/null +++ b/low-shot-task-specific/word-sorting/best_model/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e +size 627 diff --git a/low-shot-task-specific/word-sorting/best_model/trainer_state.json b/low-shot-task-specific/word-sorting/best_model/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..04d797f807d240fa226ac0b1a756543518844698 --- /dev/null +++ b/low-shot-task-specific/word-sorting/best_model/trainer_state.json @@ -0,0 +1,111 @@ +{ + "best_metric": 0.028508992865681648, + "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/word-sorting/checkpoint-100", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.4, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.3822, + "step": 10 + }, + { + "epoch": 0.8, + "learning_rate": 0.00011999999999999999, + "loss": 0.1419, + "step": 20 + }, + { + "epoch": 1.0, + "eval_loss": 0.04200100898742676, + "eval_runtime": 16.0265, + "eval_samples_per_second": 12.479, + "eval_steps_per_second": 1.56, + "step": 25 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017999999999999998, + "loss": 0.0359, + "step": 30 + }, + { + "epoch": 1.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.0356, + "step": 40 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003, + "loss": 0.0303, + "step": 50 + }, + { + "epoch": 2.0, + "eval_loss": 0.031496673822402954, + "eval_runtime": 15.9936, + "eval_samples_per_second": 12.505, + "eval_steps_per_second": 1.563, + "step": 50 + }, + { + "epoch": 2.4, + "learning_rate": 0.000285, + "loss": 0.0203, + "step": 60 + }, + { + "epoch": 2.8, + "learning_rate": 0.00027, + "loss": 0.0191, + "step": 70 + }, + { + "epoch": 3.0, + "eval_loss": 0.029132427647709846, + "eval_runtime": 15.9682, + "eval_samples_per_second": 12.525, + "eval_steps_per_second": 1.566, + "step": 75 + }, + { + "epoch": 3.2, + "learning_rate": 0.00025499999999999996, + "loss": 0.0161, + "step": 80 + }, + { + "epoch": 3.6, + "learning_rate": 0.00023999999999999998, + "loss": 0.0116, + "step": 90 + }, + { + "epoch": 4.0, + "learning_rate": 0.000225, + "loss": 0.0116, + "step": 100 + }, + { + "epoch": 4.0, + "eval_loss": 0.028508992865681648, + "eval_runtime": 15.9929, + "eval_samples_per_second": 12.506, + "eval_steps_per_second": 1.563, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 250, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4.016032383172608e+16, + "trial_name": null, + "trial_params": null +} diff --git a/low-shot-task-specific/word-sorting/best_model/training_args.bin b/low-shot-task-specific/word-sorting/best_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..50bc798a9fd4ff0c740f37c03ef32c66fd4f9700 --- /dev/null +++ b/low-shot-task-specific/word-sorting/best_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da263d11c41dea8873ebcc2473887cf2142a934341bf773c5e2b33595de9402e +size 4091