Muhammad Khalifa
commited on
Commit
•
bacaabd
1
Parent(s):
ed42aba
add low-shot models
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- low-shot-task-specific/coin_flip/best_model/adapter_config.json +21 -0
- low-shot-task-specific/coin_flip/best_model/adapter_model.bin +3 -0
- low-shot-task-specific/coin_flip/best_model/optimizer.pt +3 -0
- low-shot-task-specific/coin_flip/best_model/rng_state.pth +3 -0
- low-shot-task-specific/coin_flip/best_model/scheduler.pt +3 -0
- low-shot-task-specific/coin_flip/best_model/trainer_state.json +121 -0
- low-shot-task-specific/coin_flip/best_model/training_args.bin +3 -0
- low-shot-task-specific/cola/best_model/adapter_config.json +21 -0
- low-shot-task-specific/cola/best_model/adapter_model.bin +3 -0
- low-shot-task-specific/cola/best_model/optimizer.pt +3 -0
- low-shot-task-specific/cola/best_model/rng_state.pth +3 -0
- low-shot-task-specific/cola/best_model/scheduler.pt +3 -0
- low-shot-task-specific/cola/best_model/trainer_state.json +85 -0
- low-shot-task-specific/cola/best_model/training_args.bin +3 -0
- low-shot-task-specific/commonsense_qa/best_model/adapter_config.json +21 -0
- low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin +3 -0
- low-shot-task-specific/commonsense_qa/best_model/optimizer.pt +3 -0
- low-shot-task-specific/commonsense_qa/best_model/rng_state.pth +3 -0
- low-shot-task-specific/commonsense_qa/best_model/scheduler.pt +3 -0
- low-shot-task-specific/commonsense_qa/best_model/trainer_state.json +111 -0
- low-shot-task-specific/commonsense_qa/best_model/training_args.bin +3 -0
- low-shot-task-specific/emotion/best_model/adapter_config.json +21 -0
- low-shot-task-specific/emotion/best_model/adapter_model.bin +3 -0
- low-shot-task-specific/emotion/best_model/optimizer.pt +3 -0
- low-shot-task-specific/emotion/best_model/rng_state.pth +3 -0
- low-shot-task-specific/emotion/best_model/scheduler.pt +3 -0
- low-shot-task-specific/emotion/best_model/trainer_state.json +157 -0
- low-shot-task-specific/emotion/best_model/training_args.bin +3 -0
- low-shot-task-specific/social_i_qa/best_model/adapter_config.json +21 -0
- low-shot-task-specific/social_i_qa/best_model/adapter_model.bin +3 -0
- low-shot-task-specific/social_i_qa/best_model/optimizer.pt +3 -0
- low-shot-task-specific/social_i_qa/best_model/rng_state.pth +3 -0
- low-shot-task-specific/social_i_qa/best_model/scheduler.pt +3 -0
- low-shot-task-specific/social_i_qa/best_model/trainer_state.json +111 -0
- low-shot-task-specific/social_i_qa/best_model/training_args.bin +3 -0
- low-shot-task-specific/sst/best_model/adapter_config.json +21 -0
- low-shot-task-specific/sst/best_model/adapter_model.bin +3 -0
- low-shot-task-specific/sst/best_model/optimizer.pt +3 -0
- low-shot-task-specific/sst/best_model/rng_state.pth +3 -0
- low-shot-task-specific/sst/best_model/scheduler.pt +3 -0
- low-shot-task-specific/sst/best_model/trainer_state.json +85 -0
- low-shot-task-specific/sst/best_model/training_args.bin +3 -0
- low-shot-task-specific/sum/best_model/adapter_config.json +21 -0
- low-shot-task-specific/sum/best_model/adapter_model.bin +3 -0
- low-shot-task-specific/sum/best_model/optimizer.pt +3 -0
- low-shot-task-specific/sum/best_model/rng_state.pth +3 -0
- low-shot-task-specific/sum/best_model/scheduler.pt +3 -0
- low-shot-task-specific/sum/best_model/trainer_state.json +85 -0
- low-shot-task-specific/sum/best_model/training_args.bin +3 -0
- low-shot-task-specific/svamp/best_model/adapter_config.json +21 -0
low-shot-task-specific/coin_flip/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific/coin_flip/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b931c5915506612ec64883aa04ea154cc8aaf9f230aacb89dd47db7713e55f5b
|
3 |
+
size 104973389
|
low-shot-task-specific/coin_flip/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d92f8e6b0b40ecda9624a1676867dbcea32bdbef5e0eecbcac5106784ec8465b
|
3 |
+
size 209984517
|
low-shot-task-specific/coin_flip/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c980b24b62e2109d15363aa73d40fa6fafc88b732c285e1b6fab92db69ce36b
|
3 |
+
size 14575
|
low-shot-task-specific/coin_flip/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7da15a993b502c23d3f1c3380001efcd3dd910c920a088c178a788bdf015b29
|
3 |
+
size 627
|
low-shot-task-specific/coin_flip/best_model/trainer_state.json
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.14907684922218323,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/coin_flip/checkpoint-55",
|
4 |
+
"epoch": 8.979591836734693,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 55,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.98,
|
13 |
+
"eval_loss": 4.5179572105407715,
|
14 |
+
"eval_runtime": 1.8312,
|
15 |
+
"eval_samples_per_second": 26.758,
|
16 |
+
"eval_steps_per_second": 3.823,
|
17 |
+
"step": 6
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.63,
|
21 |
+
"learning_rate": 5.9999999999999995e-05,
|
22 |
+
"loss": 4.5562,
|
23 |
+
"step": 10
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.96,
|
27 |
+
"eval_loss": 3.3285250663757324,
|
28 |
+
"eval_runtime": 1.8387,
|
29 |
+
"eval_samples_per_second": 26.649,
|
30 |
+
"eval_steps_per_second": 3.807,
|
31 |
+
"step": 12
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 2.94,
|
35 |
+
"eval_loss": 1.0093011856079102,
|
36 |
+
"eval_runtime": 1.8346,
|
37 |
+
"eval_samples_per_second": 26.709,
|
38 |
+
"eval_steps_per_second": 3.816,
|
39 |
+
"step": 18
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 3.27,
|
43 |
+
"learning_rate": 0.00011999999999999999,
|
44 |
+
"loss": 2.3848,
|
45 |
+
"step": 20
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 3.92,
|
49 |
+
"eval_loss": 0.2200772613286972,
|
50 |
+
"eval_runtime": 1.8356,
|
51 |
+
"eval_samples_per_second": 26.694,
|
52 |
+
"eval_steps_per_second": 3.813,
|
53 |
+
"step": 24
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 4.9,
|
57 |
+
"learning_rate": 0.00017999999999999998,
|
58 |
+
"loss": 0.3144,
|
59 |
+
"step": 30
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 4.9,
|
63 |
+
"eval_loss": 0.21126192808151245,
|
64 |
+
"eval_runtime": 1.8403,
|
65 |
+
"eval_samples_per_second": 26.626,
|
66 |
+
"eval_steps_per_second": 3.804,
|
67 |
+
"step": 30
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 5.88,
|
71 |
+
"eval_loss": 0.18616808950901031,
|
72 |
+
"eval_runtime": 1.8423,
|
73 |
+
"eval_samples_per_second": 26.598,
|
74 |
+
"eval_steps_per_second": 3.8,
|
75 |
+
"step": 36
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"epoch": 6.53,
|
79 |
+
"learning_rate": 0.00023999999999999998,
|
80 |
+
"loss": 0.2066,
|
81 |
+
"step": 40
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.86,
|
85 |
+
"eval_loss": 0.1662234216928482,
|
86 |
+
"eval_runtime": 1.8364,
|
87 |
+
"eval_samples_per_second": 26.683,
|
88 |
+
"eval_steps_per_second": 3.812,
|
89 |
+
"step": 42
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 8.0,
|
93 |
+
"eval_loss": 0.2262299805879593,
|
94 |
+
"eval_runtime": 1.8315,
|
95 |
+
"eval_samples_per_second": 26.754,
|
96 |
+
"eval_steps_per_second": 3.822,
|
97 |
+
"step": 49
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"epoch": 8.16,
|
101 |
+
"learning_rate": 0.0003,
|
102 |
+
"loss": 0.1856,
|
103 |
+
"step": 50
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"epoch": 8.98,
|
107 |
+
"eval_loss": 0.14907684922218323,
|
108 |
+
"eval_runtime": 1.8356,
|
109 |
+
"eval_samples_per_second": 26.694,
|
110 |
+
"eval_steps_per_second": 3.813,
|
111 |
+
"step": 55
|
112 |
+
}
|
113 |
+
],
|
114 |
+
"logging_steps": 10,
|
115 |
+
"max_steps": 60,
|
116 |
+
"num_train_epochs": 10,
|
117 |
+
"save_steps": 500,
|
118 |
+
"total_flos": 6584591944581120.0,
|
119 |
+
"trial_name": null,
|
120 |
+
"trial_params": null
|
121 |
+
}
|
low-shot-task-specific/coin_flip/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f450808ed0897cbe91d86e09bf730b8688854884991e7216373c40ee768a0c9b
|
3 |
+
size 4091
|
low-shot-task-specific/cola/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific/cola/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:411c8f5252840aa1fc66fe6e846a855cc0c9826eb0e8a5e7e8ba168ffdeded3d
|
3 |
+
size 104973389
|
low-shot-task-specific/cola/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3ebebf0b8ce1d3bed95e63794192866c76d3ed6e03bc0d928dc945817500540
|
3 |
+
size 209984517
|
low-shot-task-specific/cola/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef953e6438f145b783f6ca5f8d6d997cb169a9ddb6824cf4f2f9e126b56b09b7
|
3 |
+
size 14575
|
low-shot-task-specific/cola/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbabbf26b9b37d257cc72f404a441c985e21acf5c3e6fb7626e5104e04ff3282
|
3 |
+
size 627
|
low-shot-task-specific/cola/best_model/trainer_state.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.1422310322523117,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/cola/checkpoint-75",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 75,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.4,
|
13 |
+
"learning_rate": 5.399999999999999e-05,
|
14 |
+
"loss": 7.2579,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.8,
|
19 |
+
"learning_rate": 0.00011399999999999999,
|
20 |
+
"loss": 6.0871,
|
21 |
+
"step": 20
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.0,
|
25 |
+
"eval_loss": 1.0100170373916626,
|
26 |
+
"eval_runtime": 5.8549,
|
27 |
+
"eval_samples_per_second": 34.159,
|
28 |
+
"eval_steps_per_second": 4.27,
|
29 |
+
"step": 25
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.2,
|
33 |
+
"learning_rate": 0.00017399999999999997,
|
34 |
+
"loss": 1.808,
|
35 |
+
"step": 30
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 1.6,
|
39 |
+
"learning_rate": 0.000234,
|
40 |
+
"loss": 0.2533,
|
41 |
+
"step": 40
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 2.0,
|
45 |
+
"learning_rate": 0.000294,
|
46 |
+
"loss": 0.2083,
|
47 |
+
"step": 50
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.0,
|
51 |
+
"eval_loss": 0.19681957364082336,
|
52 |
+
"eval_runtime": 5.8474,
|
53 |
+
"eval_samples_per_second": 34.203,
|
54 |
+
"eval_steps_per_second": 4.275,
|
55 |
+
"step": 50
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 2.4,
|
59 |
+
"learning_rate": 0.00028649999999999997,
|
60 |
+
"loss": 0.1663,
|
61 |
+
"step": 60
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.8,
|
65 |
+
"learning_rate": 0.0002715,
|
66 |
+
"loss": 0.1771,
|
67 |
+
"step": 70
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.0,
|
71 |
+
"eval_loss": 0.1422310322523117,
|
72 |
+
"eval_runtime": 5.848,
|
73 |
+
"eval_samples_per_second": 34.2,
|
74 |
+
"eval_steps_per_second": 4.275,
|
75 |
+
"step": 75
|
76 |
+
}
|
77 |
+
],
|
78 |
+
"logging_steps": 10,
|
79 |
+
"max_steps": 250,
|
80 |
+
"num_train_epochs": 10,
|
81 |
+
"save_steps": 500,
|
82 |
+
"total_flos": 4495226494648320.0,
|
83 |
+
"trial_name": null,
|
84 |
+
"trial_params": null
|
85 |
+
}
|
low-shot-task-specific/cola/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:432e4f00d89268eb8a2e942ba35b41ff6bef5e5193df86888baa8dbedf03e4e1
|
3 |
+
size 4091
|
low-shot-task-specific/commonsense_qa/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5911565f25237e1e4a88d364af20dbdd3c53aa306935a116cdb82f52cba3baa8
|
3 |
+
size 104973389
|
low-shot-task-specific/commonsense_qa/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:488383f8f47eb284f498b258cf82a60fcc881827248f5358d43805942165bc4d
|
3 |
+
size 209984517
|
low-shot-task-specific/commonsense_qa/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:329449cd1278f022b5239a2bd97e216a89c73ae3d215b6a1bfc73b69c537d4a3
|
3 |
+
size 14575
|
low-shot-task-specific/commonsense_qa/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e
|
3 |
+
size 627
|
low-shot-task-specific/commonsense_qa/best_model/trainer_state.json
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.28430670499801636,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/commonsense_qa/checkpoint-100",
|
4 |
+
"epoch": 4.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.4,
|
13 |
+
"learning_rate": 5.9999999999999995e-05,
|
14 |
+
"loss": 5.5323,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.8,
|
19 |
+
"learning_rate": 0.00011999999999999999,
|
20 |
+
"loss": 3.1134,
|
21 |
+
"step": 20
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.0,
|
25 |
+
"eval_loss": 0.6229318976402283,
|
26 |
+
"eval_runtime": 7.9588,
|
27 |
+
"eval_samples_per_second": 25.129,
|
28 |
+
"eval_steps_per_second": 3.141,
|
29 |
+
"step": 25
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.2,
|
33 |
+
"learning_rate": 0.00017999999999999998,
|
34 |
+
"loss": 0.6745,
|
35 |
+
"step": 30
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 1.6,
|
39 |
+
"learning_rate": 0.00023999999999999998,
|
40 |
+
"loss": 0.3959,
|
41 |
+
"step": 40
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 2.0,
|
45 |
+
"learning_rate": 0.0003,
|
46 |
+
"loss": 0.3388,
|
47 |
+
"step": 50
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.0,
|
51 |
+
"eval_loss": 0.32543906569480896,
|
52 |
+
"eval_runtime": 7.9767,
|
53 |
+
"eval_samples_per_second": 25.073,
|
54 |
+
"eval_steps_per_second": 3.134,
|
55 |
+
"step": 50
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 2.4,
|
59 |
+
"learning_rate": 0.000285,
|
60 |
+
"loss": 0.2496,
|
61 |
+
"step": 60
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.8,
|
65 |
+
"learning_rate": 0.00027,
|
66 |
+
"loss": 0.1963,
|
67 |
+
"step": 70
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.0,
|
71 |
+
"eval_loss": 0.28735020756721497,
|
72 |
+
"eval_runtime": 7.9596,
|
73 |
+
"eval_samples_per_second": 25.127,
|
74 |
+
"eval_steps_per_second": 3.141,
|
75 |
+
"step": 75
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"epoch": 3.2,
|
79 |
+
"learning_rate": 0.00025499999999999996,
|
80 |
+
"loss": 0.1475,
|
81 |
+
"step": 80
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 3.6,
|
85 |
+
"learning_rate": 0.00023999999999999998,
|
86 |
+
"loss": 0.087,
|
87 |
+
"step": 90
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"epoch": 4.0,
|
91 |
+
"learning_rate": 0.000225,
|
92 |
+
"loss": 0.0841,
|
93 |
+
"step": 100
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 4.0,
|
97 |
+
"eval_loss": 0.28430670499801636,
|
98 |
+
"eval_runtime": 7.9315,
|
99 |
+
"eval_samples_per_second": 25.216,
|
100 |
+
"eval_steps_per_second": 3.152,
|
101 |
+
"step": 100
|
102 |
+
}
|
103 |
+
],
|
104 |
+
"logging_steps": 10,
|
105 |
+
"max_steps": 250,
|
106 |
+
"num_train_epochs": 10,
|
107 |
+
"save_steps": 500,
|
108 |
+
"total_flos": 1.532283750678528e+16,
|
109 |
+
"trial_name": null,
|
110 |
+
"trial_params": null
|
111 |
+
}
|
low-shot-task-specific/commonsense_qa/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac62dd2138b26a82acced238425ed68cca2c7eb6c44552fa9843fba2e1d0cf34
|
3 |
+
size 4091
|
low-shot-task-specific/emotion/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific/emotion/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55526193b5417dd0e6ec1a050c8c68add7ac57e4f9adc8f5523fb7b6109cb1d5
|
3 |
+
size 104973389
|
low-shot-task-specific/emotion/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3c619e22fe8786bb47d576383db1964d04ec5895cf49c7967a06ea21ba69e24
|
3 |
+
size 209984517
|
low-shot-task-specific/emotion/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a33d07fcc46ed21e10fb24f0266332833c17c1787ecf4b21b90883591a74c17a
|
3 |
+
size 14575
|
low-shot-task-specific/emotion/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24a1a40a49a3444d450b508a337be12226511f236bc6c3d4905032050bc15d21
|
3 |
+
size 627
|
low-shot-task-specific/emotion/best_model/trainer_state.json
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.13983282446861267,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/emotion/checkpoint-150",
|
4 |
+
"epoch": 6.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 150,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.4,
|
13 |
+
"learning_rate": 5.9999999999999995e-05,
|
14 |
+
"loss": 6.4494,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.8,
|
19 |
+
"learning_rate": 0.00011999999999999999,
|
20 |
+
"loss": 5.0703,
|
21 |
+
"step": 20
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.0,
|
25 |
+
"eval_loss": 1.654482126235962,
|
26 |
+
"eval_runtime": 6.8374,
|
27 |
+
"eval_samples_per_second": 29.251,
|
28 |
+
"eval_steps_per_second": 3.656,
|
29 |
+
"step": 25
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.2,
|
33 |
+
"learning_rate": 0.00017999999999999998,
|
34 |
+
"loss": 1.9694,
|
35 |
+
"step": 30
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 1.6,
|
39 |
+
"learning_rate": 0.00023999999999999998,
|
40 |
+
"loss": 0.4062,
|
41 |
+
"step": 40
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 2.0,
|
45 |
+
"learning_rate": 0.0003,
|
46 |
+
"loss": 0.248,
|
47 |
+
"step": 50
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.0,
|
51 |
+
"eval_loss": 0.22630518674850464,
|
52 |
+
"eval_runtime": 6.8055,
|
53 |
+
"eval_samples_per_second": 29.388,
|
54 |
+
"eval_steps_per_second": 3.674,
|
55 |
+
"step": 50
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 2.4,
|
59 |
+
"learning_rate": 0.000285,
|
60 |
+
"loss": 0.1644,
|
61 |
+
"step": 60
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.8,
|
65 |
+
"learning_rate": 0.00027,
|
66 |
+
"loss": 0.1532,
|
67 |
+
"step": 70
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.0,
|
71 |
+
"eval_loss": 0.17524582147598267,
|
72 |
+
"eval_runtime": 6.7943,
|
73 |
+
"eval_samples_per_second": 29.437,
|
74 |
+
"eval_steps_per_second": 3.68,
|
75 |
+
"step": 75
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"epoch": 3.2,
|
79 |
+
"learning_rate": 0.00025499999999999996,
|
80 |
+
"loss": 0.1291,
|
81 |
+
"step": 80
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 3.6,
|
85 |
+
"learning_rate": 0.00023999999999999998,
|
86 |
+
"loss": 0.082,
|
87 |
+
"step": 90
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"epoch": 4.0,
|
91 |
+
"learning_rate": 0.000225,
|
92 |
+
"loss": 0.0672,
|
93 |
+
"step": 100
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 4.0,
|
97 |
+
"eval_loss": 0.14034521579742432,
|
98 |
+
"eval_runtime": 6.8294,
|
99 |
+
"eval_samples_per_second": 29.285,
|
100 |
+
"eval_steps_per_second": 3.661,
|
101 |
+
"step": 100
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"epoch": 4.4,
|
105 |
+
"learning_rate": 0.00020999999999999998,
|
106 |
+
"loss": 0.0443,
|
107 |
+
"step": 110
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 4.8,
|
111 |
+
"learning_rate": 0.000195,
|
112 |
+
"loss": 0.0505,
|
113 |
+
"step": 120
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 5.0,
|
117 |
+
"eval_loss": 0.187747061252594,
|
118 |
+
"eval_runtime": 6.8658,
|
119 |
+
"eval_samples_per_second": 29.13,
|
120 |
+
"eval_steps_per_second": 3.641,
|
121 |
+
"step": 125
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 5.2,
|
125 |
+
"learning_rate": 0.00017999999999999998,
|
126 |
+
"loss": 0.042,
|
127 |
+
"step": 130
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"epoch": 5.6,
|
131 |
+
"learning_rate": 0.000165,
|
132 |
+
"loss": 0.0225,
|
133 |
+
"step": 140
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"epoch": 6.0,
|
137 |
+
"learning_rate": 0.00015,
|
138 |
+
"loss": 0.0131,
|
139 |
+
"step": 150
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"epoch": 6.0,
|
143 |
+
"eval_loss": 0.13983282446861267,
|
144 |
+
"eval_runtime": 6.8662,
|
145 |
+
"eval_samples_per_second": 29.128,
|
146 |
+
"eval_steps_per_second": 3.641,
|
147 |
+
"step": 150
|
148 |
+
}
|
149 |
+
],
|
150 |
+
"logging_steps": 10,
|
151 |
+
"max_steps": 250,
|
152 |
+
"num_train_epochs": 10,
|
153 |
+
"save_steps": 500,
|
154 |
+
"total_flos": 1.692015121170432e+16,
|
155 |
+
"trial_name": null,
|
156 |
+
"trial_params": null
|
157 |
+
}
|
low-shot-task-specific/emotion/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43e1dfa83a1cabba6524b8aef4b9cf9f06ad12b54c1b84b046d4557a3bea2b51
|
3 |
+
size 4091
|
low-shot-task-specific/social_i_qa/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific/social_i_qa/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21047febd6ad48a886cf9d5fcaed091ec146720fa2ace5db287e7337cbf7a46a
|
3 |
+
size 104973389
|
low-shot-task-specific/social_i_qa/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51615aa1f8c3316a2d9f1d9b34cd7bd25fa7d1fd75182407da214a4549fcc3fe
|
3 |
+
size 209984517
|
low-shot-task-specific/social_i_qa/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d184eb9d6f950ca5fa7558982747687291171db4b5d64ca0e406118be389e9f5
|
3 |
+
size 14575
|
low-shot-task-specific/social_i_qa/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e
|
3 |
+
size 627
|
low-shot-task-specific/social_i_qa/best_model/trainer_state.json
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.21922020614147186,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/social_i_qa/checkpoint-100",
|
4 |
+
"epoch": 4.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.4,
|
13 |
+
"learning_rate": 5.9999999999999995e-05,
|
14 |
+
"loss": 6.145,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.8,
|
19 |
+
"learning_rate": 0.00011999999999999999,
|
20 |
+
"loss": 3.2951,
|
21 |
+
"step": 20
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.0,
|
25 |
+
"eval_loss": 0.47254127264022827,
|
26 |
+
"eval_runtime": 8.3699,
|
27 |
+
"eval_samples_per_second": 23.895,
|
28 |
+
"eval_steps_per_second": 2.987,
|
29 |
+
"step": 25
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.2,
|
33 |
+
"learning_rate": 0.00017999999999999998,
|
34 |
+
"loss": 0.5553,
|
35 |
+
"step": 30
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 1.6,
|
39 |
+
"learning_rate": 0.00023999999999999998,
|
40 |
+
"loss": 0.3415,
|
41 |
+
"step": 40
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 2.0,
|
45 |
+
"learning_rate": 0.0003,
|
46 |
+
"loss": 0.3055,
|
47 |
+
"step": 50
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.0,
|
51 |
+
"eval_loss": 0.29911044239997864,
|
52 |
+
"eval_runtime": 8.3444,
|
53 |
+
"eval_samples_per_second": 23.968,
|
54 |
+
"eval_steps_per_second": 2.996,
|
55 |
+
"step": 50
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 2.4,
|
59 |
+
"learning_rate": 0.000285,
|
60 |
+
"loss": 0.2157,
|
61 |
+
"step": 60
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.8,
|
65 |
+
"learning_rate": 0.00027,
|
66 |
+
"loss": 0.1871,
|
67 |
+
"step": 70
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.0,
|
71 |
+
"eval_loss": 0.2219252735376358,
|
72 |
+
"eval_runtime": 8.3281,
|
73 |
+
"eval_samples_per_second": 24.015,
|
74 |
+
"eval_steps_per_second": 3.002,
|
75 |
+
"step": 75
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"epoch": 3.2,
|
79 |
+
"learning_rate": 0.00025499999999999996,
|
80 |
+
"loss": 0.1417,
|
81 |
+
"step": 80
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 3.6,
|
85 |
+
"learning_rate": 0.00023999999999999998,
|
86 |
+
"loss": 0.12,
|
87 |
+
"step": 90
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"epoch": 4.0,
|
91 |
+
"learning_rate": 0.000225,
|
92 |
+
"loss": 0.1079,
|
93 |
+
"step": 100
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 4.0,
|
97 |
+
"eval_loss": 0.21922020614147186,
|
98 |
+
"eval_runtime": 8.385,
|
99 |
+
"eval_samples_per_second": 23.852,
|
100 |
+
"eval_steps_per_second": 2.981,
|
101 |
+
"step": 100
|
102 |
+
}
|
103 |
+
],
|
104 |
+
"logging_steps": 10,
|
105 |
+
"max_steps": 250,
|
106 |
+
"num_train_epochs": 10,
|
107 |
+
"save_steps": 500,
|
108 |
+
"total_flos": 1.656903891124224e+16,
|
109 |
+
"trial_name": null,
|
110 |
+
"trial_params": null
|
111 |
+
}
|
low-shot-task-specific/social_i_qa/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df6d236b24d8ccd4f73c811e7a410d0eabcb7077106cde9555c3305ab36be9c
|
3 |
+
size 4091
|
low-shot-task-specific/sst/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific/sst/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f159b31b434f4cbc2859fe1a9d310fe6ff28774a227edc490206f028896a6c4b
|
3 |
+
size 104973389
|
low-shot-task-specific/sst/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a3f8fa515dd79da7e74337497fcd24225c380d188278a17aecea3bbdbdab20b
|
3 |
+
size 209984517
|
low-shot-task-specific/sst/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fb7ddc07ac1c6b830dbc025657118a71cc05bef3beda9880d700dfe72a190a1
|
3 |
+
size 14575
|
low-shot-task-specific/sst/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:472963d9d147c2cd377a0a377de820bc06cc3f0119cb01d2dc8c5a02c4d14738
|
3 |
+
size 627
|
low-shot-task-specific/sst/best_model/trainer_state.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.0313660129904747,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sst/checkpoint-75",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 75,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.4,
|
13 |
+
"learning_rate": 4.2e-05,
|
14 |
+
"loss": 7.604,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.8,
|
19 |
+
"learning_rate": 0.000102,
|
20 |
+
"loss": 6.408,
|
21 |
+
"step": 20
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.0,
|
25 |
+
"eval_loss": 0.78858482837677,
|
26 |
+
"eval_runtime": 6.2379,
|
27 |
+
"eval_samples_per_second": 32.062,
|
28 |
+
"eval_steps_per_second": 4.008,
|
29 |
+
"step": 25
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.2,
|
33 |
+
"learning_rate": 0.000162,
|
34 |
+
"loss": 1.6353,
|
35 |
+
"step": 30
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 1.6,
|
39 |
+
"learning_rate": 0.00022199999999999998,
|
40 |
+
"loss": 0.1518,
|
41 |
+
"step": 40
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 2.0,
|
45 |
+
"learning_rate": 0.00028199999999999997,
|
46 |
+
"loss": 0.0807,
|
47 |
+
"step": 50
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.0,
|
51 |
+
"eval_loss": 0.06099913269281387,
|
52 |
+
"eval_runtime": 6.2244,
|
53 |
+
"eval_samples_per_second": 32.132,
|
54 |
+
"eval_steps_per_second": 4.016,
|
55 |
+
"step": 50
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 2.4,
|
59 |
+
"learning_rate": 0.0002895,
|
60 |
+
"loss": 0.0667,
|
61 |
+
"step": 60
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.8,
|
65 |
+
"learning_rate": 0.0002745,
|
66 |
+
"loss": 0.0418,
|
67 |
+
"step": 70
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.0,
|
71 |
+
"eval_loss": 0.0313660129904747,
|
72 |
+
"eval_runtime": 6.2691,
|
73 |
+
"eval_samples_per_second": 31.902,
|
74 |
+
"eval_steps_per_second": 3.988,
|
75 |
+
"step": 75
|
76 |
+
}
|
77 |
+
],
|
78 |
+
"logging_steps": 10,
|
79 |
+
"max_steps": 250,
|
80 |
+
"num_train_epochs": 10,
|
81 |
+
"save_steps": 500,
|
82 |
+
"total_flos": 6295295189975040.0,
|
83 |
+
"trial_name": null,
|
84 |
+
"trial_params": null
|
85 |
+
}
|
low-shot-task-specific/sst/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c336d21dc0b7346d796426a4797d56084a81a08ea200e8c91411fa3449b6e06
|
3 |
+
size 4091
|
low-shot-task-specific/sum/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific/sum/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0a0cb9b7eade6b441f997e152af520540514264d51b1c267e30e7c500669bc
|
3 |
+
size 104973389
|
low-shot-task-specific/sum/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8a812e3aab6e78e990e7ca36e1b9e85917fdaca9d531c7bcdc41b82d6c982d1
|
3 |
+
size 209984517
|
low-shot-task-specific/sum/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edc7dd2e3f40ac0c046da2f233e18f2314fea538368fd7bd263fa95f95f7fbef
|
3 |
+
size 14575
|
low-shot-task-specific/sum/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c71df4de1094152c023456a0f4f7f28571d7f0bd29b962a097a17dff09a83bd7
|
3 |
+
size 627
|
low-shot-task-specific/sum/best_model/trainer_state.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.024566762149333954,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sum/checkpoint-75",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 75,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.4,
|
13 |
+
"learning_rate": 5.9999999999999995e-05,
|
14 |
+
"loss": 3.5065,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.8,
|
19 |
+
"learning_rate": 0.00011999999999999999,
|
20 |
+
"loss": 2.4397,
|
21 |
+
"step": 20
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.0,
|
25 |
+
"eval_loss": 0.4209679365158081,
|
26 |
+
"eval_runtime": 5.755,
|
27 |
+
"eval_samples_per_second": 34.753,
|
28 |
+
"eval_steps_per_second": 4.344,
|
29 |
+
"step": 25
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 1.2,
|
33 |
+
"learning_rate": 0.00017999999999999998,
|
34 |
+
"loss": 0.8483,
|
35 |
+
"step": 30
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 1.6,
|
39 |
+
"learning_rate": 0.00023999999999999998,
|
40 |
+
"loss": 0.1766,
|
41 |
+
"step": 40
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 2.0,
|
45 |
+
"learning_rate": 0.0003,
|
46 |
+
"loss": 0.0503,
|
47 |
+
"step": 50
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.0,
|
51 |
+
"eval_loss": 0.045773524791002274,
|
52 |
+
"eval_runtime": 5.7905,
|
53 |
+
"eval_samples_per_second": 34.539,
|
54 |
+
"eval_steps_per_second": 4.317,
|
55 |
+
"step": 50
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 2.4,
|
59 |
+
"learning_rate": 0.000285,
|
60 |
+
"loss": 0.0382,
|
61 |
+
"step": 60
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.8,
|
65 |
+
"learning_rate": 0.00027,
|
66 |
+
"loss": 0.0355,
|
67 |
+
"step": 70
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 3.0,
|
71 |
+
"eval_loss": 0.024566762149333954,
|
72 |
+
"eval_runtime": 5.7985,
|
73 |
+
"eval_samples_per_second": 34.492,
|
74 |
+
"eval_steps_per_second": 4.311,
|
75 |
+
"step": 75
|
76 |
+
}
|
77 |
+
],
|
78 |
+
"logging_steps": 10,
|
79 |
+
"max_steps": 250,
|
80 |
+
"num_train_epochs": 10,
|
81 |
+
"save_steps": 500,
|
82 |
+
"total_flos": 4450719301632000.0,
|
83 |
+
"trial_name": null,
|
84 |
+
"trial_params": null
|
85 |
+
}
|
low-shot-task-specific/sum/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe2d99ceccff158e6a2e2efb1d0072c3a4d6419ea8c9ba1122915df6fece215d
|
3 |
+
size 4091
|
low-shot-task-specific/svamp/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|