Muhammad Khalifa
commited on
Commit
•
e5d11d8
1
Parent(s):
70d9848
add 500-shot models
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json +21 -0
- low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt +3 -0
- low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth +3 -0
- low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt +3 -0
- low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json +135 -0
- low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin +3 -0
- low-shot-task-specific-500-ex/cola/best_model/adapter_config.json +21 -0
- low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-500-ex/cola/best_model/optimizer.pt +3 -0
- low-shot-task-specific-500-ex/cola/best_model/rng_state.pth +3 -0
- low-shot-task-specific-500-ex/cola/best_model/scheduler.pt +3 -0
- low-shot-task-specific-500-ex/cola/best_model/trainer_state.json +171 -0
- low-shot-task-specific-500-ex/cola/best_model/training_args.bin +3 -0
- low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json +21 -0
- low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt +3 -0
- low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth +3 -0
- low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt +3 -0
- low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json +171 -0
- low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin +3 -0
- low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json +21 -0
- low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt +3 -0
- low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth +3 -0
- low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt +3 -0
- low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json +123 -0
- low-shot-task-specific-500-ex/emotion/best_model/training_args.bin +3 -0
- low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json +21 -0
- low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt +3 -0
- low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth +3 -0
- low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt +3 -0
- low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json +109 -0
- low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin +3 -0
- low-shot-task-specific-500-ex/sst/best_model/adapter_config.json +21 -0
- low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-500-ex/sst/best_model/optimizer.pt +3 -0
- low-shot-task-specific-500-ex/sst/best_model/rng_state.pth +3 -0
- low-shot-task-specific-500-ex/sst/best_model/scheduler.pt +3 -0
- low-shot-task-specific-500-ex/sst/best_model/trainer_state.json +123 -0
- low-shot-task-specific-500-ex/sst/best_model/training_args.bin +3 -0
- low-shot-task-specific-500-ex/sum/best_model/adapter_config.json +21 -0
- low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin +3 -0
- low-shot-task-specific-500-ex/sum/best_model/optimizer.pt +3 -0
- low-shot-task-specific-500-ex/sum/best_model/rng_state.pth +3 -0
- low-shot-task-specific-500-ex/sum/best_model/scheduler.pt +3 -0
- low-shot-task-specific-500-ex/sum/best_model/trainer_state.json +143 -0
- low-shot-task-specific-500-ex/sum/best_model/training_args.bin +3 -0
- low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:206bdaf3cf034deec6de2394ba6a7b29d0b637ab2d1925332e8f1abb76025dd6
|
3 |
+
size 104973389
|
low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d53c7e4694c2cfcf4dd0eb9bfe3b05a7cd7809c6a8e6ff871c4c99d6ddfefaf4
|
3 |
+
size 209984517
|
low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3055502c9e3004eb987550db217f6677d695763c959badae25d773f1d985ab91
|
3 |
+
size 14575
|
low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666
|
3 |
+
size 627
|
low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.17182409763336182,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/coin_flip/checkpoint-60",
|
4 |
+
"epoch": 9.795918367346939,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 60,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.98,
|
13 |
+
"eval_loss": 3.2136309146881104,
|
14 |
+
"eval_runtime": 1.7971,
|
15 |
+
"eval_samples_per_second": 27.266,
|
16 |
+
"eval_steps_per_second": 3.895,
|
17 |
+
"step": 6
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 1.63,
|
21 |
+
"learning_rate": 6.666666666666667e-05,
|
22 |
+
"loss": 3.5659,
|
23 |
+
"step": 10
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.96,
|
27 |
+
"eval_loss": 1.1381325721740723,
|
28 |
+
"eval_runtime": 1.8028,
|
29 |
+
"eval_samples_per_second": 27.18,
|
30 |
+
"eval_steps_per_second": 3.883,
|
31 |
+
"step": 12
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 2.94,
|
35 |
+
"eval_loss": 0.39599937200546265,
|
36 |
+
"eval_runtime": 1.7938,
|
37 |
+
"eval_samples_per_second": 27.316,
|
38 |
+
"eval_steps_per_second": 3.902,
|
39 |
+
"step": 18
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 3.27,
|
43 |
+
"learning_rate": 5.333333333333333e-05,
|
44 |
+
"loss": 0.8239,
|
45 |
+
"step": 20
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 3.92,
|
49 |
+
"eval_loss": 0.23788291215896606,
|
50 |
+
"eval_runtime": 1.8071,
|
51 |
+
"eval_samples_per_second": 27.115,
|
52 |
+
"eval_steps_per_second": 3.874,
|
53 |
+
"step": 24
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 4.9,
|
57 |
+
"learning_rate": 4e-05,
|
58 |
+
"loss": 0.2375,
|
59 |
+
"step": 30
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 4.9,
|
63 |
+
"eval_loss": 0.1869448572397232,
|
64 |
+
"eval_runtime": 1.8046,
|
65 |
+
"eval_samples_per_second": 27.154,
|
66 |
+
"eval_steps_per_second": 3.879,
|
67 |
+
"step": 30
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 5.88,
|
71 |
+
"eval_loss": 0.1762770116329193,
|
72 |
+
"eval_runtime": 1.7955,
|
73 |
+
"eval_samples_per_second": 27.291,
|
74 |
+
"eval_steps_per_second": 3.899,
|
75 |
+
"step": 36
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"epoch": 6.53,
|
79 |
+
"learning_rate": 2.6666666666666667e-05,
|
80 |
+
"loss": 0.1756,
|
81 |
+
"step": 40
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.86,
|
85 |
+
"eval_loss": 0.17334015667438507,
|
86 |
+
"eval_runtime": 1.7994,
|
87 |
+
"eval_samples_per_second": 27.231,
|
88 |
+
"eval_steps_per_second": 3.89,
|
89 |
+
"step": 42
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 8.0,
|
93 |
+
"eval_loss": 0.17443998157978058,
|
94 |
+
"eval_runtime": 1.7969,
|
95 |
+
"eval_samples_per_second": 27.269,
|
96 |
+
"eval_steps_per_second": 3.896,
|
97 |
+
"step": 49
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"epoch": 8.16,
|
101 |
+
"learning_rate": 1.3333333333333333e-05,
|
102 |
+
"loss": 0.1626,
|
103 |
+
"step": 50
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"epoch": 8.98,
|
107 |
+
"eval_loss": 0.17577075958251953,
|
108 |
+
"eval_runtime": 1.7999,
|
109 |
+
"eval_samples_per_second": 27.223,
|
110 |
+
"eval_steps_per_second": 3.889,
|
111 |
+
"step": 55
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"epoch": 9.8,
|
115 |
+
"learning_rate": 0.0,
|
116 |
+
"loss": 0.1535,
|
117 |
+
"step": 60
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"epoch": 9.8,
|
121 |
+
"eval_loss": 0.17182409763336182,
|
122 |
+
"eval_runtime": 1.7992,
|
123 |
+
"eval_samples_per_second": 27.235,
|
124 |
+
"eval_steps_per_second": 3.891,
|
125 |
+
"step": 60
|
126 |
+
}
|
127 |
+
],
|
128 |
+
"logging_steps": 10,
|
129 |
+
"max_steps": 60,
|
130 |
+
"num_train_epochs": 10,
|
131 |
+
"save_steps": 500,
|
132 |
+
"total_flos": 7168130697461760.0,
|
133 |
+
"trial_name": null,
|
134 |
+
"trial_params": null
|
135 |
+
}
|
low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5173d21d2a38d1cd1cd4daa45bed5a9f6f0d64b0897c6366683a240cd58f864
|
3 |
+
size 4091
|
low-shot-task-specific-500-ex/cola/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88db25f61b79433f848f0788c44a69c4f0e655ee09f0508b3af035fc7e02179e
|
3 |
+
size 104973389
|
low-shot-task-specific-500-ex/cola/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43508585f5b8ebfc9532b38cb5a03b32bd704e2b5ebaf34c0b503292c13d7c3f
|
3 |
+
size 209984517
|
low-shot-task-specific-500-ex/cola/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df43d0030d9a94c82d1f09bcf5abbca157094e974c01f4c6b0214cfabe62d21a
|
3 |
+
size 14575
|
low-shot-task-specific-500-ex/cola/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd5a1245e45cfb0cd08e1aaad686b01aa603042a02b323bff0c30b6b0eaca154
|
3 |
+
size 627
|
low-shot-task-specific-500-ex/cola/best_model/trainer_state.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.16061067581176758,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/cola/checkpoint-120",
|
4 |
+
"epoch": 9.6,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 120,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"learning_rate": 7.333333333333333e-05,
|
14 |
+
"loss": 6.6687,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.96,
|
19 |
+
"eval_loss": 4.879603385925293,
|
20 |
+
"eval_runtime": 2.9872,
|
21 |
+
"eval_samples_per_second": 33.476,
|
22 |
+
"eval_steps_per_second": 4.352,
|
23 |
+
"step": 12
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.6,
|
27 |
+
"learning_rate": 6.733333333333333e-05,
|
28 |
+
"loss": 4.1857,
|
29 |
+
"step": 20
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"eval_loss": 0.6735175251960754,
|
34 |
+
"eval_runtime": 2.9786,
|
35 |
+
"eval_samples_per_second": 33.573,
|
36 |
+
"eval_steps_per_second": 4.364,
|
37 |
+
"step": 25
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 2.4,
|
41 |
+
"learning_rate": 6.0666666666666666e-05,
|
42 |
+
"loss": 1.1578,
|
43 |
+
"step": 30
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 2.96,
|
47 |
+
"eval_loss": 0.22044576704502106,
|
48 |
+
"eval_runtime": 2.9802,
|
49 |
+
"eval_samples_per_second": 33.554,
|
50 |
+
"eval_steps_per_second": 4.362,
|
51 |
+
"step": 37
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 3.2,
|
55 |
+
"learning_rate": 5.4000000000000005e-05,
|
56 |
+
"loss": 0.2691,
|
57 |
+
"step": 40
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.0,
|
61 |
+
"learning_rate": 4.7333333333333336e-05,
|
62 |
+
"loss": 0.2011,
|
63 |
+
"step": 50
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 4.0,
|
67 |
+
"eval_loss": 0.18446393311023712,
|
68 |
+
"eval_runtime": 2.9788,
|
69 |
+
"eval_samples_per_second": 33.571,
|
70 |
+
"eval_steps_per_second": 4.364,
|
71 |
+
"step": 50
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 4.8,
|
75 |
+
"learning_rate": 4.066666666666667e-05,
|
76 |
+
"loss": 0.1782,
|
77 |
+
"step": 60
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 4.96,
|
81 |
+
"eval_loss": 0.17961610853672028,
|
82 |
+
"eval_runtime": 2.9749,
|
83 |
+
"eval_samples_per_second": 33.615,
|
84 |
+
"eval_steps_per_second": 4.37,
|
85 |
+
"step": 62
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.6,
|
89 |
+
"learning_rate": 3.4e-05,
|
90 |
+
"loss": 0.1609,
|
91 |
+
"step": 70
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 6.0,
|
95 |
+
"eval_loss": 0.1864309310913086,
|
96 |
+
"eval_runtime": 2.9806,
|
97 |
+
"eval_samples_per_second": 33.55,
|
98 |
+
"eval_steps_per_second": 4.362,
|
99 |
+
"step": 75
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 6.4,
|
103 |
+
"learning_rate": 2.7333333333333335e-05,
|
104 |
+
"loss": 0.1644,
|
105 |
+
"step": 80
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 6.96,
|
109 |
+
"eval_loss": 0.16424360871315002,
|
110 |
+
"eval_runtime": 2.9883,
|
111 |
+
"eval_samples_per_second": 33.464,
|
112 |
+
"eval_steps_per_second": 4.35,
|
113 |
+
"step": 87
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 7.2,
|
117 |
+
"learning_rate": 2.066666666666667e-05,
|
118 |
+
"loss": 0.1389,
|
119 |
+
"step": 90
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 8.0,
|
123 |
+
"learning_rate": 1.4e-05,
|
124 |
+
"loss": 0.1294,
|
125 |
+
"step": 100
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"epoch": 8.0,
|
129 |
+
"eval_loss": 0.16847126185894012,
|
130 |
+
"eval_runtime": 2.9824,
|
131 |
+
"eval_samples_per_second": 33.53,
|
132 |
+
"eval_steps_per_second": 4.359,
|
133 |
+
"step": 100
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"epoch": 8.8,
|
137 |
+
"learning_rate": 7.333333333333333e-06,
|
138 |
+
"loss": 0.1189,
|
139 |
+
"step": 110
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"epoch": 8.96,
|
143 |
+
"eval_loss": 0.16718144714832306,
|
144 |
+
"eval_runtime": 2.9865,
|
145 |
+
"eval_samples_per_second": 33.485,
|
146 |
+
"eval_steps_per_second": 4.353,
|
147 |
+
"step": 112
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 9.6,
|
151 |
+
"learning_rate": 6.666666666666667e-07,
|
152 |
+
"loss": 0.1159,
|
153 |
+
"step": 120
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"epoch": 9.6,
|
157 |
+
"eval_loss": 0.16061067581176758,
|
158 |
+
"eval_runtime": 3.0082,
|
159 |
+
"eval_samples_per_second": 33.243,
|
160 |
+
"eval_steps_per_second": 4.322,
|
161 |
+
"step": 120
|
162 |
+
}
|
163 |
+
],
|
164 |
+
"logging_steps": 10,
|
165 |
+
"max_steps": 120,
|
166 |
+
"num_train_epochs": 10,
|
167 |
+
"save_steps": 500,
|
168 |
+
"total_flos": 7598366896619520.0,
|
169 |
+
"trial_name": null,
|
170 |
+
"trial_params": null
|
171 |
+
}
|
low-shot-task-specific-500-ex/cola/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8fd855aa267cbdcebda3428b287cf8b570b4df8a9e36df6feb7196098250a51
|
3 |
+
size 4091
|
low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5152d962ae2befcfe0aedba6ab58b8db4d23257a65d5616e0d250964461d934
|
3 |
+
size 104973389
|
low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1d6a49094ccff938ab86e6209f355a618f8701ca301da56250d93f602c172c6
|
3 |
+
size 209984517
|
low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c30c2a2ce0908cdf3fafe95df8bab394435e84155013c948e02ec0288e93b6fe
|
3 |
+
size 14575
|
low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b328efc508129bc7f57f4b7996c1bcd196558a43b1355a8510ec55800cd250a4
|
3 |
+
size 627
|
low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.315158873796463,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/commonsense_qa/checkpoint-120",
|
4 |
+
"epoch": 9.6,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 120,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"learning_rate": 7.333333333333333e-05,
|
14 |
+
"loss": 4.3959,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.96,
|
19 |
+
"eval_loss": 2.007786750793457,
|
20 |
+
"eval_runtime": 3.9225,
|
21 |
+
"eval_samples_per_second": 25.494,
|
22 |
+
"eval_steps_per_second": 3.314,
|
23 |
+
"step": 12
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.6,
|
27 |
+
"learning_rate": 6.666666666666667e-05,
|
28 |
+
"loss": 1.4138,
|
29 |
+
"step": 20
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"eval_loss": 0.5842701196670532,
|
34 |
+
"eval_runtime": 3.8952,
|
35 |
+
"eval_samples_per_second": 25.673,
|
36 |
+
"eval_steps_per_second": 3.337,
|
37 |
+
"step": 25
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 2.4,
|
41 |
+
"learning_rate": 6.000000000000001e-05,
|
42 |
+
"loss": 0.5802,
|
43 |
+
"step": 30
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 2.96,
|
47 |
+
"eval_loss": 0.48449742794036865,
|
48 |
+
"eval_runtime": 3.9041,
|
49 |
+
"eval_samples_per_second": 25.614,
|
50 |
+
"eval_steps_per_second": 3.33,
|
51 |
+
"step": 37
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 3.2,
|
55 |
+
"learning_rate": 5.333333333333333e-05,
|
56 |
+
"loss": 0.4476,
|
57 |
+
"step": 40
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.0,
|
61 |
+
"learning_rate": 4.666666666666667e-05,
|
62 |
+
"loss": 0.3758,
|
63 |
+
"step": 50
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 4.0,
|
67 |
+
"eval_loss": 0.3487338721752167,
|
68 |
+
"eval_runtime": 3.9136,
|
69 |
+
"eval_samples_per_second": 25.552,
|
70 |
+
"eval_steps_per_second": 3.322,
|
71 |
+
"step": 50
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 4.8,
|
75 |
+
"learning_rate": 4e-05,
|
76 |
+
"loss": 0.3099,
|
77 |
+
"step": 60
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 4.96,
|
81 |
+
"eval_loss": 0.3447181284427643,
|
82 |
+
"eval_runtime": 3.9145,
|
83 |
+
"eval_samples_per_second": 25.546,
|
84 |
+
"eval_steps_per_second": 3.321,
|
85 |
+
"step": 62
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.6,
|
89 |
+
"learning_rate": 3.3333333333333335e-05,
|
90 |
+
"loss": 0.2785,
|
91 |
+
"step": 70
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 6.0,
|
95 |
+
"eval_loss": 0.3341815173625946,
|
96 |
+
"eval_runtime": 3.9058,
|
97 |
+
"eval_samples_per_second": 25.603,
|
98 |
+
"eval_steps_per_second": 3.328,
|
99 |
+
"step": 75
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 6.4,
|
103 |
+
"learning_rate": 2.6666666666666667e-05,
|
104 |
+
"loss": 0.2473,
|
105 |
+
"step": 80
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 6.96,
|
109 |
+
"eval_loss": 0.32787469029426575,
|
110 |
+
"eval_runtime": 3.9132,
|
111 |
+
"eval_samples_per_second": 25.555,
|
112 |
+
"eval_steps_per_second": 3.322,
|
113 |
+
"step": 87
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 7.2,
|
117 |
+
"learning_rate": 2e-05,
|
118 |
+
"loss": 0.2096,
|
119 |
+
"step": 90
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 8.0,
|
123 |
+
"learning_rate": 1.3333333333333333e-05,
|
124 |
+
"loss": 0.2026,
|
125 |
+
"step": 100
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"epoch": 8.0,
|
129 |
+
"eval_loss": 0.36058053374290466,
|
130 |
+
"eval_runtime": 3.9049,
|
131 |
+
"eval_samples_per_second": 25.609,
|
132 |
+
"eval_steps_per_second": 3.329,
|
133 |
+
"step": 100
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"epoch": 8.8,
|
137 |
+
"learning_rate": 6.666666666666667e-06,
|
138 |
+
"loss": 0.1703,
|
139 |
+
"step": 110
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"epoch": 8.96,
|
143 |
+
"eval_loss": 0.32292404770851135,
|
144 |
+
"eval_runtime": 3.9053,
|
145 |
+
"eval_samples_per_second": 25.606,
|
146 |
+
"eval_steps_per_second": 3.329,
|
147 |
+
"step": 112
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 9.6,
|
151 |
+
"learning_rate": 0.0,
|
152 |
+
"loss": 0.174,
|
153 |
+
"step": 120
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"epoch": 9.6,
|
157 |
+
"eval_loss": 0.315158873796463,
|
158 |
+
"eval_runtime": 3.9182,
|
159 |
+
"eval_samples_per_second": 25.522,
|
160 |
+
"eval_steps_per_second": 3.318,
|
161 |
+
"step": 120
|
162 |
+
}
|
163 |
+
],
|
164 |
+
"logging_steps": 10,
|
165 |
+
"max_steps": 120,
|
166 |
+
"num_train_epochs": 10,
|
167 |
+
"save_steps": 500,
|
168 |
+
"total_flos": 1.884385099874304e+16,
|
169 |
+
"trial_name": null,
|
170 |
+
"trial_params": null
|
171 |
+
}
|
low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45bff2219beb51a82849d4d07eba12e3cd594f77292977d4c4572844ac5cbf0b
|
3 |
+
size 4091
|
low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd259302fbc3589e465552374ac7ef975db55d5443842d0886e31d2e84eafd63
|
3 |
+
size 104973389
|
low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bc44b0176a9bcc2ff66801d67ece43987fa19edcaffeda2d47329715c2eca0b
|
3 |
+
size 209984517
|
low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:052af8166da591bdc27e359bc7d7771179713f7891b6826f85f597392b9ae762
|
3 |
+
size 14575
|
low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc5e65f6f4846aebdaab8b704e9eeffb8f1787e8b333c20c764dad3451c8daf1
|
3 |
+
size 627
|
low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.27617308497428894,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/emotion/checkpoint-87",
|
4 |
+
"epoch": 6.96,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 87,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"learning_rate": 7.333333333333333e-05,
|
14 |
+
"loss": 5.8573,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.96,
|
19 |
+
"eval_loss": 4.265514373779297,
|
20 |
+
"eval_runtime": 3.4719,
|
21 |
+
"eval_samples_per_second": 28.803,
|
22 |
+
"eval_steps_per_second": 3.744,
|
23 |
+
"step": 12
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.6,
|
27 |
+
"learning_rate": 6.666666666666667e-05,
|
28 |
+
"loss": 3.8105,
|
29 |
+
"step": 20
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"eval_loss": 1.5850574970245361,
|
34 |
+
"eval_runtime": 3.4571,
|
35 |
+
"eval_samples_per_second": 28.926,
|
36 |
+
"eval_steps_per_second": 3.76,
|
37 |
+
"step": 25
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 2.4,
|
41 |
+
"learning_rate": 6.0666666666666666e-05,
|
42 |
+
"loss": 1.7041,
|
43 |
+
"step": 30
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 2.96,
|
47 |
+
"eval_loss": 0.5069144368171692,
|
48 |
+
"eval_runtime": 3.4616,
|
49 |
+
"eval_samples_per_second": 28.889,
|
50 |
+
"eval_steps_per_second": 3.756,
|
51 |
+
"step": 37
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 3.2,
|
55 |
+
"learning_rate": 5.4000000000000005e-05,
|
56 |
+
"loss": 0.6618,
|
57 |
+
"step": 40
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.0,
|
61 |
+
"learning_rate": 4.7333333333333336e-05,
|
62 |
+
"loss": 0.3247,
|
63 |
+
"step": 50
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 4.0,
|
67 |
+
"eval_loss": 0.33890244364738464,
|
68 |
+
"eval_runtime": 3.4571,
|
69 |
+
"eval_samples_per_second": 28.926,
|
70 |
+
"eval_steps_per_second": 3.76,
|
71 |
+
"step": 50
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 4.8,
|
75 |
+
"learning_rate": 4.066666666666667e-05,
|
76 |
+
"loss": 0.271,
|
77 |
+
"step": 60
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 4.96,
|
81 |
+
"eval_loss": 0.3074319362640381,
|
82 |
+
"eval_runtime": 3.4533,
|
83 |
+
"eval_samples_per_second": 28.958,
|
84 |
+
"eval_steps_per_second": 3.765,
|
85 |
+
"step": 62
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.6,
|
89 |
+
"learning_rate": 3.4e-05,
|
90 |
+
"loss": 0.2088,
|
91 |
+
"step": 70
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 6.0,
|
95 |
+
"eval_loss": 0.29454201459884644,
|
96 |
+
"eval_runtime": 3.448,
|
97 |
+
"eval_samples_per_second": 29.002,
|
98 |
+
"eval_steps_per_second": 3.77,
|
99 |
+
"step": 75
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 6.4,
|
103 |
+
"learning_rate": 2.7333333333333335e-05,
|
104 |
+
"loss": 0.1924,
|
105 |
+
"step": 80
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 6.96,
|
109 |
+
"eval_loss": 0.27617308497428894,
|
110 |
+
"eval_runtime": 3.4613,
|
111 |
+
"eval_samples_per_second": 28.89,
|
112 |
+
"eval_steps_per_second": 3.756,
|
113 |
+
"step": 87
|
114 |
+
}
|
115 |
+
],
|
116 |
+
"logging_steps": 10,
|
117 |
+
"max_steps": 120,
|
118 |
+
"num_train_epochs": 10,
|
119 |
+
"save_steps": 500,
|
120 |
+
"total_flos": 1.002400891600896e+16,
|
121 |
+
"trial_name": null,
|
122 |
+
"trial_params": null
|
123 |
+
}
|
low-shot-task-specific-500-ex/emotion/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2807111bcd404841c3e700ab3cab78a978a93e97c447ffe05c31e323ab3de999
|
3 |
+
size 4091
|
low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:452926c1e61adf1dc9d07ddcd94668ffb5984646601a7f43cfccf35f8ed8f15d
|
3 |
+
size 104973389
|
low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c2ceb0a22aaf3ac5943e52d26f725ad35605b686d466e24cb6b88e9b56bab9e
|
3 |
+
size 209984517
|
low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb2d1c591c012870eb39230986af7413438032c45508997b22b8b2e04069c233
|
3 |
+
size 14575
|
low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86f741a77aed590e2df1e55bdd0d9033c12228c5cb1e1789672b7ce71994aa05
|
3 |
+
size 627
|
low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.22931724786758423,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/social_i_qa/checkpoint-75",
|
4 |
+
"epoch": 6.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 75,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"learning_rate": 7.333333333333333e-05,
|
14 |
+
"loss": 4.8517,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.96,
|
19 |
+
"eval_loss": 1.9629485607147217,
|
20 |
+
"eval_runtime": 4.1824,
|
21 |
+
"eval_samples_per_second": 23.91,
|
22 |
+
"eval_steps_per_second": 3.108,
|
23 |
+
"step": 12
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.6,
|
27 |
+
"learning_rate": 6.666666666666667e-05,
|
28 |
+
"loss": 1.2888,
|
29 |
+
"step": 20
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"eval_loss": 0.41052401065826416,
|
34 |
+
"eval_runtime": 4.1752,
|
35 |
+
"eval_samples_per_second": 23.951,
|
36 |
+
"eval_steps_per_second": 3.114,
|
37 |
+
"step": 25
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 2.4,
|
41 |
+
"learning_rate": 6.000000000000001e-05,
|
42 |
+
"loss": 0.4255,
|
43 |
+
"step": 30
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 2.96,
|
47 |
+
"eval_loss": 0.32185935974121094,
|
48 |
+
"eval_runtime": 4.1821,
|
49 |
+
"eval_samples_per_second": 23.911,
|
50 |
+
"eval_steps_per_second": 3.108,
|
51 |
+
"step": 37
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 3.2,
|
55 |
+
"learning_rate": 5.333333333333333e-05,
|
56 |
+
"loss": 0.2955,
|
57 |
+
"step": 40
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.0,
|
61 |
+
"learning_rate": 4.666666666666667e-05,
|
62 |
+
"loss": 0.2552,
|
63 |
+
"step": 50
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 4.0,
|
67 |
+
"eval_loss": 0.26777762174606323,
|
68 |
+
"eval_runtime": 4.1799,
|
69 |
+
"eval_samples_per_second": 23.924,
|
70 |
+
"eval_steps_per_second": 3.11,
|
71 |
+
"step": 50
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 4.8,
|
75 |
+
"learning_rate": 4e-05,
|
76 |
+
"loss": 0.2144,
|
77 |
+
"step": 60
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 4.96,
|
81 |
+
"eval_loss": 0.24417449533939362,
|
82 |
+
"eval_runtime": 4.1595,
|
83 |
+
"eval_samples_per_second": 24.042,
|
84 |
+
"eval_steps_per_second": 3.125,
|
85 |
+
"step": 62
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.6,
|
89 |
+
"learning_rate": 3.3333333333333335e-05,
|
90 |
+
"loss": 0.1887,
|
91 |
+
"step": 70
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 6.0,
|
95 |
+
"eval_loss": 0.22931724786758423,
|
96 |
+
"eval_runtime": 4.1585,
|
97 |
+
"eval_samples_per_second": 24.047,
|
98 |
+
"eval_steps_per_second": 3.126,
|
99 |
+
"step": 75
|
100 |
+
}
|
101 |
+
],
|
102 |
+
"logging_steps": 10,
|
103 |
+
"max_steps": 120,
|
104 |
+
"num_train_epochs": 10,
|
105 |
+
"save_steps": 500,
|
106 |
+
"total_flos": 1.244223306989568e+16,
|
107 |
+
"trial_name": null,
|
108 |
+
"trial_params": null
|
109 |
+
}
|
low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc625edfba8d629ae9a11f5c619aeadcf62fa8f504d60898b62237fc19448f60
|
3 |
+
size 4091
|
low-shot-task-specific-500-ex/sst/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f0bf1649f6d9b8dc8d6a74e917b2986eb9e0c9c257614ade4af288256d9a4f4
|
3 |
+
size 104973389
|
low-shot-task-specific-500-ex/sst/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:522572fa606bbb71751d11ef99ae52f5681a609d3d119335844ab4f53ba0d826
|
3 |
+
size 209984517
|
low-shot-task-specific-500-ex/sst/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3580967c07f4e6cea186553a49db7882eeeb990b25cfad881cf2a6edb9233e4a
|
3 |
+
size 14575
|
low-shot-task-specific-500-ex/sst/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dc34eb4d15f40db25f296376c3b3cbb8431c5236c4b6fd8813dabe4ca7b3ea2
|
3 |
+
size 627
|
low-shot-task-specific-500-ex/sst/best_model/trainer_state.json
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.042198196053504944,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sst/checkpoint-87",
|
4 |
+
"epoch": 6.96,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 87,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"learning_rate": 7.466666666666667e-05,
|
14 |
+
"loss": 7.0533,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.96,
|
19 |
+
"eval_loss": 4.983966827392578,
|
20 |
+
"eval_runtime": 3.24,
|
21 |
+
"eval_samples_per_second": 30.865,
|
22 |
+
"eval_steps_per_second": 4.012,
|
23 |
+
"step": 12
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.6,
|
27 |
+
"learning_rate": 6.866666666666666e-05,
|
28 |
+
"loss": 4.1938,
|
29 |
+
"step": 20
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"eval_loss": 0.4440341889858246,
|
34 |
+
"eval_runtime": 3.2423,
|
35 |
+
"eval_samples_per_second": 30.843,
|
36 |
+
"eval_steps_per_second": 4.01,
|
37 |
+
"step": 25
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 2.4,
|
41 |
+
"learning_rate": 6.2e-05,
|
42 |
+
"loss": 0.6862,
|
43 |
+
"step": 30
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 2.96,
|
47 |
+
"eval_loss": 0.1788669228553772,
|
48 |
+
"eval_runtime": 3.2349,
|
49 |
+
"eval_samples_per_second": 30.913,
|
50 |
+
"eval_steps_per_second": 4.019,
|
51 |
+
"step": 37
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 3.2,
|
55 |
+
"learning_rate": 5.5333333333333334e-05,
|
56 |
+
"loss": 0.2043,
|
57 |
+
"step": 40
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.0,
|
61 |
+
"learning_rate": 4.8666666666666666e-05,
|
62 |
+
"loss": 0.1107,
|
63 |
+
"step": 50
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 4.0,
|
67 |
+
"eval_loss": 0.06379850953817368,
|
68 |
+
"eval_runtime": 3.2374,
|
69 |
+
"eval_samples_per_second": 30.889,
|
70 |
+
"eval_steps_per_second": 4.016,
|
71 |
+
"step": 50
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 4.8,
|
75 |
+
"learning_rate": 4.2000000000000004e-05,
|
76 |
+
"loss": 0.0491,
|
77 |
+
"step": 60
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 4.96,
|
81 |
+
"eval_loss": 0.0445735827088356,
|
82 |
+
"eval_runtime": 3.2374,
|
83 |
+
"eval_samples_per_second": 30.889,
|
84 |
+
"eval_steps_per_second": 4.016,
|
85 |
+
"step": 62
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.6,
|
89 |
+
"learning_rate": 3.5333333333333336e-05,
|
90 |
+
"loss": 0.0273,
|
91 |
+
"step": 70
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 6.0,
|
95 |
+
"eval_loss": 0.04596562311053276,
|
96 |
+
"eval_runtime": 3.2388,
|
97 |
+
"eval_samples_per_second": 30.876,
|
98 |
+
"eval_steps_per_second": 4.014,
|
99 |
+
"step": 75
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 6.4,
|
103 |
+
"learning_rate": 2.8666666666666668e-05,
|
104 |
+
"loss": 0.0222,
|
105 |
+
"step": 80
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 6.96,
|
109 |
+
"eval_loss": 0.042198196053504944,
|
110 |
+
"eval_runtime": 3.2385,
|
111 |
+
"eval_samples_per_second": 30.879,
|
112 |
+
"eval_steps_per_second": 4.014,
|
113 |
+
"step": 87
|
114 |
+
}
|
115 |
+
],
|
116 |
+
"logging_steps": 10,
|
117 |
+
"max_steps": 120,
|
118 |
+
"num_train_epochs": 10,
|
119 |
+
"save_steps": 500,
|
120 |
+
"total_flos": 7054390093086720.0,
|
121 |
+
"trial_name": null,
|
122 |
+
"trial_params": null
|
123 |
+
}
|
low-shot-task-specific-500-ex/sst/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c42a2e587a4c59713f0200d20e8dcc233dcef141930bfbf631c04969b44050c3
|
3 |
+
size 4091
|
low-shot-task-specific-500-ex/sum/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|
low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a729e9feda81f6fe2ca5709b1c5420648c705df61f8d0f7729878f5fb4de6b9
|
3 |
+
size 104973389
|
low-shot-task-specific-500-ex/sum/best_model/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0940dfeb998ac49fc9ea6ee82de9e1a31f888566cadbbf3e12a6b31771c9257b
|
3 |
+
size 209984517
|
low-shot-task-specific-500-ex/sum/best_model/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db42f39e4e1e49a8785b28d59ee0d6a43f5f529564318dd434906402c044f9e5
|
3 |
+
size 14575
|
low-shot-task-specific-500-ex/sum/best_model/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c348388a8e293c1a759c71c596beff737512abb60e161371fe0d9e9edf9afe53
|
3 |
+
size 627
|
low-shot-task-specific-500-ex/sum/best_model/trainer_state.json
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.031680114567279816,
|
3 |
+
"best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sum/checkpoint-100",
|
4 |
+
"epoch": 8.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.8,
|
13 |
+
"learning_rate": 7.333333333333333e-05,
|
14 |
+
"loss": 2.9796,
|
15 |
+
"step": 10
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.96,
|
19 |
+
"eval_loss": 1.8474284410476685,
|
20 |
+
"eval_runtime": 2.9025,
|
21 |
+
"eval_samples_per_second": 34.453,
|
22 |
+
"eval_steps_per_second": 4.479,
|
23 |
+
"step": 12
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.6,
|
27 |
+
"learning_rate": 6.666666666666667e-05,
|
28 |
+
"loss": 1.6178,
|
29 |
+
"step": 20
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"eval_loss": 0.31484881043434143,
|
34 |
+
"eval_runtime": 2.9228,
|
35 |
+
"eval_samples_per_second": 34.214,
|
36 |
+
"eval_steps_per_second": 4.448,
|
37 |
+
"step": 25
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 2.4,
|
41 |
+
"learning_rate": 6.000000000000001e-05,
|
42 |
+
"loss": 0.4051,
|
43 |
+
"step": 30
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 2.96,
|
47 |
+
"eval_loss": 0.13756035268306732,
|
48 |
+
"eval_runtime": 2.9586,
|
49 |
+
"eval_samples_per_second": 33.799,
|
50 |
+
"eval_steps_per_second": 4.394,
|
51 |
+
"step": 37
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 3.2,
|
55 |
+
"learning_rate": 5.333333333333333e-05,
|
56 |
+
"loss": 0.1943,
|
57 |
+
"step": 40
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.0,
|
61 |
+
"learning_rate": 4.666666666666667e-05,
|
62 |
+
"loss": 0.0721,
|
63 |
+
"step": 50
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 4.0,
|
67 |
+
"eval_loss": 0.06226326525211334,
|
68 |
+
"eval_runtime": 2.9426,
|
69 |
+
"eval_samples_per_second": 33.984,
|
70 |
+
"eval_steps_per_second": 4.418,
|
71 |
+
"step": 50
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 4.8,
|
75 |
+
"learning_rate": 4e-05,
|
76 |
+
"loss": 0.043,
|
77 |
+
"step": 60
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 4.96,
|
81 |
+
"eval_loss": 0.03685503825545311,
|
82 |
+
"eval_runtime": 2.9565,
|
83 |
+
"eval_samples_per_second": 33.823,
|
84 |
+
"eval_steps_per_second": 4.397,
|
85 |
+
"step": 62
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 5.6,
|
89 |
+
"learning_rate": 3.3333333333333335e-05,
|
90 |
+
"loss": 0.0342,
|
91 |
+
"step": 70
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 6.0,
|
95 |
+
"eval_loss": 0.045043423771858215,
|
96 |
+
"eval_runtime": 2.9492,
|
97 |
+
"eval_samples_per_second": 33.907,
|
98 |
+
"eval_steps_per_second": 4.408,
|
99 |
+
"step": 75
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 6.4,
|
103 |
+
"learning_rate": 2.6666666666666667e-05,
|
104 |
+
"loss": 0.0254,
|
105 |
+
"step": 80
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 6.96,
|
109 |
+
"eval_loss": 0.04237747564911842,
|
110 |
+
"eval_runtime": 2.9496,
|
111 |
+
"eval_samples_per_second": 33.903,
|
112 |
+
"eval_steps_per_second": 4.407,
|
113 |
+
"step": 87
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 7.2,
|
117 |
+
"learning_rate": 2e-05,
|
118 |
+
"loss": 0.0293,
|
119 |
+
"step": 90
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 8.0,
|
123 |
+
"learning_rate": 1.3333333333333333e-05,
|
124 |
+
"loss": 0.0163,
|
125 |
+
"step": 100
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"epoch": 8.0,
|
129 |
+
"eval_loss": 0.031680114567279816,
|
130 |
+
"eval_runtime": 2.9477,
|
131 |
+
"eval_samples_per_second": 33.925,
|
132 |
+
"eval_steps_per_second": 4.41,
|
133 |
+
"step": 100
|
134 |
+
}
|
135 |
+
],
|
136 |
+
"logging_steps": 10,
|
137 |
+
"max_steps": 120,
|
138 |
+
"num_train_epochs": 10,
|
139 |
+
"save_steps": 500,
|
140 |
+
"total_flos": 5934292402176000.0,
|
141 |
+
"trial_name": null,
|
142 |
+
"trial_params": null
|
143 |
+
}
|
low-shot-task-specific-500-ex/sum/best_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ed1b612847b0105ad99860aaefb41fa0340e2e0280e3c4076ca491ac381da18
|
3 |
+
size 4091
|
low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"lora_alpha": 16,
|
9 |
+
"lora_dropout": 0.05,
|
10 |
+
"merge_weights": false,
|
11 |
+
"modules_to_save": null,
|
12 |
+
"peft_type": "LORA",
|
13 |
+
"r": 16,
|
14 |
+
"target_modules": [
|
15 |
+
"q_proj",
|
16 |
+
"k_proj",
|
17 |
+
"v_proj",
|
18 |
+
"o_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|