Muhammad Khalifa commited on
Commit
e5d11d8
1 Parent(s): 70d9848

add 500-shot models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json +21 -0
  2. low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin +3 -0
  3. low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt +3 -0
  4. low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth +3 -0
  5. low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt +3 -0
  6. low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json +135 -0
  7. low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin +3 -0
  8. low-shot-task-specific-500-ex/cola/best_model/adapter_config.json +21 -0
  9. low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin +3 -0
  10. low-shot-task-specific-500-ex/cola/best_model/optimizer.pt +3 -0
  11. low-shot-task-specific-500-ex/cola/best_model/rng_state.pth +3 -0
  12. low-shot-task-specific-500-ex/cola/best_model/scheduler.pt +3 -0
  13. low-shot-task-specific-500-ex/cola/best_model/trainer_state.json +171 -0
  14. low-shot-task-specific-500-ex/cola/best_model/training_args.bin +3 -0
  15. low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json +21 -0
  16. low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin +3 -0
  17. low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt +3 -0
  18. low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth +3 -0
  19. low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt +3 -0
  20. low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json +171 -0
  21. low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin +3 -0
  22. low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json +21 -0
  23. low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin +3 -0
  24. low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt +3 -0
  25. low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth +3 -0
  26. low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt +3 -0
  27. low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json +123 -0
  28. low-shot-task-specific-500-ex/emotion/best_model/training_args.bin +3 -0
  29. low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json +21 -0
  30. low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin +3 -0
  31. low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt +3 -0
  32. low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth +3 -0
  33. low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt +3 -0
  34. low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json +109 -0
  35. low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin +3 -0
  36. low-shot-task-specific-500-ex/sst/best_model/adapter_config.json +21 -0
  37. low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin +3 -0
  38. low-shot-task-specific-500-ex/sst/best_model/optimizer.pt +3 -0
  39. low-shot-task-specific-500-ex/sst/best_model/rng_state.pth +3 -0
  40. low-shot-task-specific-500-ex/sst/best_model/scheduler.pt +3 -0
  41. low-shot-task-specific-500-ex/sst/best_model/trainer_state.json +123 -0
  42. low-shot-task-specific-500-ex/sst/best_model/training_args.bin +3 -0
  43. low-shot-task-specific-500-ex/sum/best_model/adapter_config.json +21 -0
  44. low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin +3 -0
  45. low-shot-task-specific-500-ex/sum/best_model/optimizer.pt +3 -0
  46. low-shot-task-specific-500-ex/sum/best_model/rng_state.pth +3 -0
  47. low-shot-task-specific-500-ex/sum/best_model/scheduler.pt +3 -0
  48. low-shot-task-specific-500-ex/sum/best_model/trainer_state.json +143 -0
  49. low-shot-task-specific-500-ex/sum/best_model/training_args.bin +3 -0
  50. low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json +21 -0
low-shot-task-specific-500-ex/coin_flip/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/coin_flip/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206bdaf3cf034deec6de2394ba6a7b29d0b637ab2d1925332e8f1abb76025dd6
3
+ size 104973389
low-shot-task-specific-500-ex/coin_flip/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d53c7e4694c2cfcf4dd0eb9bfe3b05a7cd7809c6a8e6ff871c4c99d6ddfefaf4
3
+ size 209984517
low-shot-task-specific-500-ex/coin_flip/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3055502c9e3004eb987550db217f6677d695763c959badae25d773f1d985ab91
3
+ size 14575
low-shot-task-specific-500-ex/coin_flip/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aea4ff6d6c72e86d24e872bf7765995d2e2e0abda70fdf4dff06ed25a492666
3
+ size 627
low-shot-task-specific-500-ex/coin_flip/best_model/trainer_state.json ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.17182409763336182,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/coin_flip/checkpoint-60",
4
+ "epoch": 9.795918367346939,
5
+ "eval_steps": 500,
6
+ "global_step": 60,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.98,
13
+ "eval_loss": 3.2136309146881104,
14
+ "eval_runtime": 1.7971,
15
+ "eval_samples_per_second": 27.266,
16
+ "eval_steps_per_second": 3.895,
17
+ "step": 6
18
+ },
19
+ {
20
+ "epoch": 1.63,
21
+ "learning_rate": 6.666666666666667e-05,
22
+ "loss": 3.5659,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 1.96,
27
+ "eval_loss": 1.1381325721740723,
28
+ "eval_runtime": 1.8028,
29
+ "eval_samples_per_second": 27.18,
30
+ "eval_steps_per_second": 3.883,
31
+ "step": 12
32
+ },
33
+ {
34
+ "epoch": 2.94,
35
+ "eval_loss": 0.39599937200546265,
36
+ "eval_runtime": 1.7938,
37
+ "eval_samples_per_second": 27.316,
38
+ "eval_steps_per_second": 3.902,
39
+ "step": 18
40
+ },
41
+ {
42
+ "epoch": 3.27,
43
+ "learning_rate": 5.333333333333333e-05,
44
+ "loss": 0.8239,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 3.92,
49
+ "eval_loss": 0.23788291215896606,
50
+ "eval_runtime": 1.8071,
51
+ "eval_samples_per_second": 27.115,
52
+ "eval_steps_per_second": 3.874,
53
+ "step": 24
54
+ },
55
+ {
56
+ "epoch": 4.9,
57
+ "learning_rate": 4e-05,
58
+ "loss": 0.2375,
59
+ "step": 30
60
+ },
61
+ {
62
+ "epoch": 4.9,
63
+ "eval_loss": 0.1869448572397232,
64
+ "eval_runtime": 1.8046,
65
+ "eval_samples_per_second": 27.154,
66
+ "eval_steps_per_second": 3.879,
67
+ "step": 30
68
+ },
69
+ {
70
+ "epoch": 5.88,
71
+ "eval_loss": 0.1762770116329193,
72
+ "eval_runtime": 1.7955,
73
+ "eval_samples_per_second": 27.291,
74
+ "eval_steps_per_second": 3.899,
75
+ "step": 36
76
+ },
77
+ {
78
+ "epoch": 6.53,
79
+ "learning_rate": 2.6666666666666667e-05,
80
+ "loss": 0.1756,
81
+ "step": 40
82
+ },
83
+ {
84
+ "epoch": 6.86,
85
+ "eval_loss": 0.17334015667438507,
86
+ "eval_runtime": 1.7994,
87
+ "eval_samples_per_second": 27.231,
88
+ "eval_steps_per_second": 3.89,
89
+ "step": 42
90
+ },
91
+ {
92
+ "epoch": 8.0,
93
+ "eval_loss": 0.17443998157978058,
94
+ "eval_runtime": 1.7969,
95
+ "eval_samples_per_second": 27.269,
96
+ "eval_steps_per_second": 3.896,
97
+ "step": 49
98
+ },
99
+ {
100
+ "epoch": 8.16,
101
+ "learning_rate": 1.3333333333333333e-05,
102
+ "loss": 0.1626,
103
+ "step": 50
104
+ },
105
+ {
106
+ "epoch": 8.98,
107
+ "eval_loss": 0.17577075958251953,
108
+ "eval_runtime": 1.7999,
109
+ "eval_samples_per_second": 27.223,
110
+ "eval_steps_per_second": 3.889,
111
+ "step": 55
112
+ },
113
+ {
114
+ "epoch": 9.8,
115
+ "learning_rate": 0.0,
116
+ "loss": 0.1535,
117
+ "step": 60
118
+ },
119
+ {
120
+ "epoch": 9.8,
121
+ "eval_loss": 0.17182409763336182,
122
+ "eval_runtime": 1.7992,
123
+ "eval_samples_per_second": 27.235,
124
+ "eval_steps_per_second": 3.891,
125
+ "step": 60
126
+ }
127
+ ],
128
+ "logging_steps": 10,
129
+ "max_steps": 60,
130
+ "num_train_epochs": 10,
131
+ "save_steps": 500,
132
+ "total_flos": 7168130697461760.0,
133
+ "trial_name": null,
134
+ "trial_params": null
135
+ }
low-shot-task-specific-500-ex/coin_flip/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5173d21d2a38d1cd1cd4daa45bed5a9f6f0d64b0897c6366683a240cd58f864
3
+ size 4091
low-shot-task-specific-500-ex/cola/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/cola/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88db25f61b79433f848f0788c44a69c4f0e655ee09f0508b3af035fc7e02179e
3
+ size 104973389
low-shot-task-specific-500-ex/cola/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43508585f5b8ebfc9532b38cb5a03b32bd704e2b5ebaf34c0b503292c13d7c3f
3
+ size 209984517
low-shot-task-specific-500-ex/cola/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df43d0030d9a94c82d1f09bcf5abbca157094e974c01f4c6b0214cfabe62d21a
3
+ size 14575
low-shot-task-specific-500-ex/cola/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5a1245e45cfb0cd08e1aaad686b01aa603042a02b323bff0c30b6b0eaca154
3
+ size 627
low-shot-task-specific-500-ex/cola/best_model/trainer_state.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.16061067581176758,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/cola/checkpoint-120",
4
+ "epoch": 9.6,
5
+ "eval_steps": 500,
6
+ "global_step": 120,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "learning_rate": 7.333333333333333e-05,
14
+ "loss": 6.6687,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "eval_loss": 4.879603385925293,
20
+ "eval_runtime": 2.9872,
21
+ "eval_samples_per_second": 33.476,
22
+ "eval_steps_per_second": 4.352,
23
+ "step": 12
24
+ },
25
+ {
26
+ "epoch": 1.6,
27
+ "learning_rate": 6.733333333333333e-05,
28
+ "loss": 4.1857,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_loss": 0.6735175251960754,
34
+ "eval_runtime": 2.9786,
35
+ "eval_samples_per_second": 33.573,
36
+ "eval_steps_per_second": 4.364,
37
+ "step": 25
38
+ },
39
+ {
40
+ "epoch": 2.4,
41
+ "learning_rate": 6.0666666666666666e-05,
42
+ "loss": 1.1578,
43
+ "step": 30
44
+ },
45
+ {
46
+ "epoch": 2.96,
47
+ "eval_loss": 0.22044576704502106,
48
+ "eval_runtime": 2.9802,
49
+ "eval_samples_per_second": 33.554,
50
+ "eval_steps_per_second": 4.362,
51
+ "step": 37
52
+ },
53
+ {
54
+ "epoch": 3.2,
55
+ "learning_rate": 5.4000000000000005e-05,
56
+ "loss": 0.2691,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "learning_rate": 4.7333333333333336e-05,
62
+ "loss": 0.2011,
63
+ "step": 50
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "eval_loss": 0.18446393311023712,
68
+ "eval_runtime": 2.9788,
69
+ "eval_samples_per_second": 33.571,
70
+ "eval_steps_per_second": 4.364,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 4.8,
75
+ "learning_rate": 4.066666666666667e-05,
76
+ "loss": 0.1782,
77
+ "step": 60
78
+ },
79
+ {
80
+ "epoch": 4.96,
81
+ "eval_loss": 0.17961610853672028,
82
+ "eval_runtime": 2.9749,
83
+ "eval_samples_per_second": 33.615,
84
+ "eval_steps_per_second": 4.37,
85
+ "step": 62
86
+ },
87
+ {
88
+ "epoch": 5.6,
89
+ "learning_rate": 3.4e-05,
90
+ "loss": 0.1609,
91
+ "step": 70
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 0.1864309310913086,
96
+ "eval_runtime": 2.9806,
97
+ "eval_samples_per_second": 33.55,
98
+ "eval_steps_per_second": 4.362,
99
+ "step": 75
100
+ },
101
+ {
102
+ "epoch": 6.4,
103
+ "learning_rate": 2.7333333333333335e-05,
104
+ "loss": 0.1644,
105
+ "step": 80
106
+ },
107
+ {
108
+ "epoch": 6.96,
109
+ "eval_loss": 0.16424360871315002,
110
+ "eval_runtime": 2.9883,
111
+ "eval_samples_per_second": 33.464,
112
+ "eval_steps_per_second": 4.35,
113
+ "step": 87
114
+ },
115
+ {
116
+ "epoch": 7.2,
117
+ "learning_rate": 2.066666666666667e-05,
118
+ "loss": 0.1389,
119
+ "step": 90
120
+ },
121
+ {
122
+ "epoch": 8.0,
123
+ "learning_rate": 1.4e-05,
124
+ "loss": 0.1294,
125
+ "step": 100
126
+ },
127
+ {
128
+ "epoch": 8.0,
129
+ "eval_loss": 0.16847126185894012,
130
+ "eval_runtime": 2.9824,
131
+ "eval_samples_per_second": 33.53,
132
+ "eval_steps_per_second": 4.359,
133
+ "step": 100
134
+ },
135
+ {
136
+ "epoch": 8.8,
137
+ "learning_rate": 7.333333333333333e-06,
138
+ "loss": 0.1189,
139
+ "step": 110
140
+ },
141
+ {
142
+ "epoch": 8.96,
143
+ "eval_loss": 0.16718144714832306,
144
+ "eval_runtime": 2.9865,
145
+ "eval_samples_per_second": 33.485,
146
+ "eval_steps_per_second": 4.353,
147
+ "step": 112
148
+ },
149
+ {
150
+ "epoch": 9.6,
151
+ "learning_rate": 6.666666666666667e-07,
152
+ "loss": 0.1159,
153
+ "step": 120
154
+ },
155
+ {
156
+ "epoch": 9.6,
157
+ "eval_loss": 0.16061067581176758,
158
+ "eval_runtime": 3.0082,
159
+ "eval_samples_per_second": 33.243,
160
+ "eval_steps_per_second": 4.322,
161
+ "step": 120
162
+ }
163
+ ],
164
+ "logging_steps": 10,
165
+ "max_steps": 120,
166
+ "num_train_epochs": 10,
167
+ "save_steps": 500,
168
+ "total_flos": 7598366896619520.0,
169
+ "trial_name": null,
170
+ "trial_params": null
171
+ }
low-shot-task-specific-500-ex/cola/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8fd855aa267cbdcebda3428b287cf8b570b4df8a9e36df6feb7196098250a51
3
+ size 4091
low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/commonsense_qa/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5152d962ae2befcfe0aedba6ab58b8db4d23257a65d5616e0d250964461d934
3
+ size 104973389
low-shot-task-specific-500-ex/commonsense_qa/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d6a49094ccff938ab86e6209f355a618f8701ca301da56250d93f602c172c6
3
+ size 209984517
low-shot-task-specific-500-ex/commonsense_qa/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30c2a2ce0908cdf3fafe95df8bab394435e84155013c948e02ec0288e93b6fe
3
+ size 14575
low-shot-task-specific-500-ex/commonsense_qa/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b328efc508129bc7f57f4b7996c1bcd196558a43b1355a8510ec55800cd250a4
3
+ size 627
low-shot-task-specific-500-ex/commonsense_qa/best_model/trainer_state.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.315158873796463,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/commonsense_qa/checkpoint-120",
4
+ "epoch": 9.6,
5
+ "eval_steps": 500,
6
+ "global_step": 120,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "learning_rate": 7.333333333333333e-05,
14
+ "loss": 4.3959,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "eval_loss": 2.007786750793457,
20
+ "eval_runtime": 3.9225,
21
+ "eval_samples_per_second": 25.494,
22
+ "eval_steps_per_second": 3.314,
23
+ "step": 12
24
+ },
25
+ {
26
+ "epoch": 1.6,
27
+ "learning_rate": 6.666666666666667e-05,
28
+ "loss": 1.4138,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_loss": 0.5842701196670532,
34
+ "eval_runtime": 3.8952,
35
+ "eval_samples_per_second": 25.673,
36
+ "eval_steps_per_second": 3.337,
37
+ "step": 25
38
+ },
39
+ {
40
+ "epoch": 2.4,
41
+ "learning_rate": 6.000000000000001e-05,
42
+ "loss": 0.5802,
43
+ "step": 30
44
+ },
45
+ {
46
+ "epoch": 2.96,
47
+ "eval_loss": 0.48449742794036865,
48
+ "eval_runtime": 3.9041,
49
+ "eval_samples_per_second": 25.614,
50
+ "eval_steps_per_second": 3.33,
51
+ "step": 37
52
+ },
53
+ {
54
+ "epoch": 3.2,
55
+ "learning_rate": 5.333333333333333e-05,
56
+ "loss": 0.4476,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "learning_rate": 4.666666666666667e-05,
62
+ "loss": 0.3758,
63
+ "step": 50
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "eval_loss": 0.3487338721752167,
68
+ "eval_runtime": 3.9136,
69
+ "eval_samples_per_second": 25.552,
70
+ "eval_steps_per_second": 3.322,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 4.8,
75
+ "learning_rate": 4e-05,
76
+ "loss": 0.3099,
77
+ "step": 60
78
+ },
79
+ {
80
+ "epoch": 4.96,
81
+ "eval_loss": 0.3447181284427643,
82
+ "eval_runtime": 3.9145,
83
+ "eval_samples_per_second": 25.546,
84
+ "eval_steps_per_second": 3.321,
85
+ "step": 62
86
+ },
87
+ {
88
+ "epoch": 5.6,
89
+ "learning_rate": 3.3333333333333335e-05,
90
+ "loss": 0.2785,
91
+ "step": 70
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 0.3341815173625946,
96
+ "eval_runtime": 3.9058,
97
+ "eval_samples_per_second": 25.603,
98
+ "eval_steps_per_second": 3.328,
99
+ "step": 75
100
+ },
101
+ {
102
+ "epoch": 6.4,
103
+ "learning_rate": 2.6666666666666667e-05,
104
+ "loss": 0.2473,
105
+ "step": 80
106
+ },
107
+ {
108
+ "epoch": 6.96,
109
+ "eval_loss": 0.32787469029426575,
110
+ "eval_runtime": 3.9132,
111
+ "eval_samples_per_second": 25.555,
112
+ "eval_steps_per_second": 3.322,
113
+ "step": 87
114
+ },
115
+ {
116
+ "epoch": 7.2,
117
+ "learning_rate": 2e-05,
118
+ "loss": 0.2096,
119
+ "step": 90
120
+ },
121
+ {
122
+ "epoch": 8.0,
123
+ "learning_rate": 1.3333333333333333e-05,
124
+ "loss": 0.2026,
125
+ "step": 100
126
+ },
127
+ {
128
+ "epoch": 8.0,
129
+ "eval_loss": 0.36058053374290466,
130
+ "eval_runtime": 3.9049,
131
+ "eval_samples_per_second": 25.609,
132
+ "eval_steps_per_second": 3.329,
133
+ "step": 100
134
+ },
135
+ {
136
+ "epoch": 8.8,
137
+ "learning_rate": 6.666666666666667e-06,
138
+ "loss": 0.1703,
139
+ "step": 110
140
+ },
141
+ {
142
+ "epoch": 8.96,
143
+ "eval_loss": 0.32292404770851135,
144
+ "eval_runtime": 3.9053,
145
+ "eval_samples_per_second": 25.606,
146
+ "eval_steps_per_second": 3.329,
147
+ "step": 112
148
+ },
149
+ {
150
+ "epoch": 9.6,
151
+ "learning_rate": 0.0,
152
+ "loss": 0.174,
153
+ "step": 120
154
+ },
155
+ {
156
+ "epoch": 9.6,
157
+ "eval_loss": 0.315158873796463,
158
+ "eval_runtime": 3.9182,
159
+ "eval_samples_per_second": 25.522,
160
+ "eval_steps_per_second": 3.318,
161
+ "step": 120
162
+ }
163
+ ],
164
+ "logging_steps": 10,
165
+ "max_steps": 120,
166
+ "num_train_epochs": 10,
167
+ "save_steps": 500,
168
+ "total_flos": 1.884385099874304e+16,
169
+ "trial_name": null,
170
+ "trial_params": null
171
+ }
low-shot-task-specific-500-ex/commonsense_qa/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bff2219beb51a82849d4d07eba12e3cd594f77292977d4c4572844ac5cbf0b
3
+ size 4091
low-shot-task-specific-500-ex/emotion/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/emotion/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd259302fbc3589e465552374ac7ef975db55d5443842d0886e31d2e84eafd63
3
+ size 104973389
low-shot-task-specific-500-ex/emotion/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc44b0176a9bcc2ff66801d67ece43987fa19edcaffeda2d47329715c2eca0b
3
+ size 209984517
low-shot-task-specific-500-ex/emotion/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052af8166da591bdc27e359bc7d7771179713f7891b6826f85f597392b9ae762
3
+ size 14575
low-shot-task-specific-500-ex/emotion/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5e65f6f4846aebdaab8b704e9eeffb8f1787e8b333c20c764dad3451c8daf1
3
+ size 627
low-shot-task-specific-500-ex/emotion/best_model/trainer_state.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.27617308497428894,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/emotion/checkpoint-87",
4
+ "epoch": 6.96,
5
+ "eval_steps": 500,
6
+ "global_step": 87,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "learning_rate": 7.333333333333333e-05,
14
+ "loss": 5.8573,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "eval_loss": 4.265514373779297,
20
+ "eval_runtime": 3.4719,
21
+ "eval_samples_per_second": 28.803,
22
+ "eval_steps_per_second": 3.744,
23
+ "step": 12
24
+ },
25
+ {
26
+ "epoch": 1.6,
27
+ "learning_rate": 6.666666666666667e-05,
28
+ "loss": 3.8105,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_loss": 1.5850574970245361,
34
+ "eval_runtime": 3.4571,
35
+ "eval_samples_per_second": 28.926,
36
+ "eval_steps_per_second": 3.76,
37
+ "step": 25
38
+ },
39
+ {
40
+ "epoch": 2.4,
41
+ "learning_rate": 6.0666666666666666e-05,
42
+ "loss": 1.7041,
43
+ "step": 30
44
+ },
45
+ {
46
+ "epoch": 2.96,
47
+ "eval_loss": 0.5069144368171692,
48
+ "eval_runtime": 3.4616,
49
+ "eval_samples_per_second": 28.889,
50
+ "eval_steps_per_second": 3.756,
51
+ "step": 37
52
+ },
53
+ {
54
+ "epoch": 3.2,
55
+ "learning_rate": 5.4000000000000005e-05,
56
+ "loss": 0.6618,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "learning_rate": 4.7333333333333336e-05,
62
+ "loss": 0.3247,
63
+ "step": 50
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "eval_loss": 0.33890244364738464,
68
+ "eval_runtime": 3.4571,
69
+ "eval_samples_per_second": 28.926,
70
+ "eval_steps_per_second": 3.76,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 4.8,
75
+ "learning_rate": 4.066666666666667e-05,
76
+ "loss": 0.271,
77
+ "step": 60
78
+ },
79
+ {
80
+ "epoch": 4.96,
81
+ "eval_loss": 0.3074319362640381,
82
+ "eval_runtime": 3.4533,
83
+ "eval_samples_per_second": 28.958,
84
+ "eval_steps_per_second": 3.765,
85
+ "step": 62
86
+ },
87
+ {
88
+ "epoch": 5.6,
89
+ "learning_rate": 3.4e-05,
90
+ "loss": 0.2088,
91
+ "step": 70
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 0.29454201459884644,
96
+ "eval_runtime": 3.448,
97
+ "eval_samples_per_second": 29.002,
98
+ "eval_steps_per_second": 3.77,
99
+ "step": 75
100
+ },
101
+ {
102
+ "epoch": 6.4,
103
+ "learning_rate": 2.7333333333333335e-05,
104
+ "loss": 0.1924,
105
+ "step": 80
106
+ },
107
+ {
108
+ "epoch": 6.96,
109
+ "eval_loss": 0.27617308497428894,
110
+ "eval_runtime": 3.4613,
111
+ "eval_samples_per_second": 28.89,
112
+ "eval_steps_per_second": 3.756,
113
+ "step": 87
114
+ }
115
+ ],
116
+ "logging_steps": 10,
117
+ "max_steps": 120,
118
+ "num_train_epochs": 10,
119
+ "save_steps": 500,
120
+ "total_flos": 1.002400891600896e+16,
121
+ "trial_name": null,
122
+ "trial_params": null
123
+ }
low-shot-task-specific-500-ex/emotion/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2807111bcd404841c3e700ab3cab78a978a93e97c447ffe05c31e323ab3de999
3
+ size 4091
low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/social_i_qa/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:452926c1e61adf1dc9d07ddcd94668ffb5984646601a7f43cfccf35f8ed8f15d
3
+ size 104973389
low-shot-task-specific-500-ex/social_i_qa/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c2ceb0a22aaf3ac5943e52d26f725ad35605b686d466e24cb6b88e9b56bab9e
3
+ size 209984517
low-shot-task-specific-500-ex/social_i_qa/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb2d1c591c012870eb39230986af7413438032c45508997b22b8b2e04069c233
3
+ size 14575
low-shot-task-specific-500-ex/social_i_qa/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86f741a77aed590e2df1e55bdd0d9033c12228c5cb1e1789672b7ce71994aa05
3
+ size 627
low-shot-task-specific-500-ex/social_i_qa/best_model/trainer_state.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.22931724786758423,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/social_i_qa/checkpoint-75",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 75,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "learning_rate": 7.333333333333333e-05,
14
+ "loss": 4.8517,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "eval_loss": 1.9629485607147217,
20
+ "eval_runtime": 4.1824,
21
+ "eval_samples_per_second": 23.91,
22
+ "eval_steps_per_second": 3.108,
23
+ "step": 12
24
+ },
25
+ {
26
+ "epoch": 1.6,
27
+ "learning_rate": 6.666666666666667e-05,
28
+ "loss": 1.2888,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_loss": 0.41052401065826416,
34
+ "eval_runtime": 4.1752,
35
+ "eval_samples_per_second": 23.951,
36
+ "eval_steps_per_second": 3.114,
37
+ "step": 25
38
+ },
39
+ {
40
+ "epoch": 2.4,
41
+ "learning_rate": 6.000000000000001e-05,
42
+ "loss": 0.4255,
43
+ "step": 30
44
+ },
45
+ {
46
+ "epoch": 2.96,
47
+ "eval_loss": 0.32185935974121094,
48
+ "eval_runtime": 4.1821,
49
+ "eval_samples_per_second": 23.911,
50
+ "eval_steps_per_second": 3.108,
51
+ "step": 37
52
+ },
53
+ {
54
+ "epoch": 3.2,
55
+ "learning_rate": 5.333333333333333e-05,
56
+ "loss": 0.2955,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "learning_rate": 4.666666666666667e-05,
62
+ "loss": 0.2552,
63
+ "step": 50
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "eval_loss": 0.26777762174606323,
68
+ "eval_runtime": 4.1799,
69
+ "eval_samples_per_second": 23.924,
70
+ "eval_steps_per_second": 3.11,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 4.8,
75
+ "learning_rate": 4e-05,
76
+ "loss": 0.2144,
77
+ "step": 60
78
+ },
79
+ {
80
+ "epoch": 4.96,
81
+ "eval_loss": 0.24417449533939362,
82
+ "eval_runtime": 4.1595,
83
+ "eval_samples_per_second": 24.042,
84
+ "eval_steps_per_second": 3.125,
85
+ "step": 62
86
+ },
87
+ {
88
+ "epoch": 5.6,
89
+ "learning_rate": 3.3333333333333335e-05,
90
+ "loss": 0.1887,
91
+ "step": 70
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 0.22931724786758423,
96
+ "eval_runtime": 4.1585,
97
+ "eval_samples_per_second": 24.047,
98
+ "eval_steps_per_second": 3.126,
99
+ "step": 75
100
+ }
101
+ ],
102
+ "logging_steps": 10,
103
+ "max_steps": 120,
104
+ "num_train_epochs": 10,
105
+ "save_steps": 500,
106
+ "total_flos": 1.244223306989568e+16,
107
+ "trial_name": null,
108
+ "trial_params": null
109
+ }
low-shot-task-specific-500-ex/social_i_qa/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc625edfba8d629ae9a11f5c619aeadcf62fa8f504d60898b62237fc19448f60
3
+ size 4091
low-shot-task-specific-500-ex/sst/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/sst/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0bf1649f6d9b8dc8d6a74e917b2986eb9e0c9c257614ade4af288256d9a4f4
3
+ size 104973389
low-shot-task-specific-500-ex/sst/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:522572fa606bbb71751d11ef99ae52f5681a609d3d119335844ab4f53ba0d826
3
+ size 209984517
low-shot-task-specific-500-ex/sst/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3580967c07f4e6cea186553a49db7882eeeb990b25cfad881cf2a6edb9233e4a
3
+ size 14575
low-shot-task-specific-500-ex/sst/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc34eb4d15f40db25f296376c3b3cbb8431c5236c4b6fd8813dabe4ca7b3ea2
3
+ size 627
low-shot-task-specific-500-ex/sst/best_model/trainer_state.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.042198196053504944,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sst/checkpoint-87",
4
+ "epoch": 6.96,
5
+ "eval_steps": 500,
6
+ "global_step": 87,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "learning_rate": 7.466666666666667e-05,
14
+ "loss": 7.0533,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "eval_loss": 4.983966827392578,
20
+ "eval_runtime": 3.24,
21
+ "eval_samples_per_second": 30.865,
22
+ "eval_steps_per_second": 4.012,
23
+ "step": 12
24
+ },
25
+ {
26
+ "epoch": 1.6,
27
+ "learning_rate": 6.866666666666666e-05,
28
+ "loss": 4.1938,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_loss": 0.4440341889858246,
34
+ "eval_runtime": 3.2423,
35
+ "eval_samples_per_second": 30.843,
36
+ "eval_steps_per_second": 4.01,
37
+ "step": 25
38
+ },
39
+ {
40
+ "epoch": 2.4,
41
+ "learning_rate": 6.2e-05,
42
+ "loss": 0.6862,
43
+ "step": 30
44
+ },
45
+ {
46
+ "epoch": 2.96,
47
+ "eval_loss": 0.1788669228553772,
48
+ "eval_runtime": 3.2349,
49
+ "eval_samples_per_second": 30.913,
50
+ "eval_steps_per_second": 4.019,
51
+ "step": 37
52
+ },
53
+ {
54
+ "epoch": 3.2,
55
+ "learning_rate": 5.5333333333333334e-05,
56
+ "loss": 0.2043,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "learning_rate": 4.8666666666666666e-05,
62
+ "loss": 0.1107,
63
+ "step": 50
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "eval_loss": 0.06379850953817368,
68
+ "eval_runtime": 3.2374,
69
+ "eval_samples_per_second": 30.889,
70
+ "eval_steps_per_second": 4.016,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 4.8,
75
+ "learning_rate": 4.2000000000000004e-05,
76
+ "loss": 0.0491,
77
+ "step": 60
78
+ },
79
+ {
80
+ "epoch": 4.96,
81
+ "eval_loss": 0.0445735827088356,
82
+ "eval_runtime": 3.2374,
83
+ "eval_samples_per_second": 30.889,
84
+ "eval_steps_per_second": 4.016,
85
+ "step": 62
86
+ },
87
+ {
88
+ "epoch": 5.6,
89
+ "learning_rate": 3.5333333333333336e-05,
90
+ "loss": 0.0273,
91
+ "step": 70
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 0.04596562311053276,
96
+ "eval_runtime": 3.2388,
97
+ "eval_samples_per_second": 30.876,
98
+ "eval_steps_per_second": 4.014,
99
+ "step": 75
100
+ },
101
+ {
102
+ "epoch": 6.4,
103
+ "learning_rate": 2.8666666666666668e-05,
104
+ "loss": 0.0222,
105
+ "step": 80
106
+ },
107
+ {
108
+ "epoch": 6.96,
109
+ "eval_loss": 0.042198196053504944,
110
+ "eval_runtime": 3.2385,
111
+ "eval_samples_per_second": 30.879,
112
+ "eval_steps_per_second": 4.014,
113
+ "step": 87
114
+ }
115
+ ],
116
+ "logging_steps": 10,
117
+ "max_steps": 120,
118
+ "num_train_epochs": 10,
119
+ "save_steps": 500,
120
+ "total_flos": 7054390093086720.0,
121
+ "trial_name": null,
122
+ "trial_params": null
123
+ }
low-shot-task-specific-500-ex/sst/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42a2e587a4c59713f0200d20e8dcc233dcef141930bfbf631c04969b44050c3
3
+ size 4091
low-shot-task-specific-500-ex/sum/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific-500-ex/sum/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a729e9feda81f6fe2ca5709b1c5420648c705df61f8d0f7729878f5fb4de6b9
3
+ size 104973389
low-shot-task-specific-500-ex/sum/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0940dfeb998ac49fc9ea6ee82de9e1a31f888566cadbbf3e12a6b31771c9257b
3
+ size 209984517
low-shot-task-specific-500-ex/sum/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db42f39e4e1e49a8785b28d59ee0d6a43f5f529564318dd434906402c044f9e5
3
+ size 14575
low-shot-task-specific-500-ex/sum/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c348388a8e293c1a759c71c596beff737512abb60e161371fe0d9e9edf9afe53
3
+ size 627
low-shot-task-specific-500-ex/sum/best_model/trainer_state.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.031680114567279816,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific-500-ex/sum/checkpoint-100",
4
+ "epoch": 8.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "learning_rate": 7.333333333333333e-05,
14
+ "loss": 2.9796,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.96,
19
+ "eval_loss": 1.8474284410476685,
20
+ "eval_runtime": 2.9025,
21
+ "eval_samples_per_second": 34.453,
22
+ "eval_steps_per_second": 4.479,
23
+ "step": 12
24
+ },
25
+ {
26
+ "epoch": 1.6,
27
+ "learning_rate": 6.666666666666667e-05,
28
+ "loss": 1.6178,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_loss": 0.31484881043434143,
34
+ "eval_runtime": 2.9228,
35
+ "eval_samples_per_second": 34.214,
36
+ "eval_steps_per_second": 4.448,
37
+ "step": 25
38
+ },
39
+ {
40
+ "epoch": 2.4,
41
+ "learning_rate": 6.000000000000001e-05,
42
+ "loss": 0.4051,
43
+ "step": 30
44
+ },
45
+ {
46
+ "epoch": 2.96,
47
+ "eval_loss": 0.13756035268306732,
48
+ "eval_runtime": 2.9586,
49
+ "eval_samples_per_second": 33.799,
50
+ "eval_steps_per_second": 4.394,
51
+ "step": 37
52
+ },
53
+ {
54
+ "epoch": 3.2,
55
+ "learning_rate": 5.333333333333333e-05,
56
+ "loss": 0.1943,
57
+ "step": 40
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "learning_rate": 4.666666666666667e-05,
62
+ "loss": 0.0721,
63
+ "step": 50
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "eval_loss": 0.06226326525211334,
68
+ "eval_runtime": 2.9426,
69
+ "eval_samples_per_second": 33.984,
70
+ "eval_steps_per_second": 4.418,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 4.8,
75
+ "learning_rate": 4e-05,
76
+ "loss": 0.043,
77
+ "step": 60
78
+ },
79
+ {
80
+ "epoch": 4.96,
81
+ "eval_loss": 0.03685503825545311,
82
+ "eval_runtime": 2.9565,
83
+ "eval_samples_per_second": 33.823,
84
+ "eval_steps_per_second": 4.397,
85
+ "step": 62
86
+ },
87
+ {
88
+ "epoch": 5.6,
89
+ "learning_rate": 3.3333333333333335e-05,
90
+ "loss": 0.0342,
91
+ "step": 70
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 0.045043423771858215,
96
+ "eval_runtime": 2.9492,
97
+ "eval_samples_per_second": 33.907,
98
+ "eval_steps_per_second": 4.408,
99
+ "step": 75
100
+ },
101
+ {
102
+ "epoch": 6.4,
103
+ "learning_rate": 2.6666666666666667e-05,
104
+ "loss": 0.0254,
105
+ "step": 80
106
+ },
107
+ {
108
+ "epoch": 6.96,
109
+ "eval_loss": 0.04237747564911842,
110
+ "eval_runtime": 2.9496,
111
+ "eval_samples_per_second": 33.903,
112
+ "eval_steps_per_second": 4.407,
113
+ "step": 87
114
+ },
115
+ {
116
+ "epoch": 7.2,
117
+ "learning_rate": 2e-05,
118
+ "loss": 0.0293,
119
+ "step": 90
120
+ },
121
+ {
122
+ "epoch": 8.0,
123
+ "learning_rate": 1.3333333333333333e-05,
124
+ "loss": 0.0163,
125
+ "step": 100
126
+ },
127
+ {
128
+ "epoch": 8.0,
129
+ "eval_loss": 0.031680114567279816,
130
+ "eval_runtime": 2.9477,
131
+ "eval_samples_per_second": 33.925,
132
+ "eval_steps_per_second": 4.41,
133
+ "step": 100
134
+ }
135
+ ],
136
+ "logging_steps": 10,
137
+ "max_steps": 120,
138
+ "num_train_epochs": 10,
139
+ "save_steps": 500,
140
+ "total_flos": 5934292402176000.0,
141
+ "trial_name": null,
142
+ "trial_params": null
143
+ }
low-shot-task-specific-500-ex/sum/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed1b612847b0105ad99860aaefb41fa0340e2e0280e3c4076ca491ac381da18
3
+ size 4091
low-shot-task-specific-500-ex/svamp/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }