Muhammad Khalifa commited on
Commit
bacaabd
1 Parent(s): ed42aba

add low-shot models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. low-shot-task-specific/coin_flip/best_model/adapter_config.json +21 -0
  2. low-shot-task-specific/coin_flip/best_model/adapter_model.bin +3 -0
  3. low-shot-task-specific/coin_flip/best_model/optimizer.pt +3 -0
  4. low-shot-task-specific/coin_flip/best_model/rng_state.pth +3 -0
  5. low-shot-task-specific/coin_flip/best_model/scheduler.pt +3 -0
  6. low-shot-task-specific/coin_flip/best_model/trainer_state.json +121 -0
  7. low-shot-task-specific/coin_flip/best_model/training_args.bin +3 -0
  8. low-shot-task-specific/cola/best_model/adapter_config.json +21 -0
  9. low-shot-task-specific/cola/best_model/adapter_model.bin +3 -0
  10. low-shot-task-specific/cola/best_model/optimizer.pt +3 -0
  11. low-shot-task-specific/cola/best_model/rng_state.pth +3 -0
  12. low-shot-task-specific/cola/best_model/scheduler.pt +3 -0
  13. low-shot-task-specific/cola/best_model/trainer_state.json +85 -0
  14. low-shot-task-specific/cola/best_model/training_args.bin +3 -0
  15. low-shot-task-specific/commonsense_qa/best_model/adapter_config.json +21 -0
  16. low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin +3 -0
  17. low-shot-task-specific/commonsense_qa/best_model/optimizer.pt +3 -0
  18. low-shot-task-specific/commonsense_qa/best_model/rng_state.pth +3 -0
  19. low-shot-task-specific/commonsense_qa/best_model/scheduler.pt +3 -0
  20. low-shot-task-specific/commonsense_qa/best_model/trainer_state.json +111 -0
  21. low-shot-task-specific/commonsense_qa/best_model/training_args.bin +3 -0
  22. low-shot-task-specific/emotion/best_model/adapter_config.json +21 -0
  23. low-shot-task-specific/emotion/best_model/adapter_model.bin +3 -0
  24. low-shot-task-specific/emotion/best_model/optimizer.pt +3 -0
  25. low-shot-task-specific/emotion/best_model/rng_state.pth +3 -0
  26. low-shot-task-specific/emotion/best_model/scheduler.pt +3 -0
  27. low-shot-task-specific/emotion/best_model/trainer_state.json +157 -0
  28. low-shot-task-specific/emotion/best_model/training_args.bin +3 -0
  29. low-shot-task-specific/social_i_qa/best_model/adapter_config.json +21 -0
  30. low-shot-task-specific/social_i_qa/best_model/adapter_model.bin +3 -0
  31. low-shot-task-specific/social_i_qa/best_model/optimizer.pt +3 -0
  32. low-shot-task-specific/social_i_qa/best_model/rng_state.pth +3 -0
  33. low-shot-task-specific/social_i_qa/best_model/scheduler.pt +3 -0
  34. low-shot-task-specific/social_i_qa/best_model/trainer_state.json +111 -0
  35. low-shot-task-specific/social_i_qa/best_model/training_args.bin +3 -0
  36. low-shot-task-specific/sst/best_model/adapter_config.json +21 -0
  37. low-shot-task-specific/sst/best_model/adapter_model.bin +3 -0
  38. low-shot-task-specific/sst/best_model/optimizer.pt +3 -0
  39. low-shot-task-specific/sst/best_model/rng_state.pth +3 -0
  40. low-shot-task-specific/sst/best_model/scheduler.pt +3 -0
  41. low-shot-task-specific/sst/best_model/trainer_state.json +85 -0
  42. low-shot-task-specific/sst/best_model/training_args.bin +3 -0
  43. low-shot-task-specific/sum/best_model/adapter_config.json +21 -0
  44. low-shot-task-specific/sum/best_model/adapter_model.bin +3 -0
  45. low-shot-task-specific/sum/best_model/optimizer.pt +3 -0
  46. low-shot-task-specific/sum/best_model/rng_state.pth +3 -0
  47. low-shot-task-specific/sum/best_model/scheduler.pt +3 -0
  48. low-shot-task-specific/sum/best_model/trainer_state.json +85 -0
  49. low-shot-task-specific/sum/best_model/training_args.bin +3 -0
  50. low-shot-task-specific/svamp/best_model/adapter_config.json +21 -0
low-shot-task-specific/coin_flip/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific/coin_flip/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b931c5915506612ec64883aa04ea154cc8aaf9f230aacb89dd47db7713e55f5b
3
+ size 104973389
low-shot-task-specific/coin_flip/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d92f8e6b0b40ecda9624a1676867dbcea32bdbef5e0eecbcac5106784ec8465b
3
+ size 209984517
low-shot-task-specific/coin_flip/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c980b24b62e2109d15363aa73d40fa6fafc88b732c285e1b6fab92db69ce36b
3
+ size 14575
low-shot-task-specific/coin_flip/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7da15a993b502c23d3f1c3380001efcd3dd910c920a088c178a788bdf015b29
3
+ size 627
low-shot-task-specific/coin_flip/best_model/trainer_state.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.14907684922218323,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/coin_flip/checkpoint-55",
4
+ "epoch": 8.979591836734693,
5
+ "eval_steps": 500,
6
+ "global_step": 55,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.98,
13
+ "eval_loss": 4.5179572105407715,
14
+ "eval_runtime": 1.8312,
15
+ "eval_samples_per_second": 26.758,
16
+ "eval_steps_per_second": 3.823,
17
+ "step": 6
18
+ },
19
+ {
20
+ "epoch": 1.63,
21
+ "learning_rate": 5.9999999999999995e-05,
22
+ "loss": 4.5562,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 1.96,
27
+ "eval_loss": 3.3285250663757324,
28
+ "eval_runtime": 1.8387,
29
+ "eval_samples_per_second": 26.649,
30
+ "eval_steps_per_second": 3.807,
31
+ "step": 12
32
+ },
33
+ {
34
+ "epoch": 2.94,
35
+ "eval_loss": 1.0093011856079102,
36
+ "eval_runtime": 1.8346,
37
+ "eval_samples_per_second": 26.709,
38
+ "eval_steps_per_second": 3.816,
39
+ "step": 18
40
+ },
41
+ {
42
+ "epoch": 3.27,
43
+ "learning_rate": 0.00011999999999999999,
44
+ "loss": 2.3848,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 3.92,
49
+ "eval_loss": 0.2200772613286972,
50
+ "eval_runtime": 1.8356,
51
+ "eval_samples_per_second": 26.694,
52
+ "eval_steps_per_second": 3.813,
53
+ "step": 24
54
+ },
55
+ {
56
+ "epoch": 4.9,
57
+ "learning_rate": 0.00017999999999999998,
58
+ "loss": 0.3144,
59
+ "step": 30
60
+ },
61
+ {
62
+ "epoch": 4.9,
63
+ "eval_loss": 0.21126192808151245,
64
+ "eval_runtime": 1.8403,
65
+ "eval_samples_per_second": 26.626,
66
+ "eval_steps_per_second": 3.804,
67
+ "step": 30
68
+ },
69
+ {
70
+ "epoch": 5.88,
71
+ "eval_loss": 0.18616808950901031,
72
+ "eval_runtime": 1.8423,
73
+ "eval_samples_per_second": 26.598,
74
+ "eval_steps_per_second": 3.8,
75
+ "step": 36
76
+ },
77
+ {
78
+ "epoch": 6.53,
79
+ "learning_rate": 0.00023999999999999998,
80
+ "loss": 0.2066,
81
+ "step": 40
82
+ },
83
+ {
84
+ "epoch": 6.86,
85
+ "eval_loss": 0.1662234216928482,
86
+ "eval_runtime": 1.8364,
87
+ "eval_samples_per_second": 26.683,
88
+ "eval_steps_per_second": 3.812,
89
+ "step": 42
90
+ },
91
+ {
92
+ "epoch": 8.0,
93
+ "eval_loss": 0.2262299805879593,
94
+ "eval_runtime": 1.8315,
95
+ "eval_samples_per_second": 26.754,
96
+ "eval_steps_per_second": 3.822,
97
+ "step": 49
98
+ },
99
+ {
100
+ "epoch": 8.16,
101
+ "learning_rate": 0.0003,
102
+ "loss": 0.1856,
103
+ "step": 50
104
+ },
105
+ {
106
+ "epoch": 8.98,
107
+ "eval_loss": 0.14907684922218323,
108
+ "eval_runtime": 1.8356,
109
+ "eval_samples_per_second": 26.694,
110
+ "eval_steps_per_second": 3.813,
111
+ "step": 55
112
+ }
113
+ ],
114
+ "logging_steps": 10,
115
+ "max_steps": 60,
116
+ "num_train_epochs": 10,
117
+ "save_steps": 500,
118
+ "total_flos": 6584591944581120.0,
119
+ "trial_name": null,
120
+ "trial_params": null
121
+ }
low-shot-task-specific/coin_flip/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f450808ed0897cbe91d86e09bf730b8688854884991e7216373c40ee768a0c9b
3
+ size 4091
low-shot-task-specific/cola/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific/cola/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:411c8f5252840aa1fc66fe6e846a855cc0c9826eb0e8a5e7e8ba168ffdeded3d
3
+ size 104973389
low-shot-task-specific/cola/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ebebf0b8ce1d3bed95e63794192866c76d3ed6e03bc0d928dc945817500540
3
+ size 209984517
low-shot-task-specific/cola/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef953e6438f145b783f6ca5f8d6d997cb169a9ddb6824cf4f2f9e126b56b09b7
3
+ size 14575
low-shot-task-specific/cola/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbabbf26b9b37d257cc72f404a441c985e21acf5c3e6fb7626e5104e04ff3282
3
+ size 627
low-shot-task-specific/cola/best_model/trainer_state.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1422310322523117,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/cola/checkpoint-75",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 75,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "learning_rate": 5.399999999999999e-05,
14
+ "loss": 7.2579,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.8,
19
+ "learning_rate": 0.00011399999999999999,
20
+ "loss": 6.0871,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_loss": 1.0100170373916626,
26
+ "eval_runtime": 5.8549,
27
+ "eval_samples_per_second": 34.159,
28
+ "eval_steps_per_second": 4.27,
29
+ "step": 25
30
+ },
31
+ {
32
+ "epoch": 1.2,
33
+ "learning_rate": 0.00017399999999999997,
34
+ "loss": 1.808,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 1.6,
39
+ "learning_rate": 0.000234,
40
+ "loss": 0.2533,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "learning_rate": 0.000294,
46
+ "loss": 0.2083,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_loss": 0.19681957364082336,
52
+ "eval_runtime": 5.8474,
53
+ "eval_samples_per_second": 34.203,
54
+ "eval_steps_per_second": 4.275,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 2.4,
59
+ "learning_rate": 0.00028649999999999997,
60
+ "loss": 0.1663,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 2.8,
65
+ "learning_rate": 0.0002715,
66
+ "loss": 0.1771,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 3.0,
71
+ "eval_loss": 0.1422310322523117,
72
+ "eval_runtime": 5.848,
73
+ "eval_samples_per_second": 34.2,
74
+ "eval_steps_per_second": 4.275,
75
+ "step": 75
76
+ }
77
+ ],
78
+ "logging_steps": 10,
79
+ "max_steps": 250,
80
+ "num_train_epochs": 10,
81
+ "save_steps": 500,
82
+ "total_flos": 4495226494648320.0,
83
+ "trial_name": null,
84
+ "trial_params": null
85
+ }
low-shot-task-specific/cola/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432e4f00d89268eb8a2e942ba35b41ff6bef5e5193df86888baa8dbedf03e4e1
3
+ size 4091
low-shot-task-specific/commonsense_qa/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific/commonsense_qa/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5911565f25237e1e4a88d364af20dbdd3c53aa306935a116cdb82f52cba3baa8
3
+ size 104973389
low-shot-task-specific/commonsense_qa/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488383f8f47eb284f498b258cf82a60fcc881827248f5358d43805942165bc4d
3
+ size 209984517
low-shot-task-specific/commonsense_qa/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329449cd1278f022b5239a2bd97e216a89c73ae3d215b6a1bfc73b69c537d4a3
3
+ size 14575
low-shot-task-specific/commonsense_qa/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e
3
+ size 627
low-shot-task-specific/commonsense_qa/best_model/trainer_state.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.28430670499801636,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/commonsense_qa/checkpoint-100",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "learning_rate": 5.9999999999999995e-05,
14
+ "loss": 5.5323,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.8,
19
+ "learning_rate": 0.00011999999999999999,
20
+ "loss": 3.1134,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_loss": 0.6229318976402283,
26
+ "eval_runtime": 7.9588,
27
+ "eval_samples_per_second": 25.129,
28
+ "eval_steps_per_second": 3.141,
29
+ "step": 25
30
+ },
31
+ {
32
+ "epoch": 1.2,
33
+ "learning_rate": 0.00017999999999999998,
34
+ "loss": 0.6745,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 1.6,
39
+ "learning_rate": 0.00023999999999999998,
40
+ "loss": 0.3959,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "learning_rate": 0.0003,
46
+ "loss": 0.3388,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_loss": 0.32543906569480896,
52
+ "eval_runtime": 7.9767,
53
+ "eval_samples_per_second": 25.073,
54
+ "eval_steps_per_second": 3.134,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 2.4,
59
+ "learning_rate": 0.000285,
60
+ "loss": 0.2496,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 2.8,
65
+ "learning_rate": 0.00027,
66
+ "loss": 0.1963,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 3.0,
71
+ "eval_loss": 0.28735020756721497,
72
+ "eval_runtime": 7.9596,
73
+ "eval_samples_per_second": 25.127,
74
+ "eval_steps_per_second": 3.141,
75
+ "step": 75
76
+ },
77
+ {
78
+ "epoch": 3.2,
79
+ "learning_rate": 0.00025499999999999996,
80
+ "loss": 0.1475,
81
+ "step": 80
82
+ },
83
+ {
84
+ "epoch": 3.6,
85
+ "learning_rate": 0.00023999999999999998,
86
+ "loss": 0.087,
87
+ "step": 90
88
+ },
89
+ {
90
+ "epoch": 4.0,
91
+ "learning_rate": 0.000225,
92
+ "loss": 0.0841,
93
+ "step": 100
94
+ },
95
+ {
96
+ "epoch": 4.0,
97
+ "eval_loss": 0.28430670499801636,
98
+ "eval_runtime": 7.9315,
99
+ "eval_samples_per_second": 25.216,
100
+ "eval_steps_per_second": 3.152,
101
+ "step": 100
102
+ }
103
+ ],
104
+ "logging_steps": 10,
105
+ "max_steps": 250,
106
+ "num_train_epochs": 10,
107
+ "save_steps": 500,
108
+ "total_flos": 1.532283750678528e+16,
109
+ "trial_name": null,
110
+ "trial_params": null
111
+ }
low-shot-task-specific/commonsense_qa/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac62dd2138b26a82acced238425ed68cca2c7eb6c44552fa9843fba2e1d0cf34
3
+ size 4091
low-shot-task-specific/emotion/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific/emotion/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55526193b5417dd0e6ec1a050c8c68add7ac57e4f9adc8f5523fb7b6109cb1d5
3
+ size 104973389
low-shot-task-specific/emotion/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c619e22fe8786bb47d576383db1964d04ec5895cf49c7967a06ea21ba69e24
3
+ size 209984517
low-shot-task-specific/emotion/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a33d07fcc46ed21e10fb24f0266332833c17c1787ecf4b21b90883591a74c17a
3
+ size 14575
low-shot-task-specific/emotion/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24a1a40a49a3444d450b508a337be12226511f236bc6c3d4905032050bc15d21
3
+ size 627
low-shot-task-specific/emotion/best_model/trainer_state.json ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.13983282446861267,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/emotion/checkpoint-150",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 150,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "learning_rate": 5.9999999999999995e-05,
14
+ "loss": 6.4494,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.8,
19
+ "learning_rate": 0.00011999999999999999,
20
+ "loss": 5.0703,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_loss": 1.654482126235962,
26
+ "eval_runtime": 6.8374,
27
+ "eval_samples_per_second": 29.251,
28
+ "eval_steps_per_second": 3.656,
29
+ "step": 25
30
+ },
31
+ {
32
+ "epoch": 1.2,
33
+ "learning_rate": 0.00017999999999999998,
34
+ "loss": 1.9694,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 1.6,
39
+ "learning_rate": 0.00023999999999999998,
40
+ "loss": 0.4062,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "learning_rate": 0.0003,
46
+ "loss": 0.248,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_loss": 0.22630518674850464,
52
+ "eval_runtime": 6.8055,
53
+ "eval_samples_per_second": 29.388,
54
+ "eval_steps_per_second": 3.674,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 2.4,
59
+ "learning_rate": 0.000285,
60
+ "loss": 0.1644,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 2.8,
65
+ "learning_rate": 0.00027,
66
+ "loss": 0.1532,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 3.0,
71
+ "eval_loss": 0.17524582147598267,
72
+ "eval_runtime": 6.7943,
73
+ "eval_samples_per_second": 29.437,
74
+ "eval_steps_per_second": 3.68,
75
+ "step": 75
76
+ },
77
+ {
78
+ "epoch": 3.2,
79
+ "learning_rate": 0.00025499999999999996,
80
+ "loss": 0.1291,
81
+ "step": 80
82
+ },
83
+ {
84
+ "epoch": 3.6,
85
+ "learning_rate": 0.00023999999999999998,
86
+ "loss": 0.082,
87
+ "step": 90
88
+ },
89
+ {
90
+ "epoch": 4.0,
91
+ "learning_rate": 0.000225,
92
+ "loss": 0.0672,
93
+ "step": 100
94
+ },
95
+ {
96
+ "epoch": 4.0,
97
+ "eval_loss": 0.14034521579742432,
98
+ "eval_runtime": 6.8294,
99
+ "eval_samples_per_second": 29.285,
100
+ "eval_steps_per_second": 3.661,
101
+ "step": 100
102
+ },
103
+ {
104
+ "epoch": 4.4,
105
+ "learning_rate": 0.00020999999999999998,
106
+ "loss": 0.0443,
107
+ "step": 110
108
+ },
109
+ {
110
+ "epoch": 4.8,
111
+ "learning_rate": 0.000195,
112
+ "loss": 0.0505,
113
+ "step": 120
114
+ },
115
+ {
116
+ "epoch": 5.0,
117
+ "eval_loss": 0.187747061252594,
118
+ "eval_runtime": 6.8658,
119
+ "eval_samples_per_second": 29.13,
120
+ "eval_steps_per_second": 3.641,
121
+ "step": 125
122
+ },
123
+ {
124
+ "epoch": 5.2,
125
+ "learning_rate": 0.00017999999999999998,
126
+ "loss": 0.042,
127
+ "step": 130
128
+ },
129
+ {
130
+ "epoch": 5.6,
131
+ "learning_rate": 0.000165,
132
+ "loss": 0.0225,
133
+ "step": 140
134
+ },
135
+ {
136
+ "epoch": 6.0,
137
+ "learning_rate": 0.00015,
138
+ "loss": 0.0131,
139
+ "step": 150
140
+ },
141
+ {
142
+ "epoch": 6.0,
143
+ "eval_loss": 0.13983282446861267,
144
+ "eval_runtime": 6.8662,
145
+ "eval_samples_per_second": 29.128,
146
+ "eval_steps_per_second": 3.641,
147
+ "step": 150
148
+ }
149
+ ],
150
+ "logging_steps": 10,
151
+ "max_steps": 250,
152
+ "num_train_epochs": 10,
153
+ "save_steps": 500,
154
+ "total_flos": 1.692015121170432e+16,
155
+ "trial_name": null,
156
+ "trial_params": null
157
+ }
low-shot-task-specific/emotion/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e1dfa83a1cabba6524b8aef4b9cf9f06ad12b54c1b84b046d4557a3bea2b51
3
+ size 4091
low-shot-task-specific/social_i_qa/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific/social_i_qa/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21047febd6ad48a886cf9d5fcaed091ec146720fa2ace5db287e7337cbf7a46a
3
+ size 104973389
low-shot-task-specific/social_i_qa/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51615aa1f8c3316a2d9f1d9b34cd7bd25fa7d1fd75182407da214a4549fcc3fe
3
+ size 209984517
low-shot-task-specific/social_i_qa/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d184eb9d6f950ca5fa7558982747687291171db4b5d64ca0e406118be389e9f5
3
+ size 14575
low-shot-task-specific/social_i_qa/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c2a7bf34e16f34a4d2a75bc3cafeb5ac1fa3e67c4188166556b2ec6e67f07e
3
+ size 627
low-shot-task-specific/social_i_qa/best_model/trainer_state.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.21922020614147186,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/social_i_qa/checkpoint-100",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "learning_rate": 5.9999999999999995e-05,
14
+ "loss": 6.145,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.8,
19
+ "learning_rate": 0.00011999999999999999,
20
+ "loss": 3.2951,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_loss": 0.47254127264022827,
26
+ "eval_runtime": 8.3699,
27
+ "eval_samples_per_second": 23.895,
28
+ "eval_steps_per_second": 2.987,
29
+ "step": 25
30
+ },
31
+ {
32
+ "epoch": 1.2,
33
+ "learning_rate": 0.00017999999999999998,
34
+ "loss": 0.5553,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 1.6,
39
+ "learning_rate": 0.00023999999999999998,
40
+ "loss": 0.3415,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "learning_rate": 0.0003,
46
+ "loss": 0.3055,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_loss": 0.29911044239997864,
52
+ "eval_runtime": 8.3444,
53
+ "eval_samples_per_second": 23.968,
54
+ "eval_steps_per_second": 2.996,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 2.4,
59
+ "learning_rate": 0.000285,
60
+ "loss": 0.2157,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 2.8,
65
+ "learning_rate": 0.00027,
66
+ "loss": 0.1871,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 3.0,
71
+ "eval_loss": 0.2219252735376358,
72
+ "eval_runtime": 8.3281,
73
+ "eval_samples_per_second": 24.015,
74
+ "eval_steps_per_second": 3.002,
75
+ "step": 75
76
+ },
77
+ {
78
+ "epoch": 3.2,
79
+ "learning_rate": 0.00025499999999999996,
80
+ "loss": 0.1417,
81
+ "step": 80
82
+ },
83
+ {
84
+ "epoch": 3.6,
85
+ "learning_rate": 0.00023999999999999998,
86
+ "loss": 0.12,
87
+ "step": 90
88
+ },
89
+ {
90
+ "epoch": 4.0,
91
+ "learning_rate": 0.000225,
92
+ "loss": 0.1079,
93
+ "step": 100
94
+ },
95
+ {
96
+ "epoch": 4.0,
97
+ "eval_loss": 0.21922020614147186,
98
+ "eval_runtime": 8.385,
99
+ "eval_samples_per_second": 23.852,
100
+ "eval_steps_per_second": 2.981,
101
+ "step": 100
102
+ }
103
+ ],
104
+ "logging_steps": 10,
105
+ "max_steps": 250,
106
+ "num_train_epochs": 10,
107
+ "save_steps": 500,
108
+ "total_flos": 1.656903891124224e+16,
109
+ "trial_name": null,
110
+ "trial_params": null
111
+ }
low-shot-task-specific/social_i_qa/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df6d236b24d8ccd4f73c811e7a410d0eabcb7077106cde9555c3305ab36be9c
3
+ size 4091
low-shot-task-specific/sst/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific/sst/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f159b31b434f4cbc2859fe1a9d310fe6ff28774a227edc490206f028896a6c4b
3
+ size 104973389
low-shot-task-specific/sst/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a3f8fa515dd79da7e74337497fcd24225c380d188278a17aecea3bbdbdab20b
3
+ size 209984517
low-shot-task-specific/sst/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fb7ddc07ac1c6b830dbc025657118a71cc05bef3beda9880d700dfe72a190a1
3
+ size 14575
low-shot-task-specific/sst/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472963d9d147c2cd377a0a377de820bc06cc3f0119cb01d2dc8c5a02c4d14738
3
+ size 627
low-shot-task-specific/sst/best_model/trainer_state.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0313660129904747,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sst/checkpoint-75",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 75,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "learning_rate": 4.2e-05,
14
+ "loss": 7.604,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.8,
19
+ "learning_rate": 0.000102,
20
+ "loss": 6.408,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_loss": 0.78858482837677,
26
+ "eval_runtime": 6.2379,
27
+ "eval_samples_per_second": 32.062,
28
+ "eval_steps_per_second": 4.008,
29
+ "step": 25
30
+ },
31
+ {
32
+ "epoch": 1.2,
33
+ "learning_rate": 0.000162,
34
+ "loss": 1.6353,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 1.6,
39
+ "learning_rate": 0.00022199999999999998,
40
+ "loss": 0.1518,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "learning_rate": 0.00028199999999999997,
46
+ "loss": 0.0807,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_loss": 0.06099913269281387,
52
+ "eval_runtime": 6.2244,
53
+ "eval_samples_per_second": 32.132,
54
+ "eval_steps_per_second": 4.016,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 2.4,
59
+ "learning_rate": 0.0002895,
60
+ "loss": 0.0667,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 2.8,
65
+ "learning_rate": 0.0002745,
66
+ "loss": 0.0418,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 3.0,
71
+ "eval_loss": 0.0313660129904747,
72
+ "eval_runtime": 6.2691,
73
+ "eval_samples_per_second": 31.902,
74
+ "eval_steps_per_second": 3.988,
75
+ "step": 75
76
+ }
77
+ ],
78
+ "logging_steps": 10,
79
+ "max_steps": 250,
80
+ "num_train_epochs": 10,
81
+ "save_steps": 500,
82
+ "total_flos": 6295295189975040.0,
83
+ "trial_name": null,
84
+ "trial_params": null
85
+ }
low-shot-task-specific/sst/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c336d21dc0b7346d796426a4797d56084a81a08ea200e8c91411fa3449b6e06
3
+ size 4091
low-shot-task-specific/sum/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
low-shot-task-specific/sum/best_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0a0cb9b7eade6b441f997e152af520540514264d51b1c267e30e7c500669bc
3
+ size 104973389
low-shot-task-specific/sum/best_model/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a812e3aab6e78e990e7ca36e1b9e85917fdaca9d531c7bcdc41b82d6c982d1
3
+ size 209984517
low-shot-task-specific/sum/best_model/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edc7dd2e3f40ac0c046da2f233e18f2314fea538368fd7bd263fa95f95f7fbef
3
+ size 14575
low-shot-task-specific/sum/best_model/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71df4de1094152c023456a0f4f7f28571d7f0bd29b962a097a17dff09a83bd7
3
+ size 627
low-shot-task-specific/sum/best_model/trainer_state.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.024566762149333954,
3
+ "best_model_checkpoint": "checkpoints/instrucode/low-shot-task-specific/sum/checkpoint-75",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 75,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "learning_rate": 5.9999999999999995e-05,
14
+ "loss": 3.5065,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.8,
19
+ "learning_rate": 0.00011999999999999999,
20
+ "loss": 2.4397,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 1.0,
25
+ "eval_loss": 0.4209679365158081,
26
+ "eval_runtime": 5.755,
27
+ "eval_samples_per_second": 34.753,
28
+ "eval_steps_per_second": 4.344,
29
+ "step": 25
30
+ },
31
+ {
32
+ "epoch": 1.2,
33
+ "learning_rate": 0.00017999999999999998,
34
+ "loss": 0.8483,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 1.6,
39
+ "learning_rate": 0.00023999999999999998,
40
+ "loss": 0.1766,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "learning_rate": 0.0003,
46
+ "loss": 0.0503,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_loss": 0.045773524791002274,
52
+ "eval_runtime": 5.7905,
53
+ "eval_samples_per_second": 34.539,
54
+ "eval_steps_per_second": 4.317,
55
+ "step": 50
56
+ },
57
+ {
58
+ "epoch": 2.4,
59
+ "learning_rate": 0.000285,
60
+ "loss": 0.0382,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 2.8,
65
+ "learning_rate": 0.00027,
66
+ "loss": 0.0355,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 3.0,
71
+ "eval_loss": 0.024566762149333954,
72
+ "eval_runtime": 5.7985,
73
+ "eval_samples_per_second": 34.492,
74
+ "eval_steps_per_second": 4.311,
75
+ "step": 75
76
+ }
77
+ ],
78
+ "logging_steps": 10,
79
+ "max_steps": 250,
80
+ "num_train_epochs": 10,
81
+ "save_steps": 500,
82
+ "total_flos": 4450719301632000.0,
83
+ "trial_name": null,
84
+ "trial_params": null
85
+ }
low-shot-task-specific/sum/best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2d99ceccff158e6a2e2efb1d0072c3a4d6419ea8c9ba1122915df6fece215d
3
+ size 4091
low-shot-task-specific/svamp/best_model/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 16,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "k_proj",
17
+ "v_proj",
18
+ "o_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }