nathan0 commited on
Commit
bfd39e8
1 Parent(s): 4b6e875

first commit

Browse files

llama-2-delta-tune-model-div-sal

README.md CHANGED
@@ -1,3 +1,9 @@
1
  ---
2
- license: apache-2.0
3
  ---
 
 
 
 
 
 
 
1
  ---
2
+ library_name: peft
3
  ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/home/vmagent/app/dataset/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 7,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 7,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8528895ce3b148285e487611cb9e3fcff148a1b33af0ae7b371c038b8e47af7f
3
+ size 11303289
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.9328290820121765,
4
+ "eval_runtime": 4637.6182,
5
+ "eval_samples": 15601,
6
+ "eval_samples_per_second": 3.364,
7
+ "eval_steps_per_second": 0.421,
8
+ "eval_tokens": 1974952
9
+ }
best_model_structure.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {"num_hidden_layers": [1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1], "r": [11, 3, 12, 1, 8, 11, 7, 12, 12, 6, 12, 1, 6, 11, 8, 3, 2, 12, 12, 5, 2, 3, 9, 2, 4, 12, 10, 1, 12, 2, 9, 7], "alpha": [6, 6, 6, 2, 3, 6, 5, 5, 1, 4, 2, 3, 6, 6, 3, 5, 1, 2, 6, 2, 2, 2, 3, 1, 4, 2, 6, 2, 2, 4, 6, 1]}
checkpoint-1130/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
checkpoint-1130/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/home/vmagent/app/dataset/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 7,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 7,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-1130/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8528895ce3b148285e487611cb9e3fcff148a1b33af0ae7b371c038b8e47af7f
3
+ size 11303289
checkpoint-1130/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:618f443ea86970abd066a87c6e263550cd27aaf80beae923ba65198a7286877d
3
+ size 22615621
checkpoint-1130/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2baed73f84e733959faa04603c46e4711d40efc68cce45fe24a86cd4ef848a9
3
+ size 13553
checkpoint-1130/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdb9c410cb1fee55c64962fef791dbcf270d380ace861d4a88a849eae5737e19
3
+ size 627
checkpoint-1130/special_tokens_map.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "</s>",
5
+ "unk_token": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ }
12
+ }
checkpoint-1130/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1130/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
checkpoint-1130/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "__type": "AddedToken",
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "clean_up_tokenization_spaces": false,
11
+ "eos_token": {
12
+ "__type": "AddedToken",
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "legacy": false,
20
+ "model_max_length": 1000000000000000019884624838656,
21
+ "pad_token": null,
22
+ "padding_side": "right",
23
+ "sp_model_kwargs": {},
24
+ "tokenizer_class": "LlamaTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
checkpoint-1130/trainer_state.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 1130,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.09,
12
+ "learning_rate": 9.115044247787611e-05,
13
+ "loss": 1.0612,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.18,
18
+ "learning_rate": 8.230088495575221e-05,
19
+ "loss": 0.93,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.27,
24
+ "learning_rate": 7.345132743362832e-05,
25
+ "loss": 0.9098,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.35,
30
+ "learning_rate": 6.460176991150442e-05,
31
+ "loss": 0.9033,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.44,
36
+ "learning_rate": 5.575221238938053e-05,
37
+ "loss": 0.8943,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.53,
42
+ "learning_rate": 4.690265486725664e-05,
43
+ "loss": 0.8932,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 0.62,
48
+ "learning_rate": 3.8053097345132744e-05,
49
+ "loss": 0.9019,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 0.71,
54
+ "learning_rate": 2.9203539823008852e-05,
55
+ "loss": 0.8881,
56
+ "step": 800
57
+ },
58
+ {
59
+ "epoch": 0.8,
60
+ "learning_rate": 2.0353982300884957e-05,
61
+ "loss": 0.8929,
62
+ "step": 900
63
+ },
64
+ {
65
+ "epoch": 0.88,
66
+ "learning_rate": 1.1504424778761062e-05,
67
+ "loss": 0.8916,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 0.97,
72
+ "learning_rate": 2.6548672566371683e-06,
73
+ "loss": 0.8882,
74
+ "step": 1100
75
+ }
76
+ ],
77
+ "max_steps": 1130,
78
+ "num_train_epochs": 1,
79
+ "total_flos": 1.8342786143512166e+17,
80
+ "trial_name": null,
81
+ "trial_params": null
82
+ }
checkpoint-1130/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eedaba58b8a9174ee307f742d1df91a3623ab4c51b3f334d1569a644cc1cffa
3
+ size 4091
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.9328290820121765,
4
+ "eval_runtime": 4637.6182,
5
+ "eval_samples": 15601,
6
+ "eval_samples_per_second": 3.364,
7
+ "eval_steps_per_second": 0.421,
8
+ "eval_tokens": 1974952
9
+ }
llama2-7b-delta-arc_challenge ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "arc_challenge": {
4
+ "acc": 0.4974402730375427,
5
+ "acc_stderr": 0.014611199329843788,
6
+ "acc_norm": 0.5366894197952219,
7
+ "acc_norm_stderr": 0.01457200052775699
8
+ }
9
+ },
10
+ "versions": {
11
+ "arc_challenge": 0
12
+ },
13
+ "config": {
14
+ "model": "hf-causal-experimental",
15
+ "model_args": "pretrained=/home/vmagent/app/data/Llama-2-7b-hf,peft=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal,use_accelerate=True,delta=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal/best_model_structure.txt",
16
+ "num_fewshot": 25,
17
+ "batch_size": "auto",
18
+ "batch_sizes": [
19
+ 16
20
+ ],
21
+ "device": null,
22
+ "no_cache": false,
23
+ "limit": null,
24
+ "bootstrap_iters": 100000,
25
+ "description_dict": {}
26
+ }
27
+ }
llama2-7b-delta-hellaswag ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "hellaswag": {
4
+ "acc": 0.5876319458275244,
5
+ "acc_stderr": 0.004912547040132876,
6
+ "acc_norm": 0.7783310097590121,
7
+ "acc_norm_stderr": 0.004145206350032315
8
+ }
9
+ },
10
+ "versions": {
11
+ "hellaswag": 0
12
+ },
13
+ "config": {
14
+ "model": "hf-causal-experimental",
15
+ "model_args": "pretrained=/home/vmagent/app/data/Llama-2-7b-hf,peft=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal,use_accelerate=True,delta=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal/best_model_structure.txt",
16
+ "num_fewshot": 10,
17
+ "batch_size": "auto",
18
+ "batch_sizes": [
19
+ 16
20
+ ],
21
+ "device": null,
22
+ "no_cache": false,
23
+ "limit": null,
24
+ "bootstrap_iters": 100000,
25
+ "description_dict": {}
26
+ }
27
+ }
llama2-7b-delta-mmlu ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "hendrycksTest-abstract_algebra": {
4
+ "acc": 0.34,
5
+ "acc_stderr": 0.04760952285695236,
6
+ "acc_norm": 0.34,
7
+ "acc_norm_stderr": 0.04760952285695236
8
+ },
9
+ "hendrycksTest-anatomy": {
10
+ "acc": 0.45925925925925926,
11
+ "acc_stderr": 0.04304979692464242,
12
+ "acc_norm": 0.45925925925925926,
13
+ "acc_norm_stderr": 0.04304979692464242
14
+ },
15
+ "hendrycksTest-astronomy": {
16
+ "acc": 0.40131578947368424,
17
+ "acc_stderr": 0.039889037033362836,
18
+ "acc_norm": 0.40131578947368424,
19
+ "acc_norm_stderr": 0.039889037033362836
20
+ },
21
+ "hendrycksTest-business_ethics": {
22
+ "acc": 0.49,
23
+ "acc_stderr": 0.05024183937956912,
24
+ "acc_norm": 0.49,
25
+ "acc_norm_stderr": 0.05024183937956912
26
+ },
27
+ "hendrycksTest-clinical_knowledge": {
28
+ "acc": 0.44150943396226416,
29
+ "acc_stderr": 0.030561590426731844,
30
+ "acc_norm": 0.44150943396226416,
31
+ "acc_norm_stderr": 0.030561590426731844
32
+ },
33
+ "hendrycksTest-college_biology": {
34
+ "acc": 0.4513888888888889,
35
+ "acc_stderr": 0.04161402398403279,
36
+ "acc_norm": 0.4513888888888889,
37
+ "acc_norm_stderr": 0.04161402398403279
38
+ },
39
+ "hendrycksTest-college_chemistry": {
40
+ "acc": 0.35,
41
+ "acc_stderr": 0.0479372485441102,
42
+ "acc_norm": 0.35,
43
+ "acc_norm_stderr": 0.0479372485441102
44
+ },
45
+ "hendrycksTest-college_computer_science": {
46
+ "acc": 0.36,
47
+ "acc_stderr": 0.04824181513244218,
48
+ "acc_norm": 0.36,
49
+ "acc_norm_stderr": 0.04824181513244218
50
+ },
51
+ "hendrycksTest-college_mathematics": {
52
+ "acc": 0.35,
53
+ "acc_stderr": 0.047937248544110196,
54
+ "acc_norm": 0.35,
55
+ "acc_norm_stderr": 0.047937248544110196
56
+ },
57
+ "hendrycksTest-college_medicine": {
58
+ "acc": 0.42196531791907516,
59
+ "acc_stderr": 0.0376574669386515,
60
+ "acc_norm": 0.42196531791907516,
61
+ "acc_norm_stderr": 0.0376574669386515
62
+ },
63
+ "hendrycksTest-college_physics": {
64
+ "acc": 0.23529411764705882,
65
+ "acc_stderr": 0.04220773659171453,
66
+ "acc_norm": 0.23529411764705882,
67
+ "acc_norm_stderr": 0.04220773659171453
68
+ },
69
+ "hendrycksTest-computer_security": {
70
+ "acc": 0.59,
71
+ "acc_stderr": 0.049431107042371025,
72
+ "acc_norm": 0.59,
73
+ "acc_norm_stderr": 0.049431107042371025
74
+ },
75
+ "hendrycksTest-conceptual_physics": {
76
+ "acc": 0.4340425531914894,
77
+ "acc_stderr": 0.03240038086792747,
78
+ "acc_norm": 0.4340425531914894,
79
+ "acc_norm_stderr": 0.03240038086792747
80
+ },
81
+ "hendrycksTest-econometrics": {
82
+ "acc": 0.32456140350877194,
83
+ "acc_stderr": 0.04404556157374768,
84
+ "acc_norm": 0.32456140350877194,
85
+ "acc_norm_stderr": 0.04404556157374768
86
+ },
87
+ "hendrycksTest-electrical_engineering": {
88
+ "acc": 0.3931034482758621,
89
+ "acc_stderr": 0.0407032901370707,
90
+ "acc_norm": 0.3931034482758621,
91
+ "acc_norm_stderr": 0.0407032901370707
92
+ },
93
+ "hendrycksTest-elementary_mathematics": {
94
+ "acc": 0.28835978835978837,
95
+ "acc_stderr": 0.0233306540545359,
96
+ "acc_norm": 0.28835978835978837,
97
+ "acc_norm_stderr": 0.0233306540545359
98
+ },
99
+ "hendrycksTest-formal_logic": {
100
+ "acc": 0.30158730158730157,
101
+ "acc_stderr": 0.04104947269903394,
102
+ "acc_norm": 0.30158730158730157,
103
+ "acc_norm_stderr": 0.04104947269903394
104
+ },
105
+ "hendrycksTest-global_facts": {
106
+ "acc": 0.3,
107
+ "acc_stderr": 0.046056618647183814,
108
+ "acc_norm": 0.3,
109
+ "acc_norm_stderr": 0.046056618647183814
110
+ },
111
+ "hendrycksTest-high_school_biology": {
112
+ "acc": 0.47419354838709676,
113
+ "acc_stderr": 0.02840609505765332,
114
+ "acc_norm": 0.47419354838709676,
115
+ "acc_norm_stderr": 0.02840609505765332
116
+ },
117
+ "hendrycksTest-high_school_chemistry": {
118
+ "acc": 0.3448275862068966,
119
+ "acc_stderr": 0.03344283744280458,
120
+ "acc_norm": 0.3448275862068966,
121
+ "acc_norm_stderr": 0.03344283744280458
122
+ },
123
+ "hendrycksTest-high_school_computer_science": {
124
+ "acc": 0.39,
125
+ "acc_stderr": 0.04902071300001975,
126
+ "acc_norm": 0.39,
127
+ "acc_norm_stderr": 0.04902071300001975
128
+ },
129
+ "hendrycksTest-high_school_european_history": {
130
+ "acc": 0.6060606060606061,
131
+ "acc_stderr": 0.03815494308688931,
132
+ "acc_norm": 0.6060606060606061,
133
+ "acc_norm_stderr": 0.03815494308688931
134
+ },
135
+ "hendrycksTest-high_school_geography": {
136
+ "acc": 0.5505050505050505,
137
+ "acc_stderr": 0.035441324919479704,
138
+ "acc_norm": 0.5505050505050505,
139
+ "acc_norm_stderr": 0.035441324919479704
140
+ },
141
+ "hendrycksTest-high_school_government_and_politics": {
142
+ "acc": 0.689119170984456,
143
+ "acc_stderr": 0.03340361906276585,
144
+ "acc_norm": 0.689119170984456,
145
+ "acc_norm_stderr": 0.03340361906276585
146
+ },
147
+ "hendrycksTest-high_school_macroeconomics": {
148
+ "acc": 0.4282051282051282,
149
+ "acc_stderr": 0.025088301454694834,
150
+ "acc_norm": 0.4282051282051282,
151
+ "acc_norm_stderr": 0.025088301454694834
152
+ },
153
+ "hendrycksTest-high_school_mathematics": {
154
+ "acc": 0.2851851851851852,
155
+ "acc_stderr": 0.027528599210340496,
156
+ "acc_norm": 0.2851851851851852,
157
+ "acc_norm_stderr": 0.027528599210340496
158
+ },
159
+ "hendrycksTest-high_school_microeconomics": {
160
+ "acc": 0.38235294117647056,
161
+ "acc_stderr": 0.03156663099215416,
162
+ "acc_norm": 0.38235294117647056,
163
+ "acc_norm_stderr": 0.03156663099215416
164
+ },
165
+ "hendrycksTest-high_school_physics": {
166
+ "acc": 0.3509933774834437,
167
+ "acc_stderr": 0.03896981964257375,
168
+ "acc_norm": 0.3509933774834437,
169
+ "acc_norm_stderr": 0.03896981964257375
170
+ },
171
+ "hendrycksTest-high_school_psychology": {
172
+ "acc": 0.6146788990825688,
173
+ "acc_stderr": 0.02086585085279412,
174
+ "acc_norm": 0.6146788990825688,
175
+ "acc_norm_stderr": 0.02086585085279412
176
+ },
177
+ "hendrycksTest-high_school_statistics": {
178
+ "acc": 0.25462962962962965,
179
+ "acc_stderr": 0.02971127586000535,
180
+ "acc_norm": 0.25462962962962965,
181
+ "acc_norm_stderr": 0.02971127586000535
182
+ },
183
+ "hendrycksTest-high_school_us_history": {
184
+ "acc": 0.5490196078431373,
185
+ "acc_stderr": 0.03492406104163613,
186
+ "acc_norm": 0.5490196078431373,
187
+ "acc_norm_stderr": 0.03492406104163613
188
+ },
189
+ "hendrycksTest-high_school_world_history": {
190
+ "acc": 0.6244725738396625,
191
+ "acc_stderr": 0.03152256243091156,
192
+ "acc_norm": 0.6244725738396625,
193
+ "acc_norm_stderr": 0.03152256243091156
194
+ },
195
+ "hendrycksTest-human_aging": {
196
+ "acc": 0.5381165919282511,
197
+ "acc_stderr": 0.033460150119732274,
198
+ "acc_norm": 0.5381165919282511,
199
+ "acc_norm_stderr": 0.033460150119732274
200
+ },
201
+ "hendrycksTest-human_sexuality": {
202
+ "acc": 0.5343511450381679,
203
+ "acc_stderr": 0.043749285605997376,
204
+ "acc_norm": 0.5343511450381679,
205
+ "acc_norm_stderr": 0.043749285605997376
206
+ },
207
+ "hendrycksTest-international_law": {
208
+ "acc": 0.6033057851239669,
209
+ "acc_stderr": 0.044658697805310094,
210
+ "acc_norm": 0.6033057851239669,
211
+ "acc_norm_stderr": 0.044658697805310094
212
+ },
213
+ "hendrycksTest-jurisprudence": {
214
+ "acc": 0.49074074074074076,
215
+ "acc_stderr": 0.04832853553437055,
216
+ "acc_norm": 0.49074074074074076,
217
+ "acc_norm_stderr": 0.04832853553437055
218
+ },
219
+ "hendrycksTest-logical_fallacies": {
220
+ "acc": 0.48466257668711654,
221
+ "acc_stderr": 0.039265223787088424,
222
+ "acc_norm": 0.48466257668711654,
223
+ "acc_norm_stderr": 0.039265223787088424
224
+ },
225
+ "hendrycksTest-machine_learning": {
226
+ "acc": 0.375,
227
+ "acc_stderr": 0.04595091388086298,
228
+ "acc_norm": 0.375,
229
+ "acc_norm_stderr": 0.04595091388086298
230
+ },
231
+ "hendrycksTest-management": {
232
+ "acc": 0.5145631067961165,
233
+ "acc_stderr": 0.049486373240266356,
234
+ "acc_norm": 0.5145631067961165,
235
+ "acc_norm_stderr": 0.049486373240266356
236
+ },
237
+ "hendrycksTest-marketing": {
238
+ "acc": 0.6837606837606838,
239
+ "acc_stderr": 0.03046365674734027,
240
+ "acc_norm": 0.6837606837606838,
241
+ "acc_norm_stderr": 0.03046365674734027
242
+ },
243
+ "hendrycksTest-medical_genetics": {
244
+ "acc": 0.53,
245
+ "acc_stderr": 0.05016135580465919,
246
+ "acc_norm": 0.53,
247
+ "acc_norm_stderr": 0.05016135580465919
248
+ },
249
+ "hendrycksTest-miscellaneous": {
250
+ "acc": 0.6232439335887612,
251
+ "acc_stderr": 0.01732829290730305,
252
+ "acc_norm": 0.6232439335887612,
253
+ "acc_norm_stderr": 0.01732829290730305
254
+ },
255
+ "hendrycksTest-moral_disputes": {
256
+ "acc": 0.49421965317919075,
257
+ "acc_stderr": 0.026917296179149116,
258
+ "acc_norm": 0.49421965317919075,
259
+ "acc_norm_stderr": 0.026917296179149116
260
+ },
261
+ "hendrycksTest-moral_scenarios": {
262
+ "acc": 0.2446927374301676,
263
+ "acc_stderr": 0.014378169884098435,
264
+ "acc_norm": 0.2446927374301676,
265
+ "acc_norm_stderr": 0.014378169884098435
266
+ },
267
+ "hendrycksTest-nutrition": {
268
+ "acc": 0.4673202614379085,
269
+ "acc_stderr": 0.02856869975222588,
270
+ "acc_norm": 0.4673202614379085,
271
+ "acc_norm_stderr": 0.02856869975222588
272
+ },
273
+ "hendrycksTest-philosophy": {
274
+ "acc": 0.5884244372990354,
275
+ "acc_stderr": 0.027950481494401262,
276
+ "acc_norm": 0.5884244372990354,
277
+ "acc_norm_stderr": 0.027950481494401262
278
+ },
279
+ "hendrycksTest-prehistory": {
280
+ "acc": 0.5,
281
+ "acc_stderr": 0.02782074420373286,
282
+ "acc_norm": 0.5,
283
+ "acc_norm_stderr": 0.02782074420373286
284
+ },
285
+ "hendrycksTest-professional_accounting": {
286
+ "acc": 0.3475177304964539,
287
+ "acc_stderr": 0.028406627809590954,
288
+ "acc_norm": 0.3475177304964539,
289
+ "acc_norm_stderr": 0.028406627809590954
290
+ },
291
+ "hendrycksTest-professional_law": {
292
+ "acc": 0.3683181225554107,
293
+ "acc_stderr": 0.012319403369564639,
294
+ "acc_norm": 0.3683181225554107,
295
+ "acc_norm_stderr": 0.012319403369564639
296
+ },
297
+ "hendrycksTest-professional_medicine": {
298
+ "acc": 0.5330882352941176,
299
+ "acc_stderr": 0.03030625772246832,
300
+ "acc_norm": 0.5330882352941176,
301
+ "acc_norm_stderr": 0.03030625772246832
302
+ },
303
+ "hendrycksTest-professional_psychology": {
304
+ "acc": 0.42320261437908496,
305
+ "acc_stderr": 0.019987809769482064,
306
+ "acc_norm": 0.42320261437908496,
307
+ "acc_norm_stderr": 0.019987809769482064
308
+ },
309
+ "hendrycksTest-public_relations": {
310
+ "acc": 0.5363636363636364,
311
+ "acc_stderr": 0.04776449162396197,
312
+ "acc_norm": 0.5363636363636364,
313
+ "acc_norm_stderr": 0.04776449162396197
314
+ },
315
+ "hendrycksTest-security_studies": {
316
+ "acc": 0.42448979591836733,
317
+ "acc_stderr": 0.031642094879429414,
318
+ "acc_norm": 0.42448979591836733,
319
+ "acc_norm_stderr": 0.031642094879429414
320
+ },
321
+ "hendrycksTest-sociology": {
322
+ "acc": 0.5621890547263682,
323
+ "acc_stderr": 0.035080801121998406,
324
+ "acc_norm": 0.5621890547263682,
325
+ "acc_norm_stderr": 0.035080801121998406
326
+ },
327
+ "hendrycksTest-us_foreign_policy": {
328
+ "acc": 0.69,
329
+ "acc_stderr": 0.04648231987117316,
330
+ "acc_norm": 0.69,
331
+ "acc_norm_stderr": 0.04648231987117316
332
+ },
333
+ "hendrycksTest-virology": {
334
+ "acc": 0.3855421686746988,
335
+ "acc_stderr": 0.037891344246115496,
336
+ "acc_norm": 0.3855421686746988,
337
+ "acc_norm_stderr": 0.037891344246115496
338
+ },
339
+ "hendrycksTest-world_religions": {
340
+ "acc": 0.6666666666666666,
341
+ "acc_stderr": 0.036155076303109365,
342
+ "acc_norm": 0.6666666666666666,
343
+ "acc_norm_stderr": 0.036155076303109365
344
+ }
345
+ },
346
+ "versions": {
347
+ "hendrycksTest-abstract_algebra": 1,
348
+ "hendrycksTest-anatomy": 1,
349
+ "hendrycksTest-astronomy": 1,
350
+ "hendrycksTest-business_ethics": 1,
351
+ "hendrycksTest-clinical_knowledge": 1,
352
+ "hendrycksTest-college_biology": 1,
353
+ "hendrycksTest-college_chemistry": 1,
354
+ "hendrycksTest-college_computer_science": 1,
355
+ "hendrycksTest-college_mathematics": 1,
356
+ "hendrycksTest-college_medicine": 1,
357
+ "hendrycksTest-college_physics": 1,
358
+ "hendrycksTest-computer_security": 1,
359
+ "hendrycksTest-conceptual_physics": 1,
360
+ "hendrycksTest-econometrics": 1,
361
+ "hendrycksTest-electrical_engineering": 1,
362
+ "hendrycksTest-elementary_mathematics": 1,
363
+ "hendrycksTest-formal_logic": 1,
364
+ "hendrycksTest-global_facts": 1,
365
+ "hendrycksTest-high_school_biology": 1,
366
+ "hendrycksTest-high_school_chemistry": 1,
367
+ "hendrycksTest-high_school_computer_science": 1,
368
+ "hendrycksTest-high_school_european_history": 1,
369
+ "hendrycksTest-high_school_geography": 1,
370
+ "hendrycksTest-high_school_government_and_politics": 1,
371
+ "hendrycksTest-high_school_macroeconomics": 1,
372
+ "hendrycksTest-high_school_mathematics": 1,
373
+ "hendrycksTest-high_school_microeconomics": 1,
374
+ "hendrycksTest-high_school_physics": 1,
375
+ "hendrycksTest-high_school_psychology": 1,
376
+ "hendrycksTest-high_school_statistics": 1,
377
+ "hendrycksTest-high_school_us_history": 1,
378
+ "hendrycksTest-high_school_world_history": 1,
379
+ "hendrycksTest-human_aging": 1,
380
+ "hendrycksTest-human_sexuality": 1,
381
+ "hendrycksTest-international_law": 1,
382
+ "hendrycksTest-jurisprudence": 1,
383
+ "hendrycksTest-logical_fallacies": 1,
384
+ "hendrycksTest-machine_learning": 1,
385
+ "hendrycksTest-management": 1,
386
+ "hendrycksTest-marketing": 1,
387
+ "hendrycksTest-medical_genetics": 1,
388
+ "hendrycksTest-miscellaneous": 1,
389
+ "hendrycksTest-moral_disputes": 1,
390
+ "hendrycksTest-moral_scenarios": 1,
391
+ "hendrycksTest-nutrition": 1,
392
+ "hendrycksTest-philosophy": 1,
393
+ "hendrycksTest-prehistory": 1,
394
+ "hendrycksTest-professional_accounting": 1,
395
+ "hendrycksTest-professional_law": 1,
396
+ "hendrycksTest-professional_medicine": 1,
397
+ "hendrycksTest-professional_psychology": 1,
398
+ "hendrycksTest-public_relations": 1,
399
+ "hendrycksTest-security_studies": 1,
400
+ "hendrycksTest-sociology": 1,
401
+ "hendrycksTest-us_foreign_policy": 1,
402
+ "hendrycksTest-virology": 1,
403
+ "hendrycksTest-world_religions": 1
404
+ },
405
+ "config": {
406
+ "model": "hf-causal-experimental",
407
+ "model_args": "pretrained=/home/vmagent/app/data/Llama-2-7b-hf,peft=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal,use_accelerate=True,delta=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal/best_model_structure.txt",
408
+ "num_fewshot": 5,
409
+ "batch_size": "auto",
410
+ "batch_sizes": [
411
+ 8
412
+ ],
413
+ "device": null,
414
+ "no_cache": false,
415
+ "limit": null,
416
+ "bootstrap_iters": 100000,
417
+ "description_dict": {}
418
+ }
419
+ }
llama2-7b-delta-truthqa ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "truthfulqa_mc": {
4
+ "mc1": 0.3084455324357405,
5
+ "mc1_stderr": 0.01616803938315687,
6
+ "mc2": 0.45625480379943195,
7
+ "mc2_stderr": 0.014763037659441128
8
+ }
9
+ },
10
+ "versions": {
11
+ "truthfulqa_mc": 1
12
+ },
13
+ "config": {
14
+ "model": "hf-causal-experimental",
15
+ "model_args": "pretrained=/home/vmagent/app/data/Llama-2-7b-hf,peft=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal,use_accelerate=True,delta=/home/vmagent/app/data/llama-2-delta-tune-model-div-sal/best_model_structure.txt",
16
+ "num_fewshot": 0,
17
+ "batch_size": "auto",
18
+ "batch_sizes": [
19
+ 32
20
+ ],
21
+ "device": null,
22
+ "no_cache": false,
23
+ "limit": null,
24
+ "bootstrap_iters": 100000,
25
+ "description_dict": {}
26
+ }
27
+ }