mychen76 commited on
Commit
4c82848
1 Parent(s): 7c948d5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -387
README.md CHANGED
@@ -2,8 +2,6 @@
2
  license: apache-2.0
3
  tags:
4
  - merge
5
- - OpenPipe/mistral-ft-optimized-1218
6
- - mlabonne/NeuralHermes-2.5-Mistral-7B
7
  ---
8
 
9
  # mistral-7b-merged-slerp
@@ -34,391 +32,39 @@ dtype: bfloat16
34
 
35
  ```
36
 
37
- ## Evaluation
38
- https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-slerp
 
39
 
40
- Latest Result:
41
- https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-slerp/blob/main/results_2024-03-10T11-04-57.263703.json
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  ```
44
- {
45
- "all": {
46
- "acc": 0.6444688446653744,
47
- "acc_stderr": 0.03217564834975917,
48
- "acc_norm": 0.6448609553287138,
49
- "acc_norm_stderr": 0.032833467276313325,
50
- "mc1": 0.4283965728274174,
51
- "mc1_stderr": 0.017323088597314754,
52
- "mc2": 0.5985018412437423,
53
- "mc2_stderr": 0.01514980059720055
54
- },
55
- "harness|arc:challenge|25": {
56
- "acc": 0.6476109215017065,
57
- "acc_stderr": 0.013960142600598675,
58
- "acc_norm": 0.6774744027303754,
59
- "acc_norm_stderr": 0.013659980894277364
60
- },
61
- "harness|hellaswag|10": {
62
- "acc": 0.6700856403106951,
63
- "acc_stderr": 0.004692208279690595,
64
- "acc_norm": 0.8616809400517825,
65
- "acc_norm_stderr": 0.0034452899250117337
66
- },
67
- "harness|hendrycksTest-abstract_algebra|5": {
68
- "acc": 0.3,
69
- "acc_stderr": 0.046056618647183814,
70
- "acc_norm": 0.3,
71
- "acc_norm_stderr": 0.046056618647183814
72
- },
73
- "harness|hendrycksTest-anatomy|5": {
74
- "acc": 0.6074074074074074,
75
- "acc_stderr": 0.0421850621536888,
76
- "acc_norm": 0.6074074074074074,
77
- "acc_norm_stderr": 0.0421850621536888
78
- },
79
- "harness|hendrycksTest-astronomy|5": {
80
- "acc": 0.7105263157894737,
81
- "acc_stderr": 0.03690677986137283,
82
- "acc_norm": 0.7105263157894737,
83
- "acc_norm_stderr": 0.03690677986137283
84
- },
85
- "harness|hendrycksTest-business_ethics|5": {
86
- "acc": 0.61,
87
- "acc_stderr": 0.04902071300001975,
88
- "acc_norm": 0.61,
89
- "acc_norm_stderr": 0.04902071300001975
90
- },
91
- "harness|hendrycksTest-clinical_knowledge|5": {
92
- "acc": 0.6830188679245283,
93
- "acc_stderr": 0.02863723563980089,
94
- "acc_norm": 0.6830188679245283,
95
- "acc_norm_stderr": 0.02863723563980089
96
- },
97
- "harness|hendrycksTest-college_biology|5": {
98
- "acc": 0.7777777777777778,
99
- "acc_stderr": 0.03476590104304134,
100
- "acc_norm": 0.7777777777777778,
101
- "acc_norm_stderr": 0.03476590104304134
102
- },
103
- "harness|hendrycksTest-college_chemistry|5": {
104
- "acc": 0.45,
105
- "acc_stderr": 0.05,
106
- "acc_norm": 0.45,
107
- "acc_norm_stderr": 0.05
108
- },
109
- "harness|hendrycksTest-college_computer_science|5": {
110
- "acc": 0.49,
111
- "acc_stderr": 0.05024183937956912,
112
- "acc_norm": 0.49,
113
- "acc_norm_stderr": 0.05024183937956912
114
- },
115
- "harness|hendrycksTest-college_mathematics|5": {
116
- "acc": 0.29,
117
- "acc_stderr": 0.045604802157206845,
118
- "acc_norm": 0.29,
119
- "acc_norm_stderr": 0.045604802157206845
120
- },
121
- "harness|hendrycksTest-college_medicine|5": {
122
- "acc": 0.6473988439306358,
123
- "acc_stderr": 0.036430371689585475,
124
- "acc_norm": 0.6473988439306358,
125
- "acc_norm_stderr": 0.036430371689585475
126
- },
127
- "harness|hendrycksTest-college_physics|5": {
128
- "acc": 0.39215686274509803,
129
- "acc_stderr": 0.04858083574266345,
130
- "acc_norm": 0.39215686274509803,
131
- "acc_norm_stderr": 0.04858083574266345
132
- },
133
- "harness|hendrycksTest-computer_security|5": {
134
- "acc": 0.74,
135
- "acc_stderr": 0.04408440022768078,
136
- "acc_norm": 0.74,
137
- "acc_norm_stderr": 0.04408440022768078
138
- },
139
- "harness|hendrycksTest-conceptual_physics|5": {
140
- "acc": 0.5829787234042553,
141
- "acc_stderr": 0.03223276266711712,
142
- "acc_norm": 0.5829787234042553,
143
- "acc_norm_stderr": 0.03223276266711712
144
- },
145
- "harness|hendrycksTest-econometrics|5": {
146
- "acc": 0.5,
147
- "acc_stderr": 0.047036043419179864,
148
- "acc_norm": 0.5,
149
- "acc_norm_stderr": 0.047036043419179864
150
- },
151
- "harness|hendrycksTest-electrical_engineering|5": {
152
- "acc": 0.5448275862068965,
153
- "acc_stderr": 0.04149886942192117,
154
- "acc_norm": 0.5448275862068965,
155
- "acc_norm_stderr": 0.04149886942192117
156
- },
157
- "harness|hendrycksTest-elementary_mathematics|5": {
158
- "acc": 0.41534391534391535,
159
- "acc_stderr": 0.025379524910778405,
160
- "acc_norm": 0.41534391534391535,
161
- "acc_norm_stderr": 0.025379524910778405
162
- },
163
- "harness|hendrycksTest-formal_logic|5": {
164
- "acc": 0.4603174603174603,
165
- "acc_stderr": 0.04458029125470973,
166
- "acc_norm": 0.4603174603174603,
167
- "acc_norm_stderr": 0.04458029125470973
168
- },
169
- "harness|hendrycksTest-global_facts|5": {
170
- "acc": 0.38,
171
- "acc_stderr": 0.048783173121456316,
172
- "acc_norm": 0.38,
173
- "acc_norm_stderr": 0.048783173121456316
174
- },
175
- "harness|hendrycksTest-high_school_biology|5": {
176
- "acc": 0.7774193548387097,
177
- "acc_stderr": 0.023664216671642518,
178
- "acc_norm": 0.7774193548387097,
179
- "acc_norm_stderr": 0.023664216671642518
180
- },
181
- "harness|hendrycksTest-high_school_chemistry|5": {
182
- "acc": 0.5073891625615764,
183
- "acc_stderr": 0.035176035403610105,
184
- "acc_norm": 0.5073891625615764,
185
- "acc_norm_stderr": 0.035176035403610105
186
- },
187
- "harness|hendrycksTest-high_school_computer_science|5": {
188
- "acc": 0.69,
189
- "acc_stderr": 0.04648231987117316,
190
- "acc_norm": 0.69,
191
- "acc_norm_stderr": 0.04648231987117316
192
- },
193
- "harness|hendrycksTest-high_school_european_history|5": {
194
- "acc": 0.7696969696969697,
195
- "acc_stderr": 0.0328766675860349,
196
- "acc_norm": 0.7696969696969697,
197
- "acc_norm_stderr": 0.0328766675860349
198
- },
199
- "harness|hendrycksTest-high_school_geography|5": {
200
- "acc": 0.7878787878787878,
201
- "acc_stderr": 0.029126522834586818,
202
- "acc_norm": 0.7878787878787878,
203
- "acc_norm_stderr": 0.029126522834586818
204
- },
205
- "harness|hendrycksTest-high_school_government_and_politics|5": {
206
- "acc": 0.9015544041450777,
207
- "acc_stderr": 0.02150024957603346,
208
- "acc_norm": 0.9015544041450777,
209
- "acc_norm_stderr": 0.02150024957603346
210
- },
211
- "harness|hendrycksTest-high_school_macroeconomics|5": {
212
- "acc": 0.6538461538461539,
213
- "acc_stderr": 0.02412112541694119,
214
- "acc_norm": 0.6538461538461539,
215
- "acc_norm_stderr": 0.02412112541694119
216
- },
217
- "harness|hendrycksTest-high_school_mathematics|5": {
218
- "acc": 0.32222222222222224,
219
- "acc_stderr": 0.028493465091028593,
220
- "acc_norm": 0.32222222222222224,
221
- "acc_norm_stderr": 0.028493465091028593
222
- },
223
- "harness|hendrycksTest-high_school_microeconomics|5": {
224
- "acc": 0.6890756302521008,
225
- "acc_stderr": 0.03006676158297793,
226
- "acc_norm": 0.6890756302521008,
227
- "acc_norm_stderr": 0.03006676158297793
228
- },
229
- "harness|hendrycksTest-high_school_physics|5": {
230
- "acc": 0.32450331125827814,
231
- "acc_stderr": 0.03822746937658752,
232
- "acc_norm": 0.32450331125827814,
233
- "acc_norm_stderr": 0.03822746937658752
234
- },
235
- "harness|hendrycksTest-high_school_psychology|5": {
236
- "acc": 0.8550458715596331,
237
- "acc_stderr": 0.01509421569970048,
238
- "acc_norm": 0.8550458715596331,
239
- "acc_norm_stderr": 0.01509421569970048
240
- },
241
- "harness|hendrycksTest-high_school_statistics|5": {
242
- "acc": 0.5185185185185185,
243
- "acc_stderr": 0.034076320938540516,
244
- "acc_norm": 0.5185185185185185,
245
- "acc_norm_stderr": 0.034076320938540516
246
- },
247
- "harness|hendrycksTest-high_school_us_history|5": {
248
- "acc": 0.8186274509803921,
249
- "acc_stderr": 0.027044621719474082,
250
- "acc_norm": 0.8186274509803921,
251
- "acc_norm_stderr": 0.027044621719474082
252
- },
253
- "harness|hendrycksTest-high_school_world_history|5": {
254
- "acc": 0.8059071729957806,
255
- "acc_stderr": 0.0257449025322909,
256
- "acc_norm": 0.8059071729957806,
257
- "acc_norm_stderr": 0.0257449025322909
258
- },
259
- "harness|hendrycksTest-human_aging|5": {
260
- "acc": 0.6905829596412556,
261
- "acc_stderr": 0.03102441174057221,
262
- "acc_norm": 0.6905829596412556,
263
- "acc_norm_stderr": 0.03102441174057221
264
- },
265
- "harness|hendrycksTest-human_sexuality|5": {
266
- "acc": 0.7786259541984732,
267
- "acc_stderr": 0.03641297081313729,
268
- "acc_norm": 0.7786259541984732,
269
- "acc_norm_stderr": 0.03641297081313729
270
- },
271
- "harness|hendrycksTest-international_law|5": {
272
- "acc": 0.8099173553719008,
273
- "acc_stderr": 0.03581796951709282,
274
- "acc_norm": 0.8099173553719008,
275
- "acc_norm_stderr": 0.03581796951709282
276
- },
277
- "harness|hendrycksTest-jurisprudence|5": {
278
- "acc": 0.7685185185185185,
279
- "acc_stderr": 0.04077494709252626,
280
- "acc_norm": 0.7685185185185185,
281
- "acc_norm_stderr": 0.04077494709252626
282
- },
283
- "harness|hendrycksTest-logical_fallacies|5": {
284
- "acc": 0.7730061349693251,
285
- "acc_stderr": 0.03291099578615769,
286
- "acc_norm": 0.7730061349693251,
287
- "acc_norm_stderr": 0.03291099578615769
288
- },
289
- "harness|hendrycksTest-machine_learning|5": {
290
- "acc": 0.4642857142857143,
291
- "acc_stderr": 0.04733667890053756,
292
- "acc_norm": 0.4642857142857143,
293
- "acc_norm_stderr": 0.04733667890053756
294
- },
295
- "harness|hendrycksTest-management|5": {
296
- "acc": 0.7572815533980582,
297
- "acc_stderr": 0.04245022486384495,
298
- "acc_norm": 0.7572815533980582,
299
- "acc_norm_stderr": 0.04245022486384495
300
- },
301
- "harness|hendrycksTest-marketing|5": {
302
- "acc": 0.8547008547008547,
303
- "acc_stderr": 0.023086635086841407,
304
- "acc_norm": 0.8547008547008547,
305
- "acc_norm_stderr": 0.023086635086841407
306
- },
307
- "harness|hendrycksTest-medical_genetics|5": {
308
- "acc": 0.7,
309
- "acc_stderr": 0.046056618647183814,
310
- "acc_norm": 0.7,
311
- "acc_norm_stderr": 0.046056618647183814
312
- },
313
- "harness|hendrycksTest-miscellaneous|5": {
314
- "acc": 0.8352490421455939,
315
- "acc_stderr": 0.013265346261323793,
316
- "acc_norm": 0.8352490421455939,
317
- "acc_norm_stderr": 0.013265346261323793
318
- },
319
- "harness|hendrycksTest-moral_disputes|5": {
320
- "acc": 0.7283236994219653,
321
- "acc_stderr": 0.023948512905468365,
322
- "acc_norm": 0.7283236994219653,
323
- "acc_norm_stderr": 0.023948512905468365
324
- },
325
- "harness|hendrycksTest-moral_scenarios|5": {
326
- "acc": 0.36312849162011174,
327
- "acc_stderr": 0.016083749986853697,
328
- "acc_norm": 0.36312849162011174,
329
- "acc_norm_stderr": 0.016083749986853697
330
- },
331
- "harness|hendrycksTest-nutrition|5": {
332
- "acc": 0.7450980392156863,
333
- "acc_stderr": 0.02495418432487991,
334
- "acc_norm": 0.7450980392156863,
335
- "acc_norm_stderr": 0.02495418432487991
336
- },
337
- "harness|hendrycksTest-philosophy|5": {
338
- "acc": 0.7106109324758842,
339
- "acc_stderr": 0.025755865922632945,
340
- "acc_norm": 0.7106109324758842,
341
- "acc_norm_stderr": 0.025755865922632945
342
- },
343
- "harness|hendrycksTest-prehistory|5": {
344
- "acc": 0.7469135802469136,
345
- "acc_stderr": 0.024191808600712995,
346
- "acc_norm": 0.7469135802469136,
347
- "acc_norm_stderr": 0.024191808600712995
348
- },
349
- "harness|hendrycksTest-professional_accounting|5": {
350
- "acc": 0.4787234042553192,
351
- "acc_stderr": 0.029800481645628693,
352
- "acc_norm": 0.4787234042553192,
353
- "acc_norm_stderr": 0.029800481645628693
354
- },
355
- "harness|hendrycksTest-professional_law|5": {
356
- "acc": 0.4726205997392438,
357
- "acc_stderr": 0.012751075788015058,
358
- "acc_norm": 0.4726205997392438,
359
- "acc_norm_stderr": 0.012751075788015058
360
- },
361
- "harness|hendrycksTest-professional_medicine|5": {
362
- "acc": 0.6948529411764706,
363
- "acc_stderr": 0.027971541370170598,
364
- "acc_norm": 0.6948529411764706,
365
- "acc_norm_stderr": 0.027971541370170598
366
- },
367
- "harness|hendrycksTest-professional_psychology|5": {
368
- "acc": 0.6748366013071896,
369
- "acc_stderr": 0.01895088677080631,
370
- "acc_norm": 0.6748366013071896,
371
- "acc_norm_stderr": 0.01895088677080631
372
- },
373
- "harness|hendrycksTest-public_relations|5": {
374
- "acc": 0.6636363636363637,
375
- "acc_stderr": 0.04525393596302506,
376
- "acc_norm": 0.6636363636363637,
377
- "acc_norm_stderr": 0.04525393596302506
378
- },
379
- "harness|hendrycksTest-security_studies|5": {
380
- "acc": 0.746938775510204,
381
- "acc_stderr": 0.027833023871399673,
382
- "acc_norm": 0.746938775510204,
383
- "acc_norm_stderr": 0.027833023871399673
384
- },
385
- "harness|hendrycksTest-sociology|5": {
386
- "acc": 0.835820895522388,
387
- "acc_stderr": 0.026193923544454115,
388
- "acc_norm": 0.835820895522388,
389
- "acc_norm_stderr": 0.026193923544454115
390
- },
391
- "harness|hendrycksTest-us_foreign_policy|5": {
392
- "acc": 0.85,
393
- "acc_stderr": 0.0358870281282637,
394
- "acc_norm": 0.85,
395
- "acc_norm_stderr": 0.0358870281282637
396
- },
397
- "harness|hendrycksTest-virology|5": {
398
- "acc": 0.5301204819277109,
399
- "acc_stderr": 0.03885425420866767,
400
- "acc_norm": 0.5301204819277109,
401
- "acc_norm_stderr": 0.03885425420866767
402
- },
403
- "harness|hendrycksTest-world_religions|5": {
404
- "acc": 0.8304093567251462,
405
- "acc_stderr": 0.02878210810540171,
406
- "acc_norm": 0.8304093567251462,
407
- "acc_norm_stderr": 0.02878210810540171
408
- },
409
- "harness|truthfulqa:mc|0": {
410
- "mc1": 0.4283965728274174,
411
- "mc1_stderr": 0.017323088597314754,
412
- "mc2": 0.5985018412437423,
413
- "mc2_stderr": 0.01514980059720055
414
- },
415
- "harness|winogrande|5": {
416
- "acc": 0.8018942383583267,
417
- "acc_stderr": 0.01120186274448705
418
- },
419
- "harness|gsm8k|5": {
420
- "acc": 0.6853677028051555,
421
- "acc_stderr": 0.01279103722733604
422
- }
423
- }
424
- ```
 
2
  license: apache-2.0
3
  tags:
4
  - merge
 
 
5
  ---
6
 
7
  # mistral-7b-merged-slerp
 
32
 
33
  ```
34
 
35
+ ## 💻 Usage
36
+ ```python
37
+ !pip install -qU transformers bitsandbytes accelerate
38
 
39
+ from transformers import AutoTokenizer
40
+ import transformers
41
+ import torch
42
 
43
+ model = "mychen76/mistral-7b-merged-slerp"
44
+
45
+ tokenizer = AutoTokenizer.from_pretrained(model)
46
+ pipeline = transformers.pipeline(
47
+ "text-generation",
48
+ model=model,
49
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
50
+ )
51
+
52
+ messages = [{"role": "user", "content": "why the sky is blue."}]
53
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
55
+ print(outputs[0]["generated_text"])
56
  ```
57
+
58
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
59
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_mychen76__mistral-7b-merged-slerp)
60
+
61
+
62
+ | Metric |Value|
63
+ |---------------------------------|----:|
64
+ |Avg. |71.09|
65
+ |AI2 Reasoning Challenge (25-Shot)|67.75|
66
+ |HellaSwag (10-Shot) |86.17|
67
+ |MMLU (5-Shot) |64.05|
68
+ |TruthfulQA (0-shot) |59.85|
69
+ |Winogrande (5-shot) |80.19|
70
+ |GSM8k (5-shot) |68.54|