Syed Hasan
commited on
Commit
•
960eec2
1
Parent(s):
ab919fa
Update README.md
Browse files
README.md
CHANGED
@@ -58,8 +58,8 @@ Average: 75.9% without mmlu
|
|
58 |
### TruthfulQA
|
59 |
| Task |Version|Metric|Value| |Stderr|
|
60 |
|-------------|------:|------|----:|---|-----:|
|
61 |
-
|truthfulqa_mc| 1|mc1 |
|
62 |
-
| | |mc2 |
|
63 |
|
64 |
### BigBench Reasoning Test
|
65 |
|
@@ -85,8 +85,24 @@ Average: 75.9% without mmlu
|
|
85 |
| bigbench_tracking_shuffled_objects_five_objects| 0| multiple_choice_grade | 23.28 | _ | 1.20 |
|
86 |
| bigbench_tracking_shuffled_objects_seven_objects| 0| multiple_choice_grade | 19.37 | _ | 0.94 |
|
87 |
| bigbench_tracking_shuffled_objects_three_objects| 0| multiple_choice_grade | 59.33 | _ | 2.84 |
|
|
|
88 |
Average: 49.08%
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
### Training hyperparameters
|
92 |
|
|
|
58 |
### TruthfulQA
|
59 |
| Task |Version|Metric|Value| |Stderr|
|
60 |
|-------------|------:|------|----:|---|-----:|
|
61 |
+
|truthfulqa_mc| 1|mc1 |63.03|± | 1.68|
|
62 |
+
| | |mc2 |78.39|± | 1.37|
|
63 |
|
64 |
### BigBench Reasoning Test
|
65 |
|
|
|
85 |
| bigbench_tracking_shuffled_objects_five_objects| 0| multiple_choice_grade | 23.28 | _ | 1.20 |
|
86 |
| bigbench_tracking_shuffled_objects_seven_objects| 0| multiple_choice_grade | 19.37 | _ | 0.94 |
|
87 |
| bigbench_tracking_shuffled_objects_three_objects| 0| multiple_choice_grade | 59.33 | _ | 2.84 |
|
88 |
+
|
89 |
Average: 49.08%
|
90 |
|
91 |
+
### GPT4ALL
|
92 |
+
|
93 |
+
Task Version Metric Value Stderr
|
94 |
+
arc_challenge 0 acc 0.6630 _ 0.0138
|
95 |
+
acc_norm 0.6826 _ 0.0136
|
96 |
+
arc_easy 0 acc 0.8657 _ 0.0070
|
97 |
+
acc_norm 0.8081 _ 0.0081
|
98 |
+
boolq 1 acc 0.8716 _ 0.0059
|
99 |
+
hellaswag 0 acc 0.6960 _ 0.0046
|
100 |
+
acc_norm 0.8745 _ 0.0033
|
101 |
+
openbookqa 0 acc 0.3920 _ 0.0219
|
102 |
+
acc_norm 0.4960 _ 0.0224
|
103 |
+
piqa 0 acc 0.8303 _ 0.0088
|
104 |
+
acc_norm 0.8487 _ 0.0084
|
105 |
+
winogrande 0 acc 0.8106 _ 0.0110
|
106 |
|
107 |
### Training hyperparameters
|
108 |
|