Update README.md
Browse files
README.md
CHANGED
@@ -103,26 +103,26 @@ pip3 install lm-eval==0.4.7
|
|
103 |
we found lm-eval is very unstable for this model. Please set `add_bos_token=True `to align with the origin model.
|
104 |
|
105 |
```bash
|
106 |
-
lm-eval --model hf --model_args pretrained=OPEA/Llama-3.3-70B-Instruct-int3-sym-inc,add_bos_token=True --tasks mmlu --batch_size
|
107 |
|
108 |
```
|
109 |
|
110 |
-
| Metric | BF16 | INT3
|
111 |
-
| --------------------------- | ------------------------ |
|
112 |
-
| avg | 0.7023 |
|
113 |
-
| leaderboard_mmlu_pro 5shot | 0.5484 |
|
114 |
-
| leaderboard_ifeval
|
115 |
-
| mmlu | 0.8195 | 0.8052
|
116 |
-
| lambada_openai | 0.7528 |
|
117 |
-
| hellaswag | 0.6575 |
|
118 |
-
| winogrande | 0.7869 |
|
119 |
-
| piqa | 0.8303 |
|
120 |
-
| truthfulqa_mc1 | 0.4284 |
|
121 |
-
| openbookqa | 0.3720 |
|
122 |
-
| boolq | 0.8865 |
|
123 |
-
| arc_easy | 0.8624 | 0.8523
|
124 |
-
| arc_challenge | 0.6109 |
|
125 |
-
| gsm8k(5shot) strict match | 0.9083 |
|
126 |
|
127 |
## Generate the model
|
128 |
|
|
|
103 |
we found lm-eval is very unstable for this model. Please set `add_bos_token=True `to align with the origin model.
|
104 |
|
105 |
```bash
|
106 |
+
lm-eval --model hf --model_args pretrained=OPEA/Llama-3.3-70B-Instruct-int3-sym-inc,add_bos_token=True --tasks leaderboard_mmlu_pro,leaderboard_ifeval,lambada_openai,hellaswag,piqa,winogrande,truthfulqa_mc1,openbookqa,boolq,arc_easy,arc_challenge,mmlu,gsm8k --batch_size 16
|
107 |
|
108 |
```
|
109 |
|
110 |
+
| Metric | BF16 | INT3 | INT3 wo bos |
|
111 |
+
| --------------------------- | ------------------------ | ------------------------- | ----------- |
|
112 |
+
| avg | 0.7023 | 0.69675 | |
|
113 |
+
| leaderboard_mmlu_pro 5shot | 0.5484 | 0.5155 | |
|
114 |
+
| leaderboard_ifeval+strict | 0.6661=(0.7110+0.6211)/2 | 0.63885=(0.6954+0.5823)/2 | |
|
115 |
+
| mmlu | 0.8195 | 0.8052 | |
|
116 |
+
| lambada_openai | 0.7528 | 0.7617 | |
|
117 |
+
| hellaswag | 0.6575 | 0.6491 | |
|
118 |
+
| winogrande | 0.7869 | 0.8161 | |
|
119 |
+
| piqa | 0.8303 | 0.8303 | |
|
120 |
+
| truthfulqa_mc1 | 0.4284 | 0.4333 | |
|
121 |
+
| openbookqa | 0.3720 | 0.3620 | |
|
122 |
+
| boolq | 0.8865 | 0.8911 | |
|
123 |
+
| arc_easy | 0.8624 | 0.8523 | 0.2643 |
|
124 |
+
| arc_challenge | 0.6109 | 0.5956 | |
|
125 |
+
| gsm8k(5shot) strict match | 0.9083 | 0.9067 | |
|
126 |
|
127 |
## Generate the model
|
128 |
|