Update README.md
Browse files
README.md
CHANGED
@@ -100,8 +100,8 @@ quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dt
|
|
100 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
101 |
|
102 |
# Push to hub
|
103 |
-
|
104 |
-
save_to = f"{USER_ID}/
|
105 |
quantized_model.push_to_hub(save_to, safe_serialization=False)
|
106 |
tokenizer.push_to_hub(save_to)
|
107 |
|
@@ -133,7 +133,7 @@ print("Response:", output_text[0][len(prompt):])
|
|
133 |
|
134 |
# Save to disk
|
135 |
state_dict = quantized_model.state_dict()
|
136 |
-
torch.save(state_dict, "phi4-mini-
|
137 |
|
138 |
```
|
139 |
|
@@ -154,7 +154,7 @@ Need to install lm-eval from source: https://github.com/EleutherAI/lm-evaluation
|
|
154 |
lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
|
155 |
```
|
156 |
|
157 |
-
##
|
158 |
```
|
159 |
import lm_eval
|
160 |
from lm_eval import evaluator
|
@@ -171,7 +171,7 @@ print(make_table(results))
|
|
171 |
|
172 |
| Benchmark | | |
|
173 |
|----------------------------------|-------------|-------------------|
|
174 |
-
| | Phi-4 mini-Ins | phi4-mini-
|
175 |
| **Popular aggregated benchmark** | | |
|
176 |
| mmlu (0 shot) | 66.73 | 63.11 |
|
177 |
| mmlu_pro (5-shot) | 44.71 | 35.31 |
|
@@ -208,13 +208,13 @@ Once the checkpoint is converted, we can export to ExecuTorch's PTE format with
|
|
208 |
PARAMS="executorch/examples/models/phi_4_mini/config.json"
|
209 |
python -m executorch.examples.models.llama.export_llama \
|
210 |
--model "phi_4_mini" \
|
211 |
-
--checkpoint "phi4-mini-
|
212 |
--params "$PARAMS" \
|
213 |
-kv \
|
214 |
--use_sdpa_with_kv_cache \
|
215 |
-X \
|
216 |
--metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
|
217 |
-
--output_name="phi4-mini-
|
218 |
```
|
219 |
|
220 |
## Running in a mobile app
|
|
|
100 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
101 |
|
102 |
# Push to hub
|
103 |
+
MODEL_NAME = model_id.split("/")[-1]
|
104 |
+
save_to = f"{USER_ID}/{MODEL_NAME}-untied-8da4w"
|
105 |
quantized_model.push_to_hub(save_to, safe_serialization=False)
|
106 |
tokenizer.push_to_hub(save_to)
|
107 |
|
|
|
133 |
|
134 |
# Save to disk
|
135 |
state_dict = quantized_model.state_dict()
|
136 |
+
torch.save(state_dict, "phi4-mini-8da4w.bin")
|
137 |
|
138 |
```
|
139 |
|
|
|
154 |
lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
|
155 |
```
|
156 |
|
157 |
+
## int8 dynamic activation and int4 weight quantization (8da4w)
|
158 |
```
|
159 |
import lm_eval
|
160 |
from lm_eval import evaluator
|
|
|
171 |
|
172 |
| Benchmark | | |
|
173 |
|----------------------------------|-------------|-------------------|
|
174 |
+
| | Phi-4 mini-Ins | phi4-mini-8da4w|
|
175 |
| **Popular aggregated benchmark** | | |
|
176 |
| mmlu (0 shot) | 66.73 | 63.11 |
|
177 |
| mmlu_pro (5-shot) | 44.71 | 35.31 |
|
|
|
208 |
PARAMS="executorch/examples/models/phi_4_mini/config.json"
|
209 |
python -m executorch.examples.models.llama.export_llama \
|
210 |
--model "phi_4_mini" \
|
211 |
+
--checkpoint "phi4-mini-8da4w-converted.bin" \
|
212 |
--params "$PARAMS" \
|
213 |
-kv \
|
214 |
--use_sdpa_with_kv_cache \
|
215 |
-X \
|
216 |
--metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
|
217 |
+
--output_name="phi4-mini-8da4w.pte"
|
218 |
```
|
219 |
|
220 |
## Running in a mobile app
|