jerryzh168 commited on
Commit
a682fd2
·
verified ·
1 Parent(s): c35ad17

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -7
README.md CHANGED
@@ -100,8 +100,8 @@ quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dt
100
  tokenizer = AutoTokenizer.from_pretrained(model_id)
101
 
102
  # Push to hub
103
- USER_ID = "YOUR_USER_ID"
104
- save_to = f"{USER_ID}/phi4-mini-8dq4w"
105
  quantized_model.push_to_hub(save_to, safe_serialization=False)
106
  tokenizer.push_to_hub(save_to)
107
 
@@ -133,7 +133,7 @@ print("Response:", output_text[0][len(prompt):])
133
 
134
  # Save to disk
135
  state_dict = quantized_model.state_dict()
136
- torch.save(state_dict, "phi4-mini-8dq4w.bin")
137
 
138
  ```
139
 
@@ -154,7 +154,7 @@ Need to install lm-eval from source: https://github.com/EleutherAI/lm-evaluation
154
  lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
155
  ```
156
 
157
- ## 8dq4w
158
  ```
159
  import lm_eval
160
  from lm_eval import evaluator
@@ -171,7 +171,7 @@ print(make_table(results))
171
 
172
  | Benchmark | | |
173
  |----------------------------------|-------------|-------------------|
174
- | | Phi-4 mini-Ins | phi4-mini-8dq4w|
175
  | **Popular aggregated benchmark** | | |
176
  | mmlu (0 shot) | 66.73 | 63.11 |
177
  | mmlu_pro (5-shot) | 44.71 | 35.31 |
@@ -208,13 +208,13 @@ Once the checkpoint is converted, we can export to ExecuTorch's PTE format with
208
  PARAMS="executorch/examples/models/phi_4_mini/config.json"
209
  python -m executorch.examples.models.llama.export_llama \
210
  --model "phi_4_mini" \
211
- --checkpoint "phi4-mini-8dq4w-converted.bin" \
212
  --params "$PARAMS" \
213
  -kv \
214
  --use_sdpa_with_kv_cache \
215
  -X \
216
  --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
217
- --output_name="phi4-mini-8dq4w.pte"
218
  ```
219
 
220
  ## Running in a mobile app
 
100
  tokenizer = AutoTokenizer.from_pretrained(model_id)
101
 
102
  # Push to hub
103
+ MODEL_NAME = model_id.split("/")[-1]
104
+ save_to = f"{USER_ID}/{MODEL_NAME}-untied-8da4w"
105
  quantized_model.push_to_hub(save_to, safe_serialization=False)
106
  tokenizer.push_to_hub(save_to)
107
 
 
133
 
134
  # Save to disk
135
  state_dict = quantized_model.state_dict()
136
+ torch.save(state_dict, "phi4-mini-8da4w.bin")
137
 
138
  ```
139
 
 
154
  lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
155
  ```
156
 
157
+ ## int8 dynamic activation and int4 weight quantization (8da4w)
158
  ```
159
  import lm_eval
160
  from lm_eval import evaluator
 
171
 
172
  | Benchmark | | |
173
  |----------------------------------|-------------|-------------------|
174
+ | | Phi-4 mini-Ins | phi4-mini-8da4w|
175
  | **Popular aggregated benchmark** | | |
176
  | mmlu (0 shot) | 66.73 | 63.11 |
177
  | mmlu_pro (5-shot) | 44.71 | 35.31 |
 
208
  PARAMS="executorch/examples/models/phi_4_mini/config.json"
209
  python -m executorch.examples.models.llama.export_llama \
210
  --model "phi_4_mini" \
211
+ --checkpoint "phi4-mini-8da4w-converted.bin" \
212
  --params "$PARAMS" \
213
  -kv \
214
  --use_sdpa_with_kv_cache \
215
  -X \
216
  --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
217
+ --output_name="phi4-mini-8da4w.pte"
218
  ```
219
 
220
  ## Running in a mobile app