muhtasham commited on
Commit
e90195b
·
verified ·
1 Parent(s): 2b8ea72

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -0
README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ git clone https://github.com/neuralmagic/AutoFP8.git
3
+ pip install -e AutoFP8
4
+ ```
5
+
6
+ ```python
7
+ from transformers import AutoTokenizer
8
+ from auto_fp8 import AutoFP8ForCausalLM, BaseQuantizeConfig
9
+ from datasets import load_dataset
10
+
11
+ pretrained_model_dir = "Unbabel/TowerInstruct-7B-v0.1"
12
+ quantized_model_dir = "TowerInstruct-7B-v0.1-FP8"
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
15
+ tokenizer.pad_token = tokenizer.eos_token
16
+
17
+ ds = load_dataset("mgoin/ultrachat_2k", split="train_sft").select(range(512))
18
+ examples = [tokenizer.apply_chat_template(batch["messages"], tokenize=False) for batch in ds]
19
+ examples = tokenizer(examples, padding=True, truncation=True, return_tensors="pt").to("cuda")
20
+
21
+ quantize_config = BaseQuantizeConfig(quant_method="fp8", activation_scheme="static")
22
+
23
+
24
+ model = AutoFP8ForCausalLM.from_pretrained(
25
+ pretrained_model_dir, quantize_config=quantize_config
26
+ )
27
+ model.quantize(examples)
28
+ model.save_quantized(quantized_model_dir)
29
+ ```