mobicham commited on
Commit
66f3e32
·
verified ·
1 Parent(s): eaea27b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -47,9 +47,9 @@ You can reproduce the results above via `pip install lm-eval==0.4.3`
47
  First, install the dependecies:
48
  ```
49
  pip install git+https://github.com/mobiusml/hqq.git #master branch fix
50
- pip install bitblas
51
  ```
52
- Also, make sure you use at least torch `2.4.0` or the nightly build.
53
 
54
  Then you can use the sample code below:
55
  ``` Python
@@ -65,7 +65,7 @@ from hqq.utils.generation_hf import HFGenerator
65
  #model_id = 'mobiuslabsgmbh/Llama-3.1-8b-instruct_4bitgs64_hqq' #no calib version
66
  model_id = 'mobiuslabsgmbh/Llama-3.1-8b-instruct_4bitgs64_hqq_calib' #calibrated version
67
 
68
- compute_dtype = torch.float16 #bfloat16 for torchao, float16 for bitblas
69
  cache_dir = '.'
70
  model = AutoHQQHFModel.from_quantized(model_id, cache_dir=cache_dir, compute_dtype=compute_dtype)
71
  tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir)
@@ -77,8 +77,8 @@ patch_linearlayers(model, patch_add_quant_config, quant_config)
77
  ###################################################
78
  HQQLinear.set_backend(HQQBackend.PYTORCH)
79
  #prepare_for_inference(model) #default backend
80
- #prepare_for_inference(model, backend="torchao_int4")
81
- prepare_for_inference(model, backend="bitblas") #takes a while to init...
82
 
83
  #Generate
84
  ###################################################
 
47
  First, install the dependecies:
48
  ```
49
  pip install git+https://github.com/mobiusml/hqq.git #master branch fix
50
+ pip install bitblas #if you use the bitblas backend
51
  ```
52
+ Also, make sure you use at least torch `2.4.0` or the nightly build with at least CUDA 12.1.
53
 
54
  Then you can use the sample code below:
55
  ``` Python
 
65
  #model_id = 'mobiuslabsgmbh/Llama-3.1-8b-instruct_4bitgs64_hqq' #no calib version
66
  model_id = 'mobiuslabsgmbh/Llama-3.1-8b-instruct_4bitgs64_hqq_calib' #calibrated version
67
 
68
+ compute_dtype = torch.bfloat16 #bfloat16 for torchao_int4, float16 for bitblas
69
  cache_dir = '.'
70
  model = AutoHQQHFModel.from_quantized(model_id, cache_dir=cache_dir, compute_dtype=compute_dtype)
71
  tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir)
 
77
  ###################################################
78
  HQQLinear.set_backend(HQQBackend.PYTORCH)
79
  #prepare_for_inference(model) #default backend
80
+ prepare_for_inference(model, backend="torchao_int4")
81
+ #prepare_for_inference(model, backend="bitblas") #takes a while to init...
82
 
83
  #Generate
84
  ###################################################