Pereki commited on
Commit
8e84e9d
1 Parent(s): 6df4b5b

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +6 -1
handler.py CHANGED
@@ -1,13 +1,18 @@
1
  from typing import Dict, List, Any
2
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
 
4
 
5
  class EndpointHandler():
6
  def __init__(self, path=""):
 
 
 
 
7
  # load the optimized model
8
  tokenizer = AutoTokenizer.from_pretrained(path)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  path,
 
11
  device_map="auto",
12
  torch_dtype='auto'
13
  ).eval()
 
1
  from typing import Dict, List, Any
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
 
4
 
5
  class EndpointHandler():
6
  def __init__(self, path=""):
7
+ quantization_config = BitsAndBytesConfig(
8
+ load_in_4bit=True,
9
+ bnb_4bit_compute_dtype=torch.float16
10
+ )
11
  # load the optimized model
12
  tokenizer = AutoTokenizer.from_pretrained(path)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  path,
15
+ quantization_config=quantization_config
16
  device_map="auto",
17
  torch_dtype='auto'
18
  ).eval()