Caslow commited on
Commit
f6dfa4e
·
1 Parent(s): 8a3e945
Files changed (1) hide show
  1. inference.py +8 -9
inference.py CHANGED
@@ -22,20 +22,19 @@ def load_model(
22
  Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
23
  """
24
 
25
- try:
26
- from transformers import BitsAndBytesConfig
27
- bnb_config = BitsAndBytesConfig(load_in_4bit=False)
28
- except ImportError:
29
- bnb_config = None
 
 
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(model_name)
32
 
33
  model = AutoModelForCausalLM.from_pretrained(
34
  pretrained_model_name_or_path=model_name,
35
- device_map="cpu",
36
- torch_dtype=torch.float32, # Use float32 for CPU
37
- low_cpu_mem_usage=True, # Helps with memory efficiency
38
- quantization_config=bnb_config
39
  )
40
 
41
  model.eval() # Set model to evaluation mode
 
22
  Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
23
  """
24
 
25
+ kwargs = {
26
+ "device_map": "cpu",
27
+ "torch_dtype": torch.float32,
28
+ "low_cpu_mem_usage": True,
29
+ "_from_auto": False, # Prevent automatic quantization detection
30
+ "quantization_config": None # Explicitly set no quantization
31
+ }
32
 
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
 
35
  model = AutoModelForCausalLM.from_pretrained(
36
  pretrained_model_name_or_path=model_name,
37
+ **kwargs
 
 
 
38
  )
39
 
40
  model.eval() # Set model to evaluation mode