Spaces:

Caslow
/

Fortran_to_Rust_Translator

Sleeping

Caslow commited on Nov 21, 2024

Commit

f6dfa4e

1 Parent(s): 8a3e945

cpu

Files changed (1) hide show

inference.py CHANGED Viewed

@@ -22,20 +22,19 @@ def load_model(
         Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
     """
-    try:
-        from transformers import BitsAndBytesConfig
-        bnb_config = BitsAndBytesConfig(load_in_4bit=False)
-    except ImportError:
-        bnb_config = None
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         pretrained_model_name_or_path=model_name,
-        device_map="cpu",
-        torch_dtype=torch.float32, # Use float32 for CPU
-        low_cpu_mem_usage=True, # Helps with memory efficiency
-        quantization_config=bnb_config
     )
     model.eval() # Set model to evaluation mode

         Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
     """
+    kwargs = {
+        "device_map": "cpu",
+        "torch_dtype": torch.float32,
+        "low_cpu_mem_usage": True,
+        "_from_auto": False,  # Prevent automatic quantization detection
+        "quantization_config": None  # Explicitly set no quantization
+    }
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         pretrained_model_name_or_path=model_name,
+        **kwargs
     )
     model.eval() # Set model to evaluation mode