Spaces:

mounseflit
/

Marrakech-Heritage-LLM

Runtime error

mounseflit commited on Sep 8, 2024

Commit

52673bb

verified ·

1 Parent(s): ab52c4d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 import gradio as gr
@@ -10,10 +10,17 @@ fine_tuned_model = "mounseflit/falcon-7b-marrakech"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Load base model with 8-bit precision and offload to CPU
 base_model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    load_in_8bit=True,        # Quantization to 8-bit
     device_map="auto",        # Auto device map for offloading
     offload_folder="offload", # Offload large parts of the model to disk
     offload_state_dict=True   # Enable state dict offloading to reduce memory usage

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel
 import gradio as gr
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Set up 8-bit quantization config
+quantization_config = BitsAndBytesConfig(
+    load_in_8bit=True,  # Use 8-bit quantization
+    llm_int8_threshold=6.0,  # Adjust for better precision
+    llm_int8_has_fp16_weight=False  # More compatible for CPU usage
+)
+# Load base model with 8-bit quantization and CPU offloading
 base_model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    quantization_config=quantization_config,  # Pass quantization config
     device_map="auto",        # Auto device map for offloading
     offload_folder="offload", # Offload large parts of the model to disk
     offload_state_dict=True   # Enable state dict offloading to reduce memory usage