Spaces:

aakashv100
/

phi3-oass1-chatbot

Runtime error

App Files Files Community

Aakash Vardhan commited on Sep 21, 2024

Commit

12289b8

1 Parent(s): 21aa301

.

Browse files

Files changed (1) hide show

app.py +32 -24

app.py CHANGED Viewed

@@ -5,38 +5,46 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from config import load_config
 config = load_config("config.yaml")
 model_config = config["model_config"]
 model_name = model_config.pop("model_name")
-# Convert torch_dtype from string to torch.dtype
-if "torch_dtype" in model_config:
-    if model_config["torch_dtype"] == "float32":
-        model_config["torch_dtype"] = torch.float32
-    elif model_config["torch_dtype"] == "float16":
-        model_config["torch_dtype"] = torch.float16
-    elif model_config["torch_dtype"] == "bfloat16":
-        model_config["torch_dtype"] = torch.bfloat16
-# Load the model without quantization config
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    low_cpu_mem_usage=True,
-    **model_config
-)
 checkpoint_model = "checkpoint_dir/checkpoint-650"
-model.load_adapter(checkpoint_model)
-tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
-tokenizer.pad_token = tokenizer.eos_token
-tokenizer.padding_side = "right"
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message, history):
     system_message = """You are General Knowledge Assistant.
     Answer the questions based on the provided information.
     Be succinct and use first-principles thinking to answer the questions."""

 from config import load_config
 config = load_config("config.yaml")
 model_config = config["model_config"]
 model_name = model_config.pop("model_name")
 checkpoint_model = "checkpoint_dir/checkpoint-650"
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+pipe = None
+def load_model_and_tokenizer():
+    global model, tokenizer, pipe
+    if model is None:
+        print("Loading model and tokenizer...")
+        # Convert torch_dtype from string to torch.dtype
+        if "torch_dtype" in model_config:
+            if model_config["torch_dtype"] == "float32":
+                model_config["torch_dtype"] = torch.float32
+            elif model_config["torch_dtype"] == "float16":
+                model_config["torch_dtype"] = torch.float16
+            elif model_config["torch_dtype"] == "bfloat16":
+                model_config["torch_dtype"] = torch.bfloat16
+        # Load the model without quantization config
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            low_cpu_mem_usage=True,
+            **model_config
+        )
+        model.load_adapter(checkpoint_model)
+        tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
+        tokenizer.pad_token = tokenizer.eos_token
+        tokenizer.padding_side = "right"
+        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+        print("Model and tokenizer loaded successfully.")
 def respond(message, history):
+    load_model_and_tokenizer()
     system_message = """You are General Knowledge Assistant.
     Answer the questions based on the provided information.
     Be succinct and use first-principles thinking to answer the questions."""