Spaces:

aakashv100
/

phi3-oass1-chatbot

Runtime error

App Files Files Community

Aakash Vardhan commited on Sep 19, 2024

Commit

629311e

1 Parent(s): 2910f9f

.

Browse files

Files changed (1) hide show

app.py +2 -10

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 from config import load_config
@@ -19,13 +19,9 @@ if "torch_dtype" in model_config:
     elif model_config["torch_dtype"] == "bfloat16":
         model_config["torch_dtype"] = torch.bfloat16
-# Create quantization config
-quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-# Load the model with quantization config
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    quantization_config=quantization_config,
     low_cpu_mem_usage=True,
     **model_config
 )
@@ -34,14 +30,12 @@ checkpoint_model = "checkpoint_dir/checkpoint-650"
 model.load_adapter(checkpoint_model)
 tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
 tokenizer.pad_token = tokenizer.eos_token
 tokenizer.padding_side = "right"
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message, history):
     system_message = """You are General Knowledge Assistant.
     Answer the questions based on the provided information.
@@ -73,7 +67,6 @@ def respond(message, history):
     new_text = outputs[0]["generated_text"][len(prompt) :]
     return new_text.strip()
 examples = [
     ["Suggest some breeds that get along with each other"],
     ["Explain LLM in AI"],
@@ -90,6 +83,5 @@ demo = gr.ChatInterface(
     description="Ask me anything about general knowledge. I'll try to answer succinctly using first principles.",
 )
 if __name__ == "__main__":
     demo.launch(debug=True)

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from config import load_config
     elif model_config["torch_dtype"] == "bfloat16":
         model_config["torch_dtype"] = torch.bfloat16
+# Load the model without quantization config
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     low_cpu_mem_usage=True,
     **model_config
 )
 model.load_adapter(checkpoint_model)
 tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
 tokenizer.pad_token = tokenizer.eos_token
 tokenizer.padding_side = "right"
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message, history):
     system_message = """You are General Knowledge Assistant.
     Answer the questions based on the provided information.
     new_text = outputs[0]["generated_text"][len(prompt) :]
     return new_text.strip()
 examples = [
     ["Suggest some breeds that get along with each other"],
     ["Explain LLM in AI"],
     description="Ask me anything about general knowledge. I'll try to answer succinctly using first principles.",
 )
 if __name__ == "__main__":
     demo.launch(debug=True)