Aakash Vardhan commited on
Commit
629311e
·
1 Parent(s): 2910f9f
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
4
 
5
  from config import load_config
6
 
@@ -19,13 +19,9 @@ if "torch_dtype" in model_config:
19
  elif model_config["torch_dtype"] == "bfloat16":
20
  model_config["torch_dtype"] = torch.bfloat16
21
 
22
- # Create quantization config
23
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
24
-
25
- # Load the model with quantization config
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
28
- quantization_config=quantization_config,
29
  low_cpu_mem_usage=True,
30
  **model_config
31
  )
@@ -34,14 +30,12 @@ checkpoint_model = "checkpoint_dir/checkpoint-650"
34
 
35
  model.load_adapter(checkpoint_model)
36
 
37
-
38
  tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
39
  tokenizer.pad_token = tokenizer.eos_token
40
  tokenizer.padding_side = "right"
41
 
42
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
43
 
44
-
45
  def respond(message, history):
46
  system_message = """You are General Knowledge Assistant.
47
  Answer the questions based on the provided information.
@@ -73,7 +67,6 @@ def respond(message, history):
73
  new_text = outputs[0]["generated_text"][len(prompt) :]
74
  return new_text.strip()
75
 
76
-
77
  examples = [
78
  ["Suggest some breeds that get along with each other"],
79
  ["Explain LLM in AI"],
@@ -90,6 +83,5 @@ demo = gr.ChatInterface(
90
  description="Ask me anything about general knowledge. I'll try to answer succinctly using first principles.",
91
  )
92
 
93
-
94
  if __name__ == "__main__":
95
  demo.launch(debug=True)
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
 
5
  from config import load_config
6
 
 
19
  elif model_config["torch_dtype"] == "bfloat16":
20
  model_config["torch_dtype"] = torch.bfloat16
21
 
22
+ # Load the model without quantization config
 
 
 
23
  model = AutoModelForCausalLM.from_pretrained(
24
  model_name,
 
25
  low_cpu_mem_usage=True,
26
  **model_config
27
  )
 
30
 
31
  model.load_adapter(checkpoint_model)
32
 
 
33
  tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
34
  tokenizer.pad_token = tokenizer.eos_token
35
  tokenizer.padding_side = "right"
36
 
37
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
38
 
 
39
  def respond(message, history):
40
  system_message = """You are General Knowledge Assistant.
41
  Answer the questions based on the provided information.
 
67
  new_text = outputs[0]["generated_text"][len(prompt) :]
68
  return new_text.strip()
69
 
 
70
  examples = [
71
  ["Suggest some breeds that get along with each other"],
72
  ["Explain LLM in AI"],
 
83
  description="Ask me anything about general knowledge. I'll try to answer succinctly using first principles.",
84
  )
85
 
 
86
  if __name__ == "__main__":
87
  demo.launch(debug=True)