mattritchey commited on
Commit
c5b6761
1 Parent(s): e82cb6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -24,6 +24,9 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
24
  from transformers import TextIteratorStreamer
25
  from threading import Thread
26
 
 
 
 
27
  # Prompt template
28
  template = """Instruction:
29
  You are an AI assistant for answering questions about the provided context.
@@ -44,7 +47,12 @@ QA_PROMPT = PromptTemplate(
44
  model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
45
 
46
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
47
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto", trust_remote_code=True)
 
 
 
 
 
48
 
49
  # sentence transformers to be used in vector store
50
  embeddings = HuggingFaceEmbeddings(
 
24
  from transformers import TextIteratorStreamer
25
  from threading import Thread
26
 
27
+ #MR Added
28
+ from transformers import GPTQConfig
29
+
30
  # Prompt template
31
  template = """Instruction:
32
  You are an AI assistant for answering questions about the provided context.
 
47
  model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
48
 
49
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
50
+
51
+ quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True) #MR Added
52
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto",
53
+ trust_remote_code=True,
54
+ quantization_config=quantization_config_loading #MR Added
55
+ )
56
 
57
  # sentence transformers to be used in vector store
58
  embeddings = HuggingFaceEmbeddings(