Spaces:
Build error
Build error
mattritchey
commited on
Commit
•
c5b6761
1
Parent(s):
e82cb6d
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,9 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
|
24 |
from transformers import TextIteratorStreamer
|
25 |
from threading import Thread
|
26 |
|
|
|
|
|
|
|
27 |
# Prompt template
|
28 |
template = """Instruction:
|
29 |
You are an AI assistant for answering questions about the provided context.
|
@@ -44,7 +47,12 @@ QA_PROMPT = PromptTemplate(
|
|
44 |
model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
|
45 |
|
46 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# sentence transformers to be used in vector store
|
50 |
embeddings = HuggingFaceEmbeddings(
|
|
|
24 |
from transformers import TextIteratorStreamer
|
25 |
from threading import Thread
|
26 |
|
27 |
+
#MR Added
|
28 |
+
from transformers import GPTQConfig
|
29 |
+
|
30 |
# Prompt template
|
31 |
template = """Instruction:
|
32 |
You are an AI assistant for answering questions about the provided context.
|
|
|
47 |
model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
|
48 |
|
49 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
50 |
+
|
51 |
+
quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True) #MR Added
|
52 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto",
|
53 |
+
trust_remote_code=True,
|
54 |
+
quantization_config=quantization_config_loading #MR Added
|
55 |
+
)
|
56 |
|
57 |
# sentence transformers to be used in vector store
|
58 |
embeddings = HuggingFaceEmbeddings(
|