Spaces:

QaillcNextGen
/

phi_faster

Build error

mattritchey commited on Jan 30, 2024

Commit

c5b6761

verified ·

1 Parent(s): e82cb6d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,6 +24,9 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from transformers import TextIteratorStreamer
 from threading import Thread
 # Prompt template
 template = """Instruction:
 You are an AI assistant for answering questions about the provided context.
@@ -44,7 +47,12 @@ QA_PROMPT = PromptTemplate(
 model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto", trust_remote_code=True)
 # sentence transformers to be used in vector store
 embeddings = HuggingFaceEmbeddings(

 from transformers import TextIteratorStreamer
 from threading import Thread
+#MR Added
+from transformers import GPTQConfig
 # Prompt template
 template = """Instruction:
 You are an AI assistant for answering questions about the provided context.
 model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True) #MR Added
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto",
+                                             trust_remote_code=True,
+                                             quantization_config=quantization_config_loading #MR Added
+                                             )
 # sentence transformers to be used in vector store
 embeddings = HuggingFaceEmbeddings(