Spaces:
Running
Running
clementsan
commited on
Commit
•
067316d
1
Parent(s):
146ca67
Add trust_remote_code condition for phi2 model
Browse files
app.py
CHANGED
@@ -71,7 +71,7 @@ def load_db():
|
|
71 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
72 |
progress(0.1, desc="Initializing HF tokenizer...")
|
73 |
# HuggingFacePipeline uses local model
|
74 |
-
#
|
75 |
# tokenizer=AutoTokenizer.from_pretrained(llm_model)
|
76 |
# progress(0.5, desc="Initializing HF pipeline...")
|
77 |
# pipeline=transformers.pipeline(
|
@@ -92,11 +92,20 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
92 |
|
93 |
# HuggingFaceHub uses HF inference endpoints
|
94 |
progress(0.5, desc="Initializing HF Hub...")
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
progress(0.75, desc="Defining buffer memory...")
|
102 |
memory = ConversationBufferMemory(
|
|
|
71 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
72 |
progress(0.1, desc="Initializing HF tokenizer...")
|
73 |
# HuggingFacePipeline uses local model
|
74 |
+
# Note: it will download model locally...
|
75 |
# tokenizer=AutoTokenizer.from_pretrained(llm_model)
|
76 |
# progress(0.5, desc="Initializing HF pipeline...")
|
77 |
# pipeline=transformers.pipeline(
|
|
|
92 |
|
93 |
# HuggingFaceHub uses HF inference endpoints
|
94 |
progress(0.5, desc="Initializing HF Hub...")
|
95 |
+
# Use of trust_remote_code as model_kwargs
|
96 |
+
# Warning: langchain issue
|
97 |
+
# URL: https://github.com/langchain-ai/langchain/issues/6080
|
98 |
+
if llm_model == "microsoft/phi-2":
|
99 |
+
llm = HuggingFaceHub(
|
100 |
+
repo_id=llm_model,
|
101 |
+
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
102 |
+
)
|
103 |
+
else:
|
104 |
+
llm = HuggingFaceHub(
|
105 |
+
repo_id=llm_model,
|
106 |
+
# model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
107 |
+
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
108 |
+
)
|
109 |
|
110 |
progress(0.75, desc="Defining buffer memory...")
|
111 |
memory = ConversationBufferMemory(
|