Spaces:
Running
Running
Clean and try different embedding
Browse files
README.md
CHANGED
@@ -9,7 +9,6 @@ app_file: app.py
|
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
short_description: Chatbot assistant for the CAMELS simulations documentation
|
12 |
-
python_version: 3.8
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
short_description: Chatbot assistant for the CAMELS simulations documentation
|
|
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -14,7 +14,7 @@ document_path.write_bytes(r.content)
|
|
14 |
worker.process_document(document_path)
|
15 |
|
16 |
def handle_prompt(message, history):
|
17 |
-
bot_response = worker.process_prompt(message)
|
18 |
return bot_response
|
19 |
|
20 |
greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
|
|
|
14 |
worker.process_document(document_path)
|
15 |
|
16 |
def handle_prompt(message, history):
|
17 |
+
bot_response = worker.process_prompt(message, history)
|
18 |
return bot_response
|
19 |
|
20 |
greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
|
worker.py
CHANGED
@@ -15,7 +15,7 @@ def install(package):
|
|
15 |
pip._internal.main(['install', package])
|
16 |
|
17 |
# Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
|
18 |
-
install("sentence-transformers==2.2.2")
|
19 |
|
20 |
# Check for GPU availability and set the appropriate device for computation.
|
21 |
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
@@ -37,9 +37,9 @@ def init_llm():
|
|
37 |
|
38 |
# repo name for the model
|
39 |
# model_id = "tiiuae/falcon-7b-instruct"
|
40 |
-
|
41 |
# model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
42 |
-
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
43 |
|
44 |
# load the model into the HuggingFaceHub
|
45 |
llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
|
@@ -47,8 +47,8 @@ def init_llm():
|
|
47 |
# llm_hub.invoke('foo bar')
|
48 |
|
49 |
#Initialize embeddings using a pre-trained model to represent the text data.
|
50 |
-
embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
|
51 |
-
|
52 |
embeddings = HuggingFaceInstructEmbeddings(
|
53 |
model_name=embedddings_model,
|
54 |
model_kwargs={"device": DEVICE}
|
@@ -75,10 +75,11 @@ def process_document(document_path):
|
|
75 |
# By default, the vectorstore retriever uses similarity search.
|
76 |
# If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
|
77 |
# You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
|
|
|
78 |
conversation_retrieval_chain = RetrievalQA.from_chain_type(
|
79 |
llm=llm_hub,
|
80 |
chain_type="stuff",
|
81 |
-
retriever=
|
82 |
return_source_documents=False,
|
83 |
input_key = "question"
|
84 |
# chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
|
@@ -86,9 +87,9 @@ def process_document(document_path):
|
|
86 |
|
87 |
|
88 |
# Function to process a user prompt
|
89 |
-
def process_prompt(prompt):
|
90 |
global conversation_retrieval_chain
|
91 |
-
global chat_history
|
92 |
|
93 |
# Query the model
|
94 |
output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})
|
|
|
15 |
pip._internal.main(['install', package])
|
16 |
|
17 |
# Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
|
18 |
+
# install("sentence-transformers==2.2.2")
|
19 |
|
20 |
# Check for GPU availability and set the appropriate device for computation.
|
21 |
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
37 |
|
38 |
# repo name for the model
|
39 |
# model_id = "tiiuae/falcon-7b-instruct"
|
40 |
+
model_id = "microsoft/Phi-3.5-mini-instruct"
|
41 |
# model_id = "meta-llama/Llama-3.2-1B-Instruct"
|
42 |
+
# model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
43 |
|
44 |
# load the model into the HuggingFaceHub
|
45 |
llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
|
|
|
47 |
# llm_hub.invoke('foo bar')
|
48 |
|
49 |
#Initialize embeddings using a pre-trained model to represent the text data.
|
50 |
+
# embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
|
51 |
+
embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
|
52 |
embeddings = HuggingFaceInstructEmbeddings(
|
53 |
model_name=embedddings_model,
|
54 |
model_kwargs={"device": DEVICE}
|
|
|
75 |
# By default, the vectorstore retriever uses similarity search.
|
76 |
# If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
|
77 |
# You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
|
78 |
+
retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
|
79 |
conversation_retrieval_chain = RetrievalQA.from_chain_type(
|
80 |
llm=llm_hub,
|
81 |
chain_type="stuff",
|
82 |
+
retriever=retriever,
|
83 |
return_source_documents=False,
|
84 |
input_key = "question"
|
85 |
# chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
|
|
|
87 |
|
88 |
|
89 |
# Function to process a user prompt
|
90 |
+
def process_prompt(prompt, chat_history):
|
91 |
global conversation_retrieval_chain
|
92 |
+
# global chat_history
|
93 |
|
94 |
# Query the model
|
95 |
output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})
|