Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ from cryptography.fernet import Fernet
|
|
6 |
# --- LangChain / RAG Imports ---
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
from langchain.chains import ConversationalRetrievalChain
|
9 |
-
from langchain.memory import ConversationBufferMemory
|
10 |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
|
11 |
|
12 |
def load_decrypted_preprompt(file_path="pre_prompt.enc"):
|
@@ -71,9 +71,10 @@ def initialize_qa_chain(temperature, max_tokens, top_k, vector_db):
|
|
71 |
task="text-generation"
|
72 |
)
|
73 |
|
74 |
-
memory =
|
|
|
|
|
75 |
memory_key="chat_history",
|
76 |
-
output_key="answer",
|
77 |
return_messages=True
|
78 |
)
|
79 |
|
@@ -118,19 +119,21 @@ def update_chat(message, history):
|
|
118 |
def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
|
119 |
"""
|
120 |
Generate the assistant's response using the QA chain (if available) or fallback to plain chat.
|
121 |
-
|
122 |
"""
|
123 |
qa_chain = qa_chain_state_dict.get("qa_chain")
|
124 |
|
125 |
if qa_chain is not None:
|
126 |
# Format history to the plain-text format expected by the QA chain
|
127 |
formatted_history = format_chat_history(history)
|
128 |
-
|
|
|
|
|
129 |
answer = response.get("answer", "")
|
130 |
history.append({"role": "assistant", "content": answer})
|
131 |
return history, {"qa_chain": qa_chain}
|
132 |
|
133 |
-
# Fallback: Plain Chat Mode using the InferenceClient
|
134 |
messages = [{"role": "system", "content": PRE_PROMPT}] + history
|
135 |
response = ""
|
136 |
result = client.chat_completion(
|
@@ -147,6 +150,7 @@ def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_
|
|
147 |
history.append({"role": "assistant", "content": response})
|
148 |
return history, {"qa_chain": qa_chain}
|
149 |
|
|
|
150 |
# Global InferenceClient for plain chat (fallback)
|
151 |
client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
|
152 |
|
|
|
6 |
# --- LangChain / RAG Imports ---
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
from langchain.chains import ConversationalRetrievalChain
|
9 |
+
from langchain.memory import ConversationSummaryMemory #ConversationBufferMemory
|
10 |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
|
11 |
|
12 |
def load_decrypted_preprompt(file_path="pre_prompt.enc"):
|
|
|
71 |
task="text-generation"
|
72 |
)
|
73 |
|
74 |
+
memory = ConversationSummaryMemory(
|
75 |
+
llm=llm,
|
76 |
+
max_token_limit=500, # Adjust this to control the summary size
|
77 |
memory_key="chat_history",
|
|
|
78 |
return_messages=True
|
79 |
)
|
80 |
|
|
|
119 |
def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
|
120 |
"""
|
121 |
Generate the assistant's response using the QA chain (if available) or fallback to plain chat.
|
122 |
+
The pre-prompt is always included by concatenating it to the user's new question.
|
123 |
"""
|
124 |
qa_chain = qa_chain_state_dict.get("qa_chain")
|
125 |
|
126 |
if qa_chain is not None:
|
127 |
# Format history to the plain-text format expected by the QA chain
|
128 |
formatted_history = format_chat_history(history)
|
129 |
+
# Prepend the pre-prompt to the current question
|
130 |
+
combined_question = PRE_PROMPT + "\n" + message
|
131 |
+
response = qa_chain.invoke({"question": combined_question, "chat_history": formatted_history})
|
132 |
answer = response.get("answer", "")
|
133 |
history.append({"role": "assistant", "content": answer})
|
134 |
return history, {"qa_chain": qa_chain}
|
135 |
|
136 |
+
# Fallback: Plain Chat Mode using the InferenceClient (pre-prompt already included here)
|
137 |
messages = [{"role": "system", "content": PRE_PROMPT}] + history
|
138 |
response = ""
|
139 |
result = client.chat_completion(
|
|
|
150 |
history.append({"role": "assistant", "content": response})
|
151 |
return history, {"qa_chain": qa_chain}
|
152 |
|
153 |
+
|
154 |
# Global InferenceClient for plain chat (fallback)
|
155 |
client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
|
156 |
|