ChristopherMarais commited on
Commit
ab16bf8
·
verified ·
1 Parent(s): 25de2c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -6,7 +6,7 @@ from cryptography.fernet import Fernet
6
  # --- LangChain / RAG Imports ---
7
  from langchain_community.vectorstores import FAISS
8
  from langchain.chains import ConversationalRetrievalChain
9
- from langchain.memory import ConversationBufferMemory
10
  from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
11
 
12
  def load_decrypted_preprompt(file_path="pre_prompt.enc"):
@@ -71,9 +71,10 @@ def initialize_qa_chain(temperature, max_tokens, top_k, vector_db):
71
  task="text-generation"
72
  )
73
 
74
- memory = ConversationBufferMemory(
 
 
75
  memory_key="chat_history",
76
- output_key="answer",
77
  return_messages=True
78
  )
79
 
@@ -118,19 +119,21 @@ def update_chat(message, history):
118
  def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
119
  """
120
  Generate the assistant's response using the QA chain (if available) or fallback to plain chat.
121
- Assumes that the user's message is already appended to the history.
122
  """
123
  qa_chain = qa_chain_state_dict.get("qa_chain")
124
 
125
  if qa_chain is not None:
126
  # Format history to the plain-text format expected by the QA chain
127
  formatted_history = format_chat_history(history)
128
- response = qa_chain.invoke({"question": message, "chat_history": formatted_history})
 
 
129
  answer = response.get("answer", "")
130
  history.append({"role": "assistant", "content": answer})
131
  return history, {"qa_chain": qa_chain}
132
 
133
- # Fallback: Plain Chat Mode using the InferenceClient
134
  messages = [{"role": "system", "content": PRE_PROMPT}] + history
135
  response = ""
136
  result = client.chat_completion(
@@ -147,6 +150,7 @@ def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_
147
  history.append({"role": "assistant", "content": response})
148
  return history, {"qa_chain": qa_chain}
149
 
 
150
  # Global InferenceClient for plain chat (fallback)
151
  client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
152
 
 
6
  # --- LangChain / RAG Imports ---
7
  from langchain_community.vectorstores import FAISS
8
  from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationSummaryMemory #ConversationBufferMemory
10
  from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
11
 
12
  def load_decrypted_preprompt(file_path="pre_prompt.enc"):
 
71
  task="text-generation"
72
  )
73
 
74
+ memory = ConversationSummaryMemory(
75
+ llm=llm,
76
+ max_token_limit=500, # Adjust this to control the summary size
77
  memory_key="chat_history",
 
78
  return_messages=True
79
  )
80
 
 
119
  def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
120
  """
121
  Generate the assistant's response using the QA chain (if available) or fallback to plain chat.
122
+ The pre-prompt is always included by concatenating it to the user's new question.
123
  """
124
  qa_chain = qa_chain_state_dict.get("qa_chain")
125
 
126
  if qa_chain is not None:
127
  # Format history to the plain-text format expected by the QA chain
128
  formatted_history = format_chat_history(history)
129
+ # Prepend the pre-prompt to the current question
130
+ combined_question = PRE_PROMPT + "\n" + message
131
+ response = qa_chain.invoke({"question": combined_question, "chat_history": formatted_history})
132
  answer = response.get("answer", "")
133
  history.append({"role": "assistant", "content": answer})
134
  return history, {"qa_chain": qa_chain}
135
 
136
+ # Fallback: Plain Chat Mode using the InferenceClient (pre-prompt already included here)
137
  messages = [{"role": "system", "content": PRE_PROMPT}] + history
138
  response = ""
139
  result = client.chat_completion(
 
150
  history.append({"role": "assistant", "content": response})
151
  return history, {"qa_chain": qa_chain}
152
 
153
+
154
  # Global InferenceClient for plain chat (fallback)
155
  client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
156