ADKU commited on
Commit
cf9155f
·
verified ·
1 Parent(s): 7ef58b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -29
app.py CHANGED
@@ -67,7 +67,7 @@ except Exception as e:
67
  raise
68
 
69
  # Generate SciBERT embeddings (optimized with larger batch size)
70
- def generate_embeddings_sci_bert(texts, batch_size=64): # Increased batch size for efficiency
71
  try:
72
  all_embeddings = []
73
  for i in range(0, len(texts), batch_size):
@@ -122,7 +122,7 @@ def process_uploaded_pdf(file):
122
  for page in pdf_reader.pages:
123
  text += page.extract_text() or ""
124
  cleaned_text = clean_text(text)
125
- chunks = [cleaned_text[i:i+1000] for i in range(0, len(cleaned_text), 1000)] # Chunk for efficiency
126
  embeddings = generate_embeddings_sci_bert(chunks)
127
  faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
128
  faiss_index.add(embeddings.astype(np.float32))
@@ -148,17 +148,17 @@ def get_relevant_chunks(query, uploaded_doc):
148
  logger.error(f"RAG retrieval failed: {e}")
149
  return [], "Retrieval failed."
150
 
151
- # Unified QA function
152
  def answer_question(mode, selected_index, question, history, uploaded_doc=None):
153
  if not question.strip():
154
- return [(question, "Please ask a question!")], history
155
  if question.lower() in ["exit", "done"]:
156
- return [("Conversation ended.", "Start a new conversation!")], []
157
 
158
  try:
159
  if mode == "research":
160
  if selected_index is None:
161
- return [(question, "Please select a paper first!")], history
162
  paper_data = df.iloc[selected_index]
163
  title = paper_data["title"]
164
  abstract = paper_data["abstract"]
@@ -182,8 +182,8 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
182
  )
183
  if history:
184
  prompt += "Previous conversation (use for context):\n"
185
- for user_q, bot_a in history[-2:]:
186
- prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
187
  prompt += f"Now, answer this question: {question}"
188
  model = genai.GenerativeModel("gemini-1.5-flash")
189
  response = model.generate_content(prompt)
@@ -199,7 +199,7 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
199
 
200
  elif mode == "rag":
201
  if uploaded_doc is None:
202
- return [(question, "Please upload a document first!")], history
203
  relevant_chunks, _ = get_relevant_chunks(question, uploaded_doc)
204
  context = "\n".join(relevant_chunks)
205
  prompt = (
@@ -209,8 +209,8 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
209
  )
210
  if history:
211
  prompt += "Previous conversation (use for context):\n"
212
- for user_q, bot_a in history[-2:]:
213
- prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
214
  prompt += f"Now, answer this question: {question}"
215
  model = genai.GenerativeModel("gemini-1.5-flash")
216
  response = model.generate_content(prompt)
@@ -222,18 +222,20 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
222
  )
223
  if history:
224
  prompt += "Previous conversation (use for context):\n"
225
- for user_q, bot_a in history[-2:]:
226
- prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
227
  prompt += f"Question: {question}"
228
  model = genai.GenerativeModel("gemini-1.5-flash")
229
  response = model.generate_content(prompt)
230
  answer = response.text.strip()
231
 
232
- history.append((question, answer))
 
233
  return history, history
234
  except Exception as e:
235
  logger.error(f"QA failed: {e}")
236
- history.append((question, "Sorry, I couldn’t process that. Try again!"))
 
237
  return history, history
238
 
239
  # Gradio UI
@@ -256,7 +258,7 @@ with gr.Blocks(
256
  with gr.Column(scale=1, min_width=350, elem_classes="sidebar"):
257
  mode_tabs = gr.Tabs()
258
  with mode_tabs:
259
- # Research Mode (unchanged backend)
260
  with gr.TabItem("Research Mode"):
261
  gr.Markdown("### Search Papers")
262
  query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning in healthcare")
@@ -296,33 +298,28 @@ with gr.Blocks(
296
  with gr.Column(scale=3, elem_classes="tab-content"):
297
  gr.Markdown("### Chat Area")
298
  selected_display = gr.Markdown(label="Selected Context", value="Select a mode to begin!")
299
- chatbot = gr.Chatbot(label="Conversation", elem_classes="chatbot")
300
  question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
301
  chat_btn = gr.Button("Send")
302
 
303
  history_state = gr.State([])
304
  selected_index_state = gr.State(None)
305
 
306
- def update_display(mode, choice, indices, uploaded_doc):
307
- if mode == "research" and choice:
308
  index = int(choice.split(".")[0]) - 1
309
  selected_idx = indices[index]
310
  paper = df.iloc[selected_idx]
311
  return f"**{paper['title']}**<br>DOI: [{paper['doi']}](https://doi.org/{paper['doi']})", selected_idx
312
- elif mode == "rag" and uploaded_doc:
313
  return "Uploaded Document Ready", None
314
- elif mode == "general":
315
  return "General Chat Mode", None
316
  return "Select a mode to begin!", None
317
 
318
  mode_tabs.select(
319
- fn=lambda tab: ("research" if tab == "Research Mode" else "rag" if tab == "RAG Mode" else "general"),
320
- inputs=None,
321
- outputs=None,
322
- _js="tab => tab"
323
- ).then(
324
- fn=update_display,
325
- inputs=[mode_tabs, paper_dropdown, paper_indices_state, uploaded_doc_state],
326
  outputs=[selected_display, selected_index_state]
327
  ).then(
328
  fn=lambda: [],
@@ -337,7 +334,10 @@ with gr.Blocks(
337
  )
338
 
339
  chat_btn.click(
340
- fn=answer_question,
 
 
 
341
  inputs=[mode_tabs, selected_index_state, question_input, history_state, uploaded_doc_state],
342
  outputs=[chatbot, history_state]
343
  ).then(
 
67
  raise
68
 
69
  # Generate SciBERT embeddings (optimized with larger batch size)
70
+ def generate_embeddings_sci_bert(texts, batch_size=64):
71
  try:
72
  all_embeddings = []
73
  for i in range(0, len(texts), batch_size):
 
122
  for page in pdf_reader.pages:
123
  text += page.extract_text() or ""
124
  cleaned_text = clean_text(text)
125
+ chunks = [cleaned_text[i:i+1000] for i in range(0, len(cleaned_text), 1000)]
126
  embeddings = generate_embeddings_sci_bert(chunks)
127
  faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
128
  faiss_index.add(embeddings.astype(np.float32))
 
148
  logger.error(f"RAG retrieval failed: {e}")
149
  return [], "Retrieval failed."
150
 
151
+ # Unified QA function (updated for messages format)
152
  def answer_question(mode, selected_index, question, history, uploaded_doc=None):
153
  if not question.strip():
154
+ return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please ask a question!"}], history
155
  if question.lower() in ["exit", "done"]:
156
+ return history + [{"role": "user", "content": "Conversation ended."}, {"role": "assistant", "content": "Start a new conversation!"}], []
157
 
158
  try:
159
  if mode == "research":
160
  if selected_index is None:
161
+ return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please select a paper first!"}], history
162
  paper_data = df.iloc[selected_index]
163
  title = paper_data["title"]
164
  abstract = paper_data["abstract"]
 
182
  )
183
  if history:
184
  prompt += "Previous conversation (use for context):\n"
185
+ for msg in history[-2:]:
186
+ prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
187
  prompt += f"Now, answer this question: {question}"
188
  model = genai.GenerativeModel("gemini-1.5-flash")
189
  response = model.generate_content(prompt)
 
199
 
200
  elif mode == "rag":
201
  if uploaded_doc is None:
202
+ return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please upload a document first!"}], history
203
  relevant_chunks, _ = get_relevant_chunks(question, uploaded_doc)
204
  context = "\n".join(relevant_chunks)
205
  prompt = (
 
209
  )
210
  if history:
211
  prompt += "Previous conversation (use for context):\n"
212
+ for msg in history[-2:]:
213
+ prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
214
  prompt += f"Now, answer this question: {question}"
215
  model = genai.GenerativeModel("gemini-1.5-flash")
216
  response = model.generate_content(prompt)
 
222
  )
223
  if history:
224
  prompt += "Previous conversation (use for context):\n"
225
+ for msg in history[-2:]:
226
+ prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
227
  prompt += f"Question: {question}"
228
  model = genai.GenerativeModel("gemini-1.5-flash")
229
  response = model.generate_content(prompt)
230
  answer = response.text.strip()
231
 
232
+ history.append({"role": "user", "content": question})
233
+ history.append({"role": "assistant", "content": answer})
234
  return history, history
235
  except Exception as e:
236
  logger.error(f"QA failed: {e}")
237
+ history.append({"role": "user", "content": question})
238
+ history.append({"role": "assistant", "content": "Sorry, I couldn’t process that. Try again!"})
239
  return history, history
240
 
241
  # Gradio UI
 
258
  with gr.Column(scale=1, min_width=350, elem_classes="sidebar"):
259
  mode_tabs = gr.Tabs()
260
  with mode_tabs:
261
+ # Research Mode
262
  with gr.TabItem("Research Mode"):
263
  gr.Markdown("### Search Papers")
264
  query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning in healthcare")
 
298
  with gr.Column(scale=3, elem_classes="tab-content"):
299
  gr.Markdown("### Chat Area")
300
  selected_display = gr.Markdown(label="Selected Context", value="Select a mode to begin!")
301
+ chatbot = gr.Chatbot(label="Conversation", elem_classes="chatbot", type="messages") # Updated to messages format
302
  question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
303
  chat_btn = gr.Button("Send")
304
 
305
  history_state = gr.State([])
306
  selected_index_state = gr.State(None)
307
 
308
+ def update_display(selected_tab, choice, indices, uploaded_doc):
309
+ if selected_tab == "Research Mode" and choice:
310
  index = int(choice.split(".")[0]) - 1
311
  selected_idx = indices[index]
312
  paper = df.iloc[selected_idx]
313
  return f"**{paper['title']}**<br>DOI: [{paper['doi']}](https://doi.org/{paper['doi']})", selected_idx
314
+ elif selected_tab == "RAG Mode" and uploaded_doc:
315
  return "Uploaded Document Ready", None
316
+ elif selected_tab == "General Chat":
317
  return "General Chat Mode", None
318
  return "Select a mode to begin!", None
319
 
320
  mode_tabs.select(
321
+ fn=lambda selected_tab: update_display(selected_tab, paper_dropdown.value, paper_indices_state.value, uploaded_doc_state.value),
322
+ inputs=[mode_tabs],
 
 
 
 
 
323
  outputs=[selected_display, selected_index_state]
324
  ).then(
325
  fn=lambda: [],
 
334
  )
335
 
336
  chat_btn.click(
337
+ fn=lambda mode, idx, q, hist, doc: answer_question(
338
+ "research" if mode == "Research Mode" else "rag" if mode == "RAG Mode" else "general",
339
+ idx, q, hist, doc
340
+ ),
341
  inputs=[mode_tabs, selected_index_state, question_input, history_state, uploaded_doc_state],
342
  outputs=[chatbot, history_state]
343
  ).then(