Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -67,7 +67,7 @@ except Exception as e:
|
|
67 |
raise
|
68 |
|
69 |
# Generate SciBERT embeddings (optimized with larger batch size)
|
70 |
-
def generate_embeddings_sci_bert(texts, batch_size=64):
|
71 |
try:
|
72 |
all_embeddings = []
|
73 |
for i in range(0, len(texts), batch_size):
|
@@ -122,7 +122,7 @@ def process_uploaded_pdf(file):
|
|
122 |
for page in pdf_reader.pages:
|
123 |
text += page.extract_text() or ""
|
124 |
cleaned_text = clean_text(text)
|
125 |
-
chunks = [cleaned_text[i:i+1000] for i in range(0, len(cleaned_text), 1000)]
|
126 |
embeddings = generate_embeddings_sci_bert(chunks)
|
127 |
faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
|
128 |
faiss_index.add(embeddings.astype(np.float32))
|
@@ -148,17 +148,17 @@ def get_relevant_chunks(query, uploaded_doc):
|
|
148 |
logger.error(f"RAG retrieval failed: {e}")
|
149 |
return [], "Retrieval failed."
|
150 |
|
151 |
-
# Unified QA function
|
152 |
def answer_question(mode, selected_index, question, history, uploaded_doc=None):
|
153 |
if not question.strip():
|
154 |
-
return [
|
155 |
if question.lower() in ["exit", "done"]:
|
156 |
-
return [
|
157 |
|
158 |
try:
|
159 |
if mode == "research":
|
160 |
if selected_index is None:
|
161 |
-
return [
|
162 |
paper_data = df.iloc[selected_index]
|
163 |
title = paper_data["title"]
|
164 |
abstract = paper_data["abstract"]
|
@@ -182,8 +182,8 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
|
|
182 |
)
|
183 |
if history:
|
184 |
prompt += "Previous conversation (use for context):\n"
|
185 |
-
for
|
186 |
-
prompt += f"User: {
|
187 |
prompt += f"Now, answer this question: {question}"
|
188 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
189 |
response = model.generate_content(prompt)
|
@@ -199,7 +199,7 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
|
|
199 |
|
200 |
elif mode == "rag":
|
201 |
if uploaded_doc is None:
|
202 |
-
return [
|
203 |
relevant_chunks, _ = get_relevant_chunks(question, uploaded_doc)
|
204 |
context = "\n".join(relevant_chunks)
|
205 |
prompt = (
|
@@ -209,8 +209,8 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
|
|
209 |
)
|
210 |
if history:
|
211 |
prompt += "Previous conversation (use for context):\n"
|
212 |
-
for
|
213 |
-
prompt += f"User: {
|
214 |
prompt += f"Now, answer this question: {question}"
|
215 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
216 |
response = model.generate_content(prompt)
|
@@ -222,18 +222,20 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
|
|
222 |
)
|
223 |
if history:
|
224 |
prompt += "Previous conversation (use for context):\n"
|
225 |
-
for
|
226 |
-
prompt += f"User: {
|
227 |
prompt += f"Question: {question}"
|
228 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
229 |
response = model.generate_content(prompt)
|
230 |
answer = response.text.strip()
|
231 |
|
232 |
-
history.append(
|
|
|
233 |
return history, history
|
234 |
except Exception as e:
|
235 |
logger.error(f"QA failed: {e}")
|
236 |
-
history.append(
|
|
|
237 |
return history, history
|
238 |
|
239 |
# Gradio UI
|
@@ -256,7 +258,7 @@ with gr.Blocks(
|
|
256 |
with gr.Column(scale=1, min_width=350, elem_classes="sidebar"):
|
257 |
mode_tabs = gr.Tabs()
|
258 |
with mode_tabs:
|
259 |
-
# Research Mode
|
260 |
with gr.TabItem("Research Mode"):
|
261 |
gr.Markdown("### Search Papers")
|
262 |
query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning in healthcare")
|
@@ -296,33 +298,28 @@ with gr.Blocks(
|
|
296 |
with gr.Column(scale=3, elem_classes="tab-content"):
|
297 |
gr.Markdown("### Chat Area")
|
298 |
selected_display = gr.Markdown(label="Selected Context", value="Select a mode to begin!")
|
299 |
-
chatbot = gr.Chatbot(label="Conversation", elem_classes="chatbot")
|
300 |
question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
|
301 |
chat_btn = gr.Button("Send")
|
302 |
|
303 |
history_state = gr.State([])
|
304 |
selected_index_state = gr.State(None)
|
305 |
|
306 |
-
def update_display(
|
307 |
-
if
|
308 |
index = int(choice.split(".")[0]) - 1
|
309 |
selected_idx = indices[index]
|
310 |
paper = df.iloc[selected_idx]
|
311 |
return f"**{paper['title']}**<br>DOI: [{paper['doi']}](https://doi.org/{paper['doi']})", selected_idx
|
312 |
-
elif
|
313 |
return "Uploaded Document Ready", None
|
314 |
-
elif
|
315 |
return "General Chat Mode", None
|
316 |
return "Select a mode to begin!", None
|
317 |
|
318 |
mode_tabs.select(
|
319 |
-
fn=lambda
|
320 |
-
inputs=
|
321 |
-
outputs=None,
|
322 |
-
_js="tab => tab"
|
323 |
-
).then(
|
324 |
-
fn=update_display,
|
325 |
-
inputs=[mode_tabs, paper_dropdown, paper_indices_state, uploaded_doc_state],
|
326 |
outputs=[selected_display, selected_index_state]
|
327 |
).then(
|
328 |
fn=lambda: [],
|
@@ -337,7 +334,10 @@ with gr.Blocks(
|
|
337 |
)
|
338 |
|
339 |
chat_btn.click(
|
340 |
-
fn=answer_question
|
|
|
|
|
|
|
341 |
inputs=[mode_tabs, selected_index_state, question_input, history_state, uploaded_doc_state],
|
342 |
outputs=[chatbot, history_state]
|
343 |
).then(
|
|
|
67 |
raise
|
68 |
|
69 |
# Generate SciBERT embeddings (optimized with larger batch size)
|
70 |
+
def generate_embeddings_sci_bert(texts, batch_size=64):
|
71 |
try:
|
72 |
all_embeddings = []
|
73 |
for i in range(0, len(texts), batch_size):
|
|
|
122 |
for page in pdf_reader.pages:
|
123 |
text += page.extract_text() or ""
|
124 |
cleaned_text = clean_text(text)
|
125 |
+
chunks = [cleaned_text[i:i+1000] for i in range(0, len(cleaned_text), 1000)]
|
126 |
embeddings = generate_embeddings_sci_bert(chunks)
|
127 |
faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
|
128 |
faiss_index.add(embeddings.astype(np.float32))
|
|
|
148 |
logger.error(f"RAG retrieval failed: {e}")
|
149 |
return [], "Retrieval failed."
|
150 |
|
151 |
+
# Unified QA function (updated for messages format)
|
152 |
def answer_question(mode, selected_index, question, history, uploaded_doc=None):
|
153 |
if not question.strip():
|
154 |
+
return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please ask a question!"}], history
|
155 |
if question.lower() in ["exit", "done"]:
|
156 |
+
return history + [{"role": "user", "content": "Conversation ended."}, {"role": "assistant", "content": "Start a new conversation!"}], []
|
157 |
|
158 |
try:
|
159 |
if mode == "research":
|
160 |
if selected_index is None:
|
161 |
+
return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please select a paper first!"}], history
|
162 |
paper_data = df.iloc[selected_index]
|
163 |
title = paper_data["title"]
|
164 |
abstract = paper_data["abstract"]
|
|
|
182 |
)
|
183 |
if history:
|
184 |
prompt += "Previous conversation (use for context):\n"
|
185 |
+
for msg in history[-2:]:
|
186 |
+
prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
|
187 |
prompt += f"Now, answer this question: {question}"
|
188 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
189 |
response = model.generate_content(prompt)
|
|
|
199 |
|
200 |
elif mode == "rag":
|
201 |
if uploaded_doc is None:
|
202 |
+
return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please upload a document first!"}], history
|
203 |
relevant_chunks, _ = get_relevant_chunks(question, uploaded_doc)
|
204 |
context = "\n".join(relevant_chunks)
|
205 |
prompt = (
|
|
|
209 |
)
|
210 |
if history:
|
211 |
prompt += "Previous conversation (use for context):\n"
|
212 |
+
for msg in history[-2:]:
|
213 |
+
prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
|
214 |
prompt += f"Now, answer this question: {question}"
|
215 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
216 |
response = model.generate_content(prompt)
|
|
|
222 |
)
|
223 |
if history:
|
224 |
prompt += "Previous conversation (use for context):\n"
|
225 |
+
for msg in history[-2:]:
|
226 |
+
prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
|
227 |
prompt += f"Question: {question}"
|
228 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
229 |
response = model.generate_content(prompt)
|
230 |
answer = response.text.strip()
|
231 |
|
232 |
+
history.append({"role": "user", "content": question})
|
233 |
+
history.append({"role": "assistant", "content": answer})
|
234 |
return history, history
|
235 |
except Exception as e:
|
236 |
logger.error(f"QA failed: {e}")
|
237 |
+
history.append({"role": "user", "content": question})
|
238 |
+
history.append({"role": "assistant", "content": "Sorry, I couldn’t process that. Try again!"})
|
239 |
return history, history
|
240 |
|
241 |
# Gradio UI
|
|
|
258 |
with gr.Column(scale=1, min_width=350, elem_classes="sidebar"):
|
259 |
mode_tabs = gr.Tabs()
|
260 |
with mode_tabs:
|
261 |
+
# Research Mode
|
262 |
with gr.TabItem("Research Mode"):
|
263 |
gr.Markdown("### Search Papers")
|
264 |
query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning in healthcare")
|
|
|
298 |
with gr.Column(scale=3, elem_classes="tab-content"):
|
299 |
gr.Markdown("### Chat Area")
|
300 |
selected_display = gr.Markdown(label="Selected Context", value="Select a mode to begin!")
|
301 |
+
chatbot = gr.Chatbot(label="Conversation", elem_classes="chatbot", type="messages") # Updated to messages format
|
302 |
question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
|
303 |
chat_btn = gr.Button("Send")
|
304 |
|
305 |
history_state = gr.State([])
|
306 |
selected_index_state = gr.State(None)
|
307 |
|
308 |
+
def update_display(selected_tab, choice, indices, uploaded_doc):
|
309 |
+
if selected_tab == "Research Mode" and choice:
|
310 |
index = int(choice.split(".")[0]) - 1
|
311 |
selected_idx = indices[index]
|
312 |
paper = df.iloc[selected_idx]
|
313 |
return f"**{paper['title']}**<br>DOI: [{paper['doi']}](https://doi.org/{paper['doi']})", selected_idx
|
314 |
+
elif selected_tab == "RAG Mode" and uploaded_doc:
|
315 |
return "Uploaded Document Ready", None
|
316 |
+
elif selected_tab == "General Chat":
|
317 |
return "General Chat Mode", None
|
318 |
return "Select a mode to begin!", None
|
319 |
|
320 |
mode_tabs.select(
|
321 |
+
fn=lambda selected_tab: update_display(selected_tab, paper_dropdown.value, paper_indices_state.value, uploaded_doc_state.value),
|
322 |
+
inputs=[mode_tabs],
|
|
|
|
|
|
|
|
|
|
|
323 |
outputs=[selected_display, selected_index_state]
|
324 |
).then(
|
325 |
fn=lambda: [],
|
|
|
334 |
)
|
335 |
|
336 |
chat_btn.click(
|
337 |
+
fn=lambda mode, idx, q, hist, doc: answer_question(
|
338 |
+
"research" if mode == "Research Mode" else "rag" if mode == "RAG Mode" else "general",
|
339 |
+
idx, q, hist, doc
|
340 |
+
),
|
341 |
inputs=[mode_tabs, selected_index_state, question_input, history_state, uploaded_doc_state],
|
342 |
outputs=[chatbot, history_state]
|
343 |
).then(
|