Spaces:

ADKU
/

ResearchGPT_space

Sleeping

App Files Files Community

ADKU commited on Mar 6

Commit

cf9155f

verified ·

1 Parent(s): 7ef58b7

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -29

app.py CHANGED Viewed

@@ -67,7 +67,7 @@ except Exception as e:
     raise
 # Generate SciBERT embeddings (optimized with larger batch size)
-def generate_embeddings_sci_bert(texts, batch_size=64):  # Increased batch size for efficiency
     try:
         all_embeddings = []
         for i in range(0, len(texts), batch_size):
@@ -122,7 +122,7 @@ def process_uploaded_pdf(file):
         for page in pdf_reader.pages:
             text += page.extract_text() or ""
         cleaned_text = clean_text(text)
-        chunks = [cleaned_text[i:i+1000] for i in range(0, len(cleaned_text), 1000)]  # Chunk for efficiency
         embeddings = generate_embeddings_sci_bert(chunks)
         faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
         faiss_index.add(embeddings.astype(np.float32))
@@ -148,17 +148,17 @@ def get_relevant_chunks(query, uploaded_doc):
         logger.error(f"RAG retrieval failed: {e}")
         return [], "Retrieval failed."
-# Unified QA function
 def answer_question(mode, selected_index, question, history, uploaded_doc=None):
     if not question.strip():
-        return [(question, "Please ask a question!")], history
     if question.lower() in ["exit", "done"]:
-        return [("Conversation ended.", "Start a new conversation!")], []
     try:
         if mode == "research":
             if selected_index is None:
-                return [(question, "Please select a paper first!")], history
             paper_data = df.iloc[selected_index]
             title = paper_data["title"]
             abstract = paper_data["abstract"]
@@ -182,8 +182,8 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
             )
             if history:
                 prompt += "Previous conversation (use for context):\n"
-                for user_q, bot_a in history[-2:]:
-                    prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
             prompt += f"Now, answer this question: {question}"
             model = genai.GenerativeModel("gemini-1.5-flash")
             response = model.generate_content(prompt)
@@ -199,7 +199,7 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
         elif mode == "rag":
             if uploaded_doc is None:
-                return [(question, "Please upload a document first!")], history
             relevant_chunks, _ = get_relevant_chunks(question, uploaded_doc)
             context = "\n".join(relevant_chunks)
             prompt = (
@@ -209,8 +209,8 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
             )
             if history:
                 prompt += "Previous conversation (use for context):\n"
-                for user_q, bot_a in history[-2:]:
-                    prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
             prompt += f"Now, answer this question: {question}"
             model = genai.GenerativeModel("gemini-1.5-flash")
             response = model.generate_content(prompt)
@@ -222,18 +222,20 @@ def answer_question(mode, selected_index, question, history, uploaded_doc=None):
             )
             if history:
                 prompt += "Previous conversation (use for context):\n"
-                for user_q, bot_a in history[-2:]:
-                    prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
             prompt += f"Question: {question}"
             model = genai.GenerativeModel("gemini-1.5-flash")
             response = model.generate_content(prompt)
             answer = response.text.strip()
-        history.append((question, answer))
         return history, history
     except Exception as e:
         logger.error(f"QA failed: {e}")
-        history.append((question, "Sorry, I couldn’t process that. Try again!"))
         return history, history
 # Gradio UI
@@ -256,7 +258,7 @@ with gr.Blocks(
         with gr.Column(scale=1, min_width=350, elem_classes="sidebar"):
             mode_tabs = gr.Tabs()
             with mode_tabs:
-                # Research Mode (unchanged backend)
                 with gr.TabItem("Research Mode"):
                     gr.Markdown("### Search Papers")
                     query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning in healthcare")
@@ -296,33 +298,28 @@ with gr.Blocks(
         with gr.Column(scale=3, elem_classes="tab-content"):
             gr.Markdown("### Chat Area")
             selected_display = gr.Markdown(label="Selected Context", value="Select a mode to begin!")
-            chatbot = gr.Chatbot(label="Conversation", elem_classes="chatbot")
             question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
             chat_btn = gr.Button("Send")
             history_state = gr.State([])
             selected_index_state = gr.State(None)
-            def update_display(mode, choice, indices, uploaded_doc):
-                if mode == "research" and choice:
                     index = int(choice.split(".")[0]) - 1
                     selected_idx = indices[index]
                     paper = df.iloc[selected_idx]
                     return f"**{paper['title']}**<br>DOI: [{paper['doi']}](https://doi.org/{paper['doi']})", selected_idx
-                elif mode == "rag" and uploaded_doc:
                     return "Uploaded Document Ready", None
-                elif mode == "general":
                     return "General Chat Mode", None
                 return "Select a mode to begin!", None
             mode_tabs.select(
-                fn=lambda tab: ("research" if tab == "Research Mode" else "rag" if tab == "RAG Mode" else "general"),
-                inputs=None,
-                outputs=None,
-                _js="tab => tab"
-            ).then(
-                fn=update_display,
-                inputs=[mode_tabs, paper_dropdown, paper_indices_state, uploaded_doc_state],
                 outputs=[selected_display, selected_index_state]
             ).then(
                 fn=lambda: [],
@@ -337,7 +334,10 @@ with gr.Blocks(
             )
             chat_btn.click(
-                fn=answer_question,
                 inputs=[mode_tabs, selected_index_state, question_input, history_state, uploaded_doc_state],
                 outputs=[chatbot, history_state]
             ).then(

     raise
 # Generate SciBERT embeddings (optimized with larger batch size)
+def generate_embeddings_sci_bert(texts, batch_size=64):
     try:
         all_embeddings = []
         for i in range(0, len(texts), batch_size):
         for page in pdf_reader.pages:
             text += page.extract_text() or ""
         cleaned_text = clean_text(text)
+        chunks = [cleaned_text[i:i+1000] for i in range(0, len(cleaned_text), 1000)]
         embeddings = generate_embeddings_sci_bert(chunks)
         faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
         faiss_index.add(embeddings.astype(np.float32))
         logger.error(f"RAG retrieval failed: {e}")
         return [], "Retrieval failed."
+# Unified QA function (updated for messages format)
 def answer_question(mode, selected_index, question, history, uploaded_doc=None):
     if not question.strip():
+        return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please ask a question!"}], history
     if question.lower() in ["exit", "done"]:
+        return history + [{"role": "user", "content": "Conversation ended."}, {"role": "assistant", "content": "Start a new conversation!"}], []
     try:
         if mode == "research":
             if selected_index is None:
+                return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please select a paper first!"}], history
             paper_data = df.iloc[selected_index]
             title = paper_data["title"]
             abstract = paper_data["abstract"]
             )
             if history:
                 prompt += "Previous conversation (use for context):\n"
+                for msg in history[-2:]:
+                    prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
             prompt += f"Now, answer this question: {question}"
             model = genai.GenerativeModel("gemini-1.5-flash")
             response = model.generate_content(prompt)
         elif mode == "rag":
             if uploaded_doc is None:
+                return history + [{"role": "user", "content": question}, {"role": "assistant", "content": "Please upload a document first!"}], history
             relevant_chunks, _ = get_relevant_chunks(question, uploaded_doc)
             context = "\n".join(relevant_chunks)
             prompt = (
             )
             if history:
                 prompt += "Previous conversation (use for context):\n"
+                for msg in history[-2:]:
+                    prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
             prompt += f"Now, answer this question: {question}"
             model = genai.GenerativeModel("gemini-1.5-flash")
             response = model.generate_content(prompt)
             )
             if history:
                 prompt += "Previous conversation (use for context):\n"
+                for msg in history[-2:]:
+                    prompt += f"User: {msg['content']}\n" if msg["role"] == "user" else f"Assistant: {msg['content']}\n"
             prompt += f"Question: {question}"
             model = genai.GenerativeModel("gemini-1.5-flash")
             response = model.generate_content(prompt)
             answer = response.text.strip()
+        history.append({"role": "user", "content": question})
+        history.append({"role": "assistant", "content": answer})
         return history, history
     except Exception as e:
         logger.error(f"QA failed: {e}")
+        history.append({"role": "user", "content": question})
+        history.append({"role": "assistant", "content": "Sorry, I couldn’t process that. Try again!"})
         return history, history
 # Gradio UI
         with gr.Column(scale=1, min_width=350, elem_classes="sidebar"):
             mode_tabs = gr.Tabs()
             with mode_tabs:
+                # Research Mode
                 with gr.TabItem("Research Mode"):
                     gr.Markdown("### Search Papers")
                     query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning in healthcare")
         with gr.Column(scale=3, elem_classes="tab-content"):
             gr.Markdown("### Chat Area")
             selected_display = gr.Markdown(label="Selected Context", value="Select a mode to begin!")
+            chatbot = gr.Chatbot(label="Conversation", elem_classes="chatbot", type="messages")  # Updated to messages format
             question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
             chat_btn = gr.Button("Send")
             history_state = gr.State([])
             selected_index_state = gr.State(None)
+            def update_display(selected_tab, choice, indices, uploaded_doc):
+                if selected_tab == "Research Mode" and choice:
                     index = int(choice.split(".")[0]) - 1
                     selected_idx = indices[index]
                     paper = df.iloc[selected_idx]
                     return f"**{paper['title']}**<br>DOI: [{paper['doi']}](https://doi.org/{paper['doi']})", selected_idx
+                elif selected_tab == "RAG Mode" and uploaded_doc:
                     return "Uploaded Document Ready", None
+                elif selected_tab == "General Chat":
                     return "General Chat Mode", None
                 return "Select a mode to begin!", None
             mode_tabs.select(
+                fn=lambda selected_tab: update_display(selected_tab, paper_dropdown.value, paper_indices_state.value, uploaded_doc_state.value),
+                inputs=[mode_tabs],
                 outputs=[selected_display, selected_index_state]
             ).then(
                 fn=lambda: [],
             )
             chat_btn.click(
+                fn=lambda mode, idx, q, hist, doc: answer_question(
+                    "research" if mode == "Research Mode" else "rag" if mode == "RAG Mode" else "general",
+                    idx, q, hist, doc
+                ),
                 inputs=[mode_tabs, selected_index_state, question_input, history_state, uploaded_doc_state],
                 outputs=[chatbot, history_state]
             ).then(