Spaces:

ADKU
/

ResearchGPT_space

Sleeping

App Files Files Community

ADKU commited on Feb 28

Commit

093fd7d

verified ·

1 Parent(s): d08a770

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -26

app.py CHANGED Viewed

@@ -94,7 +94,7 @@ except Exception as e:
     logger.error(f"FAISS index creation failed: {e}")
     raise
-# Hybrid search function
 def get_relevant_papers(query):
     if not query.strip():
         return [], "Please enter a search query."
@@ -106,18 +106,16 @@ def get_relevant_papers(query):
         bm25_top_indices = np.argsort(bm25_scores)[::-1][:5]
         combined_indices = list(set(indices[0]) | set(bm25_top_indices))
         ranked_results = sorted(combined_indices, key=lambda idx: -bm25_scores[idx])
-        papers = []
-        for i, index in enumerate(ranked_results[:5]):
-            paper = df.iloc[index]
-            papers.append(f"{i+1}. {paper['title']} - Abstract: {paper['cleaned_abstract'][:200]}...")
-        return papers, "Search completed."
     except Exception as e:
         logger.error(f"Search failed: {e}")
-        return [], "Search failed. Please try again."
-# Gemini API QA function with optimized prompt
-def answer_question(paper, question, history):
-    if not paper:
         return [(question, "Please select a paper first!")], history
     if not question.strip():
         return [(question, "Please ask a question!")], history
@@ -125,14 +123,17 @@ def answer_question(paper, question, history):
         return [("Conversation ended.", "Select a new paper or search again!")], []
     try:
-        # Extract title and abstract
-        title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
-        abstract = paper.split(" - Abstract: ")[1].rstrip("...")
-        # Build prompt for Gemini API (plain text, no special tokens needed)
         prompt = (
             "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
-            "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
             "When asked about tech stacks or methods, follow these guidelines:\n"
             "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
             "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
@@ -140,9 +141,11 @@ def answer_question(paper, question, history):
             "4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
             "5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
             "6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
-            f"Here’s the paper:\n"
             f"Title: {title}\n"
-            f"Abstract: {abstract}\n\n"
         )
         # Add history if present
@@ -155,7 +158,7 @@ def answer_question(paper, question, history):
         logger.info(f"Prompt sent to Gemini API: {prompt[:200]}...")
-        # Call Gemini API (using Gemini 1.5 Flash by default)
         model = genai.GenerativeModel("gemini-1.5-flash")
         response = model.generate_content(prompt)
         answer = response.text.strip()
@@ -196,13 +199,14 @@ with gr.Blocks(
             paper_dropdown = gr.Dropdown(label="Select a Paper", choices=[], interactive=True)
             search_status = gr.Textbox(label="Search Status", interactive=False)
-            # State to store paper choices
             paper_choices_state = gr.State([])
             search_btn.click(
                 fn=get_relevant_papers,
                 inputs=query_input,
-                outputs=[paper_choices_state, search_status]
             ).then(
                 fn=lambda choices: gr.update(choices=choices, value=None),
                 inputs=paper_choices_state,
@@ -217,14 +221,22 @@ with gr.Blocks(
             question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
             chat_btn = gr.Button("Send")
-            # State to store conversation history
             history_state = gr.State([])
-            # Update selected paper and reset history
             paper_dropdown.change(
-                fn=lambda x: (x, []),
-                inputs=paper_dropdown,
-                outputs=[selected_paper, history_state]
             ).then(
                 fn=lambda: [],
                 inputs=None,
@@ -234,7 +246,7 @@ with gr.Blocks(
             # Handle chat
             chat_btn.click(
                 fn=answer_question,
-                inputs=[selected_paper, question_input, history_state],
                 outputs=[chatbot, history_state]
             ).then(
                 fn=lambda: "",

     logger.error(f"FAISS index creation failed: {e}")
     raise
+# Hybrid search function (return indices instead of truncated strings)
 def get_relevant_papers(query):
     if not query.strip():
         return [], "Please enter a search query."
         bm25_top_indices = np.argsort(bm25_scores)[::-1][:5]
         combined_indices = list(set(indices[0]) | set(bm25_top_indices))
         ranked_results = sorted(combined_indices, key=lambda idx: -bm25_scores[idx])
+        # Return formatted strings for dropdown and indices for full data
+        papers = [f"{i+1}. {df.iloc[idx]['title']} - Abstract: {df.iloc[idx]['abstract'][:200]}..." for i, idx in enumerate(ranked_results[:5])]
+        return papers, ranked_results[:5], "Search completed."
     except Exception as e:
         logger.error(f"Search failed: {e}")
+        return [], [], "Search failed. Please try again."
+# Gemini API QA function with full context
+def answer_question(selected_index, question, history):
+    if selected_index is None:
         return [(question, "Please select a paper first!")], history
     if not question.strip():
         return [(question, "Please ask a question!")], history
         return [("Conversation ended.", "Select a new paper or search again!")], []
     try:
+        # Get full paper data from DataFrame using index
+        paper_data = df.iloc[selected_index]
+        title = paper_data["title"]
+        abstract = paper_data["abstract"]  # Full abstract, not truncated
+        authors = ", ".join(paper_data["authors"])
+        doi = paper_data["doi"]
+        # Build prompt with all fields
         prompt = (
             "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
+            "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's details. "
             "When asked about tech stacks or methods, follow these guidelines:\n"
             "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
             "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
             "4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
             "5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
             "6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
+            "Here’s the paper:\n"
             f"Title: {title}\n"
+            f"Authors: {authors}\n"
+            f"Abstract: {abstract}\n"
+            f"DOI: {doi}\n\n"
         )
         # Add history if present
         logger.info(f"Prompt sent to Gemini API: {prompt[:200]}...")
+        # Call Gemini API (Gemini 1.5 Flash)
         model = genai.GenerativeModel("gemini-1.5-flash")
         response = model.generate_content(prompt)
         answer = response.text.strip()
             paper_dropdown = gr.Dropdown(label="Select a Paper", choices=[], interactive=True)
             search_status = gr.Textbox(label="Search Status", interactive=False)
+            # States to store paper choices and indices
             paper_choices_state = gr.State([])
+            paper_indices_state = gr.State([])
             search_btn.click(
                 fn=get_relevant_papers,
                 inputs=query_input,
+                outputs=[paper_choices_state, paper_indices_state, search_status]
             ).then(
                 fn=lambda choices: gr.update(choices=choices, value=None),
                 inputs=paper_choices_state,
             question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
             chat_btn = gr.Button("Send")
+            # State to store conversation history and selected index
             history_state = gr.State([])
+            selected_index_state = gr.State(None)
+            # Update selected paper and index
+            def update_selected_paper(choice, indices):
+                if choice is None:
+                    return "", None
+                index = int(choice.split(".")[0]) - 1  # Extract rank (e.g., "1." -> 0)
+                selected_idx = indices[index]
+                return choice, selected_idx
             paper_dropdown.change(
+                fn=update_selected_paper,
+                inputs=[paper_dropdown, paper_indices_state],
+                outputs=[selected_paper, selected_index_state]
             ).then(
                 fn=lambda: [],
                 inputs=None,
             # Handle chat
             chat_btn.click(
                 fn=answer_question,
+                inputs=[selected_index_state, question_input, history_state],
                 outputs=[chatbot, history_state]
             ).then(
                 fn=lambda: "",