Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -94,7 +94,7 @@ except Exception as e:
|
|
| 94 |
logger.error(f"FAISS index creation failed: {e}")
|
| 95 |
raise
|
| 96 |
|
| 97 |
-
# Hybrid search function
|
| 98 |
def get_relevant_papers(query):
|
| 99 |
if not query.strip():
|
| 100 |
return [], "Please enter a search query."
|
|
@@ -106,18 +106,16 @@ def get_relevant_papers(query):
|
|
| 106 |
bm25_top_indices = np.argsort(bm25_scores)[::-1][:5]
|
| 107 |
combined_indices = list(set(indices[0]) | set(bm25_top_indices))
|
| 108 |
ranked_results = sorted(combined_indices, key=lambda idx: -bm25_scores[idx])
|
| 109 |
-
|
| 110 |
-
for i,
|
| 111 |
-
|
| 112 |
-
papers.append(f"{i+1}. {paper['title']} - Abstract: {paper['cleaned_abstract'][:200]}...")
|
| 113 |
-
return papers, "Search completed."
|
| 114 |
except Exception as e:
|
| 115 |
logger.error(f"Search failed: {e}")
|
| 116 |
-
return [], "Search failed. Please try again."
|
| 117 |
|
| 118 |
-
# Gemini API QA function with
|
| 119 |
-
def answer_question(
|
| 120 |
-
if
|
| 121 |
return [(question, "Please select a paper first!")], history
|
| 122 |
if not question.strip():
|
| 123 |
return [(question, "Please ask a question!")], history
|
|
@@ -125,14 +123,17 @@ def answer_question(paper, question, history):
|
|
| 125 |
return [("Conversation ended.", "Select a new paper or search again!")], []
|
| 126 |
|
| 127 |
try:
|
| 128 |
-
#
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
# Build prompt
|
| 133 |
prompt = (
|
| 134 |
"You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
|
| 135 |
-
"Your goal is to provide concise, accurate, and well-structured answers based on the given paper's
|
| 136 |
"When asked about tech stacks or methods, follow these guidelines:\n"
|
| 137 |
"1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
|
| 138 |
"2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
|
|
@@ -140,9 +141,11 @@ def answer_question(paper, question, history):
|
|
| 140 |
"4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
|
| 141 |
"5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
|
| 142 |
"6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
|
| 143 |
-
|
| 144 |
f"Title: {title}\n"
|
| 145 |
-
f"
|
|
|
|
|
|
|
| 146 |
)
|
| 147 |
|
| 148 |
# Add history if present
|
|
@@ -155,7 +158,7 @@ def answer_question(paper, question, history):
|
|
| 155 |
|
| 156 |
logger.info(f"Prompt sent to Gemini API: {prompt[:200]}...")
|
| 157 |
|
| 158 |
-
# Call Gemini API (
|
| 159 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 160 |
response = model.generate_content(prompt)
|
| 161 |
answer = response.text.strip()
|
|
@@ -196,13 +199,14 @@ with gr.Blocks(
|
|
| 196 |
paper_dropdown = gr.Dropdown(label="Select a Paper", choices=[], interactive=True)
|
| 197 |
search_status = gr.Textbox(label="Search Status", interactive=False)
|
| 198 |
|
| 199 |
-
#
|
| 200 |
paper_choices_state = gr.State([])
|
|
|
|
| 201 |
|
| 202 |
search_btn.click(
|
| 203 |
fn=get_relevant_papers,
|
| 204 |
inputs=query_input,
|
| 205 |
-
outputs=[paper_choices_state, search_status]
|
| 206 |
).then(
|
| 207 |
fn=lambda choices: gr.update(choices=choices, value=None),
|
| 208 |
inputs=paper_choices_state,
|
|
@@ -217,14 +221,22 @@ with gr.Blocks(
|
|
| 217 |
question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
|
| 218 |
chat_btn = gr.Button("Send")
|
| 219 |
|
| 220 |
-
# State to store conversation history
|
| 221 |
history_state = gr.State([])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
-
# Update selected paper and reset history
|
| 224 |
paper_dropdown.change(
|
| 225 |
-
fn=
|
| 226 |
-
inputs=paper_dropdown,
|
| 227 |
-
outputs=[selected_paper,
|
| 228 |
).then(
|
| 229 |
fn=lambda: [],
|
| 230 |
inputs=None,
|
|
@@ -234,7 +246,7 @@ with gr.Blocks(
|
|
| 234 |
# Handle chat
|
| 235 |
chat_btn.click(
|
| 236 |
fn=answer_question,
|
| 237 |
-
inputs=[
|
| 238 |
outputs=[chatbot, history_state]
|
| 239 |
).then(
|
| 240 |
fn=lambda: "",
|
|
|
|
| 94 |
logger.error(f"FAISS index creation failed: {e}")
|
| 95 |
raise
|
| 96 |
|
| 97 |
+
# Hybrid search function (return indices instead of truncated strings)
|
| 98 |
def get_relevant_papers(query):
|
| 99 |
if not query.strip():
|
| 100 |
return [], "Please enter a search query."
|
|
|
|
| 106 |
bm25_top_indices = np.argsort(bm25_scores)[::-1][:5]
|
| 107 |
combined_indices = list(set(indices[0]) | set(bm25_top_indices))
|
| 108 |
ranked_results = sorted(combined_indices, key=lambda idx: -bm25_scores[idx])
|
| 109 |
+
# Return formatted strings for dropdown and indices for full data
|
| 110 |
+
papers = [f"{i+1}. {df.iloc[idx]['title']} - Abstract: {df.iloc[idx]['abstract'][:200]}..." for i, idx in enumerate(ranked_results[:5])]
|
| 111 |
+
return papers, ranked_results[:5], "Search completed."
|
|
|
|
|
|
|
| 112 |
except Exception as e:
|
| 113 |
logger.error(f"Search failed: {e}")
|
| 114 |
+
return [], [], "Search failed. Please try again."
|
| 115 |
|
| 116 |
+
# Gemini API QA function with full context
|
| 117 |
+
def answer_question(selected_index, question, history):
|
| 118 |
+
if selected_index is None:
|
| 119 |
return [(question, "Please select a paper first!")], history
|
| 120 |
if not question.strip():
|
| 121 |
return [(question, "Please ask a question!")], history
|
|
|
|
| 123 |
return [("Conversation ended.", "Select a new paper or search again!")], []
|
| 124 |
|
| 125 |
try:
|
| 126 |
+
# Get full paper data from DataFrame using index
|
| 127 |
+
paper_data = df.iloc[selected_index]
|
| 128 |
+
title = paper_data["title"]
|
| 129 |
+
abstract = paper_data["abstract"] # Full abstract, not truncated
|
| 130 |
+
authors = ", ".join(paper_data["authors"])
|
| 131 |
+
doi = paper_data["doi"]
|
| 132 |
|
| 133 |
+
# Build prompt with all fields
|
| 134 |
prompt = (
|
| 135 |
"You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
|
| 136 |
+
"Your goal is to provide concise, accurate, and well-structured answers based on the given paper's details. "
|
| 137 |
"When asked about tech stacks or methods, follow these guidelines:\n"
|
| 138 |
"1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
|
| 139 |
"2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
|
|
|
|
| 141 |
"4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
|
| 142 |
"5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
|
| 143 |
"6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
|
| 144 |
+
"Here’s the paper:\n"
|
| 145 |
f"Title: {title}\n"
|
| 146 |
+
f"Authors: {authors}\n"
|
| 147 |
+
f"Abstract: {abstract}\n"
|
| 148 |
+
f"DOI: {doi}\n\n"
|
| 149 |
)
|
| 150 |
|
| 151 |
# Add history if present
|
|
|
|
| 158 |
|
| 159 |
logger.info(f"Prompt sent to Gemini API: {prompt[:200]}...")
|
| 160 |
|
| 161 |
+
# Call Gemini API (Gemini 1.5 Flash)
|
| 162 |
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 163 |
response = model.generate_content(prompt)
|
| 164 |
answer = response.text.strip()
|
|
|
|
| 199 |
paper_dropdown = gr.Dropdown(label="Select a Paper", choices=[], interactive=True)
|
| 200 |
search_status = gr.Textbox(label="Search Status", interactive=False)
|
| 201 |
|
| 202 |
+
# States to store paper choices and indices
|
| 203 |
paper_choices_state = gr.State([])
|
| 204 |
+
paper_indices_state = gr.State([])
|
| 205 |
|
| 206 |
search_btn.click(
|
| 207 |
fn=get_relevant_papers,
|
| 208 |
inputs=query_input,
|
| 209 |
+
outputs=[paper_choices_state, paper_indices_state, search_status]
|
| 210 |
).then(
|
| 211 |
fn=lambda choices: gr.update(choices=choices, value=None),
|
| 212 |
inputs=paper_choices_state,
|
|
|
|
| 221 |
question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
|
| 222 |
chat_btn = gr.Button("Send")
|
| 223 |
|
| 224 |
+
# State to store conversation history and selected index
|
| 225 |
history_state = gr.State([])
|
| 226 |
+
selected_index_state = gr.State(None)
|
| 227 |
+
|
| 228 |
+
# Update selected paper and index
|
| 229 |
+
def update_selected_paper(choice, indices):
|
| 230 |
+
if choice is None:
|
| 231 |
+
return "", None
|
| 232 |
+
index = int(choice.split(".")[0]) - 1 # Extract rank (e.g., "1." -> 0)
|
| 233 |
+
selected_idx = indices[index]
|
| 234 |
+
return choice, selected_idx
|
| 235 |
|
|
|
|
| 236 |
paper_dropdown.change(
|
| 237 |
+
fn=update_selected_paper,
|
| 238 |
+
inputs=[paper_dropdown, paper_indices_state],
|
| 239 |
+
outputs=[selected_paper, selected_index_state]
|
| 240 |
).then(
|
| 241 |
fn=lambda: [],
|
| 242 |
inputs=None,
|
|
|
|
| 246 |
# Handle chat
|
| 247 |
chat_btn.click(
|
| 248 |
fn=answer_question,
|
| 249 |
+
inputs=[selected_index_state, question_input, history_state],
|
| 250 |
outputs=[chatbot, history_state]
|
| 251 |
).then(
|
| 252 |
fn=lambda: "",
|