ADKU commited on
Commit
093fd7d
·
verified ·
1 Parent(s): d08a770

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -26
app.py CHANGED
@@ -94,7 +94,7 @@ except Exception as e:
94
  logger.error(f"FAISS index creation failed: {e}")
95
  raise
96
 
97
- # Hybrid search function
98
  def get_relevant_papers(query):
99
  if not query.strip():
100
  return [], "Please enter a search query."
@@ -106,18 +106,16 @@ def get_relevant_papers(query):
106
  bm25_top_indices = np.argsort(bm25_scores)[::-1][:5]
107
  combined_indices = list(set(indices[0]) | set(bm25_top_indices))
108
  ranked_results = sorted(combined_indices, key=lambda idx: -bm25_scores[idx])
109
- papers = []
110
- for i, index in enumerate(ranked_results[:5]):
111
- paper = df.iloc[index]
112
- papers.append(f"{i+1}. {paper['title']} - Abstract: {paper['cleaned_abstract'][:200]}...")
113
- return papers, "Search completed."
114
  except Exception as e:
115
  logger.error(f"Search failed: {e}")
116
- return [], "Search failed. Please try again."
117
 
118
- # Gemini API QA function with optimized prompt
119
- def answer_question(paper, question, history):
120
- if not paper:
121
  return [(question, "Please select a paper first!")], history
122
  if not question.strip():
123
  return [(question, "Please ask a question!")], history
@@ -125,14 +123,17 @@ def answer_question(paper, question, history):
125
  return [("Conversation ended.", "Select a new paper or search again!")], []
126
 
127
  try:
128
- # Extract title and abstract
129
- title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
130
- abstract = paper.split(" - Abstract: ")[1].rstrip("...")
 
 
 
131
 
132
- # Build prompt for Gemini API (plain text, no special tokens needed)
133
  prompt = (
134
  "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
135
- "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
136
  "When asked about tech stacks or methods, follow these guidelines:\n"
137
  "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
138
  "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
@@ -140,9 +141,11 @@ def answer_question(paper, question, history):
140
  "4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
141
  "5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
142
  "6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
143
- f"Here’s the paper:\n"
144
  f"Title: {title}\n"
145
- f"Abstract: {abstract}\n\n"
 
 
146
  )
147
 
148
  # Add history if present
@@ -155,7 +158,7 @@ def answer_question(paper, question, history):
155
 
156
  logger.info(f"Prompt sent to Gemini API: {prompt[:200]}...")
157
 
158
- # Call Gemini API (using Gemini 1.5 Flash by default)
159
  model = genai.GenerativeModel("gemini-1.5-flash")
160
  response = model.generate_content(prompt)
161
  answer = response.text.strip()
@@ -196,13 +199,14 @@ with gr.Blocks(
196
  paper_dropdown = gr.Dropdown(label="Select a Paper", choices=[], interactive=True)
197
  search_status = gr.Textbox(label="Search Status", interactive=False)
198
 
199
- # State to store paper choices
200
  paper_choices_state = gr.State([])
 
201
 
202
  search_btn.click(
203
  fn=get_relevant_papers,
204
  inputs=query_input,
205
- outputs=[paper_choices_state, search_status]
206
  ).then(
207
  fn=lambda choices: gr.update(choices=choices, value=None),
208
  inputs=paper_choices_state,
@@ -217,14 +221,22 @@ with gr.Blocks(
217
  question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
218
  chat_btn = gr.Button("Send")
219
 
220
- # State to store conversation history
221
  history_state = gr.State([])
 
 
 
 
 
 
 
 
 
222
 
223
- # Update selected paper and reset history
224
  paper_dropdown.change(
225
- fn=lambda x: (x, []),
226
- inputs=paper_dropdown,
227
- outputs=[selected_paper, history_state]
228
  ).then(
229
  fn=lambda: [],
230
  inputs=None,
@@ -234,7 +246,7 @@ with gr.Blocks(
234
  # Handle chat
235
  chat_btn.click(
236
  fn=answer_question,
237
- inputs=[selected_paper, question_input, history_state],
238
  outputs=[chatbot, history_state]
239
  ).then(
240
  fn=lambda: "",
 
94
  logger.error(f"FAISS index creation failed: {e}")
95
  raise
96
 
97
+ # Hybrid search function (return indices instead of truncated strings)
98
  def get_relevant_papers(query):
99
  if not query.strip():
100
  return [], "Please enter a search query."
 
106
  bm25_top_indices = np.argsort(bm25_scores)[::-1][:5]
107
  combined_indices = list(set(indices[0]) | set(bm25_top_indices))
108
  ranked_results = sorted(combined_indices, key=lambda idx: -bm25_scores[idx])
109
+ # Return formatted strings for dropdown and indices for full data
110
+ papers = [f"{i+1}. {df.iloc[idx]['title']} - Abstract: {df.iloc[idx]['abstract'][:200]}..." for i, idx in enumerate(ranked_results[:5])]
111
+ return papers, ranked_results[:5], "Search completed."
 
 
112
  except Exception as e:
113
  logger.error(f"Search failed: {e}")
114
+ return [], [], "Search failed. Please try again."
115
 
116
+ # Gemini API QA function with full context
117
+ def answer_question(selected_index, question, history):
118
+ if selected_index is None:
119
  return [(question, "Please select a paper first!")], history
120
  if not question.strip():
121
  return [(question, "Please ask a question!")], history
 
123
  return [("Conversation ended.", "Select a new paper or search again!")], []
124
 
125
  try:
126
+ # Get full paper data from DataFrame using index
127
+ paper_data = df.iloc[selected_index]
128
+ title = paper_data["title"]
129
+ abstract = paper_data["abstract"] # Full abstract, not truncated
130
+ authors = ", ".join(paper_data["authors"])
131
+ doi = paper_data["doi"]
132
 
133
+ # Build prompt with all fields
134
  prompt = (
135
  "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
136
+ "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's details. "
137
  "When asked about tech stacks or methods, follow these guidelines:\n"
138
  "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
139
  "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
 
141
  "4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
142
  "5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
143
  "6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
144
+ "Here’s the paper:\n"
145
  f"Title: {title}\n"
146
+ f"Authors: {authors}\n"
147
+ f"Abstract: {abstract}\n"
148
+ f"DOI: {doi}\n\n"
149
  )
150
 
151
  # Add history if present
 
158
 
159
  logger.info(f"Prompt sent to Gemini API: {prompt[:200]}...")
160
 
161
+ # Call Gemini API (Gemini 1.5 Flash)
162
  model = genai.GenerativeModel("gemini-1.5-flash")
163
  response = model.generate_content(prompt)
164
  answer = response.text.strip()
 
199
  paper_dropdown = gr.Dropdown(label="Select a Paper", choices=[], interactive=True)
200
  search_status = gr.Textbox(label="Search Status", interactive=False)
201
 
202
+ # States to store paper choices and indices
203
  paper_choices_state = gr.State([])
204
+ paper_indices_state = gr.State([])
205
 
206
  search_btn.click(
207
  fn=get_relevant_papers,
208
  inputs=query_input,
209
+ outputs=[paper_choices_state, paper_indices_state, search_status]
210
  ).then(
211
  fn=lambda choices: gr.update(choices=choices, value=None),
212
  inputs=paper_choices_state,
 
221
  question_input = gr.Textbox(label="Ask a question", placeholder="e.g., What methods are used?")
222
  chat_btn = gr.Button("Send")
223
 
224
+ # State to store conversation history and selected index
225
  history_state = gr.State([])
226
+ selected_index_state = gr.State(None)
227
+
228
+ # Update selected paper and index
229
+ def update_selected_paper(choice, indices):
230
+ if choice is None:
231
+ return "", None
232
+ index = int(choice.split(".")[0]) - 1 # Extract rank (e.g., "1." -> 0)
233
+ selected_idx = indices[index]
234
+ return choice, selected_idx
235
 
 
236
  paper_dropdown.change(
237
+ fn=update_selected_paper,
238
+ inputs=[paper_dropdown, paper_indices_state],
239
+ outputs=[selected_paper, selected_index_state]
240
  ).then(
241
  fn=lambda: [],
242
  inputs=None,
 
246
  # Handle chat
247
  chat_btn.click(
248
  fn=answer_question,
249
+ inputs=[selected_index_state, question_input, history_state],
250
  outputs=[chatbot, history_state]
251
  ).then(
252
  fn=lambda: "",