ADKU commited on
Commit
ad54e4d
·
verified ·
1 Parent(s): 43c1491

made an in depth prompt to handle any type of query

Browse files
Files changed (1) hide show
  1. app.py +33 -15
app.py CHANGED
@@ -114,7 +114,7 @@ def get_relevant_papers(query):
114
  logger.error(f"Search failed: {e}")
115
  return [], "Search failed. Please try again."
116
 
117
- # GPT-2 QA function with direct prompting
118
  def answer_question(paper, question, history):
119
  if not paper:
120
  return [(question, "Please select a paper first!")], history
@@ -128,42 +128,60 @@ def answer_question(paper, question, history):
128
  title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
129
  abstract = paper.split(" - Abstract: ")[1].rstrip("...")
130
 
131
- # Build a simple prompt
132
  prompt = (
133
- f"You are an expert assistant. Based on the following paper details:\n"
 
 
 
 
 
 
 
 
 
 
 
134
  f"Title: {title}\n"
135
  f"Abstract: {abstract}\n\n"
136
- f"Answer this question: {question}"
137
  )
138
 
139
- # Include recent history if available
140
  if history:
141
- prompt += "\n\nPrevious conversation:\n"
142
- for user_q, bot_a in history[-2:]: # Last 2 turns for context
143
  prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
144
 
 
 
145
  logger.info(f"Prompt sent to GPT-2: {prompt[:200]}...")
146
 
147
- # Generate response directly
148
  inputs = gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
149
  inputs = {key: val.to(device) for key, val in inputs.items()}
150
  with torch.no_grad():
151
  outputs = gpt2_model.generate(
152
  inputs["input_ids"],
153
- max_new_tokens=150, # Longer responses for clarity
154
  do_sample=True,
155
- temperature=0.8,
156
  top_p=0.9,
157
  pad_token_id=gpt2_tokenizer.eos_token_id
158
  )
159
 
160
- # Decode full output and extract response
161
  response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
162
- response = response[len(prompt):].strip() # Remove prompt from output
163
 
164
- # Fallback for bad responses
165
- if not response or len(response) < 10:
166
- response = "I couldn’t generate a clear answer. Could you rephrase your question?"
 
 
 
 
 
 
167
 
168
  history.append((question, response))
169
  return history, history
 
114
  logger.error(f"Search failed: {e}")
115
  return [], "Search failed. Please try again."
116
 
117
+ # GPT-2 QA function with the best prompt
118
  def answer_question(paper, question, history):
119
  if not paper:
120
  return [(question, "Please select a paper first!")], history
 
128
  title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
129
  abstract = paper.split(" - Abstract: ")[1].rstrip("...")
130
 
131
+ # Build the ultimate prompt
132
  prompt = (
133
+ "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning and any abstract or title you are given as input. "
134
+ "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
135
+ "Donot repeat the same sentence again and again no matter what, use your own intelligence to anser some vague question or question whos data is not with you."
136
+ "Be the best RESEARCH ASSISTANT ever existed"
137
+ "When asked about tech stacks or methods, use the following guidelines:\n"
138
+ "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
139
+ "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
140
+ "3. Always respond in a clear, concise format—use bullet points for lists (e.g., tech stacks) and short paragraphs for explanations.\n"
141
+ "4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
142
+ "5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
143
+ "6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
144
+ f"Here’s the paper:\n"
145
  f"Title: {title}\n"
146
  f"Abstract: {abstract}\n\n"
 
147
  )
148
 
149
+ # Add history if present
150
  if history:
151
+ prompt += "Previous conversation (if any, use for context):\n"
152
+ for user_q, bot_a in history[-2:]:
153
  prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
154
 
155
+ prompt += f"Now, answer this question: {question}"
156
+
157
  logger.info(f"Prompt sent to GPT-2: {prompt[:200]}...")
158
 
159
+ # Generate response
160
  inputs = gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
161
  inputs = {key: val.to(device) for key, val in inputs.items()}
162
  with torch.no_grad():
163
  outputs = gpt2_model.generate(
164
  inputs["input_ids"],
165
+ max_new_tokens=150,
166
  do_sample=True,
167
+ temperature=0.7,
168
  top_p=0.9,
169
  pad_token_id=gpt2_tokenizer.eos_token_id
170
  )
171
 
172
+ # Decode and clean response
173
  response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
174
+ response = response[len(prompt):].strip()
175
 
176
+ # Fallback for poor responses
177
+ if not response or len(response) < 15:
178
+ response = (
179
+ "The abstract doesn’t provide specific technologies, but based on crop prediction with machine learning and deep learning, likely tech stacks include:\n"
180
+ "- Python: Core language for ML/DL.\n"
181
+ "- TensorFlow or PyTorch: Frameworks for deep learning models.\n"
182
+ "- Scikit-learn: For traditional ML algorithms.\n"
183
+ "- Pandas/NumPy: For data handling and preprocessing."
184
+ )
185
 
186
  history.append((question, response))
187
  return history, history