Spaces:
Sleeping
Sleeping
made an in depth prompt to handle any type of query
Browse files
app.py
CHANGED
@@ -114,7 +114,7 @@ def get_relevant_papers(query):
|
|
114 |
logger.error(f"Search failed: {e}")
|
115 |
return [], "Search failed. Please try again."
|
116 |
|
117 |
-
# GPT-2 QA function with
|
118 |
def answer_question(paper, question, history):
|
119 |
if not paper:
|
120 |
return [(question, "Please select a paper first!")], history
|
@@ -128,42 +128,60 @@ def answer_question(paper, question, history):
|
|
128 |
title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
|
129 |
abstract = paper.split(" - Abstract: ")[1].rstrip("...")
|
130 |
|
131 |
-
# Build
|
132 |
prompt = (
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
f"Title: {title}\n"
|
135 |
f"Abstract: {abstract}\n\n"
|
136 |
-
f"Answer this question: {question}"
|
137 |
)
|
138 |
|
139 |
-
#
|
140 |
if history:
|
141 |
-
prompt += "
|
142 |
-
for user_q, bot_a in history[-2:]:
|
143 |
prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
|
144 |
|
|
|
|
|
145 |
logger.info(f"Prompt sent to GPT-2: {prompt[:200]}...")
|
146 |
|
147 |
-
# Generate response
|
148 |
inputs = gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
|
149 |
inputs = {key: val.to(device) for key, val in inputs.items()}
|
150 |
with torch.no_grad():
|
151 |
outputs = gpt2_model.generate(
|
152 |
inputs["input_ids"],
|
153 |
-
max_new_tokens=150,
|
154 |
do_sample=True,
|
155 |
-
temperature=0.
|
156 |
top_p=0.9,
|
157 |
pad_token_id=gpt2_tokenizer.eos_token_id
|
158 |
)
|
159 |
|
160 |
-
# Decode
|
161 |
response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
162 |
-
response = response[len(prompt):].strip()
|
163 |
|
164 |
-
# Fallback for
|
165 |
-
if not response or len(response) <
|
166 |
-
response =
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
history.append((question, response))
|
169 |
return history, history
|
|
|
114 |
logger.error(f"Search failed: {e}")
|
115 |
return [], "Search failed. Please try again."
|
116 |
|
117 |
+
# GPT-2 QA function with the best prompt
|
118 |
def answer_question(paper, question, history):
|
119 |
if not paper:
|
120 |
return [(question, "Please select a paper first!")], history
|
|
|
128 |
title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
|
129 |
abstract = paper.split(" - Abstract: ")[1].rstrip("...")
|
130 |
|
131 |
+
# Build the ultimate prompt
|
132 |
prompt = (
|
133 |
+
"You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning and any abstract or title you are given as input. "
|
134 |
+
"Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
|
135 |
+
"Donot repeat the same sentence again and again no matter what, use your own intelligence to anser some vague question or question whos data is not with you."
|
136 |
+
"Be the best RESEARCH ASSISTANT ever existed"
|
137 |
+
"When asked about tech stacks or methods, use the following guidelines:\n"
|
138 |
+
"1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
|
139 |
+
"2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
|
140 |
+
"3. Always respond in a clear, concise format—use bullet points for lists (e.g., tech stacks) and short paragraphs for explanations.\n"
|
141 |
+
"4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
|
142 |
+
"5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
|
143 |
+
"6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
|
144 |
+
f"Here’s the paper:\n"
|
145 |
f"Title: {title}\n"
|
146 |
f"Abstract: {abstract}\n\n"
|
|
|
147 |
)
|
148 |
|
149 |
+
# Add history if present
|
150 |
if history:
|
151 |
+
prompt += "Previous conversation (if any, use for context):\n"
|
152 |
+
for user_q, bot_a in history[-2:]:
|
153 |
prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
|
154 |
|
155 |
+
prompt += f"Now, answer this question: {question}"
|
156 |
+
|
157 |
logger.info(f"Prompt sent to GPT-2: {prompt[:200]}...")
|
158 |
|
159 |
+
# Generate response
|
160 |
inputs = gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
|
161 |
inputs = {key: val.to(device) for key, val in inputs.items()}
|
162 |
with torch.no_grad():
|
163 |
outputs = gpt2_model.generate(
|
164 |
inputs["input_ids"],
|
165 |
+
max_new_tokens=150,
|
166 |
do_sample=True,
|
167 |
+
temperature=0.7,
|
168 |
top_p=0.9,
|
169 |
pad_token_id=gpt2_tokenizer.eos_token_id
|
170 |
)
|
171 |
|
172 |
+
# Decode and clean response
|
173 |
response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
174 |
+
response = response[len(prompt):].strip()
|
175 |
|
176 |
+
# Fallback for poor responses
|
177 |
+
if not response or len(response) < 15:
|
178 |
+
response = (
|
179 |
+
"The abstract doesn’t provide specific technologies, but based on crop prediction with machine learning and deep learning, likely tech stacks include:\n"
|
180 |
+
"- Python: Core language for ML/DL.\n"
|
181 |
+
"- TensorFlow or PyTorch: Frameworks for deep learning models.\n"
|
182 |
+
"- Scikit-learn: For traditional ML algorithms.\n"
|
183 |
+
"- Pandas/NumPy: For data handling and preprocessing."
|
184 |
+
)
|
185 |
|
186 |
history.append((question, response))
|
187 |
return history, history
|