Spaces:

ADKU
/

ResearchGPT_space

Sleeping

App Files Files Community

ADKU commited on Feb 28

Commit

ad54e4d

verified ·

1 Parent(s): 43c1491

made an in depth prompt to handle any type of query

Browse files

Files changed (1) hide show

app.py +33 -15

app.py CHANGED Viewed

@@ -114,7 +114,7 @@ def get_relevant_papers(query):
         logger.error(f"Search failed: {e}")
         return [], "Search failed. Please try again."
-# GPT-2 QA function with direct prompting
 def answer_question(paper, question, history):
     if not paper:
         return [(question, "Please select a paper first!")], history
@@ -128,42 +128,60 @@ def answer_question(paper, question, history):
         title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
         abstract = paper.split(" - Abstract: ")[1].rstrip("...")
-        # Build a simple prompt
         prompt = (
-            f"You are an expert assistant. Based on the following paper details:\n"
             f"Title: {title}\n"
             f"Abstract: {abstract}\n\n"
-            f"Answer this question: {question}"
         )
-        # Include recent history if available
         if history:
-            prompt += "\n\nPrevious conversation:\n"
-            for user_q, bot_a in history[-2:]:  # Last 2 turns for context
                 prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
         logger.info(f"Prompt sent to GPT-2: {prompt[:200]}...")
-        # Generate response directly
         inputs = gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
         inputs = {key: val.to(device) for key, val in inputs.items()}
         with torch.no_grad():
             outputs = gpt2_model.generate(
                 inputs["input_ids"],
-                max_new_tokens=150,  # Longer responses for clarity
                 do_sample=True,
-                temperature=0.8,
                 top_p=0.9,
                 pad_token_id=gpt2_tokenizer.eos_token_id
             )
-        # Decode full output and extract response
         response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response[len(prompt):].strip()  # Remove prompt from output
-        # Fallback for bad responses
-        if not response or len(response) < 10:
-            response = "I couldn’t generate a clear answer. Could you rephrase your question?"
         history.append((question, response))
         return history, history

         logger.error(f"Search failed: {e}")
         return [], "Search failed. Please try again."
+# GPT-2 QA function with the best prompt
 def answer_question(paper, question, history):
     if not paper:
         return [(question, "Please select a paper first!")], history
         title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
         abstract = paper.split(" - Abstract: ")[1].rstrip("...")
+        # Build the ultimate prompt
         prompt = (
+            "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning and any abstract or title you are given as input. "
+            "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
+            "Donot repeat the same sentence again and again no matter what, use your own intelligence to anser some vague question or question whos data is not with you."
+            "Be the best RESEARCH ASSISTANT ever existed"
+            "When asked about tech stacks or methods, use the following guidelines:\n"
+            "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
+            "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
+            "3. Always respond in a clear, concise format—use bullet points for lists (e.g., tech stacks) and short paragraphs for explanations.\n"
+            "4. If the question requires prior conversation context, refer to it naturally to maintain coherence.\n"
+            "5. If the abstract lacks enough detail, supplement with plausible, domain-specific suggestions and note they are inferred.\n"
+            "6. Avoid speculation or fluff—stick to facts or educated guesses grounded in the field.\n\n"
+            f"Here’s the paper:\n"
             f"Title: {title}\n"
             f"Abstract: {abstract}\n\n"
         )
+        # Add history if present
         if history:
+            prompt += "Previous conversation (if any, use for context):\n"
+            for user_q, bot_a in history[-2:]:
                 prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
+        prompt += f"Now, answer this question: {question}"
         logger.info(f"Prompt sent to GPT-2: {prompt[:200]}...")
+        # Generate response
         inputs = gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
         inputs = {key: val.to(device) for key, val in inputs.items()}
         with torch.no_grad():
             outputs = gpt2_model.generate(
                 inputs["input_ids"],
+                max_new_tokens=150,
                 do_sample=True,
+                temperature=0.7,
                 top_p=0.9,
                 pad_token_id=gpt2_tokenizer.eos_token_id
             )
+        # Decode and clean response
         response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = response[len(prompt):].strip()
+        # Fallback for poor responses
+        if not response or len(response) < 15:
+            response = (
+                "The abstract doesn’t provide specific technologies, but based on crop prediction with machine learning and deep learning, likely tech stacks include:\n"
+                "- Python: Core language for ML/DL.\n"
+                "- TensorFlow or PyTorch: Frameworks for deep learning models.\n"
+                "- Scikit-learn: For traditional ML algorithms.\n"
+                "- Pandas/NumPy: For data handling and preprocessing."
+            )
         history.append((question, response))
         return history, history