Spaces:

ADKU
/

ResearchGPT_space

Running

App Files Files Community

ADKU commited on Feb 28

Commit

1130652

verified ·

1 Parent(s): ad54e4d

updated model from gpt2 to mistral for enhancement in response

Browse files

Files changed (1) hide show

app.py +20 -22

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from rank_bm25 import BM25Okapi
 import torch
 import pandas as pd
 import gradio as gr
-from transformers import AutoTokenizer, AutoModel, GPT2LMHeadModel, GPT2Tokenizer
 import logging
 # Set up logging
@@ -53,12 +53,12 @@ try:
     sci_bert_model.eval()
     logger.info("SciBERT loaded")
-    # DistilGPT-2 for QA
-    gpt2_tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2", cache_dir="/tmp/huggingface")
-    gpt2_model = GPT2LMHeadModel.from_pretrained("distilgpt2", cache_dir="/tmp/huggingface")
-    gpt2_model.to(device)
-    gpt2_model.eval()
-    logger.info("DistilGPT-2 loaded")
 except Exception as e:
     logger.error(f"Model loading failed: {e}")
     raise
@@ -114,7 +114,7 @@ def get_relevant_papers(query):
         logger.error(f"Search failed: {e}")
         return [], "Search failed. Please try again."
-# GPT-2 QA function with the best prompt
 def answer_question(paper, question, history):
     if not paper:
         return [(question, "Please select a paper first!")], history
@@ -128,13 +128,11 @@ def answer_question(paper, question, history):
         title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
         abstract = paper.split(" - Abstract: ")[1].rstrip("...")
-        # Build the ultimate prompt
         prompt = (
-            "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning and any abstract or title you are given as input. "
             "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
-            "Donot repeat the same sentence again and again no matter what, use your own intelligence to anser some vague question or question whos data is not with you."
-            "Be the best RESEARCH ASSISTANT ever existed"
-            "When asked about tech stacks or methods, use the following guidelines:\n"
             "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
             "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
             "3. Always respond in a clear, concise format—use bullet points for lists (e.g., tech stacks) and short paragraphs for explanations.\n"
@@ -148,30 +146,30 @@ def answer_question(paper, question, history):
         # Add history if present
         if history:
-            prompt += "Previous conversation (if any, use for context):\n"
             for user_q, bot_a in history[-2:]:
                 prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
-        prompt += f"Now, answer this question: {question}"
-        logger.info(f"Prompt sent to GPT-2: {prompt[:200]}...")
         # Generate response
-        inputs = gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
         inputs = {key: val.to(device) for key, val in inputs.items()}
         with torch.no_grad():
-            outputs = gpt2_model.generate(
                 inputs["input_ids"],
-                max_new_tokens=150,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
-                pad_token_id=gpt2_tokenizer.eos_token_id
             )
         # Decode and clean response
-        response = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response[len(prompt):].strip()
         # Fallback for poor responses
         if not response or len(response) < 15:

 import torch
 import pandas as pd
 import gradio as gr
+from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
 import logging
 # Set up logging
     sci_bert_model.eval()
     logger.info("SciBERT loaded")
+    # Mistral-7B-Instruct for QA
+    mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", cache_dir="/tmp/huggingface")
+    mistral_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", cache_dir="/tmp/huggingface")
+    mistral_model.to(device)
+    mistral_model.eval()
+    logger.info("Mistral-7B-Instruct loaded")
 except Exception as e:
     logger.error(f"Model loading failed: {e}")
     raise
         logger.error(f"Search failed: {e}")
         return [], "Search failed. Please try again."
+# Mistral QA function with optimized prompt
 def answer_question(paper, question, history):
     if not paper:
         return [(question, "Please select a paper first!")], history
         title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
         abstract = paper.split(" - Abstract: ")[1].rstrip("...")
+        # Build the ultimate prompt with Mistral's instruction format
         prompt = (
+            "<s>[INST] You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
             "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
+            "When asked about tech stacks or methods, follow these guidelines:\n"
             "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
             "2. If the abstract is vague (e.g., 'machine learning techniques'), infer the most likely tech stacks based on the context of crop prediction and modern research practices, and explain your reasoning.\n"
             "3. Always respond in a clear, concise format—use bullet points for lists (e.g., tech stacks) and short paragraphs for explanations.\n"
         # Add history if present
         if history:
+            prompt += "Previous conversation (use for context):\n"
             for user_q, bot_a in history[-2:]:
                 prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
+        prompt += f"Now, answer this question: {question} [/INST]</s>"
+        logger.info(f"Prompt sent to Mistral: {prompt[:200]}...")
         # Generate response
+        inputs = mistral_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
         inputs = {key: val.to(device) for key, val in inputs.items()}
         with torch.no_grad():
+            outputs = mistral_model.generate(
                 inputs["input_ids"],
+                max_new_tokens=200,  # More tokens for detailed answers
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
+                pad_token_id=mistral_tokenizer.eos_token_id
             )
         # Decode and clean response
+        response = mistral_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = response[len(prompt):].strip()  # Remove prompt, including [INST] tags
         # Fallback for poor responses
         if not response or len(response) < 15: