Spaces:

ADKU
/

ResearchGPT_space

Sleeping

App Files Files Community

ADKU commited on Feb 28

Commit

d6c0c81

verified ·

1 Parent(s): d5d95b0

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -22

app.py CHANGED Viewed

@@ -15,11 +15,6 @@ logger = logging.getLogger(__name__)
 # Set cache directory for Hugging Face models
 os.environ["HF_HOME"] = "/tmp/huggingface"
-# Get Hugging Face token from environment variable (set in Spaces secrets)
-HF_TOKEN = os.getenv("HF_TOKEN")
-if not HF_TOKEN:
-    logger.warning("HF_TOKEN not set. Mistral model access may fail. Set it in Hugging Face Spaces secrets.")
 # Load dataset with error handling
 DATASET_PATH = os.path.join(os.getcwd(), "springer_papers_DL.json")
 try:
@@ -58,12 +53,12 @@ try:
     sci_bert_model.eval()
     logger.info("SciBERT loaded")
-    # Mistral-7B-Instruct for QA with token
-    mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", cache_dir="/tmp/huggingface", use_auth_token=HF_TOKEN)
-    mistral_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", cache_dir="/tmp/huggingface", use_auth_token=HF_TOKEN)
-    mistral_model.to(device)
-    mistral_model.eval()
-    logger.info("Mistral-7B-Instruct loaded")
 except Exception as e:
     logger.error(f"Model loading failed: {e}")
     raise
@@ -119,7 +114,7 @@ def get_relevant_papers(query):
         logger.error(f"Search failed: {e}")
         return [], "Search failed. Please try again."
-# Mistral QA function with optimized prompt
 def answer_question(paper, question, history):
     if not paper:
         return [(question, "Please select a paper first!")], history
@@ -133,9 +128,10 @@ def answer_question(paper, question, history):
         title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
         abstract = paper.split(" - Abstract: ")[1].rstrip("...")
-        # Build the ultimate prompt with Mistral's instruction format
         prompt = (
-            "<s>[INST] You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
             "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
             "When asked about tech stacks or methods, follow these guidelines:\n"
             "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
@@ -155,26 +151,26 @@ def answer_question(paper, question, history):
             for user_q, bot_a in history[-2:]:
                 prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
-        prompt += f"Now, answer this question: {question} [/INST]</s>"
-        logger.info(f"Prompt sent to Mistral: {prompt[:200]}...")
         # Generate response
-        inputs = mistral_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
         inputs = {key: val.to(device) for key, val in inputs.items()}
         with torch.no_grad():
-            outputs = mistral_model.generate(
                 inputs["input_ids"],
                 max_new_tokens=200,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
-                pad_token_id=mistral_tokenizer.eos_token_id
             )
-        # Decode and clean response (preserve token structure)
-        response = mistral_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response[len(prompt):].strip()  # Remove prompt, including [INST] tags
         # Fallback for poor responses
         if not response or len(response) < 15:

 # Set cache directory for Hugging Face models
 os.environ["HF_HOME"] = "/tmp/huggingface"
 # Load dataset with error handling
 DATASET_PATH = os.path.join(os.getcwd(), "springer_papers_DL.json")
 try:
     sci_bert_model.eval()
     logger.info("SciBERT loaded")
+    # Qwen1.5-1.8B-Chat for QA (ungated)
+    qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B-Chat", cache_dir="/tmp/huggingface")
+    qwen_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-1.8B-Chat", cache_dir="/tmp/huggingface")
+    qwen_model.to(device)
+    qwen_model.eval()
+    logger.info("Qwen1.5-1.8B-Chat loaded")
 except Exception as e:
     logger.error(f"Model loading failed: {e}")
     raise
         logger.error(f"Search failed: {e}")
         return [], "Search failed. Please try again."
+# Qwen QA function with optimized prompt
 def answer_question(paper, question, history):
     if not paper:
         return [(question, "Please select a paper first!")], history
         title = paper.split(" - Abstract: ")[0].split(". ", 1)[1]
         abstract = paper.split(" - Abstract: ")[1].rstrip("...")
+        # Build prompt with Qwen's chat format
         prompt = (
+            "<|im_start|>user\n"
+            "You are Dr. Sage, the world's most brilliant and reliable research assistant, specializing in machine learning, deep learning, and agriculture. "
             "Your goal is to provide concise, accurate, and well-structured answers based on the given paper's title and abstract. "
             "When asked about tech stacks or methods, follow these guidelines:\n"
             "1. If the abstract explicitly mentions technologies (e.g., Python, TensorFlow), list them precisely with brief explanations.\n"
             for user_q, bot_a in history[-2:]:
                 prompt += f"User: {user_q}\nAssistant: {bot_a}\n"
+        prompt += f"Now, answer this question: {question}<|im_end|>\n<|im_start|>assistant"
+        logger.info(f"Prompt sent to Qwen: {prompt[:200]}...")
         # Generate response
+        inputs = qwen_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=400)
         inputs = {key: val.to(device) for key, val in inputs.items()}
         with torch.no_grad():
+            outputs = qwen_model.generate(
                 inputs["input_ids"],
                 max_new_tokens=200,
                 do_sample=True,
                 temperature=0.7,
                 top_p=0.9,
+                pad_token_id=qwen_tokenizer.eos_token_id
             )
+        # Decode and clean response
+        response = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = response[len(prompt):].strip()  # Remove prompt, including <|im_start|> tags
         # Fallback for poor responses
         if not response or len(response) < 15: