Spaces:

tuandunghcmut
/

Qwen-Coding-Streamlit

Paused

App Files Files Community

tuandunghcmut commited on 25 days ago

Commit

522d800

verified ·

1 Parent(s): d044800

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -19

app.py CHANGED Viewed

@@ -1,18 +1,12 @@
-import json
-import os
-import re
-import sys
-from typing import List, Optional, Union, Dict, Any, Tuple
 import streamlit as st
 import torch
 import yaml
-import threading
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Set page config first
 st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")
-# Model path - using direct adapter path for CPU compatibility
 MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"
 # Example data
@@ -49,9 +43,6 @@ for category, examples in CODING_EXAMPLES_BY_CATEGORY.items():
         CODING_EXAMPLES.append(example)
 class PromptCreator:
-    BASIC = "basic"
-    YAML_REASONING = "yaml"
     def __init__(self, prompt_type="yaml"):
         self.prompt_type = prompt_type
@@ -94,31 +85,29 @@ The answer field MUST contain ONLY a single character letter."""
 class QwenModelHandler:
     def __init__(self, model_path):
-        self.model_path = model_path
-        with st.spinner("Loading model (this may take several minutes on CPU)..."):
             try:
-                # Load tokenizer
                 self.tokenizer = AutoTokenizer.from_pretrained(
                     model_path,
                     trust_remote_code=True
                 )
-                # Load model directly for CPU without quantization
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_path,
                     device_map="cpu",
                     trust_remote_code=True,
-                    use_cache=True,
-                    # Explicitly disable bitsandbytes
                     load_in_8bit=False,
                     load_in_4bit=False
                 )
-                if self.tokenizer.pad_token is None:
                     self.tokenizer.pad_token = self.tokenizer.eos_token
             except Exception as e:
-                st.error(f"Error loading model: {str(e)}")
                 raise
     def generate_response(self, prompt, max_tokens=512, temperature=0.7,

 import streamlit as st
 import torch
 import yaml
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Set page config first
 st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")
+# Use the specified model
 MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"
 # Example data
         CODING_EXAMPLES.append(example)
 class PromptCreator:
     def __init__(self, prompt_type="yaml"):
         self.prompt_type = prompt_type
 class QwenModelHandler:
     def __init__(self, model_path):
+        with st.spinner("Loading model..."):
             try:
+                # Explicitly disable quantization options
                 self.tokenizer = AutoTokenizer.from_pretrained(
                     model_path,
                     trust_remote_code=True
                 )
+                # Load with standard precision on CPU
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_path,
+                    torch_dtype=torch.float32,
                     device_map="cpu",
                     trust_remote_code=True,
+                    # Explicitly disable quantization
                     load_in_8bit=False,
                     load_in_4bit=False
                 )
+                if self.tokenizer.pad_token is None and self.tokenizer.eos_token is not None:
                     self.tokenizer.pad_token = self.tokenizer.eos_token
             except Exception as e:
+                st.error(f"Error: {str(e)}")
                 raise
     def generate_response(self, prompt, max_tokens=512, temperature=0.7,