Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,12 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
import re
|
4 |
-
import sys
|
5 |
-
from typing import List, Optional, Union, Dict, Any, Tuple
|
6 |
import streamlit as st
|
7 |
import torch
|
8 |
import yaml
|
9 |
-
import threading
|
10 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
11 |
|
12 |
# Set page config first
|
13 |
st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")
|
14 |
|
15 |
-
#
|
16 |
MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"
|
17 |
|
18 |
# Example data
|
@@ -49,9 +43,6 @@ for category, examples in CODING_EXAMPLES_BY_CATEGORY.items():
|
|
49 |
CODING_EXAMPLES.append(example)
|
50 |
|
51 |
class PromptCreator:
|
52 |
-
BASIC = "basic"
|
53 |
-
YAML_REASONING = "yaml"
|
54 |
-
|
55 |
def __init__(self, prompt_type="yaml"):
|
56 |
self.prompt_type = prompt_type
|
57 |
|
@@ -94,31 +85,29 @@ The answer field MUST contain ONLY a single character letter."""
|
|
94 |
|
95 |
class QwenModelHandler:
|
96 |
def __init__(self, model_path):
|
97 |
-
|
98 |
-
|
99 |
-
with st.spinner("Loading model (this may take several minutes on CPU)..."):
|
100 |
try:
|
101 |
-
#
|
102 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
103 |
model_path,
|
104 |
trust_remote_code=True
|
105 |
)
|
106 |
|
107 |
-
# Load
|
108 |
self.model = AutoModelForCausalLM.from_pretrained(
|
109 |
model_path,
|
|
|
110 |
device_map="cpu",
|
111 |
trust_remote_code=True,
|
112 |
-
|
113 |
-
# Explicitly disable bitsandbytes
|
114 |
load_in_8bit=False,
|
115 |
load_in_4bit=False
|
116 |
)
|
117 |
|
118 |
-
if self.tokenizer.pad_token is None:
|
119 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
120 |
except Exception as e:
|
121 |
-
st.error(f"Error
|
122 |
raise
|
123 |
|
124 |
def generate_response(self, prompt, max_tokens=512, temperature=0.7,
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import torch
|
3 |
import yaml
|
|
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
|
6 |
# Set page config first
|
7 |
st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")
|
8 |
|
9 |
+
# Use the specified model
|
10 |
MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"
|
11 |
|
12 |
# Example data
|
|
|
43 |
CODING_EXAMPLES.append(example)
|
44 |
|
45 |
class PromptCreator:
|
|
|
|
|
|
|
46 |
def __init__(self, prompt_type="yaml"):
|
47 |
self.prompt_type = prompt_type
|
48 |
|
|
|
85 |
|
86 |
class QwenModelHandler:
|
87 |
def __init__(self, model_path):
|
88 |
+
with st.spinner("Loading model..."):
|
|
|
|
|
89 |
try:
|
90 |
+
# Explicitly disable quantization options
|
91 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
92 |
model_path,
|
93 |
trust_remote_code=True
|
94 |
)
|
95 |
|
96 |
+
# Load with standard precision on CPU
|
97 |
self.model = AutoModelForCausalLM.from_pretrained(
|
98 |
model_path,
|
99 |
+
torch_dtype=torch.float32,
|
100 |
device_map="cpu",
|
101 |
trust_remote_code=True,
|
102 |
+
# Explicitly disable quantization
|
|
|
103 |
load_in_8bit=False,
|
104 |
load_in_4bit=False
|
105 |
)
|
106 |
|
107 |
+
if self.tokenizer.pad_token is None and self.tokenizer.eos_token is not None:
|
108 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
109 |
except Exception as e:
|
110 |
+
st.error(f"Error: {str(e)}")
|
111 |
raise
|
112 |
|
113 |
def generate_response(self, prompt, max_tokens=512, temperature=0.7,
|