tuandunghcmut commited on
Commit
522d800
·
verified ·
1 Parent(s): d044800

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -19
app.py CHANGED
@@ -1,18 +1,12 @@
1
- import json
2
- import os
3
- import re
4
- import sys
5
- from typing import List, Optional, Union, Dict, Any, Tuple
6
  import streamlit as st
7
  import torch
8
  import yaml
9
- import threading
10
  from transformers import AutoTokenizer, AutoModelForCausalLM
11
 
12
  # Set page config first
13
  st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")
14
 
15
- # Model path - using direct adapter path for CPU compatibility
16
  MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"
17
 
18
  # Example data
@@ -49,9 +43,6 @@ for category, examples in CODING_EXAMPLES_BY_CATEGORY.items():
49
  CODING_EXAMPLES.append(example)
50
 
51
  class PromptCreator:
52
- BASIC = "basic"
53
- YAML_REASONING = "yaml"
54
-
55
  def __init__(self, prompt_type="yaml"):
56
  self.prompt_type = prompt_type
57
 
@@ -94,31 +85,29 @@ The answer field MUST contain ONLY a single character letter."""
94
 
95
  class QwenModelHandler:
96
  def __init__(self, model_path):
97
- self.model_path = model_path
98
-
99
- with st.spinner("Loading model (this may take several minutes on CPU)..."):
100
  try:
101
- # Load tokenizer
102
  self.tokenizer = AutoTokenizer.from_pretrained(
103
  model_path,
104
  trust_remote_code=True
105
  )
106
 
107
- # Load model directly for CPU without quantization
108
  self.model = AutoModelForCausalLM.from_pretrained(
109
  model_path,
 
110
  device_map="cpu",
111
  trust_remote_code=True,
112
- use_cache=True,
113
- # Explicitly disable bitsandbytes
114
  load_in_8bit=False,
115
  load_in_4bit=False
116
  )
117
 
118
- if self.tokenizer.pad_token is None:
119
  self.tokenizer.pad_token = self.tokenizer.eos_token
120
  except Exception as e:
121
- st.error(f"Error loading model: {str(e)}")
122
  raise
123
 
124
  def generate_response(self, prompt, max_tokens=512, temperature=0.7,
 
 
 
 
 
 
1
  import streamlit as st
2
  import torch
3
  import yaml
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
  # Set page config first
7
  st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")
8
 
9
+ # Use the specified model
10
  MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"
11
 
12
  # Example data
 
43
  CODING_EXAMPLES.append(example)
44
 
45
  class PromptCreator:
 
 
 
46
  def __init__(self, prompt_type="yaml"):
47
  self.prompt_type = prompt_type
48
 
 
85
 
86
  class QwenModelHandler:
87
  def __init__(self, model_path):
88
+ with st.spinner("Loading model..."):
 
 
89
  try:
90
+ # Explicitly disable quantization options
91
  self.tokenizer = AutoTokenizer.from_pretrained(
92
  model_path,
93
  trust_remote_code=True
94
  )
95
 
96
+ # Load with standard precision on CPU
97
  self.model = AutoModelForCausalLM.from_pretrained(
98
  model_path,
99
+ torch_dtype=torch.float32,
100
  device_map="cpu",
101
  trust_remote_code=True,
102
+ # Explicitly disable quantization
 
103
  load_in_8bit=False,
104
  load_in_4bit=False
105
  )
106
 
107
+ if self.tokenizer.pad_token is None and self.tokenizer.eos_token is not None:
108
  self.tokenizer.pad_token = self.tokenizer.eos_token
109
  except Exception as e:
110
+ st.error(f"Error: {str(e)}")
111
  raise
112
 
113
  def generate_response(self, prompt, max_tokens=512, temperature=0.7,