Spaces:

tuandunghcmut
/

Qwen-Coding-Streamlit

Paused

e51e541 24 days ago

10.3 kB

	import streamlit as st
	import torch
	import yaml
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Set page config first
	st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")

	# Use the specified model
	MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"



	from coding_examples import CODING_EXAMPLES_BY_CATEGORY

	# Flatten examples
	CODING_EXAMPLES = []
	for category, examples in CODING_EXAMPLES_BY_CATEGORY.items():
	for example in examples:
	example["category"] = category
	CODING_EXAMPLES.append(example)

	class PromptCreator:
	def __init__(self, prompt_type="yaml"):
	self.prompt_type = prompt_type

	def format_choices(self, choices):
	if not choices: return ""
	if isinstance(choices, str): return choices
	return "\n".join(f"{chr(65 + i)}. {choice}" for i, choice in enumerate(choices))

	def get_max_letter(self, choices):
	if not choices: return "A"
	if isinstance(choices, str):
	num_choices = len([line for line in choices.split("\n") if line.strip()])
	return "A" if num_choices == 0 else chr(64 + num_choices)
	return chr(64 + len(choices))

	def create_inference_prompt(self, question, choices):
	if not question: return ""
	formatted_choices = self.format_choices(choices)
	max_letter = self.get_max_letter(choices)

	return f"""Question: {question}

	Choices:
	{formatted_choices}

	Analyze this question step-by-step and provide a detailed explanation.
	Your response MUST be in YAML format as follows:

	understanding: \|
	<your understanding of what the question is asking>
	analysis: \|
	<your analysis of each option>
	reasoning: \|
	<your step-by-step reasoning process>
	conclusion: \|
	<your final conclusion>
	answer: <single letter A through {max_letter}>

	The answer field MUST contain ONLY a single character letter."""

	class QwenModelHandler:
	def __init__(self, model_path):
	with st.spinner("Loading model..."):
	try:
	# Explicitly disable quantization options
	self.tokenizer = AutoTokenizer.from_pretrained(
	model_path,
	trust_remote_code=True
	)

	# Load with standard precision on CPU
	from peft import PeftModel
	from transformers import AutoModelForCausalLM

	base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-1.5B-Instruct")
	self.model = PeftModel.from_pretrained(base_model, "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4")
	# self.model = AutoModelForCausalLM.from_pretrained(
	# model_path,
	# torch_dtype=torch.float32,
	# device_map="cpu",
	# trust_remote_code=True,
	# # Explicitly disable quantization
	# load_in_8bit=False,
	# load_in_4bit=False
	# )

	if self.tokenizer.pad_token is None and self.tokenizer.eos_token is not None:
	self.tokenizer.pad_token = self.tokenizer.eos_token
	except Exception as e:
	st.error(f"Error: {str(e)}")
	raise

	def generate_response(self, prompt, max_tokens=512, temperature=0.7,
	top_p=0.9, top_k=50, repetition_penalty=1.0,
	do_sample=True):
	try:
	inputs = self.tokenizer(prompt, return_tensors="pt")
	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repetition_penalty=repetition_penalty,
	do_sample=do_sample,
	pad_token_id=self.tokenizer.eos_token_id,
	)
	response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	if prompt in response:
	response = response[len(prompt):].strip()
	return response
	except Exception as e:
	return f"Error during generation: {str(e)}"

	# Create prompt without requiring model
	def create_prompt(question, choices):
	creator = PromptCreator(prompt_type="yaml")
	return creator.create_inference_prompt(question, choices)

	def main():
	# Initialize session state
	if 'model_loaded' not in st.session_state:
	st.session_state.model_loaded = False
	if 'model_output' not in st.session_state:
	st.session_state.model_output = ""

	st.title("Coding Multiple Choice Q&A with YAML Reasoning")
	st.warning("⚠️ Running on CPU - model loading and inference will be slow")

	# Two-column layout
	col1, col2 = st.columns([4, 6])

	with col1:
	st.subheader("Examples")

	# Category selector
	category_options = ["All Categories"] + list(CODING_EXAMPLES_BY_CATEGORY.keys())
	selected_category = st.selectbox("Select a category", category_options)

	# Example selector
	if selected_category == "All Categories":
	example_options = [f"Example {i+1}: {ex['question']}" for i, ex in enumerate(CODING_EXAMPLES)]
	else:
	example_options = []
	start_idx = 0
	for cat, examples in CODING_EXAMPLES_BY_CATEGORY.items():
	if cat == selected_category:
	example_options = [f"Example {start_idx+i+1}: {ex['question']}" for i, ex in enumerate(examples)]
	break
	start_idx += len(examples)

	selected_example = st.selectbox("Select an example question", [""] + example_options)

	# Process selected example
	if selected_example:
	try:
	example_idx = int(selected_example.split(":")[0].split()[-1]) - 1
	example = CODING_EXAMPLES[example_idx]
	question = example["question"]
	choices = "\n".join(f"{chr(65+i)}. {choice}" for i, choice in enumerate(example["choices"]))
	except:
	question = ""
	choices = ""
	else:
	question = ""
	choices = ""

	st.subheader("Your Question")
	question_input = st.text_area("Question", value=question, height=100,
	placeholder="Enter your coding question here...")

	choices_input = st.text_area("Choices", value=choices, height=150,
	placeholder="Enter each choice on a new line...")

	# Model Parameters
	temperature = st.slider("Temperature", 0.0, 1.0, 0.7, 0.1)

	with st.expander("Advanced Parameters"):
	max_tokens = st.slider("Max Tokens", 128, 1024, 512, 128)
	top_p = st.slider("Top-p", 0.1, 1.0, 0.9, 0.1)
	top_k = st.slider("Top-k", 1, 100, 50, 10)
	repetition_penalty = st.slider("Repetition Penalty", 1.0, 2.0, 1.1, 0.1)
	do_sample = st.checkbox("Enable Sampling", True)

	# Load model button
	if not st.session_state.model_loaded:
	if st.button("Load Model", type="primary"):
	try:
	st.session_state.model_handler = QwenModelHandler(MODEL_PATH)
	st.session_state.prompt_creator = PromptCreator("yaml")
	st.session_state.model_loaded = True
	# st.experimental_rerun()
	st.rerun()
	except Exception as e:
	st.error(f"Failed to load model: {str(e)}")

	# Generate button
	if st.session_state.model_loaded:
	generate_button = st.button("Generate Response", type="primary")
	else:
	st.info("Please load the model first")
	generate_button = False

	with col2:
	# Show prompt
	st.subheader("Model Input")
	if question_input and choices_input:
	prompt = create_prompt(question_input, choices_input)
	st.text_area("Prompt", value=prompt, height=200, disabled=True)
	else:
	st.text_area("Prompt", value="", height=200, disabled=True)

	# Results Area
	st.subheader("Model Response")
	st.text_area("Response", value=st.session_state.model_output, height=300)

	# YAML parsing
	if st.session_state.model_output:
	try:
	with st.expander("Raw Output"):
	st.code(st.session_state.model_output, language="yaml")

	try:
	yaml_data = yaml.safe_load(st.session_state.model_output)
	with st.expander("Parsed Output", expanded=True):
	st.json(yaml_data)
	except:
	st.warning("Could not parse output as YAML")
	except:
	pass

	# Handle generation
	if generate_button and st.session_state.model_loaded:
	if not question_input or not choices_input:
	st.error("Please provide both a question and choices.")
	else:
	try:
	prompt = st.session_state.prompt_creator.create_inference_prompt(question_input, choices_input)
	with st.spinner("Generating response..."):
	response = st.session_state.model_handler.generate_response(
	prompt=prompt,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repetition_penalty=repetition_penalty,
	do_sample=do_sample
	)
	st.session_state.model_output = response
	st.experimental_rerun()
	except Exception as e:
	st.error(f"Error generating response: {e}")

	if __name__ == "__main__":
	main()