Spaces:
Sleeping
Sleeping
| import json | |
| import re | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # Global variables for caching the model and tokenizer | |
| tokenizer, model = None, None | |
| def load_model(): | |
| global tokenizer, model | |
| if tokenizer is None or model is None: | |
| # Use the DeepSeek instruct model for code evaluation. | |
| model_name = "deepseek-ai/deepseek-coder-1.3b-instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| return tokenizer, model | |
| def extract_json(response_text): | |
| # Attempt to extract all JSON blocks (non-greedy, with DOTALL) | |
| matches = re.findall(r'\{.*?\}', response_text, re.DOTALL) | |
| for m in reversed(matches): | |
| try: | |
| temp = json.loads(m) | |
| if isinstance(temp, dict) and "stars" in temp and "feedback" in temp: | |
| return temp | |
| except Exception: | |
| continue | |
| return None | |
| def evaluate_code(question, code): | |
| # Revised prompt that explicitly states the expected arithmetic operation for square. | |
| prompt = f"""You are an expert code evaluator. | |
| Evaluate the following solution for the given problem. | |
| The problem asks for a function that returns the square of a number. | |
| A correct solution must multiply the number by itself (using x*x or x**2). | |
| If the solution uses any other operation (such as addition), it is completely incorrect. | |
| Rate the solution as follows: | |
| - 5 stars: Perfect solution; correct, efficient, and follows best practices. | |
| - 4 stars: Correct solution with minor issues. | |
| - 3 stars: Partially correct solution with noticeable issues. | |
| - 2 stars: Incorrect solution with some correct elements. | |
| - 1 star: Mostly incorrect solution. | |
| - 0 stars: Completely incorrect solution. | |
| Respond with exactly one JSON object (with no extra text) that has exactly two keys: | |
| "stars": an integer between 0 and 5, | |
| "feedback": a concise string message explaining your rating. | |
| The JSON must start with '{{' and end with '}}'. | |
| Do not output any additional text. | |
| Question: "{question}" | |
| Solution: "{code}" | |
| Your response:""" | |
| tokenizer, model = load_model() | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=120, | |
| temperature=0.2, | |
| pad_token_id=tokenizer.eos_token_id, | |
| do_sample=True | |
| ) | |
| response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| print("Raw model response:", response_text) # Debug output | |
| result = extract_json(response_text) | |
| if result is None: | |
| result = {"stars": 0, "feedback": "Evaluation failed. Unable to extract valid JSON from AI response."} | |
| return result | |
| # For direct command-line testing. | |
| if __name__ == "__main__": | |
| import sys | |
| if len(sys.argv) < 3: | |
| print(json.dumps({"error": "Please provide a question and code as arguments"})) | |
| sys.exit(1) | |
| question = sys.argv[1] | |
| code = sys.argv[2] | |
| result = evaluate_code(question, code) | |
| print(json.dumps(result)) | |