# evaluator.py import torch from torchmetrics.text.bleu import BLEUScore from torchmetrics.text.rouge import ROUGEScore from transformers import AutoTokenizer, AutoModelForSeq2SeqLM class CodeEvaluator: def __init__(self, model_name="S-Dreamer/PyCodeT5"): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(self.device) self.bleu = BLEUScore(n_gram=4).to(self.device) # use GPU if possible self.rouge = ROUGEScore().to(self.device) def evaluate(self, nl_input, target_code): self.model.eval() # Set model to evaluation mode with torch.no_grad(): # Disable gradient calculations inputs = self.tokenizer(nl_input, return_tensors="pt").to(self.device) outputs = self.model.generate( **inputs, max_length=512, num_beams=5, early_stopping=True, ) generated_code = self.tokenizer.decode(outputs[0], skip_special_tokens=True) bleu_score = self.bleu(generated_code, target_code) rouge_score = self.rouge(generated_code, target_code) return bleu_score, rouge_score if __name__ == "__main__": evaluator = CodeEvaluator() nl_input = "Write a Python function to reverse a string." target_code = """def reverse_string(s): return s[::-1] """ bleu_score, rouge_score = evaluator.evaluate(nl_input, target_code) print(f"BLEU score: {bleu_score}") print(f"ROUGE score: {rouge_score}")