import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import ast import astor import traceback # Initialize Hugging Face model and tokenizer MODEL_NAME = "microsoft/codebert-base" # Load the pre-trained CodeBERT model for understanding code tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) # Helper function to analyze code def analyze_code(code): # Split the code into manageable chunks max_length = 512 lines = code.split("\n") chunks = ["\n".join(lines[i:i+max_length]) for i in range(0, len(lines), max_length)] results = [] for chunk in chunks: tokenized_code = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=max_length) outputs = model(**tokenized_code) logits = outputs.logits results.append(logits.argmax(dim=1).item()) return results # Function to detect and fix bugs, including logical errors def detect_and_fix_bugs(code): suggestions = [] fixed_code = code try: tree = ast.parse(code) # Detect undefined variable usage for node in ast.walk(tree): if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load): if node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)}: suggestions.append(f"Variable '{node.id}' is used but not defined.") undefined_variables = [ node.id for node in ast.walk(tree) if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load) and node.id not in {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)} ] for var in undefined_variables: fix_statement = ast.Assign(targets=[ast.Name(id=var, ctx=ast.Store())], value=ast.Constant(value=None)) tree.body.insert(0, fix_statement) suggestions.append(f"Added a definition for variable '{var}'.") # Detect unused variables assigned_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Store)} used_vars = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Load)} unused_vars = assigned_vars - used_vars for var in unused_vars: suggestions.append(f"Variable '{var}' is defined but never used.") # Detect missing imports import_names = {n.name for n in ast.walk(tree) if isinstance(n, ast.Import)} for node in ast.walk(tree): if isinstance(node, ast.Call) and hasattr(node.func, 'id') and node.func.id not in import_names: suggestions.append(f"Missing import for '{node.func.id}'.") # Detect invalid function calls for node in ast.walk(tree): if isinstance(node, ast.Call): if isinstance(node.func, ast.Name): if not any(isinstance(n, ast.FunctionDef) and n.name == node.func.id for n in ast.walk(tree)): suggestions.append(f"Function '{node.func.id}' is called but not defined.") # Detect type mismatches (example: adding string to integer) for node in ast.walk(tree): if isinstance(node, ast.BinOp): left = node.left right = node.right if isinstance(left, ast.Constant) and isinstance(right, ast.Constant): if type(left.value) != type(right.value): suggestions.append(f"Type mismatch in operation: '{left.value}' ({type(left.value).__name__}) and '{right.value}' ({type(right.value).__name__}).") # Detect logical errors (example: unreachable code) for i, node in enumerate(tree.body): if isinstance(node, ast.If): if isinstance(node.test, ast.Constant) and node.test.value is False: suggestions.append(f"Unreachable code detected at line {node.lineno}.") elif isinstance(node.test, ast.Constant) and node.test.value is True: suggestions.append(f"Redundant condition always True at line {node.lineno}.") # Detect duplicate keys in dictionaries for node in ast.walk(tree): if isinstance(node, ast.Dict): keys = [k.value for k in node.keys if isinstance(k, ast.Constant)] if len(keys) != len(set(keys)): suggestions.append("Duplicate keys detected in dictionary.") # Convert the modified AST back to code fixed_code = astor.to_source(tree) except Exception as e: suggestions.append(f"Error analyzing code: {traceback.format_exc()}") return suggestions, fixed_code # Streamlit app UI st.title("Code Quality, Bug Detection, and Auto-Correction Tool") st.markdown("Analyze your code for syntax issues, quality, bugs, logical errors, and get suggested corrections.") # File uploader uploaded_file = st.file_uploader("Upload a Python code file", type=["py"]) # Code snippet input code_snippet = st.text_area("Or paste your code snippet below:") if st.button("Analyze and Fix Code"): if uploaded_file is not None: code = uploaded_file.read().decode("utf-8") elif code_snippet.strip(): code = code_snippet else: st.error("Please upload a file or paste code to analyze.") st.stop() # Perform code analysis and bug fixing st.subheader("Analysis Results") st.write("**Code Quality and Bug Suggestions:**") suggestions, fixed_code = detect_and_fix_bugs(code) if suggestions: for i, suggestion in enumerate(suggestions, 1): st.write(f"{i}. {suggestion}") else: st.write("No major issues detected. Your code looks good!") # Display corrected code st.subheader("Corrected Code:") st.code(fixed_code, language="python") # Simulated CodeBERT analysis (placeholder) st.write("**Model Analysis:**") model_results = analyze_code(code) for idx, result in enumerate(model_results, 1): st.write(f"Chunk {idx} classification result: {result}") st.markdown("---") st.markdown("*Powered by Hugging Face and Streamlit*")