import streamlit as st from fpdf import FPDF import PyPDF2 import pytesseract from pdf2image import convert_from_path from transformers import pipeline import re import io # Load pre-trained model for question-answering qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") # Extract text from PDF (text-based and image-based) def extract_text_from_pdf(pdf_path): with open(pdf_path, 'rb') as file: reader = PyPDF2.PdfReader(file) text = '' for page in reader.pages: text += page.extract_text() return text def extract_text_from_image_pdf(pdf_path): images = convert_from_path(pdf_path) text = '' for image in images: text += pytesseract.image_to_string(image) return text # Process the extracted text def preprocess_text(text): text = re.sub(r'\s+', ' ', text) # Clean up spaces text = re.sub(r'\n', ' ', text) # Clean up newlines return text.strip() # Grading function using the question-answering model def grade_answer(question, student_answer): result = qa_pipeline(question=question, context=student_answer) answer_score = result['score'] if answer_score > 0.5: return answer_score, "Correct" else: return answer_score, "Incorrect" # Function to extract student name from text def extract_student_name(text): match = re.search(r"Name\s*[:|-]?\s*([\w\s]+)", text) if match: return match.group(1).strip() return "Unknown Student" # Function to extract questions from the text def extract_questions_from_text(text): # Improved logic: extract sentences ending with '?' or "Question: [text]" questions = re.findall(r'(Question\s*[:|-]?\s*[\w\s\?]+)', text) # Extract questions starting with "Question:" questions += re.findall(r'([^.]*\?)', text) # Also extract any sentence ending with "?" # Remove duplicates and metadata like 'Name', 'Roll No', etc. questions = list(set(questions)) # Remove duplicates questions = [q for q in questions if not any(keyword in q.lower() for keyword in ['name', 'roll no', 'school'])] return questions # Streamlit Interface st.title('Student Answer Grading System') st.write('Upload a PDF containing student details and their answers.') # Upload file uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") if uploaded_file is not None: # Save uploaded file temporarily with open("uploaded_file.pdf", "wb") as f: f.write(uploaded_file.getbuffer()) # Extract text from the uploaded PDF text = extract_text_from_pdf("uploaded_file.pdf") if not text: # If no text extracted, try OCR text = extract_text_from_image_pdf("uploaded_file.pdf") # Print the extracted text to manually check what was extracted (optional) st.subheader("Extracted Text:") st.text(text) # Preprocess text preprocessed_text = preprocess_text(text) # Extract student name and questions student_name = extract_student_name(text) questions = extract_questions_from_text(text) # Display student name st.subheader(f"Student Name: {student_name}") # Results results = {} for question in questions: score, feedback = grade_answer(question, preprocessed_text) results[question] = {"score": score, "feedback": feedback} # Display results for question, result in results.items(): st.write(f"**Question**: {question}") st.write(f"**Score**: {result['score']:.2f}") st.write(f"**Feedback**: {result['feedback']}") st.write("---")