import logging import re from openpyxl import load_workbook # Debug Mode (Set to False for production) DEBUG_MODE = True # Logging Configuration logging.basicConfig( level=logging.DEBUG if DEBUG_MODE else logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.FileHandler("debug.log"), logging.StreamHandler() ] ) def process_xlsx(file_path): logging.info(f"Processing XLSX file: {file_path}") try: # Load XLSX content wb = load_workbook(filename=file_path) sheet = wb.active xlsx_content = [[cell.value for cell in row] for row in sheet.rows] # Initialization column_names = [] department_program_courses = {} current_department = None current_program = None # Determine column indices header_index = 0 while header_index < len(xlsx_content): if "Course Code" in [x for x in xlsx_content[header_index] if x]: break header_index += 1 column_names = xlsx_content[header_index] cr_index = [i for i, x in enumerate(column_names) if re.match(r"Cr", str(x))] if not cr_index: logging.error("Could not find 'Cr' column index.") return None cr_index = cr_index[0] # Process rows for index, row in enumerate(xlsx_content): if index <= header_index: continue # Department Row Detection (Loose pattern for "Cr") if row[0] and row[cr_index] and re.match(r"cr", str(row[cr_index]), re.IGNORECASE): current_department = row[0] department_program_courses.setdefault(current_department, {}) current_program = None logging.debug(f"Detected Department: {current_department}") # Program Row Detection (Empty "Cr" column) elif row[0] and not row[cr_index]: current_program = row[0] department_program_courses[current_department].setdefault(current_program, []) logging.debug(f"Detected Program under {current_department}: {current_program}") # Course Row Detection (Numeric "Cr" value) elif row[0] and isinstance(row[cr_index], (int, float)): course_codes = [row[0]] # Default to single course code # Handle special case (e.g., "STA421/521") if "/" in row[0]: start, end = row[0].split("/") course_codes = [start, start[:3] + end] logging.info(f"Splitting course for row: {repr(row)}") logging.info(f"course_codes: {course_codes}") for code in course_codes: new_row = row[:] # Copy original row new_row[0] = code # Update course code for each split course # Assign courses to program if exists, otherwise directly to department if current_program: department_program_courses[current_department][current_program].append(new_row) else: department_program_courses[current_department].setdefault(current_department, []).append(new_row) logging.debug(f"Added Course(s) {course_codes} under {current_program or 'directly in department'} in {current_department}") elif row[0]: logging.info(f"Skipping row: {repr(row)}") return (column_names, department_program_courses) except Exception as e: logging.error(f"An error occurred: {str(e)}") return None if __name__ == "__main__": file_path = "data/FTCM_Course_List_Spring2025.xlsx" result = process_xlsx(file_path) if result: column_names, department_program_courses = result print("Column Names:") # Modify column names to have no spaces and no line breaks modified_column_names = [name.replace(' ', '').replace('\n', '') if name else name for name in column_names] print(modified_column_names) print("\nDepartment, Program, Courses:") for department, programs in department_program_courses.items(): print(f"**Department: {department}**") for program, courses in programs.items(): print(f" Program: *{program}") for course in courses: print(f" - Course: {course}") else: print("Failed to process XLSX file")