Spaces:
Sleeping
Sleeping
File size: 4,654 Bytes
6340cf4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import logging
import re
from openpyxl import load_workbook
# Debug Mode (Set to False for production)
DEBUG_MODE = True
# Logging Configuration
logging.basicConfig(
level=logging.DEBUG if DEBUG_MODE else logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("debug.log"),
logging.StreamHandler()
]
)
def process_xlsx(file_path):
logging.info(f"Processing XLSX file: {file_path}")
try:
# Load XLSX content
wb = load_workbook(filename=file_path)
sheet = wb.active
xlsx_content = [[cell.value for cell in row] for row in sheet.rows]
# Initialization
column_names = []
department_program_courses = {}
current_department = None
current_program = None
# Determine column indices
header_index = 0
while header_index < len(xlsx_content):
if "Course Code" in [x for x in xlsx_content[header_index] if x]:
break
header_index += 1
column_names = xlsx_content[header_index]
cr_index = [i for i, x in enumerate(column_names) if re.match(r"Cr", str(x))]
if not cr_index:
logging.error("Could not find 'Cr' column index.")
return None
cr_index = cr_index[0]
# Process rows
for index, row in enumerate(xlsx_content):
if index <= header_index:
continue
# Department Row Detection (Loose pattern for "Cr")
if row[0] and row[cr_index] and re.match(r"cr", str(row[cr_index]), re.IGNORECASE):
current_department = row[0]
department_program_courses.setdefault(current_department, {})
current_program = None
logging.debug(f"Detected Department: {current_department}")
# Program Row Detection (Empty "Cr" column)
elif row[0] and not row[cr_index]:
current_program = row[0]
department_program_courses[current_department].setdefault(current_program, [])
logging.debug(f"Detected Program under {current_department}: {current_program}")
# Course Row Detection (Numeric "Cr" value)
elif row[0] and isinstance(row[cr_index], (int, float)):
course_codes = [row[0]] # Default to single course code
# Handle special case (e.g., "STA421/521")
if "/" in row[0]:
start, end = row[0].split("/")
course_codes = [start, start[:3] + end]
logging.info(f"Splitting course for row: {repr(row)}")
logging.info(f"course_codes: {course_codes}")
for code in course_codes:
new_row = row[:] # Copy original row
new_row[0] = code # Update course code for each split course
# Assign courses to program if exists, otherwise directly to department
if current_program:
department_program_courses[current_department][current_program].append(new_row)
else:
department_program_courses[current_department].setdefault(current_department, []).append(new_row)
logging.debug(f"Added Course(s) {course_codes} under {current_program or 'directly in department'} in {current_department}")
elif row[0]:
logging.info(f"Skipping row: {repr(row)}")
return (column_names, department_program_courses)
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
return None
if __name__ == "__main__":
file_path = "data/FTCM_Course_List_Spring2025.xlsx"
result = process_xlsx(file_path)
if result:
column_names, department_program_courses = result
print("Column Names:")
# Modify column names to have no spaces and no line breaks
modified_column_names = [name.replace(' ', '').replace('\n', '') if name else name for name in column_names]
print(modified_column_names)
print("\nDepartment, Program, Courses:")
for department, programs in department_program_courses.items():
print(f"**Department: {department}**")
for program, courses in programs.items():
print(f" Program: *{program}")
for course in courses:
print(f" - Course: {course}")
else:
print("Failed to process XLSX file") |