import gradio as gr import pandas as pd from fuzzywuzzy import process, fuzz import tempfile import os from openpyxl import load_workbook from openpyxl.styles import Alignment def auto_correct_names(series, threshold=90): """Auto-correct typos in chatter names using fuzzy matching.""" unique_names = series.dropna().unique() name_mapping = {} for name in unique_names: matches = process.extractBests( name, unique_names, scorer=fuzz.token_sort_ratio, score_cutoff=threshold ) if matches: best_match = max(matches, key=lambda x: (x[1], list(series).count(x[0]))) name_mapping[name] = best_match[0] return series.replace(name_mapping) def adjust_excel_formatting(file_path): """Adjust column widths and enable text wrapping in Excel file.""" wb = load_workbook(file_path) ws = wb.active for col in ws.columns: max_length = 0 col_letter = col[0].column_letter for cell in col: if cell.value: max_length = max(max_length, len(str(cell.value))) cell.alignment = Alignment(wrap_text=True) ws.column_dimensions[col_letter].width = max_length + 2 wb.save(file_path) def process_file(input_file): """Process uploaded Excel file and return output""" try: # Read input file input_df = pd.read_excel(input_file, header=1) # Store original date order date_columns = input_df.columns[1:].tolist() # Melt to long format df_long = input_df.melt( id_vars=[input_df.columns[0]], var_name='DATE', value_name='CHATTER' ) # Force date order df_long['DATE'] = pd.Categorical( df_long['DATE'], categories=date_columns, ordered=True ) # Clean names df_long['CHATTER'] = auto_correct_names(df_long['CHATTER']) # Group and pivot grouped = df_long.groupby(['CHATTER', 'DATE'], observed=True)[input_df.columns[0]] \ .apply(lambda x: ', '.join(sorted(x))).reset_index() pivoted = grouped.pivot(index='CHATTER', columns='DATE', values=input_df.columns[0]) chatter_order = grouped['CHATTER'].value_counts().index.tolist() final_df = pivoted.reindex(chatter_order)[date_columns].fillna("OFF") # Reset index to show chatter names in preview final_df = final_df.reset_index() # Create temp file with original filename + "_processed" original_filename = os.path.basename(input_file) name_part, ext_part = os.path.splitext(original_filename) processed_filename = f"{name_part}_processed{ext_part}" temp_file_path = os.path.join(tempfile.gettempdir(), processed_filename) final_df.to_excel(temp_file_path, index=False, sheet_name='Schedule') # Adjust formatting to ensure everything fits within cells adjust_excel_formatting(temp_file_path) return final_df, temp_file_path except Exception as e: error_df = pd.DataFrame({"Error": [f"⚠️ {str(e)}"]}) return error_df, None # Add the missing download_file function def download_file(out_path): """Return the processed file path for download""" return out_path # Gradio interface with gr.Blocks(title="Schedule Processor") as demo: gr.Markdown("# 📅 Schedule Processor") gr.Markdown("Upload your schedule Excel file and download the formatted version") with gr.Row(): input_file = gr.File(label="Upload Schedule File", type="filepath") with gr.Row(): process_btn = gr.Button("Process File", variant="primary") reset_btn = gr.Button("Upload New File") output_table = gr.Dataframe(label="Preview", wrap=True) download_button = gr.Button("Download Processed File", visible=False) temp_file_path = gr.State(value=None) def reset_components(): """Reset all components to initial state""" return [ None, # Clear file input pd.DataFrame(), # Clear output table None, # Reset temp file path gr.update(visible=False) # Hide download button ] def process_and_show(file): df, out_path = process_file(file) if out_path: return df, out_path, gr.update(visible=True) return df, None, gr.update(visible=False) process_btn.click( process_and_show, inputs=input_file, outputs=[output_table, temp_file_path, download_button] ) reset_btn.click( reset_components, outputs=[input_file, output_table, temp_file_path, download_button] ) download_button.click( download_file, inputs=temp_file_path, outputs=gr.File(label="Processed Schedule") ) if __name__ == "__main__": demo.launch()