import gradio as gr import pandas as pd import os from datetime import datetime from clean import clean_data, get_numeric_columns from report import create_full_report, REPORT_DIR def clean_and_visualize(file, primary_key_column, progress=gr.Progress()): # Read the CSV file progress(0.05, desc="Reading CSV file") df = pd.read_csv(file.name) # Clean the data progress(0.1, desc="Starting data cleaning") cleaned_df, nonconforming_cells_before, process_times = clean_data(df, primary_key_column, progress) progress(0.8, desc="Data cleaning completed") # Calculate removed columns and rows removed_columns = len(df.columns) - len(cleaned_df.columns) removed_rows = len(df) - len(cleaned_df) # Generate full visualization report progress(0.9, desc="Generating report") create_full_report( df, cleaned_df, nonconforming_cells_before, process_times, removed_columns, removed_rows, primary_key_column ) # Save cleaned data as CSV progress(0.95, desc="Saving cleaned data") current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") cleaned_csv_path = os.path.join(f"cleaned_data_{current_time}.csv") cleaned_df.to_csv(cleaned_csv_path, index=False) # Collect all generated images image_files = [os.path.join(REPORT_DIR, f) for f in os.listdir(REPORT_DIR) if f.endswith('.png')] progress(1.0, desc="Process completed") return cleaned_csv_path, image_files def launch_app(): with gr.Blocks() as app: gr.Markdown("# Data Cleaner") with gr.Row(): file_input = gr.File(label="Upload CSV File", file_count="single", file_types=[".csv"]) with gr.Row(): primary_key_dropdown = gr.Dropdown(label="Select Primary Key Column", choices=[], interactive=True) with gr.Row(): clean_button = gr.Button("Start Cleaning") with gr.Row(): progress_bar = gr.Progress() with gr.Row(): cleaned_file_output = gr.File(label="Cleaned CSV", visible=True) with gr.Row(): output_gallery = gr.Gallery( label="Visualization Results", show_label=True, elem_id="gallery", columns=[3], rows=[3], object_fit="contain", height="auto", visible=False ) def update_primary_key_options(file): if file is None: return gr.Dropdown(choices=[]) df = pd.read_csv(file.name) numeric_columns = get_numeric_columns(df) return gr.Dropdown(choices=numeric_columns) def process_and_show_results(file, primary_key_column): cleaned_csv_path, image_files = clean_and_visualize(file, primary_key_column, progress=progress_bar) return ( cleaned_csv_path, gr.Gallery(visible=True, value=image_files) ) file_input.change( fn=update_primary_key_options, inputs=file_input, outputs=primary_key_dropdown ) clean_button.click( fn=process_and_show_results, inputs=[file_input, primary_key_dropdown], outputs=[cleaned_file_output, output_gallery] ) app.launch() if __name__ == "__main__": launch_app()