import pandas as pd import ydata_profiling import gradio as gr from pydantic_settings import BaseSettings from tempfile import NamedTemporaryFile import sweetviz as sv #from datatile.summary.df import DataFrameSummary from autoviz.AutoViz_Class import AutoViz_Class from traceml.summary.df import DataFrameSummary def variable_table(df): """ Analyzes a DataFrame and categorizes variables with colorful HTML formatting. Args: df (pandas.DataFrame): The DataFrame to analyze. Returns: str: HTML code representing the analysis results with colorful highlights. """ # Analyze variable types numerical_vars=[] text_vars=[] categorical_vars=[] # Identify categorical, numerical, and text columns # Iterate over columns for col in df.columns: print(col ) unique_values = df[col].nunique() total_values = df[col].count() dtype_kind = df[col].dtype.kind print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind) # Check criteria for numerical, text, and categorical variables if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10: numerical_vars.append(col) print('Numerical') elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10: text_vars.append(col) print('Text') else: categorical_vars.append(col) print('categorical') print(categorical_vars,numerical_vars,text_vars) # Build HTML table with styles table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;' header_style = 'background-color: #f2f2f2; padding: 5px 10px;' data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;' category_color = '#90ee90' # Light green for categorical numerical_color = '#add8e6' # Light blue for numerical text_color = '#ffd9b3' # Light yellow for text html = f"" html += f"" # Add rows for each variable type with coloring if categorical_vars: html += f"" if numerical_vars: html += f"" if text_vars: html += f"" # Handle cases where no variables are found if not (categorical_vars or numerical_vars or text_vars): html += "" html += "
Variable TypeColumns
Categorical{', '.join(categorical_vars)}
Numerical{', '.join(numerical_vars)}
Text{', '.join(text_vars)}
No variables found!
" return html def generate_report(file, type): df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file) pandas_html_report =ydata_profiling.ProfileReport(df).to_html() temp_file1 = NamedTemporaryFile(delete=False, suffix=".html") temp_file1.write(pandas_html_report.encode('utf-8')) temp_file1.close() print('Pandas completed') # dataprep_report = create_report(df) # temp_file2 = NamedTemporaryFile(delete=False, suffix=".html") # temp_file2.write(dataprep_report.encode('utf-8')) # temp_file2.close() def convert_mixed_to_str(df): mixed_cols = df.select_dtypes(include=['object']).columns # Detect object dtype (usually mixed) for col in mixed_cols: df[col] = df[col].astype(str) # Convert mixed columns to strings return df df = convert_mixed_to_str(df.copy()) df.columns = df.columns.str.upper() sweetviz_report = sv.analyze(df) #sweetviz_report=sweetviz_report.show_html() #print(type(sweetviz_report)) report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html', open_browser=False, layout='widescreen', scale=None) HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") # Reading the file and storing in a variable contents = HTMLFileToBeOpened.read() print('Pandas completed') temp_file3 = NamedTemporaryFile(delete=False, suffix=".html") temp_file3.write(contents.encode('utf-8')) temp_file3.close() try: df=AV.AutoViz('/content/insurance.csv') dft = AV.AutoViz(filename="", sep=',', dfte=df, header=0, verbose=1, lowess=False,chart_format='html',max_rows_analyzed=150000, max_cols_analyzed=30,save_plot_dir="plots_html") dft = AV.AutoViz(filename='/content/insurance.csv', sep=',', header=0, verbose=2, lowess=False,chart_format='png',max_rows_analyzed=150000, max_cols_analyzed=30,save_plot_dir="plots") except Exception as e: dfviz=df print(e) print('Exception during Autoviz') reports = [] output_dir_html='plots_html/AutoViz' for filename in os.listdir(output_dir_html): if filename.endswith(".html"): title = os.path.splitext(filename)[0] # Extract title from filename (without extension) reports.append(title) reports.append(output_dir_html+'/'+filename) imgs = [] output_dir_img='plots/Autoviz' for filename in os.listdir(output_dir_img): if filename.endswith(".png"): title = os.path.splitext(filename)[0] # Extract title from filename (without extension) imgs.append(title) imgs.append(output_dir_img+'/'+filename) while len(reports)<10: # Comparing the lengths and appending t and m to output if needed t = gr.Markdown(visible=False) m = gr.File(visible=False) reports.append(t) reports.append(m) while len(imgs)<10: # Comparing the lengths and appending t and m to output if needed t = gr.Markdown(visible=False) m = gr.Image(visible=False) imgs.append(t) imgs.append(m) dfs = DataFrameSummary(df) sd=dfs.summary() sd.index.name = 'PARAMETERS' sd1= sd.reset_index(drop=False) def highlight_cols(x): df = x.copy() df.loc[:, :] = 'color: purple' df[['PARAMETERS']] = 'color: red' return df # Applying the style function sd1 = sd1.style.apply(highlight_cols, axis = None) sd1.columns = sd1.columns.str.upper() return temp_file1.name ,temp_file3.name ,variable_table(df),dfviz,sd1,reports+imgs def handle_click(file, sheet_name): results = generate_report(file, sheet_name) return results def update_sheets(file): sheet_names = pd.ExcelFile(file).sheet_names sheet_dropdown = gr.Dropdown( label="Select sheet", choices=[sheet_names], visible=True # Initially disabled ) return sheet_dropdown with gr.Blocks() as cluster: with gr.Column(): gr.HTML(value="""

EXCEL ANALYSIS AND INSIGHTS

""") with gr.Row(): file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file") btn=gr.Button(value="Download Report") with gr.Row(): gr.HTML(value="""

PANDAS REPORT

""") out1=gr.File(label="Download CSV") gr.HTML(value="""

SWEETVIZ REPORT

""") out2=gr.File(label="Download CSV") sheet_dropdown = gr.Dropdown( label="Select sheet", choices=[], visible=False # Initially disabled ) with gr.Column(): gr.HTML(value="""

""") #gr.Markdown("***Uploaded File***") var=gr.HTML() with gr.Column(): gr.HTML(value="""

""") #gr.Markdown("#Uploaded File") dataframe1=gr.Dataframe() with gr.Column(): gr.HTML(value="""

""") #gr.Markdown("#Columns Analysis") dataframe2=gr.Dataframe() #Autoviz insights graphs with gr.Column(): gr.HTML(value="""

EXCEL ANALYSIS AND INSIGHTS

""") with gr.Row(): file = gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file") btn = gr.Button("Download Report") # AutoViz report display section #autoviz_reports = gr.Column() gr.HTML(value="""

Insight Visualisations

""") with gr.Column(): plots2 = [] for i in range(5): t = gr.Markdown() with gr.Column(scale=1): m=gr.Image() plots2.append(t) plots2.append(m) print(type(plots2[0])) gr.HTML(value="""

Insight graphs

""") with gr.Row(): plots1 = [] for i in range(5): with gr.Row(): t = gr.Markdown() m=gr.File() plots1.append(t) plots1.append(m) print(type(plots1[0])) btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var, dataframe1, dataframe2,plots1+plots2]) file.change(update_sheets, inputs=file, outputs=sheet_dropdown) #btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2]) cluster.launch()