|
import pandas as pd |
|
import ydata_profiling |
|
import gradio as gr |
|
from pydantic_settings import BaseSettings |
|
from tempfile import NamedTemporaryFile |
|
import sweetviz as sv |
|
import sketch |
|
|
|
from autoviz.AutoViz_Class import AutoViz_Class |
|
from traceml.summary.df import DataFrameSummary |
|
import os |
|
df1=gr.State() |
|
|
|
def variable_table(df): |
|
""" |
|
Analyzes a DataFrame and categorizes variables with colorful HTML formatting. |
|
|
|
Args: |
|
df (pandas.DataFrame): The DataFrame to analyze. |
|
|
|
Returns: |
|
str: HTML code representing the analysis results with colorful highlights. |
|
""" |
|
|
|
numerical_vars=[] |
|
text_vars=[] |
|
categorical_vars=[] |
|
|
|
|
|
for col in df.columns: |
|
print(col ) |
|
unique_values = df[col].nunique() |
|
total_values = df[col].count() |
|
dtype_kind = df[col].dtype.kind |
|
print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind) |
|
|
|
if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10: |
|
numerical_vars.append(col) |
|
print('Numerical') |
|
elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10: |
|
text_vars.append(col) |
|
print('Text') |
|
else: |
|
categorical_vars.append(col) |
|
print('categorical') |
|
print(categorical_vars,numerical_vars,text_vars) |
|
|
|
|
|
table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;' |
|
header_style = 'background-color: #f2f2f2; padding: 5px 10px;' |
|
data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;' |
|
category_color = '#90ee90' |
|
numerical_color = '#add8e6' |
|
text_color = '#ffd9b3' |
|
|
|
html = f"<table style='{table_style}'>" |
|
html += f"<tr><th style='{header_style}'>Variable Type</th><th style='{header_style}'>Columns</th></tr>" |
|
|
|
|
|
if categorical_vars: |
|
html += f"<tr style='background-color: {category_color};'><td>Categorical</td><td style='{data_style}'>{', '.join(categorical_vars)}</td></tr>" |
|
if numerical_vars: |
|
html += f"<tr style='background-color: {numerical_color};'><td>Numerical</td><td style='{data_style}'>{', '.join(numerical_vars)}</td></tr>" |
|
if text_vars: |
|
html += f"<tr style='background-color: {text_color};'><td>Text</td><td style='{data_style}'>{', '.join(text_vars)}</td></tr>" |
|
|
|
|
|
if not (categorical_vars or numerical_vars or text_vars): |
|
html += "<tr><td>No variables found!</td></tr>" |
|
|
|
html += "</table>" |
|
return html |
|
|
|
def generate_report(file, type): |
|
df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file) |
|
|
|
|
|
pandas_html_report =ydata_profiling.ProfileReport(df).to_html() |
|
temp_file1 = NamedTemporaryFile(delete=False, suffix=".html") |
|
temp_file1.write(pandas_html_report.encode('utf-8')) |
|
temp_file1.close() |
|
print('Pandas completed') |
|
|
|
|
|
|
|
|
|
|
|
|
|
def convert_mixed_to_str(df): |
|
|
|
mixed_cols = df.select_dtypes(include=['object']).columns |
|
for col in mixed_cols: |
|
df[col] = df[col].astype(str) |
|
return df |
|
|
|
df = convert_mixed_to_str(df.copy()) |
|
df.columns = df.columns.str.upper() |
|
df1.value=df |
|
sweetviz_report = sv.analyze(df) |
|
|
|
|
|
report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html', |
|
open_browser=False, |
|
layout='widescreen', |
|
scale=None) |
|
HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r") |
|
|
|
|
|
contents = HTMLFileToBeOpened.read() |
|
print('Pandas completed') |
|
|
|
|
|
temp_file3 = NamedTemporaryFile(delete=False, suffix=".html") |
|
temp_file3.write(contents.encode('utf-8')) |
|
temp_file3.close() |
|
dfviz=df |
|
try: |
|
AV = AutoViz_Class() |
|
dft = AV.AutoViz(filename="", sep=',', dfte=df, header=0, verbose=1, |
|
lowess=False,chart_format='html',max_rows_analyzed=150000, |
|
max_cols_analyzed=30,save_plot_dir=None) |
|
reports = [] |
|
print(os.listdir("./")) |
|
output_dir_html="./AutoViz_Plots/AutoViz" |
|
for filename in os.listdir(output_dir_html): |
|
if filename.endswith(".html"): |
|
title = os.path.splitext(filename)[0] |
|
reports.append(title) |
|
reports.append(output_dir_html+'/'+filename) |
|
print(reports) |
|
|
|
dft = AV.AutoViz(filename='', sep=',', header=0, verbose=2,dfte=df, |
|
lowess=False,chart_format='png',max_rows_analyzed=150000, |
|
max_cols_analyzed=30,save_plot_dir=None) |
|
|
|
imgs = [] |
|
output_dir_img="./AutoViz_Plots/AutoViz" |
|
for filename in os.listdir(output_dir_img): |
|
if filename.endswith(".png"): |
|
title = os.path.splitext(filename)[0] |
|
imgs.append(title) |
|
imgs.append(output_dir_img+'/'+filename) |
|
print(imgs) |
|
|
|
except Exception as e: |
|
dfviz=df |
|
print(e) |
|
print('Exception during Autoviz') |
|
|
|
|
|
|
|
while len(reports)<10: |
|
|
|
|
|
t = gr.Markdown(visible=False) |
|
m = gr.File(visible=False) |
|
reports.append(t) |
|
reports.append(m) |
|
|
|
while len(imgs)<10: |
|
|
|
|
|
t = gr.Markdown(visible=False) |
|
m = gr.Image(visible=False) |
|
imgs.append(t) |
|
imgs.append(m) |
|
|
|
dfs = DataFrameSummary(df) |
|
sd=dfs.summary() |
|
sd.index.name = 'PARAMETERS' |
|
sd1= sd.reset_index(drop=False) |
|
def highlight_cols(x): |
|
df = x.copy() |
|
df.loc[:, :] = 'color: purple' |
|
df[['PARAMETERS']] = 'color: red' |
|
return df |
|
|
|
|
|
sd1 = sd1.style.apply(highlight_cols, axis = None) |
|
sd1.columns = sd1.columns.str.upper() |
|
|
|
|
|
|
|
return [temp_file1.name ,temp_file3.name ,variable_table(df),dfviz,sd1]+reports+imgs |
|
|
|
def query_chat(query): |
|
df=df1.value |
|
return df.sketch.ask(query) |
|
|
|
def handle_click(file, sheet_name): |
|
print('Handle click') |
|
results = generate_report(file, sheet_name) |
|
return results |
|
|
|
def update_sheets(file): |
|
sheet_names = pd.ExcelFile(file).sheet_names |
|
|
|
print('Sheet updated') |
|
sheet_dropdown = gr.Dropdown( |
|
label="Select sheet", choices=[sheet_names], visible=True |
|
) |
|
return sheet_dropdown |
|
|
|
|
|
with gr.Blocks() as cluster: |
|
with gr.Column(): |
|
gr.HTML(value="""<h1 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">EXCEL ANALYSIS AND INSIGHTS</h1>""") |
|
with gr.Row(): |
|
file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file") |
|
btn=gr.Button(value="Download Report") |
|
|
|
with gr.Row(): |
|
|
|
|
|
gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">PANDAS REPORT</h2>""") |
|
out1=gr.File(label="Download CSV") |
|
gr.HTML(value="""<h2 style="color: #3399FF; text-shadow: 1px 1px 2px #ddd;">SWEETVIZ REPORT</h2>""") |
|
out2=gr.File(label="Download CSV") |
|
sheet_dropdown = gr.Dropdown( |
|
label="Select sheet", choices=[], visible=False |
|
) |
|
|
|
|
|
with gr.Column(): |
|
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS IN FILE</h2>""") |
|
|
|
var=gr.HTML() |
|
with gr.Column(): |
|
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"UPLOADED FILE</h2>""") |
|
|
|
dataframe1=gr.Dataframe() |
|
with gr.Column(): |
|
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"COLUMNS STATS AND ANALYSIS</h2>""") |
|
|
|
dataframe2=gr.Dataframe() |
|
|
|
with gr.Column(): |
|
gr.HTML(value="""<h2 style="color: #BA4A00; text-shadow: 1px 1px 2px #ddd;"ASK QUESTIONS ON THE COLUMNS </h2>""") |
|
with gr.Row(): |
|
query=gr.Textbox(label="Enter any question on data columns", |
|
info="Natural language query", |
|
lines=1, |
|
value="What is maximum value of first column") |
|
outlabel=gr.HighlightedText(label='Query Result ') |
|
query_btn=gr.Button(value='Submit Query') |
|
query_btn.click(query_chat,inputs=query,outputs=outlabel) |
|
|
|
|
|
|
|
|
|
|
|
gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight Visualisations</h3>""") |
|
with gr.Column(): |
|
plots2 = [] |
|
for i in range(5): |
|
t = gr.Markdown() |
|
with gr.Column(scale=1): |
|
m=gr.Image() |
|
plots2.append(t) |
|
plots2.append(m) |
|
print(type(plots2[0])) |
|
gr.HTML(value="""<h3 style="color: #1999FF; text-shadow: 1px 1px 2px #ddd;">Insight graphs</h3>""") |
|
with gr.Row(): |
|
plots1 = [] |
|
for i in range(5): |
|
with gr.Row(): |
|
t = gr.Markdown() |
|
m=gr.File() |
|
plots1.append(t) |
|
plots1.append(m) |
|
print(type(plots1[0])) |
|
|
|
btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var,dataframe1, dataframe2]+plots1+plots2) |
|
file.change(update_sheets, inputs=file, outputs=sheet_dropdown) |
|
|
|
cluster.launch(debug=True) |
|
|