Spaces:
Sleeping
Sleeping
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from typing import List, Dict, Any | |
import streamlit as st | |
class DataProcessor: | |
def __init__(self): | |
self.data = None | |
self.numeric_columns = [] | |
self.categorical_columns = [] | |
self.date_columns = [] | |
def load_data(self, file) -> bool: | |
"""Load and validate CSV data""" | |
try: | |
self.data = pd.read_csv(file) | |
self._classify_columns() | |
return True | |
except Exception as e: | |
st.error(f"Error loading data: {str(e)}") | |
return False | |
def _classify_columns(self): | |
"""Classify columns into numeric, categorical, and date types""" | |
for col in self.data.columns: | |
if pd.api.types.is_numeric_dtype(self.data[col]): | |
self.numeric_columns.append(col) | |
elif pd.api.types.is_datetime64_any_dtype(self.data[col]): | |
self.date_columns.append(col) | |
else: | |
try: | |
pd.to_datetime(self.data[col]) | |
self.date_columns.append(col) | |
except: | |
self.categorical_columns.append(col) | |
def get_basic_stats(self) -> Dict[str, Any]: | |
"""Calculate basic statistics for numeric columns""" | |
if self.data is None: | |
return {} | |
stats = { | |
'summary': self.data[self.numeric_columns].describe(), | |
'missing_values': self.data.isnull().sum(), | |
'row_count': len(self.data), | |
'column_count': len(self.data.columns) | |
} | |
return stats | |
def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure: | |
"""Create different types of visualizations based on user selection""" | |
if chart_type == "Line Plot": | |
fig = px.line(self.data, x=x_col, y=y_col, color=color_col) | |
elif chart_type == "Bar Plot": | |
fig = px.bar(self.data, x=x_col, y=y_col, color=color_col) | |
elif chart_type == "Scatter Plot": | |
fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col) | |
elif chart_type == "Box Plot": | |
fig = px.box(self.data, x=x_col, y=y_col, color=color_col) | |
else: | |
fig = px.histogram(self.data, x=x_col, color=color_col) | |
return fig | |
def calculate_metrics(self, column: str) -> Dict[str, float]: | |
"""Calculate key metrics for a selected column""" | |
if column not in self.numeric_columns: | |
return {} | |
metrics = { | |
'mean': self.data[column].mean(), | |
'median': self.data[column].median(), | |
'std': self.data[column].std(), | |
'min': self.data[column].min(), | |
'max': self.data[column].max(), | |
'skew': self.data[column].skew() | |
} | |
return metrics | |
def render_analytics_page(): | |
st.title("Data Analytics Dashboard") | |
# Initialize data processor | |
processor = DataProcessor() | |
# File upload | |
uploaded_file = st.file_uploader("Upload your CSV data", type=['csv']) | |
if uploaded_file is not None: | |
if processor.load_data(uploaded_file): | |
st.success("Data loaded successfully!") | |
# Data Preview | |
st.subheader("Data Preview") | |
st.dataframe(processor.data.head()) | |
# Basic Stats | |
st.subheader("Basic Statistics") | |
stats = processor.get_basic_stats() | |
st.write(stats['summary']) | |
# Visualization Section | |
st.subheader("Create Visualization") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
chart_type = st.selectbox( | |
"Select Chart Type", | |
["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"] | |
) | |
with col2: | |
x_col = st.selectbox("Select X-axis", processor.data.columns) | |
with col3: | |
y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None | |
color_col = st.selectbox("Select Color Variable (optional)", | |
['None'] + processor.categorical_columns) | |
color_col = None if color_col == 'None' else color_col | |
# Generate and display visualization | |
fig = processor.create_visualization( | |
chart_type, | |
x_col, | |
y_col if y_col else x_col, | |
color_col | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Metrics Calculator | |
st.subheader("Metric Calculator") | |
metric_col = st.selectbox("Select column for metrics", processor.numeric_columns) | |
metrics = processor.calculate_metrics(metric_col) | |
# Display metrics in columns | |
cols = st.columns(3) | |
for i, (metric, value) in enumerate(metrics.items()): | |
with cols[i % 3]: | |
st.metric(label=metric.capitalize(), value=f"{value:.2f}") |