import pandas as pd import plotly.express as px import plotly.graph_objects as go from typing import List, Dict, Any import streamlit as st class DataProcessor: def __init__(self): self.data = None self.numeric_columns = [] self.categorical_columns = [] self.date_columns = [] def load_data(self, file) -> bool: """Load and validate CSV data""" try: self.data = pd.read_csv(file) self._classify_columns() return True except Exception as e: st.error(f"Error loading data: {str(e)}") return False def _classify_columns(self): """Classify columns into numeric, categorical, and date types""" for col in self.data.columns: if pd.api.types.is_numeric_dtype(self.data[col]): self.numeric_columns.append(col) elif pd.api.types.is_datetime64_any_dtype(self.data[col]): self.date_columns.append(col) else: try: pd.to_datetime(self.data[col]) self.date_columns.append(col) except: self.categorical_columns.append(col) def get_basic_stats(self) -> Dict[str, Any]: """Calculate basic statistics for numeric columns""" if self.data is None: return {} stats = { 'summary': self.data[self.numeric_columns].describe(), 'missing_values': self.data.isnull().sum(), 'row_count': len(self.data), 'column_count': len(self.data.columns) } return stats def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure: """Create different types of visualizations based on user selection""" if chart_type == "Line Plot": fig = px.line(self.data, x=x_col, y=y_col, color=color_col) elif chart_type == "Bar Plot": fig = px.bar(self.data, x=x_col, y=y_col, color=color_col) elif chart_type == "Scatter Plot": fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col) elif chart_type == "Box Plot": fig = px.box(self.data, x=x_col, y=y_col, color=color_col) else: fig = px.histogram(self.data, x=x_col, color=color_col) return fig def calculate_metrics(self, column: str) -> Dict[str, float]: """Calculate key metrics for a selected column""" if column not in self.numeric_columns: return {} metrics = { 'mean': self.data[column].mean(), 'median': self.data[column].median(), 'std': self.data[column].std(), 'min': self.data[column].min(), 'max': self.data[column].max(), 'skew': self.data[column].skew() } return metrics def render_analytics_page(): st.title("Data Analytics Dashboard") # Initialize data processor processor = DataProcessor() # File upload uploaded_file = st.file_uploader("Upload your CSV data", type=['csv']) if uploaded_file is not None: if processor.load_data(uploaded_file): st.success("Data loaded successfully!") # Data Preview st.subheader("Data Preview") st.dataframe(processor.data.head()) # Basic Stats st.subheader("Basic Statistics") stats = processor.get_basic_stats() st.write(stats['summary']) # Visualization Section st.subheader("Create Visualization") col1, col2, col3 = st.columns(3) with col1: chart_type = st.selectbox( "Select Chart Type", ["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"] ) with col2: x_col = st.selectbox("Select X-axis", processor.data.columns) with col3: y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None color_col = st.selectbox("Select Color Variable (optional)", ['None'] + processor.categorical_columns) color_col = None if color_col == 'None' else color_col # Generate and display visualization fig = processor.create_visualization( chart_type, x_col, y_col if y_col else x_col, color_col ) st.plotly_chart(fig, use_container_width=True) # Metrics Calculator st.subheader("Metric Calculator") metric_col = st.selectbox("Select column for metrics", processor.numeric_columns) metrics = processor.calculate_metrics(metric_col) # Display metrics in columns cols = st.columns(3) for i, (metric, value) in enumerate(metrics.items()): with cols[i % 3]: st.metric(label=metric.capitalize(), value=f"{value:.2f}")