File size: 5,309 Bytes
89cee86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import List, Dict, Any
import streamlit as st

class DataProcessor:
    def __init__(self):
        self.data = None
        self.numeric_columns = []
        self.categorical_columns = []
        self.date_columns = []
    
    def load_data(self, file) -> bool:
        """Load and validate CSV data"""
        try:
            self.data = pd.read_csv(file)
            self._classify_columns()
            return True
        except Exception as e:
            st.error(f"Error loading data: {str(e)}")
            return False
    
    def _classify_columns(self):
        """Classify columns into numeric, categorical, and date types"""
        for col in self.data.columns:
            if pd.api.types.is_numeric_dtype(self.data[col]):
                self.numeric_columns.append(col)
            elif pd.api.types.is_datetime64_any_dtype(self.data[col]):
                self.date_columns.append(col)
            else:
                try:
                    pd.to_datetime(self.data[col])
                    self.date_columns.append(col)
                except:
                    self.categorical_columns.append(col)
    
    def get_basic_stats(self) -> Dict[str, Any]:
        """Calculate basic statistics for numeric columns"""
        if self.data is None:
            return {}
        
        stats = {
            'summary': self.data[self.numeric_columns].describe(),
            'missing_values': self.data.isnull().sum(),
            'row_count': len(self.data),
            'column_count': len(self.data.columns)
        }
        return stats
    
    def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
        """Create different types of visualizations based on user selection"""
        if chart_type == "Line Plot":
            fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
        elif chart_type == "Bar Plot":
            fig = px.bar(self.data, x=x_col, y=y_col, color=color_col)
        elif chart_type == "Scatter Plot":
            fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col)
        elif chart_type == "Box Plot":
            fig = px.box(self.data, x=x_col, y=y_col, color=color_col)
        else:
            fig = px.histogram(self.data, x=x_col, color=color_col)
        
        return fig
    
    def calculate_metrics(self, column: str) -> Dict[str, float]:
        """Calculate key metrics for a selected column"""
        if column not in self.numeric_columns:
            return {}
        
        metrics = {
            'mean': self.data[column].mean(),
            'median': self.data[column].median(),
            'std': self.data[column].std(),
            'min': self.data[column].min(),
            'max': self.data[column].max(),
            'skew': self.data[column].skew()
        }
        return metrics

def render_analytics_page():
    st.title("Data Analytics Dashboard")
    
    # Initialize data processor
    processor = DataProcessor()
    
    # File upload
    uploaded_file = st.file_uploader("Upload your CSV data", type=['csv'])
    if uploaded_file is not None:
        if processor.load_data(uploaded_file):
            st.success("Data loaded successfully!")
            
            # Data Preview
            st.subheader("Data Preview")
            st.dataframe(processor.data.head())
            
            # Basic Stats
            st.subheader("Basic Statistics")
            stats = processor.get_basic_stats()
            st.write(stats['summary'])
            
            # Visualization Section
            st.subheader("Create Visualization")
            col1, col2, col3 = st.columns(3)
            
            with col1:
                chart_type = st.selectbox(
                    "Select Chart Type",
                    ["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"]
                )
            
            with col2:
                x_col = st.selectbox("Select X-axis", processor.data.columns)
            
            with col3:
                y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None
            
            color_col = st.selectbox("Select Color Variable (optional)", 
                                   ['None'] + processor.categorical_columns)
            color_col = None if color_col == 'None' else color_col
            
            # Generate and display visualization
            fig = processor.create_visualization(
                chart_type,
                x_col,
                y_col if y_col else x_col,
                color_col
            )
            st.plotly_chart(fig, use_container_width=True)
            
            # Metrics Calculator
            st.subheader("Metric Calculator")
            metric_col = st.selectbox("Select column for metrics", processor.numeric_columns)
            metrics = processor.calculate_metrics(metric_col)
            
            # Display metrics in columns
            cols = st.columns(3)
            for i, (metric, value) in enumerate(metrics.items()):
                with cols[i % 3]:
                    st.metric(label=metric.capitalize(), value=f"{value:.2f}")