Spaces:
Sleeping
Sleeping
File size: 5,309 Bytes
89cee86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import List, Dict, Any
import streamlit as st
class DataProcessor:
def __init__(self):
self.data = None
self.numeric_columns = []
self.categorical_columns = []
self.date_columns = []
def load_data(self, file) -> bool:
"""Load and validate CSV data"""
try:
self.data = pd.read_csv(file)
self._classify_columns()
return True
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return False
def _classify_columns(self):
"""Classify columns into numeric, categorical, and date types"""
for col in self.data.columns:
if pd.api.types.is_numeric_dtype(self.data[col]):
self.numeric_columns.append(col)
elif pd.api.types.is_datetime64_any_dtype(self.data[col]):
self.date_columns.append(col)
else:
try:
pd.to_datetime(self.data[col])
self.date_columns.append(col)
except:
self.categorical_columns.append(col)
def get_basic_stats(self) -> Dict[str, Any]:
"""Calculate basic statistics for numeric columns"""
if self.data is None:
return {}
stats = {
'summary': self.data[self.numeric_columns].describe(),
'missing_values': self.data.isnull().sum(),
'row_count': len(self.data),
'column_count': len(self.data.columns)
}
return stats
def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
"""Create different types of visualizations based on user selection"""
if chart_type == "Line Plot":
fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Bar Plot":
fig = px.bar(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Scatter Plot":
fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Box Plot":
fig = px.box(self.data, x=x_col, y=y_col, color=color_col)
else:
fig = px.histogram(self.data, x=x_col, color=color_col)
return fig
def calculate_metrics(self, column: str) -> Dict[str, float]:
"""Calculate key metrics for a selected column"""
if column not in self.numeric_columns:
return {}
metrics = {
'mean': self.data[column].mean(),
'median': self.data[column].median(),
'std': self.data[column].std(),
'min': self.data[column].min(),
'max': self.data[column].max(),
'skew': self.data[column].skew()
}
return metrics
def render_analytics_page():
st.title("Data Analytics Dashboard")
# Initialize data processor
processor = DataProcessor()
# File upload
uploaded_file = st.file_uploader("Upload your CSV data", type=['csv'])
if uploaded_file is not None:
if processor.load_data(uploaded_file):
st.success("Data loaded successfully!")
# Data Preview
st.subheader("Data Preview")
st.dataframe(processor.data.head())
# Basic Stats
st.subheader("Basic Statistics")
stats = processor.get_basic_stats()
st.write(stats['summary'])
# Visualization Section
st.subheader("Create Visualization")
col1, col2, col3 = st.columns(3)
with col1:
chart_type = st.selectbox(
"Select Chart Type",
["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"]
)
with col2:
x_col = st.selectbox("Select X-axis", processor.data.columns)
with col3:
y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None
color_col = st.selectbox("Select Color Variable (optional)",
['None'] + processor.categorical_columns)
color_col = None if color_col == 'None' else color_col
# Generate and display visualization
fig = processor.create_visualization(
chart_type,
x_col,
y_col if y_col else x_col,
color_col
)
st.plotly_chart(fig, use_container_width=True)
# Metrics Calculator
st.subheader("Metric Calculator")
metric_col = st.selectbox("Select column for metrics", processor.numeric_columns)
metrics = processor.calculate_metrics(metric_col)
# Display metrics in columns
cols = st.columns(3)
for i, (metric, value) in enumerate(metrics.items()):
with cols[i % 3]:
st.metric(label=metric.capitalize(), value=f"{value:.2f}") |