Prospea / data_processor.py
Pranav0111's picture
Create data_processor.py
89cee86 verified
raw
history blame
5.31 kB
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import List, Dict, Any
import streamlit as st
class DataProcessor:
def __init__(self):
self.data = None
self.numeric_columns = []
self.categorical_columns = []
self.date_columns = []
def load_data(self, file) -> bool:
"""Load and validate CSV data"""
try:
self.data = pd.read_csv(file)
self._classify_columns()
return True
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return False
def _classify_columns(self):
"""Classify columns into numeric, categorical, and date types"""
for col in self.data.columns:
if pd.api.types.is_numeric_dtype(self.data[col]):
self.numeric_columns.append(col)
elif pd.api.types.is_datetime64_any_dtype(self.data[col]):
self.date_columns.append(col)
else:
try:
pd.to_datetime(self.data[col])
self.date_columns.append(col)
except:
self.categorical_columns.append(col)
def get_basic_stats(self) -> Dict[str, Any]:
"""Calculate basic statistics for numeric columns"""
if self.data is None:
return {}
stats = {
'summary': self.data[self.numeric_columns].describe(),
'missing_values': self.data.isnull().sum(),
'row_count': len(self.data),
'column_count': len(self.data.columns)
}
return stats
def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
"""Create different types of visualizations based on user selection"""
if chart_type == "Line Plot":
fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Bar Plot":
fig = px.bar(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Scatter Plot":
fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Box Plot":
fig = px.box(self.data, x=x_col, y=y_col, color=color_col)
else:
fig = px.histogram(self.data, x=x_col, color=color_col)
return fig
def calculate_metrics(self, column: str) -> Dict[str, float]:
"""Calculate key metrics for a selected column"""
if column not in self.numeric_columns:
return {}
metrics = {
'mean': self.data[column].mean(),
'median': self.data[column].median(),
'std': self.data[column].std(),
'min': self.data[column].min(),
'max': self.data[column].max(),
'skew': self.data[column].skew()
}
return metrics
def render_analytics_page():
st.title("Data Analytics Dashboard")
# Initialize data processor
processor = DataProcessor()
# File upload
uploaded_file = st.file_uploader("Upload your CSV data", type=['csv'])
if uploaded_file is not None:
if processor.load_data(uploaded_file):
st.success("Data loaded successfully!")
# Data Preview
st.subheader("Data Preview")
st.dataframe(processor.data.head())
# Basic Stats
st.subheader("Basic Statistics")
stats = processor.get_basic_stats()
st.write(stats['summary'])
# Visualization Section
st.subheader("Create Visualization")
col1, col2, col3 = st.columns(3)
with col1:
chart_type = st.selectbox(
"Select Chart Type",
["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"]
)
with col2:
x_col = st.selectbox("Select X-axis", processor.data.columns)
with col3:
y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None
color_col = st.selectbox("Select Color Variable (optional)",
['None'] + processor.categorical_columns)
color_col = None if color_col == 'None' else color_col
# Generate and display visualization
fig = processor.create_visualization(
chart_type,
x_col,
y_col if y_col else x_col,
color_col
)
st.plotly_chart(fig, use_container_width=True)
# Metrics Calculator
st.subheader("Metric Calculator")
metric_col = st.selectbox("Select column for metrics", processor.numeric_columns)
metrics = processor.calculate_metrics(metric_col)
# Display metrics in columns
cols = st.columns(3)
for i, (metric, value) in enumerate(metrics.items()):
with cols[i % 3]:
st.metric(label=metric.capitalize(), value=f"{value:.2f}")