Pranav0111 commited on
Commit
89cee86
·
verified ·
1 Parent(s): 0d7d0ea

Create data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +140 -0
data_processor.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objects as go
4
+ from typing import List, Dict, Any
5
+ import streamlit as st
6
+
7
+ class DataProcessor:
8
+ def __init__(self):
9
+ self.data = None
10
+ self.numeric_columns = []
11
+ self.categorical_columns = []
12
+ self.date_columns = []
13
+
14
+ def load_data(self, file) -> bool:
15
+ """Load and validate CSV data"""
16
+ try:
17
+ self.data = pd.read_csv(file)
18
+ self._classify_columns()
19
+ return True
20
+ except Exception as e:
21
+ st.error(f"Error loading data: {str(e)}")
22
+ return False
23
+
24
+ def _classify_columns(self):
25
+ """Classify columns into numeric, categorical, and date types"""
26
+ for col in self.data.columns:
27
+ if pd.api.types.is_numeric_dtype(self.data[col]):
28
+ self.numeric_columns.append(col)
29
+ elif pd.api.types.is_datetime64_any_dtype(self.data[col]):
30
+ self.date_columns.append(col)
31
+ else:
32
+ try:
33
+ pd.to_datetime(self.data[col])
34
+ self.date_columns.append(col)
35
+ except:
36
+ self.categorical_columns.append(col)
37
+
38
+ def get_basic_stats(self) -> Dict[str, Any]:
39
+ """Calculate basic statistics for numeric columns"""
40
+ if self.data is None:
41
+ return {}
42
+
43
+ stats = {
44
+ 'summary': self.data[self.numeric_columns].describe(),
45
+ 'missing_values': self.data.isnull().sum(),
46
+ 'row_count': len(self.data),
47
+ 'column_count': len(self.data.columns)
48
+ }
49
+ return stats
50
+
51
+ def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
52
+ """Create different types of visualizations based on user selection"""
53
+ if chart_type == "Line Plot":
54
+ fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
55
+ elif chart_type == "Bar Plot":
56
+ fig = px.bar(self.data, x=x_col, y=y_col, color=color_col)
57
+ elif chart_type == "Scatter Plot":
58
+ fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col)
59
+ elif chart_type == "Box Plot":
60
+ fig = px.box(self.data, x=x_col, y=y_col, color=color_col)
61
+ else:
62
+ fig = px.histogram(self.data, x=x_col, color=color_col)
63
+
64
+ return fig
65
+
66
+ def calculate_metrics(self, column: str) -> Dict[str, float]:
67
+ """Calculate key metrics for a selected column"""
68
+ if column not in self.numeric_columns:
69
+ return {}
70
+
71
+ metrics = {
72
+ 'mean': self.data[column].mean(),
73
+ 'median': self.data[column].median(),
74
+ 'std': self.data[column].std(),
75
+ 'min': self.data[column].min(),
76
+ 'max': self.data[column].max(),
77
+ 'skew': self.data[column].skew()
78
+ }
79
+ return metrics
80
+
81
+ def render_analytics_page():
82
+ st.title("Data Analytics Dashboard")
83
+
84
+ # Initialize data processor
85
+ processor = DataProcessor()
86
+
87
+ # File upload
88
+ uploaded_file = st.file_uploader("Upload your CSV data", type=['csv'])
89
+ if uploaded_file is not None:
90
+ if processor.load_data(uploaded_file):
91
+ st.success("Data loaded successfully!")
92
+
93
+ # Data Preview
94
+ st.subheader("Data Preview")
95
+ st.dataframe(processor.data.head())
96
+
97
+ # Basic Stats
98
+ st.subheader("Basic Statistics")
99
+ stats = processor.get_basic_stats()
100
+ st.write(stats['summary'])
101
+
102
+ # Visualization Section
103
+ st.subheader("Create Visualization")
104
+ col1, col2, col3 = st.columns(3)
105
+
106
+ with col1:
107
+ chart_type = st.selectbox(
108
+ "Select Chart Type",
109
+ ["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"]
110
+ )
111
+
112
+ with col2:
113
+ x_col = st.selectbox("Select X-axis", processor.data.columns)
114
+
115
+ with col3:
116
+ y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None
117
+
118
+ color_col = st.selectbox("Select Color Variable (optional)",
119
+ ['None'] + processor.categorical_columns)
120
+ color_col = None if color_col == 'None' else color_col
121
+
122
+ # Generate and display visualization
123
+ fig = processor.create_visualization(
124
+ chart_type,
125
+ x_col,
126
+ y_col if y_col else x_col,
127
+ color_col
128
+ )
129
+ st.plotly_chart(fig, use_container_width=True)
130
+
131
+ # Metrics Calculator
132
+ st.subheader("Metric Calculator")
133
+ metric_col = st.selectbox("Select column for metrics", processor.numeric_columns)
134
+ metrics = processor.calculate_metrics(metric_col)
135
+
136
+ # Display metrics in columns
137
+ cols = st.columns(3)
138
+ for i, (metric, value) in enumerate(metrics.items()):
139
+ with cols[i % 3]:
140
+ st.metric(label=metric.capitalize(), value=f"{value:.2f}")