Spaces:

demoOrganisation34
/

Prospea

Running

App Files Files Community

Pranav0111 commited on Jan 21

Commit

89cee86

verified ·

1 Parent(s): 0d7d0ea

Create data_processor.py

Browse files

Files changed (1) hide show

data_processor.py +140 -0

data_processor.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from typing import List, Dict, Any
+import streamlit as st
+class DataProcessor:
+    def __init__(self):
+        self.data = None
+        self.numeric_columns = []
+        self.categorical_columns = []
+        self.date_columns = []
+    def load_data(self, file) -> bool:
+        """Load and validate CSV data"""
+        try:
+            self.data = pd.read_csv(file)
+            self._classify_columns()
+            return True
+        except Exception as e:
+            st.error(f"Error loading data: {str(e)}")
+            return False
+    def _classify_columns(self):
+        """Classify columns into numeric, categorical, and date types"""
+        for col in self.data.columns:
+            if pd.api.types.is_numeric_dtype(self.data[col]):
+                self.numeric_columns.append(col)
+            elif pd.api.types.is_datetime64_any_dtype(self.data[col]):
+                self.date_columns.append(col)
+            else:
+                try:
+                    pd.to_datetime(self.data[col])
+                    self.date_columns.append(col)
+                except:
+                    self.categorical_columns.append(col)
+    def get_basic_stats(self) -> Dict[str, Any]:
+        """Calculate basic statistics for numeric columns"""
+        if self.data is None:
+            return {}
+        stats = {
+            'summary': self.data[self.numeric_columns].describe(),
+            'missing_values': self.data.isnull().sum(),
+            'row_count': len(self.data),
+            'column_count': len(self.data.columns)
+        }
+        return stats
+    def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
+        """Create different types of visualizations based on user selection"""
+        if chart_type == "Line Plot":
+            fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
+        elif chart_type == "Bar Plot":
+            fig = px.bar(self.data, x=x_col, y=y_col, color=color_col)
+        elif chart_type == "Scatter Plot":
+            fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col)
+        elif chart_type == "Box Plot":
+            fig = px.box(self.data, x=x_col, y=y_col, color=color_col)
+        else:
+            fig = px.histogram(self.data, x=x_col, color=color_col)
+        return fig
+    def calculate_metrics(self, column: str) -> Dict[str, float]:
+        """Calculate key metrics for a selected column"""
+        if column not in self.numeric_columns:
+            return {}
+        metrics = {
+            'mean': self.data[column].mean(),
+            'median': self.data[column].median(),
+            'std': self.data[column].std(),
+            'min': self.data[column].min(),
+            'max': self.data[column].max(),
+            'skew': self.data[column].skew()
+        }
+        return metrics
+def render_analytics_page():
+    st.title("Data Analytics Dashboard")
+    # Initialize data processor
+    processor = DataProcessor()
+    # File upload
+    uploaded_file = st.file_uploader("Upload your CSV data", type=['csv'])
+    if uploaded_file is not None:
+        if processor.load_data(uploaded_file):
+            st.success("Data loaded successfully!")
+            # Data Preview
+            st.subheader("Data Preview")
+            st.dataframe(processor.data.head())
+            # Basic Stats
+            st.subheader("Basic Statistics")
+            stats = processor.get_basic_stats()
+            st.write(stats['summary'])
+            # Visualization Section
+            st.subheader("Create Visualization")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                chart_type = st.selectbox(
+                    "Select Chart Type",
+                    ["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"]
+                )
+            with col2:
+                x_col = st.selectbox("Select X-axis", processor.data.columns)
+            with col3:
+                y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None
+            color_col = st.selectbox("Select Color Variable (optional)",
+                                   ['None'] + processor.categorical_columns)
+            color_col = None if color_col == 'None' else color_col
+            # Generate and display visualization
+            fig = processor.create_visualization(
+                chart_type,
+                x_col,
+                y_col if y_col else x_col,
+                color_col
+            )
+            st.plotly_chart(fig, use_container_width=True)
+            # Metrics Calculator
+            st.subheader("Metric Calculator")
+            metric_col = st.selectbox("Select column for metrics", processor.numeric_columns)
+            metrics = processor.calculate_metrics(metric_col)
+            # Display metrics in columns
+            cols = st.columns(3)
+            for i, (metric, value) in enumerate(metrics.items()):
+                with cols[i % 3]:
+                    st.metric(label=metric.capitalize(), value=f"{value:.2f}")