Spaces:

Ranveer-Hack
/

ranveer_hack

Sleeping

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+from astrapy import DataAPIClient
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import numpy as np
+from openai import OpenAI
+from typing import Dict, List
+from dotenv import load_dotenv
+import os
+# Load environment variables
+load_dotenv()
+def initialize_client():
+    try:
+        token = os.getenv("ASTRA_DB_TOKEN")
+        endpoint = os.getenv("ASTRA_DB_ENDPOINT")
+        if not token or not endpoint:
+            raise ValueError("AstraDB token or endpoint not found in environment variables.")
+        client = DataAPIClient(token)
+        db = client.get_database_by_api_endpoint(endpoint)
+        return db
+    except Exception as e:
+        st.error(f"Error initializing AstraDB client: {e}")
+        return None
+def fetch_collection_data(db, collection_name):
+    try:
+        collection = db[collection_name]
+        documents = collection.find({})
+        return list(documents)
+    except Exception as e:
+        st.error(f"Error fetching data from collection {collection_name}: {e}")
+        return None
+@st.cache_data
+def process_dataframe(data):
+    """Cache the dataframe processing to prevent unnecessary recomputation"""
+    df = pd.DataFrame(data)
+    df = df.apply(pd.to_numeric, errors="ignore")
+    return df
+def create_basic_visualization(df, viz_type, x_col, y_col, color_col=None):
+    """Handle basic visualization types"""
+    if viz_type == "Line Chart":
+        fig = px.line(df, x=x_col, y=y_col, color=color_col, markers=True)
+    elif viz_type == "Bar Chart":
+        fig = px.bar(df, x=x_col, y=y_col, color=color_col, text=y_col)
+    elif viz_type == "Scatter Plot":
+        fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=y_col, hover_data=[color_col])
+    elif viz_type == "Box Plot":
+        fig = px.box(df, x=x_col, y=y_col, color=color_col, points="all")
+    return fig
+def create_advanced_visualization(df, viz_type, x_col, y_col, color_col=None):
+    if viz_type in ["Line Chart", "Bar Chart", "Scatter Plot", "Box Plot"]:
+        fig = create_basic_visualization(df, viz_type, x_col, y_col, color_col)
+    elif viz_type == "Engagement Sunburst":
+        total_engagement = df['likes'] + df['shares'] + df['comments']
+        engagement_labels = pd.qcut(total_engagement, q=4, labels=['Low', 'Medium', 'High', 'Viral'])
+        temp_df = pd.DataFrame({
+            'engagement_level': engagement_labels,
+            'post_type': df['post_type'],
+            'likes': df['likes'],
+            'sentiment': df['avg_sentiment_score']
+        })
+        fig = px.sunburst(
+            temp_df,
+            path=['engagement_level', 'post_type'],
+            values='likes',
+            color='sentiment',
+            color_continuous_scale='RdYlBu',
+            title="Engagement Distribution by Post Type and Sentiment"
+        )
+    elif viz_type == "Sentiment Heat Calendar":
+        # Create dummy datetime for visualization
+        hour_data = []
+        days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
+        for day in days:
+            for hour in range(24):
+                avg_sentiment = df['avg_sentiment_score'].mean() + np.random.normal(0, 0.1)
+                hour_data.append({
+                    'day': day,
+                    'hour': hour,
+                    'sentiment': avg_sentiment
+                })
+        temp_df = pd.DataFrame(hour_data)
+        fig = px.density_heatmap(
+            temp_df,
+            x='day',
+            y='hour',
+            z='sentiment',
+            title="Sentiment Distribution by Day and Hour",
+            labels={'sentiment': 'Average Sentiment'},
+            color_continuous_scale="RdYlBu"
+        )
+    elif viz_type == "Engagement Spider":
+        metrics = ['likes', 'shares', 'comments']
+        df_norm = df[metrics].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
+        fig = go.Figure()
+        for ptype in df['post_type'].unique():
+            values = df_norm[df['post_type'] == ptype].mean()
+            fig.add_trace(go.Scatterpolar(
+                r=values.tolist() + [values.iloc[0]],
+                theta=metrics + [metrics[0]],
+                name=ptype,
+                fill='toself'
+            ))
+        fig.update_layout(
+            polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
+            showlegend=True,
+            title="Engagement Pattern by Post Type"
+        )
+    elif viz_type == "Sentiment Flow":
+        # Group by post type and calculate rolling average
+        fig = go.Figure()
+        for ptype in df['post_type'].unique():
+            mask = df['post_type'] == ptype
+            sentiment_series = df[mask]['avg_sentiment_score']
+            rolling_avg = sentiment_series.rolling(window=min(7, len(sentiment_series))).mean()
+            fig.add_trace(go.Scatter(
+                x=list(range(len(rolling_avg))),  # Use index instead of dates
+                y=rolling_avg,
+                name=ptype,
+                mode='lines',
+                fill='tonexty'
+            ))
+        fig.update_layout(
+            title="Sentiment Flow by Post Type",
+            xaxis_title="Post Sequence",
+            yaxis_title="Average Sentiment"
+        )
+    elif viz_type == "Engagement Matrix":
+        corr_matrix = df[['likes', 'shares', 'comments', 'avg_sentiment_score']].corr()
+        fig = px.imshow(
+            corr_matrix,
+            color_continuous_scale='RdBu',
+            aspect='auto',
+            title="Engagement Metrics Correlation Matrix"
+        )
+    # Apply theme
+    fig.update_layout(
+        template="plotly_dark" if st.session_state.dark_mode else "plotly_white",
+        title_x=0.5,
+        font=dict(size=14),
+        margin=dict(l=20, r=20, t=50, b=20),
+        paper_bgcolor="#1e1e1e" if st.session_state.dark_mode else "#f9f9f9",
+        plot_bgcolor="#1e1e1e" if st.session_state.dark_mode else "#f9f9f9",
+    )
+    return fig
+def initialize_openai():
+    """Initialize OpenAI client"""
+    try:
+        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        return client
+    except Exception as e:
+        st.error(f"Error initializing OpenAI: {e}")
+        return None
+def generate_prompt(metrics: Dict) -> str:
+    """Generate a prompt for GPT based on the metrics"""
+    return f"""Analyze the following social media metrics and provide 3-5 clear, specific insights about post performance:
+Post Type Metrics:
+{metrics}
+Please focus on:
+1. Comparative performance between post types
+2. Engagement patterns
+3. Notable trends or anomalies
+4. Actionable recommendations
+Format your response in clear bullet points with percentage comparisons where relevant.
+Keep each insight concise but specific, including numerical comparisons.
+"""
+def calculate_metrics(df: pd.DataFrame) -> Dict:
+    """Calculate comprehensive metrics for GPT analysis"""
+    metrics = {}
+    # Calculate per post type metrics
+    for post_type in df['post_type'].unique():
+        post_data = df[df['post_type'] == post_type]
+        metrics[post_type] = {
+            'avg_likes': post_data['likes'].mean(),
+            'avg_shares': post_data['shares'].mean(),
+            'avg_comments': post_data['comments'].mean(),
+            'avg_sentiment': post_data['avg_sentiment_score'].mean(),
+            'engagement_rate': (post_data['likes'] + post_data['shares'] + post_data['comments']).mean(),
+            'post_count': len(post_data)
+        }
+    # Calculate comparative metrics
+    total_posts = len(df)
+    total_engagement = df['likes'].sum() + df['shares'].sum() + df['comments'].sum()
+    metrics['overall'] = {
+        'total_posts': total_posts,
+        'total_engagement': total_engagement,
+        'avg_sentiment_overall': df['avg_sentiment_score'].mean()
+    }
+    return metrics
+def get_gpt_insights(client: OpenAI, metrics: Dict, user_query: str) -> str:
+    """Get insights from GPT based on the metrics and user query"""
+    try:
+        prompt = generate_prompt(metrics) + f"\n\nUser Query: {user_query}"
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a social media analytics expert. Provide clear, specific insights based on the data."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.7,
+            max_tokens=500
+        )
+        # Extract and clean insights
+        insights_text = response.choices[0].message.content
+        return insights_text.strip()
+    except Exception as e:
+        return f"Error generating insights: {e}"
+def main():
+    st.set_page_config(
+        page_title="Advanced Social Media Analytics Dashboard",
+        page_icon="📊",
+        layout="wide",
+    )
+    openai_client = initialize_openai()
+    # Sidebar Settings
+    with st.sidebar:
+        st.title("Dashboard Settings")
+        if "dark_mode" not in st.session_state:
+            st.session_state.dark_mode = False
+        st.checkbox("Dark Mode", value=st.session_state.dark_mode, key="dark_mode")
+        st.write("### Data Source")
+        st.info("Initializing connection to AstraDB...")
+        db = initialize_client()
+        if not db:
+            return
+        collections = db.list_collection_names()
+        st.success("Connected to AstraDB")
+        selected_collection = st.selectbox("Select Collection", collections)
+    if selected_collection:
+        data = fetch_collection_data(db, selected_collection)
+        if data:
+            # Use cached data processing
+            df = process_dataframe(data)
+            # Create tabs for different analysis views
+            tab1, tab2, tab3 = st.tabs(["📊 Visualizations", "📈 Metrics", "🤖 AI Insights"])
+            with tab1:
+                col1, col2 = st.columns([1, 3])
+                with col1:
+                    st.write("### Visualization Options")
+                    viz_type = st.selectbox(
+                        "Select Analysis Type",
+                        [
+                            "Engagement Sunburst",
+                            "Sentiment Heat Calendar",
+                            "Engagement Spider",
+                            "Sentiment Flow",
+                            "Engagement Matrix",
+                            "Line Chart",
+                            "Bar Chart",
+                            "Scatter Plot",
+                            "Box Plot"
+                        ]
+                    )
+                    if viz_type in ["Line Chart", "Bar Chart", "Scatter Plot", "Box Plot"]:
+                        x_col = st.selectbox("Select X-axis", df.columns)
+                        y_col = st.selectbox("Select Y-axis", df.select_dtypes(include=["number"]).columns)
+                        color_col = st.selectbox("Select Color Column (Optional)", [None] + list(df.columns), index=0)
+                    else:
+                        x_col = y_col = color_col = None
+                with col2:
+                    try:
+                        fig = create_advanced_visualization(df, viz_type, x_col, y_col, color_col)
+                        st.plotly_chart(fig, use_container_width=True)
+                    except Exception as e:
+                        st.error(f"Error creating visualization: {e}")
+            with tab2:
+                # Display key metrics and insights
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.metric("Average Engagement Rate",
+                             f"{((df['likes'] + df['shares'] + df['comments']).mean() / len(df)):.2f}")
+                    st.metric("Likes Mean", f"{df['likes'].mean():.2f}")
+                    st.metric("Shares Mean", f"{df['shares'].mean():.2f}")
+                    st.metric("Comments Mean", f"{df['comments'].mean():.2f}")
+                    st.metric("Max Likes", f"{df['likes'].max():.2f}")
+                    st.metric("Min Likes", f"{df['likes'].min():.2f}")
+                with col2:
+                    st.metric("Sentiment Trend",
+                             f"{df['avg_sentiment_score'].mean():.2f}",
+                             f"{df['avg_sentiment_score'].std():.2f}")
+                    st.metric("Max Shares", f"{df['shares'].max():.2f}")
+                    st.metric("Min Shares", f"{df['shares'].min():.2f}")
+                    st.metric("Max Comments", f"{df['comments'].max():.2f}")
+                    st.metric("Min Comments", f"{df['comments'].min():.2f}")
+                    st.metric("Median Sentiment", f"{df['avg_sentiment_score'].median():.2f}")
+                with col3:
+                    top_type = df.groupby('post_type')['likes'].sum().idxmax()
+                    st.metric("Most Engaging Post Type", top_type)
+                with st.expander("Detailed Post Overview"):
+                    st.markdown("**Detailed metrics for each post (ID, likes, shares, comments, sentiment):**")
+                    if 'post_id' in df.columns:
+                        st.dataframe(df[['post_id','likes','shares','comments','avg_sentiment_score']])
+                    else:
+                        st.warning("No 'post_id' column found in the data.")
+            with tab3:
+                st.write("## AI Chatbot Insights")
+                if not openai_client:
+                    st.error("OpenAI API not configured. Please add your API key to access AI insights.")
+                else:
+                    if 'chat_history' not in st.session_state:
+                        st.session_state.chat_history = []
+                    user_input = st.text_input("Ask about data or insights:")
+                    if st.button("Send"):
+                        st.session_state.chat_history.append({"role": "user", "content": user_input})
+                        # Use the modified get_gpt_insights function to generate response
+                        metrics = calculate_metrics(df)
+                        reply = get_gpt_insights(openai_client, metrics, user_input)
+                        st.session_state.chat_history.append({"role": "assistant", "content": reply})
+                    for msg in st.session_state.chat_history:
+                        if msg["role"] == "user":
+                            st.markdown(f"**You:** {msg['content']}")
+                        else:
+                            st.markdown(f"**Assistant:** {msg['content']}")
+        else:
+            st.error("Failed to fetch data from the selected collection.")
+    else:
+        st.error("Please select a valid collection.")
+if __name__ == "__main__":
+    main()