Spaces:

pranit144
/

detect2

Running

File size: 20,091 Bytes

236b759

import streamlit as st
import pandas as pd
import joblib
import numpy as np
from sklearn.ensemble import IsolationForest
import plotly.express as px
import plotly.graph_objects as go
import time
from datetime import datetime

# Try to import streamlit_lottie, but have a fallback if it's not available
try:
    from streamlit_lottie import st_lottie
    import requests


    def load_lottieurl(url):
        try:
            r = requests.get(url)
            if r.status_code != 200:
                return None
            return r.json()
        except:
            return None


    LOTTIE_AVAILABLE = True
except ImportError:
    LOTTIE_AVAILABLE = False

# Page configuration
st.set_page_config(
    page_title="Anomaly Hunter",
    page_icon="🔎",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS
st.markdown("""

<style>

    .main-header {

        font-family: 'Trebuchet MS', sans-serif;

        background: linear-gradient(90deg, #4b6cb7 0%, #182848 100%);

        color: white !important;

        padding: 20px !important;

        border-radius: 10px;

        text-align: center;

    }

    .sub-header {

        color: #4b6cb7;

        border-left: 4px solid #4b6cb7;

        padding-left: 10px;

    }

    .normal-tag {

        background-color: #2ecc71;

        color: white;

        padding: 3px 10px;

        border-radius: 15px;

        font-weight: bold;

    }

    .anomaly-tag {

        background-color: #e74c3c;

        color: white;

        padding: 3px 10px;

        border-radius: 15px;

        font-weight: bold;

    }

    .stProgress > div > div > div > div {

        background-color: #4b6cb7;

    }

    .info-card {

        background-color: #f8f9fa;

        border-radius: 10px;

        padding: 20px;

        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);

        margin-bottom: 20px;

    }

    .metrics-container {

        display: flex;

        flex-direction: row;

        justify-content: space-between;

    }

    .metric-card {

        background-color: white;

        border-radius: 10px;

        padding: 15px;

        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);

        width: 31%;

        text-align: center;

    }

    .sidebar .sidebar-content {

        background-image: linear-gradient(180deg, #4b6cb7 0%, #182848 100%);

    }

    .animation-placeholder {

        text-align: center;

        padding: 20px;

        background: #f0f2f6;

        border-radius: 10px;

        margin-bottom: 15px;

    }

</style>

""", unsafe_allow_html=True)

# Header section
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
    st.markdown("<h1 class='main-header'>🔎 ANOMALY HUNTER</h1>", unsafe_allow_html=True)
    st.markdown("<p style='text-align: center;'>Advanced detection system for API resource anomalies</p>",
                unsafe_allow_html=True)


# Load model
@st.cache_resource
def load_model():
    try:
        return joblib.load("anomaly_detector.pkl")
    except:
        # If model file doesn't exist, create a simple model
        model = IsolationForest(contamination=0.05, random_state=42)
        # Generate some sample data for training
        np.random.seed(42)
        X_train = np.random.rand(100, 3) * np.array([100, 50, 100])
        model.fit(X_train)
        return model


model = load_model()

# Sidebar with glossy effect
st.sidebar.markdown("<h2 style='color: #4b6cb7;'>⚙️ Control Panel</h2>", unsafe_allow_html=True)
st.sidebar.markdown("<hr style='margin: 0; border-color: #4b6cb7;'>", unsafe_allow_html=True)

# Add time display in sidebar
current_time = datetime.now().strftime("%H:%M:%S")
st.sidebar.markdown(f"<p style='text-align: center;'>Current Time: {current_time}</p>", unsafe_allow_html=True)

# Add animation placeholder or animation if available
with st.sidebar:
    if LOTTIE_AVAILABLE:
        lottie_scanning = load_lottieurl("https://assets4.lottiefiles.com/packages/lf20_rYN8cRF3b4.json")
        if lottie_scanning:
            st_lottie(lottie_scanning, height=150, key="scanning")
        else:
            st.markdown("<div class='animation-placeholder'>🔍<br>Scanning Animation</div>", unsafe_allow_html=True)
    else:
        st.markdown("<div class='animation-placeholder'>🔍<br>Scanning Animation</div>", unsafe_allow_html=True)

st.sidebar.markdown("<h3 style='color: #4b6cb7;'>Input Parameters</h3>", unsafe_allow_html=True)

# Custom themed sliders
latency = st.sidebar.slider(
    "API Latency (ms)",
    min_value=0.0,
    max_value=100.0,
    value=10.0,
    step=0.1,
    help="Response time of the API endpoint"
)

cpu_cost = st.sidebar.slider(
    "CPU Utilization",
    min_value=0.0,
    max_value=50.0,
    value=10.0,
    step=0.1,
    help="Simulated cost of CPU resources"
)

memory_mb = st.sidebar.slider(
    "Memory Usage (MB)",
    min_value=0.0,
    max_value=100.0,
    value=20.0,
    step=0.1,
    help="Simulated memory consumption"
)

# Main content area
tab1, tab2 = st.tabs(["🔍 Live Analysis", "📊 Batch Processing"])

with tab1:
    # Metrics cards in a row
    st.markdown("<div class='metrics-container'>", unsafe_allow_html=True)
    st.markdown(f"""

        <div class='metric-card'>

            <h4>Latency</h4>

            <h2>{latency} ms</h2>

            <p>{'⚠️ High' if latency > 50 else '✅ Normal'}</p>

        </div>

    """, unsafe_allow_html=True)

    st.markdown(f"""

        <div class='metric-card'>

            <h4>CPU Cost</h4>

            <h2>{cpu_cost}</h2>

            <p>{'⚠️ High' if cpu_cost > 25 else '✅ Normal'}</p>

        </div>

    """, unsafe_allow_html=True)

    st.markdown(f"""

        <div class='metric-card'>

            <h4>Memory</h4>

            <h2>{memory_mb} MB</h2>

            <p>{'⚠️ High' if memory_mb > 50 else '✅ Normal'}</p>

        </div>

    """, unsafe_allow_html=True)
    st.markdown("</div>", unsafe_allow_html=True)

    # Prediction with animated progress
    st.markdown("<h3 class='sub-header'>Anomaly Analysis Result</h3>", unsafe_allow_html=True)

    # Create a progress bar for analysis
    progress_bar = st.progress(0)
    status_text = st.empty()

    for i in range(100):
        progress_bar.progress(i + 1)
        if i < 30:
            status_text.text("Collecting inputs...")
        elif i < 60:
            status_text.text("Analyzing patterns...")
        elif i < 90:
            status_text.text("Applying machine learning model...")
        else:
            status_text.text("Finalizing results...")
        time.sleep(0.01)

    # Prediction
    input_data = pd.DataFrame([[latency, cpu_cost, memory_mb]],
                              columns=['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb'])
    prediction = model.predict(input_data)
    result = "anomaly" if prediction[0] == -1 else "normal"

    # Create a card with the result
    if result == "normal":
        st.markdown("""

        <div class="info-card" style="border-left: 5px solid #2ecc71;">

            <h2>✅ NORMAL OPERATION</h2>

            <p>All metrics are within expected parameters. No anomalies detected.</p>

            <p>Confidence Score: 92%</p>

        </div>

        """, unsafe_allow_html=True)
    else:
        st.markdown("""

        <div class="info-card" style="border-left: 5px solid #e74c3c;">

            <h2>🚨 ANOMALY DETECTED</h2>

            <p>The system has detected unusual behavior in the provided metrics.</p>

            <p>Recommended action: Investigate the API endpoint for potential issues.</p>

            <p>Confidence Score: 87%</p>

        </div>

        """, unsafe_allow_html=True)

    # 3D visualization
    st.markdown("<h3 class='sub-header'>3D Resource Visualization</h3>", unsafe_allow_html=True)

    # Generate some sample data for visualization context
    np.random.seed(42)
    n_samples = 100
    normal_data = np.random.rand(n_samples, 3) * np.array([60, 25, 60])
    anomaly_data = np.random.rand(int(n_samples * 0.1), 3) * np.array([100, 50, 100])

    # Create DataFrame with sample data
    viz_data = pd.DataFrame(
        np.vstack([normal_data, anomaly_data]),
        columns=['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb']
    )
    viz_data['anomaly'] = ['Normal'] * n_samples + ['Anomaly'] * int(n_samples * 0.1)

    # Add current point
    current_point = pd.DataFrame({
        'latency_ms': [latency],
        'simulated_cpu_cost': [cpu_cost],
        'simulated_memory_mb': [memory_mb],
        'anomaly': ['Current Reading']
    })
    viz_data = pd.concat([viz_data, current_point])

    # Create 3D scatter plot
    fig = px.scatter_3d(
        viz_data,
        x='latency_ms',
        y='simulated_cpu_cost',
        z='simulated_memory_mb',
        color='anomaly',
        color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c', 'Current Reading': '#3498db'},
        opacity=0.7,
        height=600
    )

    # Update marker size to highlight current reading
    fig.update_traces(
        marker=dict(size=[5 if status != 'Current Reading' else 10 for status in viz_data['anomaly']]),
        selector=dict(mode='markers')
    )

    fig.update_layout(
        scene=dict(
            xaxis_title='Latency (ms)',
            yaxis_title='CPU Cost',
            zaxis_title='Memory (MB)',
            aspectmode='cube'
        ),
        margin=dict(l=0, r=0, b=0, t=0)
    )

    st.plotly_chart(fig, use_container_width=True)

# Batch processing tab
with tab2:
    st.markdown("<h3 class='sub-header'>Batch Anomaly Detection</h3>", unsafe_allow_html=True)

    with st.expander("ℹ️ How to prepare your data"):
        st.write("""

        Your CSV file should contain the following columns:

        - `latency_ms`: API response time in milliseconds

        - `simulated_cpu_cost`: CPU utilization metric

        - `simulated_memory_mb`: Memory usage in megabytes



        You may include additional columns, but these three are required for analysis.

        """)

    col1, col2 = st.columns([2, 1])

    with col1:
        uploaded_file = st.file_uploader("Upload your CSV file", type=['csv'])

    with col2:
        # Add animation placeholder or animation if available
        if LOTTIE_AVAILABLE:
            lottie_analysis = load_lottieurl("https://assets5.lottiefiles.com/packages/lf20_xyadoh9f.json")
            if lottie_analysis:
                st_lottie(lottie_analysis, height=120, key="analysis")
            else:
                st.markdown("<div class='animation-placeholder'>📊<br>Analysis Animation</div>", unsafe_allow_html=True)
        else:
            st.markdown("<div class='animation-placeholder'>📊<br>Analysis Animation</div>", unsafe_allow_html=True)

    if uploaded_file is not None:
        # Add a spinner during processing
        with st.spinner('Processing data...'):
            df = pd.read_csv(uploaded_file)

            required_cols = ['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb']
            if all(col in df.columns for col in required_cols):
                # Show data summary
                st.markdown("<h4>Data Overview</h4>", unsafe_allow_html=True)

                # Display summary metrics
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.metric("Total Records", len(df))
                with col2:
                    st.metric("Avg Latency", f"{df['latency_ms'].mean():.2f} ms")
                with col3:
                    st.metric("Avg Memory", f"{df['simulated_memory_mb'].mean():.2f} MB")

                # Make predictions
                df_pred = df.copy()
                df_pred['anomaly_score'] = model.decision_function(df_pred[required_cols])
                df_pred['anomaly'] = model.predict(df_pred[required_cols])
                df_pred['status'] = df_pred['anomaly'].map({1: 'Normal', -1: 'Anomaly'})

                # Count anomalies
                anomaly_count = (df_pred['anomaly'] == -1).sum()
                normal_count = (df_pred['anomaly'] == 1).sum()

                # Display results summary
                st.markdown("<h4>Detection Results</h4>", unsafe_allow_html=True)

                col1, col2 = st.columns(2)
                with col1:
                    # Create a pie chart for anomaly distribution
                    fig = go.Figure(data=[go.Pie(
                        labels=['Normal', 'Anomaly'],
                        values=[normal_count, anomaly_count],
                        hole=.4,
                        marker_colors=['#2ecc71', '#e74c3c']
                    )])
                    fig.update_layout(title_text="Anomaly Distribution")
                    st.plotly_chart(fig)

                with col2:
                    # Create a gauge chart for anomaly percentage
                    anomaly_percent = (anomaly_count / len(df_pred)) * 100

                    fig = go.Figure(go.Indicator(
                        mode="gauge+number",
                        value=anomaly_percent,
                        domain={'x': [0, 1], 'y': [0, 1]},
                        title={'text': "Anomaly Percentage"},
                        gauge={
                            'axis': {'range': [None, 100]},
                            'bar': {'color': "#e74c3c"},
                            'steps': [
                                {'range': [0, 5], 'color': "#2ecc71"},
                                {'range': [5, 15], 'color': "#f39c12"},
                                {'range': [15, 100], 'color': "#e74c3c"}
                            ]
                        }
                    ))
                    st.plotly_chart(fig)

                # Show the dataframe with custom formatting
                st.markdown("<h4>Detailed Results</h4>", unsafe_allow_html=True)


                # Format the dataframe with styler
                def highlight_anomalies(val):
                    if val == 'Anomaly':
                        return 'background-color: #ffcccc'
                    elif val == 'Normal':
                        return 'background-color: #ccffcc'
                    else:
                        return ''


                # Display the styled dataframe
                st.dataframe(df_pred.style.applymap(highlight_anomalies, subset=['status']))

                # Create a download button for the results
                csv = df_pred.to_csv(index=False)
                st.download_button(
                    label="📥 Download Results as CSV",
                    data=csv,
                    file_name="anomaly_detection_results.csv",
                    mime="text/csv",
                )

                # Visualizations
                st.markdown("<h3 class='sub-header'>Advanced Visualizations</h3>", unsafe_allow_html=True)

                viz_type = st.radio(
                    "Select Visualization Type:",
                    ["2D Scatter Plot", "Feature Distributions", "Anomaly Scores"],
                    horizontal=True
                )

                if viz_type == "2D Scatter Plot":
                    # Create scatter plot with custom styling
                    fig = px.scatter(
                        df_pred,
                        x='latency_ms',
                        y='simulated_cpu_cost',
                        color='status',
                        size='simulated_memory_mb',
                        hover_data=['anomaly_score'],
                        color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'},
                        title="Latency vs CPU Cost (size represents Memory Usage)"
                    )

                    fig.update_layout(
                        xaxis_title="Latency (ms)",
                        yaxis_title="CPU Cost",
                        legend_title="Status",
                        template="plotly_white"
                    )

                    st.plotly_chart(fig, use_container_width=True)

                elif viz_type == "Feature Distributions":
                    # Create histogram with distributions by anomaly status
                    features = st.multiselect(
                        "Select features to visualize:",
                        required_cols,
                        default=required_cols[0]
                    )

                    if features:
                        for feature in features:
                            fig = px.histogram(
                                df_pred,
                                x=feature,
                                color='status',
                                barmode='overlay',
                                marginal="box",
                                color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'},
                                title=f"Distribution of {feature} by Status"
                            )

                            fig.update_layout(template="plotly_white")
                            st.plotly_chart(fig, use_container_width=True)

                elif viz_type == "Anomaly Scores":
                    # Create anomaly score visualization
                    fig = px.scatter(
                        df_pred.sort_values('anomaly_score'),
                        y='anomaly_score',
                        color='status',
                        color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'},
                        title="Anomaly Scores (lower scores indicate more anomalous behavior)"
                    )

                    fig.add_hline(
                        y=0,
                        line_dash="dash",
                        line_color="red",
                        annotation_text="Decision Boundary"
                    )

                    fig.update_layout(
                        xaxis_title="Data Point Index",
                        yaxis_title="Anomaly Score",
                        template="plotly_white"
                    )

                    st.plotly_chart(fig, use_container_width=True)

                    # Add explanation
                    st.info(
                        "The anomaly score represents how 'normal' a data point is. Points with lower scores are more likely to be anomalies. The decision boundary (dashed line) separates normal from anomalous points.")

            else:
                st.error(
                    "❌ Your CSV must contain the following columns: latency_ms, simulated_cpu_cost, simulated_memory_mb")
                st.write("Found columns:", list(df.columns))

                # Show missing columns
                missing_cols = [col for col in required_cols if col not in df.columns]
                st.write("Missing columns:", missing_cols)

                # Offer a sample CSV download
                st.markdown("### Download a Sample CSV Template")

                # Create sample data
                sample_data = pd.DataFrame({
                    'latency_ms': [10.2, 15.6, 8.9, 45.3, 12.1],
                    'simulated_cpu_cost': [5.1, 7.8, 4.5, 22.7, 6.0],
                    'simulated_memory_mb': [20.5, 31.2, 17.8, 90.6, 24.2]
                })

                csv = sample_data.to_csv(index=False)
                st.download_button(
                    label="📥 Download Sample Template",
                    data=csv,
                    file_name="sample_template.csv",
                    mime="text/csv",
                )

# Add footer
st.markdown("---")
st.markdown("<p style='text-align: center; color: gray;'>Anomaly Hunter v2.0 | Advanced API Resource Monitoring</p>",
            unsafe_allow_html=True)