import streamlit as st import pandas as pd import joblib import numpy as np from sklearn.ensemble import IsolationForest import plotly.express as px import plotly.graph_objects as go import time from datetime import datetime # Try to import streamlit_lottie, but have a fallback if it's not available try: from streamlit_lottie import st_lottie import requests def load_lottieurl(url): try: r = requests.get(url) if r.status_code != 200: return None return r.json() except: return None LOTTIE_AVAILABLE = True except ImportError: LOTTIE_AVAILABLE = False # Page configuration st.set_page_config( page_title="Anomaly Hunter", page_icon="🔎", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Header section col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.markdown("

🔎 ANOMALY HUNTER

", unsafe_allow_html=True) st.markdown("

Advanced detection system for API resource anomalies

", unsafe_allow_html=True) # Load model @st.cache_resource def load_model(): try: return joblib.load("anomaly_detector.pkl") except: # If model file doesn't exist, create a simple model model = IsolationForest(contamination=0.05, random_state=42) # Generate some sample data for training np.random.seed(42) X_train = np.random.rand(100, 3) * np.array([100, 50, 100]) model.fit(X_train) return model model = load_model() # Sidebar with glossy effect st.sidebar.markdown("

⚙️ Control Panel

", unsafe_allow_html=True) st.sidebar.markdown("

", unsafe_allow_html=True) # Add time display in sidebar current_time = datetime.now().strftime("%H:%M:%S") st.sidebar.markdown(f"

Current Time: {current_time}

", unsafe_allow_html=True) # Add animation placeholder or animation if available with st.sidebar: if LOTTIE_AVAILABLE: lottie_scanning = load_lottieurl("https://assets4.lottiefiles.com/packages/lf20_rYN8cRF3b4.json") if lottie_scanning: st_lottie(lottie_scanning, height=150, key="scanning") else: st.markdown("

🔍
Scanning Animation

", unsafe_allow_html=True) else: st.markdown("

🔍
Scanning Animation

", unsafe_allow_html=True) st.sidebar.markdown("

Input Parameters

", unsafe_allow_html=True) # Custom themed sliders latency = st.sidebar.slider( "API Latency (ms)", min_value=0.0, max_value=100.0, value=10.0, step=0.1, help="Response time of the API endpoint" ) cpu_cost = st.sidebar.slider( "CPU Utilization", min_value=0.0, max_value=50.0, value=10.0, step=0.1, help="Simulated cost of CPU resources" ) memory_mb = st.sidebar.slider( "Memory Usage (MB)", min_value=0.0, max_value=100.0, value=20.0, step=0.1, help="Simulated memory consumption" ) # Main content area tab1, tab2 = st.tabs(["🔍 Live Analysis", "📊 Batch Processing"]) with tab1: # Metrics cards in a row st.markdown("

", unsafe_allow_html=True) st.markdown(f"""

Latency

{latency} ms

{'⚠️ High' if latency > 50 else '✅ Normal'}

""", unsafe_allow_html=True) st.markdown(f"""

CPU Cost

{cpu_cost}

{'⚠️ High' if cpu_cost > 25 else '✅ Normal'}

""", unsafe_allow_html=True) st.markdown(f"""

Memory

{memory_mb} MB

{'⚠️ High' if memory_mb > 50 else '✅ Normal'}

""", unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) # Prediction with animated progress st.markdown("

Anomaly Analysis Result

", unsafe_allow_html=True) # Create a progress bar for analysis progress_bar = st.progress(0) status_text = st.empty() for i in range(100): progress_bar.progress(i + 1) if i < 30: status_text.text("Collecting inputs...") elif i < 60: status_text.text("Analyzing patterns...") elif i < 90: status_text.text("Applying machine learning model...") else: status_text.text("Finalizing results...") time.sleep(0.01) # Prediction input_data = pd.DataFrame([[latency, cpu_cost, memory_mb]], columns=['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb']) prediction = model.predict(input_data) result = "anomaly" if prediction[0] == -1 else "normal" # Create a card with the result if result == "normal": st.markdown("""

✅ NORMAL OPERATION

All metrics are within expected parameters. No anomalies detected.

Confidence Score: 92%

""", unsafe_allow_html=True) else: st.markdown("""

🚨 ANOMALY DETECTED

The system has detected unusual behavior in the provided metrics.

Recommended action: Investigate the API endpoint for potential issues.

Confidence Score: 87%

""", unsafe_allow_html=True) # 3D visualization st.markdown("

3D Resource Visualization

", unsafe_allow_html=True) # Generate some sample data for visualization context np.random.seed(42) n_samples = 100 normal_data = np.random.rand(n_samples, 3) * np.array([60, 25, 60]) anomaly_data = np.random.rand(int(n_samples * 0.1), 3) * np.array([100, 50, 100]) # Create DataFrame with sample data viz_data = pd.DataFrame( np.vstack([normal_data, anomaly_data]), columns=['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb'] ) viz_data['anomaly'] = ['Normal'] * n_samples + ['Anomaly'] * int(n_samples * 0.1) # Add current point current_point = pd.DataFrame({ 'latency_ms': [latency], 'simulated_cpu_cost': [cpu_cost], 'simulated_memory_mb': [memory_mb], 'anomaly': ['Current Reading'] }) viz_data = pd.concat([viz_data, current_point]) # Create 3D scatter plot fig = px.scatter_3d( viz_data, x='latency_ms', y='simulated_cpu_cost', z='simulated_memory_mb', color='anomaly', color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c', 'Current Reading': '#3498db'}, opacity=0.7, height=600 ) # Update marker size to highlight current reading fig.update_traces( marker=dict(size=[5 if status != 'Current Reading' else 10 for status in viz_data['anomaly']]), selector=dict(mode='markers') ) fig.update_layout( scene=dict( xaxis_title='Latency (ms)', yaxis_title='CPU Cost', zaxis_title='Memory (MB)', aspectmode='cube' ), margin=dict(l=0, r=0, b=0, t=0) ) st.plotly_chart(fig, use_container_width=True) # Batch processing tab with tab2: st.markdown("

Batch Anomaly Detection

", unsafe_allow_html=True) with st.expander("ℹ️ How to prepare your data"): st.write(""" Your CSV file should contain the following columns: - `latency_ms`: API response time in milliseconds - `simulated_cpu_cost`: CPU utilization metric - `simulated_memory_mb`: Memory usage in megabytes You may include additional columns, but these three are required for analysis. """) col1, col2 = st.columns([2, 1]) with col1: uploaded_file = st.file_uploader("Upload your CSV file", type=['csv']) with col2: # Add animation placeholder or animation if available if LOTTIE_AVAILABLE: lottie_analysis = load_lottieurl("https://assets5.lottiefiles.com/packages/lf20_xyadoh9f.json") if lottie_analysis: st_lottie(lottie_analysis, height=120, key="analysis") else: st.markdown("

📊
Analysis Animation

", unsafe_allow_html=True) else: st.markdown("

📊
Analysis Animation

", unsafe_allow_html=True) if uploaded_file is not None: # Add a spinner during processing with st.spinner('Processing data...'): df = pd.read_csv(uploaded_file) required_cols = ['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb'] if all(col in df.columns for col in required_cols): # Show data summary st.markdown("

Data Overview

", unsafe_allow_html=True) # Display summary metrics col1, col2, col3 = st.columns(3) with col1: st.metric("Total Records", len(df)) with col2: st.metric("Avg Latency", f"{df['latency_ms'].mean():.2f} ms") with col3: st.metric("Avg Memory", f"{df['simulated_memory_mb'].mean():.2f} MB") # Make predictions df_pred = df.copy() df_pred['anomaly_score'] = model.decision_function(df_pred[required_cols]) df_pred['anomaly'] = model.predict(df_pred[required_cols]) df_pred['status'] = df_pred['anomaly'].map({1: 'Normal', -1: 'Anomaly'}) # Count anomalies anomaly_count = (df_pred['anomaly'] == -1).sum() normal_count = (df_pred['anomaly'] == 1).sum() # Display results summary st.markdown("

Detection Results

", unsafe_allow_html=True) col1, col2 = st.columns(2) with col1: # Create a pie chart for anomaly distribution fig = go.Figure(data=[go.Pie( labels=['Normal', 'Anomaly'], values=[normal_count, anomaly_count], hole=.4, marker_colors=['#2ecc71', '#e74c3c'] )]) fig.update_layout(title_text="Anomaly Distribution") st.plotly_chart(fig) with col2: # Create a gauge chart for anomaly percentage anomaly_percent = (anomaly_count / len(df_pred)) * 100 fig = go.Figure(go.Indicator( mode="gauge+number", value=anomaly_percent, domain={'x': [0, 1], 'y': [0, 1]}, title={'text': "Anomaly Percentage"}, gauge={ 'axis': {'range': [None, 100]}, 'bar': {'color': "#e74c3c"}, 'steps': [ {'range': [0, 5], 'color': "#2ecc71"}, {'range': [5, 15], 'color': "#f39c12"}, {'range': [15, 100], 'color': "#e74c3c"} ] } )) st.plotly_chart(fig) # Show the dataframe with custom formatting st.markdown("

Detailed Results

", unsafe_allow_html=True) # Format the dataframe with styler def highlight_anomalies(val): if val == 'Anomaly': return 'background-color: #ffcccc' elif val == 'Normal': return 'background-color: #ccffcc' else: return '' # Display the styled dataframe st.dataframe(df_pred.style.applymap(highlight_anomalies, subset=['status'])) # Create a download button for the results csv = df_pred.to_csv(index=False) st.download_button( label="📥 Download Results as CSV", data=csv, file_name="anomaly_detection_results.csv", mime="text/csv", ) # Visualizations st.markdown("

Advanced Visualizations

", unsafe_allow_html=True) viz_type = st.radio( "Select Visualization Type:", ["2D Scatter Plot", "Feature Distributions", "Anomaly Scores"], horizontal=True ) if viz_type == "2D Scatter Plot": # Create scatter plot with custom styling fig = px.scatter( df_pred, x='latency_ms', y='simulated_cpu_cost', color='status', size='simulated_memory_mb', hover_data=['anomaly_score'], color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'}, title="Latency vs CPU Cost (size represents Memory Usage)" ) fig.update_layout( xaxis_title="Latency (ms)", yaxis_title="CPU Cost", legend_title="Status", template="plotly_white" ) st.plotly_chart(fig, use_container_width=True) elif viz_type == "Feature Distributions": # Create histogram with distributions by anomaly status features = st.multiselect( "Select features to visualize:", required_cols, default=required_cols[0] ) if features: for feature in features: fig = px.histogram( df_pred, x=feature, color='status', barmode='overlay', marginal="box", color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'}, title=f"Distribution of {feature} by Status" ) fig.update_layout(template="plotly_white") st.plotly_chart(fig, use_container_width=True) elif viz_type == "Anomaly Scores": # Create anomaly score visualization fig = px.scatter( df_pred.sort_values('anomaly_score'), y='anomaly_score', color='status', color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'}, title="Anomaly Scores (lower scores indicate more anomalous behavior)" ) fig.add_hline( y=0, line_dash="dash", line_color="red", annotation_text="Decision Boundary" ) fig.update_layout( xaxis_title="Data Point Index", yaxis_title="Anomaly Score", template="plotly_white" ) st.plotly_chart(fig, use_container_width=True) # Add explanation st.info( "The anomaly score represents how 'normal' a data point is. Points with lower scores are more likely to be anomalies. The decision boundary (dashed line) separates normal from anomalous points.") else: st.error( "❌ Your CSV must contain the following columns: latency_ms, simulated_cpu_cost, simulated_memory_mb") st.write("Found columns:", list(df.columns)) # Show missing columns missing_cols = [col for col in required_cols if col not in df.columns] st.write("Missing columns:", missing_cols) # Offer a sample CSV download st.markdown("### Download a Sample CSV Template") # Create sample data sample_data = pd.DataFrame({ 'latency_ms': [10.2, 15.6, 8.9, 45.3, 12.1], 'simulated_cpu_cost': [5.1, 7.8, 4.5, 22.7, 6.0], 'simulated_memory_mb': [20.5, 31.2, 17.8, 90.6, 24.2] }) csv = sample_data.to_csv(index=False) st.download_button( label="📥 Download Sample Template", data=csv, file_name="sample_template.csv", mime="text/csv", ) # Add footer st.markdown("---") st.markdown("

Anomaly Hunter v2.0 | Advanced API Resource Monitoring

", unsafe_allow_html=True)