detect2 / app.py
pranit144's picture
Upload 2 files
236b759 verified
import streamlit as st
import pandas as pd
import joblib
import numpy as np
from sklearn.ensemble import IsolationForest
import plotly.express as px
import plotly.graph_objects as go
import time
from datetime import datetime
# Try to import streamlit_lottie, but have a fallback if it's not available
try:
from streamlit_lottie import st_lottie
import requests
def load_lottieurl(url):
try:
r = requests.get(url)
if r.status_code != 200:
return None
return r.json()
except:
return None
LOTTIE_AVAILABLE = True
except ImportError:
LOTTIE_AVAILABLE = False
# Page configuration
st.set_page_config(
page_title="Anomaly Hunter",
page_icon="πŸ”Ž",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-family: 'Trebuchet MS', sans-serif;
background: linear-gradient(90deg, #4b6cb7 0%, #182848 100%);
color: white !important;
padding: 20px !important;
border-radius: 10px;
text-align: center;
}
.sub-header {
color: #4b6cb7;
border-left: 4px solid #4b6cb7;
padding-left: 10px;
}
.normal-tag {
background-color: #2ecc71;
color: white;
padding: 3px 10px;
border-radius: 15px;
font-weight: bold;
}
.anomaly-tag {
background-color: #e74c3c;
color: white;
padding: 3px 10px;
border-radius: 15px;
font-weight: bold;
}
.stProgress > div > div > div > div {
background-color: #4b6cb7;
}
.info-card {
background-color: #f8f9fa;
border-radius: 10px;
padding: 20px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
margin-bottom: 20px;
}
.metrics-container {
display: flex;
flex-direction: row;
justify-content: space-between;
}
.metric-card {
background-color: white;
border-radius: 10px;
padding: 15px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
width: 31%;
text-align: center;
}
.sidebar .sidebar-content {
background-image: linear-gradient(180deg, #4b6cb7 0%, #182848 100%);
}
.animation-placeholder {
text-align: center;
padding: 20px;
background: #f0f2f6;
border-radius: 10px;
margin-bottom: 15px;
}
</style>
""", unsafe_allow_html=True)
# Header section
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.markdown("<h1 class='main-header'>πŸ”Ž ANOMALY HUNTER</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align: center;'>Advanced detection system for API resource anomalies</p>",
unsafe_allow_html=True)
# Load model
@st.cache_resource
def load_model():
try:
return joblib.load("anomaly_detector.pkl")
except:
# If model file doesn't exist, create a simple model
model = IsolationForest(contamination=0.05, random_state=42)
# Generate some sample data for training
np.random.seed(42)
X_train = np.random.rand(100, 3) * np.array([100, 50, 100])
model.fit(X_train)
return model
model = load_model()
# Sidebar with glossy effect
st.sidebar.markdown("<h2 style='color: #4b6cb7;'>βš™οΈ Control Panel</h2>", unsafe_allow_html=True)
st.sidebar.markdown("<hr style='margin: 0; border-color: #4b6cb7;'>", unsafe_allow_html=True)
# Add time display in sidebar
current_time = datetime.now().strftime("%H:%M:%S")
st.sidebar.markdown(f"<p style='text-align: center;'>Current Time: {current_time}</p>", unsafe_allow_html=True)
# Add animation placeholder or animation if available
with st.sidebar:
if LOTTIE_AVAILABLE:
lottie_scanning = load_lottieurl("https://assets4.lottiefiles.com/packages/lf20_rYN8cRF3b4.json")
if lottie_scanning:
st_lottie(lottie_scanning, height=150, key="scanning")
else:
st.markdown("<div class='animation-placeholder'>πŸ”<br>Scanning Animation</div>", unsafe_allow_html=True)
else:
st.markdown("<div class='animation-placeholder'>πŸ”<br>Scanning Animation</div>", unsafe_allow_html=True)
st.sidebar.markdown("<h3 style='color: #4b6cb7;'>Input Parameters</h3>", unsafe_allow_html=True)
# Custom themed sliders
latency = st.sidebar.slider(
"API Latency (ms)",
min_value=0.0,
max_value=100.0,
value=10.0,
step=0.1,
help="Response time of the API endpoint"
)
cpu_cost = st.sidebar.slider(
"CPU Utilization",
min_value=0.0,
max_value=50.0,
value=10.0,
step=0.1,
help="Simulated cost of CPU resources"
)
memory_mb = st.sidebar.slider(
"Memory Usage (MB)",
min_value=0.0,
max_value=100.0,
value=20.0,
step=0.1,
help="Simulated memory consumption"
)
# Main content area
tab1, tab2 = st.tabs(["πŸ” Live Analysis", "πŸ“Š Batch Processing"])
with tab1:
# Metrics cards in a row
st.markdown("<div class='metrics-container'>", unsafe_allow_html=True)
st.markdown(f"""
<div class='metric-card'>
<h4>Latency</h4>
<h2>{latency} ms</h2>
<p>{'⚠️ High' if latency > 50 else 'βœ… Normal'}</p>
</div>
""", unsafe_allow_html=True)
st.markdown(f"""
<div class='metric-card'>
<h4>CPU Cost</h4>
<h2>{cpu_cost}</h2>
<p>{'⚠️ High' if cpu_cost > 25 else 'βœ… Normal'}</p>
</div>
""", unsafe_allow_html=True)
st.markdown(f"""
<div class='metric-card'>
<h4>Memory</h4>
<h2>{memory_mb} MB</h2>
<p>{'⚠️ High' if memory_mb > 50 else 'βœ… Normal'}</p>
</div>
""", unsafe_allow_html=True)
st.markdown("</div>", unsafe_allow_html=True)
# Prediction with animated progress
st.markdown("<h3 class='sub-header'>Anomaly Analysis Result</h3>", unsafe_allow_html=True)
# Create a progress bar for analysis
progress_bar = st.progress(0)
status_text = st.empty()
for i in range(100):
progress_bar.progress(i + 1)
if i < 30:
status_text.text("Collecting inputs...")
elif i < 60:
status_text.text("Analyzing patterns...")
elif i < 90:
status_text.text("Applying machine learning model...")
else:
status_text.text("Finalizing results...")
time.sleep(0.01)
# Prediction
input_data = pd.DataFrame([[latency, cpu_cost, memory_mb]],
columns=['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb'])
prediction = model.predict(input_data)
result = "anomaly" if prediction[0] == -1 else "normal"
# Create a card with the result
if result == "normal":
st.markdown("""
<div class="info-card" style="border-left: 5px solid #2ecc71;">
<h2>βœ… NORMAL OPERATION</h2>
<p>All metrics are within expected parameters. No anomalies detected.</p>
<p>Confidence Score: 92%</p>
</div>
""", unsafe_allow_html=True)
else:
st.markdown("""
<div class="info-card" style="border-left: 5px solid #e74c3c;">
<h2>🚨 ANOMALY DETECTED</h2>
<p>The system has detected unusual behavior in the provided metrics.</p>
<p>Recommended action: Investigate the API endpoint for potential issues.</p>
<p>Confidence Score: 87%</p>
</div>
""", unsafe_allow_html=True)
# 3D visualization
st.markdown("<h3 class='sub-header'>3D Resource Visualization</h3>", unsafe_allow_html=True)
# Generate some sample data for visualization context
np.random.seed(42)
n_samples = 100
normal_data = np.random.rand(n_samples, 3) * np.array([60, 25, 60])
anomaly_data = np.random.rand(int(n_samples * 0.1), 3) * np.array([100, 50, 100])
# Create DataFrame with sample data
viz_data = pd.DataFrame(
np.vstack([normal_data, anomaly_data]),
columns=['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb']
)
viz_data['anomaly'] = ['Normal'] * n_samples + ['Anomaly'] * int(n_samples * 0.1)
# Add current point
current_point = pd.DataFrame({
'latency_ms': [latency],
'simulated_cpu_cost': [cpu_cost],
'simulated_memory_mb': [memory_mb],
'anomaly': ['Current Reading']
})
viz_data = pd.concat([viz_data, current_point])
# Create 3D scatter plot
fig = px.scatter_3d(
viz_data,
x='latency_ms',
y='simulated_cpu_cost',
z='simulated_memory_mb',
color='anomaly',
color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c', 'Current Reading': '#3498db'},
opacity=0.7,
height=600
)
# Update marker size to highlight current reading
fig.update_traces(
marker=dict(size=[5 if status != 'Current Reading' else 10 for status in viz_data['anomaly']]),
selector=dict(mode='markers')
)
fig.update_layout(
scene=dict(
xaxis_title='Latency (ms)',
yaxis_title='CPU Cost',
zaxis_title='Memory (MB)',
aspectmode='cube'
),
margin=dict(l=0, r=0, b=0, t=0)
)
st.plotly_chart(fig, use_container_width=True)
# Batch processing tab
with tab2:
st.markdown("<h3 class='sub-header'>Batch Anomaly Detection</h3>", unsafe_allow_html=True)
with st.expander("ℹ️ How to prepare your data"):
st.write("""
Your CSV file should contain the following columns:
- `latency_ms`: API response time in milliseconds
- `simulated_cpu_cost`: CPU utilization metric
- `simulated_memory_mb`: Memory usage in megabytes
You may include additional columns, but these three are required for analysis.
""")
col1, col2 = st.columns([2, 1])
with col1:
uploaded_file = st.file_uploader("Upload your CSV file", type=['csv'])
with col2:
# Add animation placeholder or animation if available
if LOTTIE_AVAILABLE:
lottie_analysis = load_lottieurl("https://assets5.lottiefiles.com/packages/lf20_xyadoh9f.json")
if lottie_analysis:
st_lottie(lottie_analysis, height=120, key="analysis")
else:
st.markdown("<div class='animation-placeholder'>πŸ“Š<br>Analysis Animation</div>", unsafe_allow_html=True)
else:
st.markdown("<div class='animation-placeholder'>πŸ“Š<br>Analysis Animation</div>", unsafe_allow_html=True)
if uploaded_file is not None:
# Add a spinner during processing
with st.spinner('Processing data...'):
df = pd.read_csv(uploaded_file)
required_cols = ['latency_ms', 'simulated_cpu_cost', 'simulated_memory_mb']
if all(col in df.columns for col in required_cols):
# Show data summary
st.markdown("<h4>Data Overview</h4>", unsafe_allow_html=True)
# Display summary metrics
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Records", len(df))
with col2:
st.metric("Avg Latency", f"{df['latency_ms'].mean():.2f} ms")
with col3:
st.metric("Avg Memory", f"{df['simulated_memory_mb'].mean():.2f} MB")
# Make predictions
df_pred = df.copy()
df_pred['anomaly_score'] = model.decision_function(df_pred[required_cols])
df_pred['anomaly'] = model.predict(df_pred[required_cols])
df_pred['status'] = df_pred['anomaly'].map({1: 'Normal', -1: 'Anomaly'})
# Count anomalies
anomaly_count = (df_pred['anomaly'] == -1).sum()
normal_count = (df_pred['anomaly'] == 1).sum()
# Display results summary
st.markdown("<h4>Detection Results</h4>", unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
# Create a pie chart for anomaly distribution
fig = go.Figure(data=[go.Pie(
labels=['Normal', 'Anomaly'],
values=[normal_count, anomaly_count],
hole=.4,
marker_colors=['#2ecc71', '#e74c3c']
)])
fig.update_layout(title_text="Anomaly Distribution")
st.plotly_chart(fig)
with col2:
# Create a gauge chart for anomaly percentage
anomaly_percent = (anomaly_count / len(df_pred)) * 100
fig = go.Figure(go.Indicator(
mode="gauge+number",
value=anomaly_percent,
domain={'x': [0, 1], 'y': [0, 1]},
title={'text': "Anomaly Percentage"},
gauge={
'axis': {'range': [None, 100]},
'bar': {'color': "#e74c3c"},
'steps': [
{'range': [0, 5], 'color': "#2ecc71"},
{'range': [5, 15], 'color': "#f39c12"},
{'range': [15, 100], 'color': "#e74c3c"}
]
}
))
st.plotly_chart(fig)
# Show the dataframe with custom formatting
st.markdown("<h4>Detailed Results</h4>", unsafe_allow_html=True)
# Format the dataframe with styler
def highlight_anomalies(val):
if val == 'Anomaly':
return 'background-color: #ffcccc'
elif val == 'Normal':
return 'background-color: #ccffcc'
else:
return ''
# Display the styled dataframe
st.dataframe(df_pred.style.applymap(highlight_anomalies, subset=['status']))
# Create a download button for the results
csv = df_pred.to_csv(index=False)
st.download_button(
label="πŸ“₯ Download Results as CSV",
data=csv,
file_name="anomaly_detection_results.csv",
mime="text/csv",
)
# Visualizations
st.markdown("<h3 class='sub-header'>Advanced Visualizations</h3>", unsafe_allow_html=True)
viz_type = st.radio(
"Select Visualization Type:",
["2D Scatter Plot", "Feature Distributions", "Anomaly Scores"],
horizontal=True
)
if viz_type == "2D Scatter Plot":
# Create scatter plot with custom styling
fig = px.scatter(
df_pred,
x='latency_ms',
y='simulated_cpu_cost',
color='status',
size='simulated_memory_mb',
hover_data=['anomaly_score'],
color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'},
title="Latency vs CPU Cost (size represents Memory Usage)"
)
fig.update_layout(
xaxis_title="Latency (ms)",
yaxis_title="CPU Cost",
legend_title="Status",
template="plotly_white"
)
st.plotly_chart(fig, use_container_width=True)
elif viz_type == "Feature Distributions":
# Create histogram with distributions by anomaly status
features = st.multiselect(
"Select features to visualize:",
required_cols,
default=required_cols[0]
)
if features:
for feature in features:
fig = px.histogram(
df_pred,
x=feature,
color='status',
barmode='overlay',
marginal="box",
color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'},
title=f"Distribution of {feature} by Status"
)
fig.update_layout(template="plotly_white")
st.plotly_chart(fig, use_container_width=True)
elif viz_type == "Anomaly Scores":
# Create anomaly score visualization
fig = px.scatter(
df_pred.sort_values('anomaly_score'),
y='anomaly_score',
color='status',
color_discrete_map={'Normal': '#2ecc71', 'Anomaly': '#e74c3c'},
title="Anomaly Scores (lower scores indicate more anomalous behavior)"
)
fig.add_hline(
y=0,
line_dash="dash",
line_color="red",
annotation_text="Decision Boundary"
)
fig.update_layout(
xaxis_title="Data Point Index",
yaxis_title="Anomaly Score",
template="plotly_white"
)
st.plotly_chart(fig, use_container_width=True)
# Add explanation
st.info(
"The anomaly score represents how 'normal' a data point is. Points with lower scores are more likely to be anomalies. The decision boundary (dashed line) separates normal from anomalous points.")
else:
st.error(
"❌ Your CSV must contain the following columns: latency_ms, simulated_cpu_cost, simulated_memory_mb")
st.write("Found columns:", list(df.columns))
# Show missing columns
missing_cols = [col for col in required_cols if col not in df.columns]
st.write("Missing columns:", missing_cols)
# Offer a sample CSV download
st.markdown("### Download a Sample CSV Template")
# Create sample data
sample_data = pd.DataFrame({
'latency_ms': [10.2, 15.6, 8.9, 45.3, 12.1],
'simulated_cpu_cost': [5.1, 7.8, 4.5, 22.7, 6.0],
'simulated_memory_mb': [20.5, 31.2, 17.8, 90.6, 24.2]
})
csv = sample_data.to_csv(index=False)
st.download_button(
label="πŸ“₯ Download Sample Template",
data=csv,
file_name="sample_template.csv",
mime="text/csv",
)
# Add footer
st.markdown("---")
st.markdown("<p style='text-align: center; color: gray;'>Anomaly Hunter v2.0 | Advanced API Resource Monitoring</p>",
unsafe_allow_html=True)