{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Timestamp | \n",
" ResponseTime(ms) | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2024-07-26 12:00:12 | \n",
" 169.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2024-07-26 12:00:27 | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 2024-07-26 12:00:42 | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" 2024-07-26 12:00:57 | \n",
" 146.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 2024-07-26 12:01:30 | \n",
" 202.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Timestamp ResponseTime(ms)\n",
"0 2024-07-26 12:00:12 169.0\n",
"1 2024-07-26 12:00:27 NaN\n",
"2 2024-07-26 12:00:42 NaN\n",
"3 2024-07-26 12:00:57 146.0\n",
"4 2024-07-26 12:01:30 202.0"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"\n",
"# Function to generate random timestamps and response times\n",
"def generate_random_data(date, start_time, end_time, count, response_time_range, null_percentage):\n",
" # Combine date with start and end times\n",
" start_datetime = datetime.combine(date, start_time)\n",
" end_datetime = datetime.combine(date, end_time)\n",
" \n",
" # Generate random timestamps\n",
" random_timestamps = [\n",
" start_datetime + timedelta(seconds=random.randint(0, int((end_datetime - start_datetime).total_seconds())))\n",
" for _ in range(count)\n",
" ]\n",
" \n",
" # Sort the timestamps\n",
" random_timestamps.sort()\n",
" \n",
" # Generate random response times\n",
" random_response_times = [\n",
" random.randint(response_time_range[0], response_time_range[1]) for _ in range(count)\n",
" ]\n",
" \n",
" # Introduce null values in response times\n",
" null_count = int(null_percentage * count)\n",
" null_indices = random.sample(range(count), null_count)\n",
" for idx in null_indices:\n",
" random_response_times[idx] = None\n",
" \n",
" # Create a pandas DataFrame\n",
" data = {\n",
" 'Timestamp': random_timestamps,\n",
" 'ResponseTime(ms)': random_response_times\n",
" }\n",
" df = pd.DataFrame(data)\n",
" return df\n",
"\n",
"# Parameters\n",
"date = datetime.strptime('2024-07-26', '%Y-%m-%d').date()\n",
"start_time = datetime.strptime('12:00:00', '%H:%M:%S').time()\n",
"end_time = datetime.strptime('12:30:00', '%H:%M:%S').time()\n",
"count = 60\n",
"response_time_range = (100, 250)\n",
"null_percentage = 0.50\n",
"\n",
"# Generate random data and get the DataFrame\n",
"df = generate_random_data(date, start_time, end_time, count, response_time_range, null_percentage)\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Timestamp | \n",
" p95_ResponseTime(ms) | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2024-07-26 12:02:00 | \n",
" None | \n",
"
\n",
" \n",
" 1 | \n",
" 2024-07-26 12:03:00 | \n",
" None | \n",
"
\n",
" \n",
" 2 | \n",
" 2024-07-26 12:04:00 | \n",
" 184.8 | \n",
"
\n",
" \n",
" 3 | \n",
" 2024-07-26 12:05:00 | \n",
" None | \n",
"
\n",
" \n",
" 4 | \n",
" 2024-07-26 12:06:00 | \n",
" 181.3 | \n",
"
\n",
" \n",
" 5 | \n",
" 2024-07-26 12:07:00 | \n",
" 223.0 | \n",
"
\n",
" \n",
" 6 | \n",
" 2024-07-26 12:08:00 | \n",
" 196.2 | \n",
"
\n",
" \n",
" 7 | \n",
" 2024-07-26 12:09:00 | \n",
" 151.0 | \n",
"
\n",
" \n",
" 8 | \n",
" 2024-07-26 12:10:00 | \n",
" None | \n",
"
\n",
" \n",
" 9 | \n",
" 2024-07-26 12:11:00 | \n",
" 227.45 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Timestamp p95_ResponseTime(ms)\n",
"0 2024-07-26 12:02:00 None\n",
"1 2024-07-26 12:03:00 None\n",
"2 2024-07-26 12:04:00 184.8\n",
"3 2024-07-26 12:05:00 None\n",
"4 2024-07-26 12:06:00 181.3\n",
"5 2024-07-26 12:07:00 223.0\n",
"6 2024-07-26 12:08:00 196.2\n",
"7 2024-07-26 12:09:00 151.0\n",
"8 2024-07-26 12:10:00 None\n",
"9 2024-07-26 12:11:00 227.45"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Function to calculate the specified percentile of response times over specified frequency\n",
"def calculate_percentile(df, freq, percentile):\n",
" \"\"\"\n",
" freq: Frequency for grouping the data (e.g., '1Min', '5Min', '1H')\n",
" percentile: Percentile value (e.g., 0.95, 0.99)\n",
" \"\"\"\n",
" percentile_df = df.groupby(pd.Grouper(key='Timestamp', freq=freq))[\"ResponseTime(ms)\"].quantile(percentile).reset_index(name=f\"p{int(percentile*100)}_ResponseTime(ms)\")\n",
" percentile_df.replace(to_replace=np.nan, value=None, inplace=True)\n",
" return percentile_df\n",
"\n",
"\n",
"\n",
"# df.groupby(pd.Grouper(key='Timestamp', freq='1Min'))[\"ResponseTime(ms)\"]\\\n",
"# .quantile(0.95).reset_index(name=\"p95_ResponseTime(ms)\")\n",
"\n",
"percentile_df = calculate_percentile(df, '1min', 0.95)\n",
"percentile_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([229.8 , nan, 224. , nan, 234. , nan, 162.5 , nan,\n",
" 136. , nan, 205.35, nan, nan, 183. , 241. , 221.8 ,\n",
" nan, 116.4 , 174.65, 133.35, 176. , 127. , 209.85, 207. ,\n",
" 200. , 241.25, 217. , nan, 188.7 , 188. ])"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"percentile_df[\"p95_ResponseTime(ms)\"].values"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"def chunk_list(input_list, size=3):\n",
" while input_list:\n",
" chunk = input_list[:size]\n",
" yield chunk\n",
" input_list = input_list[size:]\n",
"\n",
"\n",
"# for chunk in chunk_list(list(percentile_df[\"p95_ResponseTime(ms)\"].values)):\n",
"# print(chunk)\n"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s_ = \"0-X-X\"\n",
"# len(s_) - s_.count(\"-\")"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DataPoints | \n",
" # of data points that must be filled | \n",
" MISSING | \n",
" IGNORE | \n",
" BREACHING | \n",
" NOT BREACHING | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" --X-X | \n",
" 1 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 1 | \n",
" XXX-X | \n",
" 0 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 2 | \n",
" XX0-- | \n",
" 0 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 3 | \n",
" XXXXX | \n",
" 0 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 4 | \n",
" X00-X | \n",
" 0 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" 5 | \n",
" --X-- | \n",
" 2 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DataPoints # of data points that must be filled MISSING IGNORE BREACHING \\\n",
"0 --X-X 1 \n",
"1 XXX-X 0 \n",
"2 XX0-- 0 \n",
"3 XXXXX 0 \n",
"4 X00-X 0 \n",
"5 --X-- 2 \n",
"\n",
" NOT BREACHING \n",
"0 \n",
"1 \n",
"2 \n",
"3 \n",
"4 \n",
"5 "
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def evaluate_alarm_state(percentile_df, percentile_value, threshold, datapoints_to_alarm, evaluation_range=5):\n",
" data_points = list(percentile_df[f\"p{int(percentile_value*100)}_ResponseTime(ms)\"].values)\n",
" \n",
" data_table_dict = {\n",
" \"DataPoints\": [],\n",
" \"# of data points that must be filled\": [],\n",
" \"MISSING\": [],\n",
" \"IGNORE\": [],\n",
" \"BREACHING\": [],\n",
" \"NOT BREACHING\": []\n",
" }\n",
" \n",
" for chunk in chunk_list(data_points, size=evaluation_range):\n",
" data_point_repr = ''\n",
" num_dp_that_must_be_filled = 0\n",
" # missing_state, ignore_state, breaching_state, not_breaching_state = None, None, None, None\n",
" for dp in chunk:\n",
" if dp is None:\n",
" data_point_repr += '-'\n",
" elif dp < threshold:\n",
" data_point_repr += '0'\n",
" else:\n",
" data_point_repr += 'X'\n",
" \n",
" # Fill the remaining data points with '-' if the chunk is less than evaluation_range\n",
" if len(chunk) < evaluation_range:\n",
" data_point_repr += '-'*(evaluation_range - len(chunk))\n",
" \n",
" if data_point_repr.count('-') > (evaluation_range - datapoints_to_alarm):\n",
" num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('0'), data_point_repr.count('X')])\n",
" \n",
" \n",
" data_table_dict[\"DataPoints\"].append(data_point_repr)\n",
" data_table_dict[\"# of data points that must be filled\"].append(num_dp_that_must_be_filled)\n",
" \n",
" data_table_dict[\"MISSING\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n",
" data_table_dict[\"IGNORE\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n",
" data_table_dict[\"BREACHING\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n",
" data_table_dict[\"NOT BREACHING\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n",
" \n",
" return pd.DataFrame(data_table_dict)\n",
"\n",
"\n",
"evaluate_alarm_state(\n",
" percentile_df=percentile_df,\n",
" threshold=150,\n",
" percentile_value=0.95,\n",
" datapoints_to_alarm=3,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}