{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimestampResponseTime(ms)
02024-07-26 12:00:12169.0
12024-07-26 12:00:27NaN
22024-07-26 12:00:42NaN
32024-07-26 12:00:57146.0
42024-07-26 12:01:30202.0
\n", "
" ], "text/plain": [ " Timestamp ResponseTime(ms)\n", "0 2024-07-26 12:00:12 169.0\n", "1 2024-07-26 12:00:27 NaN\n", "2 2024-07-26 12:00:42 NaN\n", "3 2024-07-26 12:00:57 146.0\n", "4 2024-07-26 12:01:30 202.0" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import random\n", "from datetime import datetime, timedelta\n", "import pandas as pd\n", "import numpy as np\n", "\n", "\n", "# Function to generate random timestamps and response times\n", "def generate_random_data(date, start_time, end_time, count, response_time_range, null_percentage):\n", " # Combine date with start and end times\n", " start_datetime = datetime.combine(date, start_time)\n", " end_datetime = datetime.combine(date, end_time)\n", " \n", " # Generate random timestamps\n", " random_timestamps = [\n", " start_datetime + timedelta(seconds=random.randint(0, int((end_datetime - start_datetime).total_seconds())))\n", " for _ in range(count)\n", " ]\n", " \n", " # Sort the timestamps\n", " random_timestamps.sort()\n", " \n", " # Generate random response times\n", " random_response_times = [\n", " random.randint(response_time_range[0], response_time_range[1]) for _ in range(count)\n", " ]\n", " \n", " # Introduce null values in response times\n", " null_count = int(null_percentage * count)\n", " null_indices = random.sample(range(count), null_count)\n", " for idx in null_indices:\n", " random_response_times[idx] = None\n", " \n", " # Create a pandas DataFrame\n", " data = {\n", " 'Timestamp': random_timestamps,\n", " 'ResponseTime(ms)': random_response_times\n", " }\n", " df = pd.DataFrame(data)\n", " return df\n", "\n", "# Parameters\n", "date = datetime.strptime('2024-07-26', '%Y-%m-%d').date()\n", "start_time = datetime.strptime('12:00:00', '%H:%M:%S').time()\n", "end_time = datetime.strptime('12:30:00', '%H:%M:%S').time()\n", "count = 60\n", "response_time_range = (100, 250)\n", "null_percentage = 0.50\n", "\n", "# Generate random data and get the DataFrame\n", "df = generate_random_data(date, start_time, end_time, count, response_time_range, null_percentage)\n", "df.head()\n" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Timestampp95_ResponseTime(ms)
02024-07-26 12:02:00None
12024-07-26 12:03:00None
22024-07-26 12:04:00184.8
32024-07-26 12:05:00None
42024-07-26 12:06:00181.3
52024-07-26 12:07:00223.0
62024-07-26 12:08:00196.2
72024-07-26 12:09:00151.0
82024-07-26 12:10:00None
92024-07-26 12:11:00227.45
\n", "
" ], "text/plain": [ " Timestamp p95_ResponseTime(ms)\n", "0 2024-07-26 12:02:00 None\n", "1 2024-07-26 12:03:00 None\n", "2 2024-07-26 12:04:00 184.8\n", "3 2024-07-26 12:05:00 None\n", "4 2024-07-26 12:06:00 181.3\n", "5 2024-07-26 12:07:00 223.0\n", "6 2024-07-26 12:08:00 196.2\n", "7 2024-07-26 12:09:00 151.0\n", "8 2024-07-26 12:10:00 None\n", "9 2024-07-26 12:11:00 227.45" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Function to calculate the specified percentile of response times over specified frequency\n", "def calculate_percentile(df, freq, percentile):\n", " \"\"\"\n", " freq: Frequency for grouping the data (e.g., '1Min', '5Min', '1H')\n", " percentile: Percentile value (e.g., 0.95, 0.99)\n", " \"\"\"\n", " percentile_df = df.groupby(pd.Grouper(key='Timestamp', freq=freq))[\"ResponseTime(ms)\"].quantile(percentile).reset_index(name=f\"p{int(percentile*100)}_ResponseTime(ms)\")\n", " percentile_df.replace(to_replace=np.nan, value=None, inplace=True)\n", " return percentile_df\n", "\n", "\n", "\n", "# df.groupby(pd.Grouper(key='Timestamp', freq='1Min'))[\"ResponseTime(ms)\"]\\\n", "# .quantile(0.95).reset_index(name=\"p95_ResponseTime(ms)\")\n", "\n", "percentile_df = calculate_percentile(df, '1min', 0.95)\n", "percentile_df.head(10)" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([229.8 , nan, 224. , nan, 234. , nan, 162.5 , nan,\n", " 136. , nan, 205.35, nan, nan, 183. , 241. , 221.8 ,\n", " nan, 116.4 , 174.65, 133.35, 176. , 127. , 209.85, 207. ,\n", " 200. , 241.25, 217. , nan, 188.7 , 188. ])" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "percentile_df[\"p95_ResponseTime(ms)\"].values" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "def chunk_list(input_list, size=3):\n", " while input_list:\n", " chunk = input_list[:size]\n", " yield chunk\n", " input_list = input_list[size:]\n", "\n", "\n", "# for chunk in chunk_list(list(percentile_df[\"p95_ResponseTime(ms)\"].values)):\n", "# print(chunk)\n" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s_ = \"0-X-X\"\n", "# len(s_) - s_.count(\"-\")" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DataPoints# of data points that must be filledMISSINGIGNOREBREACHINGNOT BREACHING
0--X-X1
1XXX-X0
2XX0--0
3XXXXX0
4X00-X0
5--X--2
\n", "
" ], "text/plain": [ " DataPoints # of data points that must be filled MISSING IGNORE BREACHING \\\n", "0 --X-X 1 \n", "1 XXX-X 0 \n", "2 XX0-- 0 \n", "3 XXXXX 0 \n", "4 X00-X 0 \n", "5 --X-- 2 \n", "\n", " NOT BREACHING \n", "0 \n", "1 \n", "2 \n", "3 \n", "4 \n", "5 " ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def evaluate_alarm_state(percentile_df, percentile_value, threshold, datapoints_to_alarm, evaluation_range=5):\n", " data_points = list(percentile_df[f\"p{int(percentile_value*100)}_ResponseTime(ms)\"].values)\n", " \n", " data_table_dict = {\n", " \"DataPoints\": [],\n", " \"# of data points that must be filled\": [],\n", " \"MISSING\": [],\n", " \"IGNORE\": [],\n", " \"BREACHING\": [],\n", " \"NOT BREACHING\": []\n", " }\n", " \n", " for chunk in chunk_list(data_points, size=evaluation_range):\n", " data_point_repr = ''\n", " num_dp_that_must_be_filled = 0\n", " # missing_state, ignore_state, breaching_state, not_breaching_state = None, None, None, None\n", " for dp in chunk:\n", " if dp is None:\n", " data_point_repr += '-'\n", " elif dp < threshold:\n", " data_point_repr += '0'\n", " else:\n", " data_point_repr += 'X'\n", " \n", " # Fill the remaining data points with '-' if the chunk is less than evaluation_range\n", " if len(chunk) < evaluation_range:\n", " data_point_repr += '-'*(evaluation_range - len(chunk))\n", " \n", " if data_point_repr.count('-') > (evaluation_range - datapoints_to_alarm):\n", " num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('0'), data_point_repr.count('X')])\n", " \n", " \n", " data_table_dict[\"DataPoints\"].append(data_point_repr)\n", " data_table_dict[\"# of data points that must be filled\"].append(num_dp_that_must_be_filled)\n", " \n", " data_table_dict[\"MISSING\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n", " data_table_dict[\"IGNORE\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n", " data_table_dict[\"BREACHING\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n", " data_table_dict[\"NOT BREACHING\"] = [\"\"]*len(data_table_dict[\"DataPoints\"])\n", " \n", " return pd.DataFrame(data_table_dict)\n", "\n", "\n", "evaluate_alarm_state(\n", " percentile_df=percentile_df,\n", " threshold=150,\n", " percentile_value=0.95,\n", " datapoints_to_alarm=3,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 2 }