Fucius's picture
Upload 422 files
df6c67d verified
raw
history blame
3.7 kB
import platform
import re
import socket
import time
import uuid
from inference.core.cache import cache
from inference.core.logger import logger
from inference.core.version import __version__
def get_model_metrics(
inference_server_id: str, model_id: str, min: float = -1, max: float = float("inf")
) -> dict:
"""
Gets the metrics for a given model between a specified time range.
Args:
device_id (str): The identifier of the device.
model_id (str): The identifier of the model.
start (float, optional): The starting timestamp of the time range. Defaults to -1.
stop (float, optional): The ending timestamp of the time range. Defaults to float("inf").
Returns:
dict: A dictionary containing the metrics of the model:
- num_inferences (int): The number of inferences made.
- avg_inference_time (float): The average inference time.
- num_errors (int): The number of errors occurred.
"""
now = time.time()
inferences_with_times = cache.zrangebyscore(
f"inference:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
)
num_inferences = len(inferences_with_times)
inference_times = []
for inference, t in inferences_with_times:
response = inference["response"]
if isinstance(response, list):
times = [r["time"] for r in response if "time" in r]
inference_times.extend(times)
else:
if "time" in response:
inference_times.append(response["time"])
avg_inference_time = (
sum(inference_times) / len(inference_times) if len(inference_times) > 0 else 0
)
errors_with_times = cache.zrangebyscore(
f"error:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
)
num_errors = len(errors_with_times)
return {
"num_inferences": num_inferences,
"avg_inference_time": avg_inference_time,
"num_errors": num_errors,
}
def get_system_info() -> dict:
"""Collects system information such as platform, architecture, hostname, IP address, MAC address, and processor details.
Returns:
dict: A dictionary containing detailed system information.
"""
info = {}
try:
info["platform"] = platform.system()
info["platform_release"] = platform.release()
info["platform_version"] = platform.version()
info["architecture"] = platform.machine()
info["hostname"] = socket.gethostname()
info["ip_address"] = socket.gethostbyname(socket.gethostname())
info["mac_address"] = ":".join(re.findall("..", "%012x" % uuid.getnode()))
info["processor"] = platform.processor()
return info
except Exception as e:
logger.exception(e)
finally:
return info
def get_inference_results_for_model(
inference_server_id: str, model_id: str, min: float = -1, max: float = float("inf")
):
inferences_with_times = cache.zrangebyscore(
f"inference:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
)
inference_results = []
for result, score in inferences_with_times:
# Don't send large image files
if result.get("request", {}).get("image"):
del result["request"]["image"]
responses = result.get("response")
if responses:
if not isinstance(responses, list):
responses = [responses]
for resp in responses:
if resp.get("image"):
del resp["image"]
inference_results.append({"request_time": score, "inference": result})
return inference_results