|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import math |
|
import os |
|
|
|
import pynvml |
|
|
|
|
|
class Device: |
|
"""A class to handle NVIDIA GPU device operations using NVML. |
|
|
|
This class provides an interface to access and manage NVIDIA GPU devices, |
|
including retrieving device information and CPU affinity settings. |
|
|
|
Attributes: |
|
_nvml_affinity_elements (int): Number of 64-bit elements needed to represent CPU affinity |
|
""" |
|
|
|
_nvml_affinity_elements = math.ceil(os.cpu_count() / 64) |
|
|
|
def __init__(self, device_idx: int): |
|
"""Initialize a Device instance for a specific GPU. |
|
|
|
Args: |
|
device_idx (int): Index of the GPU device to manage |
|
|
|
Raises: |
|
NVMLError: If the device cannot be found or initialized |
|
""" |
|
super().__init__() |
|
self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx) |
|
|
|
def get_cpu_affinity(self) -> list[int]: |
|
"""Get the CPU affinity mask for this GPU device. |
|
|
|
Retrieves the CPU affinity mask indicating which CPU cores are assigned |
|
to this GPU device. The affinity is returned as a list of CPU core indices. |
|
|
|
Returns: |
|
list[int]: List of CPU core indices that have affinity with this GPU |
|
|
|
Raises: |
|
NVMLError: If the CPU affinity information cannot be retrieved |
|
|
|
Example: |
|
>>> device = Device(0) |
|
>>> device.get_cpu_affinity() |
|
[0, 1, 2, 3] # Shows this GPU has affinity with CPU cores 0-3 |
|
""" |
|
affinity_string = "" |
|
for j in pynvml.nvmlDeviceGetCpuAffinity(self.handle, Device._nvml_affinity_elements): |
|
|
|
affinity_string = "{:064b}".format(j) + affinity_string |
|
affinity_list = [int(x) for x in affinity_string] |
|
affinity_list.reverse() |
|
return [i for i, e in enumerate(affinity_list) if e != 0] |
|
|