Spaces:
Sleeping
Sleeping
| import subprocess | |
| from keras import Model | |
| # from keras.utils import multi_gpu_model | |
| # from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model | |
| def get_available_gpus(num_gpus: int = None): | |
| """Get gpu ids for gpus that are >95% free. | |
| Tensorflow does not support checking free memory on gpus. | |
| This is a crude method that relies on `nvidia-smi` to | |
| determine which gpus are occupied and which are free. | |
| Args: | |
| num_gpus: Number of requested gpus. If not specified, | |
| ids of all available gpu(s) are returned. | |
| Returns: | |
| List[int]: List of gpu ids that are free. Length | |
| will equal `num_gpus`, if specified. | |
| """ | |
| # Built-in tensorflow gpu id. | |
| assert isinstance(num_gpus, (type(None), int)) | |
| if num_gpus == 0: | |
| return [-1] | |
| num_requested_gpus = num_gpus | |
| try: | |
| num_gpus = ( | |
| len( | |
| subprocess.check_output("nvidia-smi --list-gpus", shell=True) | |
| .decode() | |
| .split("\n") | |
| ) | |
| - 1 | |
| ) | |
| out_str = subprocess.check_output("nvidia-smi | grep MiB", shell=True).decode() | |
| except subprocess.CalledProcessError: | |
| return None | |
| mem_str = [x for x in out_str.split() if "MiB" in x] | |
| # First 2 * num_gpu elements correspond to memory for gpus | |
| # Order: (occupied-0, total-0, occupied-1, total-1, ...) | |
| mems = [float(x[:-3]) for x in mem_str] | |
| gpu_percent_occupied_mem = [ | |
| mems[2 * gpu_id] / mems[2 * gpu_id + 1] for gpu_id in range(num_gpus) | |
| ] | |
| available_gpus = [ | |
| gpu_id for gpu_id, mem in enumerate(gpu_percent_occupied_mem) if mem < 0.05 | |
| ] | |
| if num_requested_gpus and num_requested_gpus > len(available_gpus): | |
| raise ValueError( | |
| "Requested {} gpus, only {} are free".format( | |
| num_requested_gpus, len(available_gpus) | |
| ) | |
| ) | |
| return available_gpus[:num_requested_gpus] if num_requested_gpus else available_gpus | |
| class ModelMGPU(Model): | |
| """Wrapper for distributing model across multiple gpus""" | |
| def __init__(self, ser_model, gpus): | |
| pmodel = multi_gpu_model(ser_model, gpus) # noqa: F821 | |
| self.__dict__.update(pmodel.__dict__) | |
| self._smodel = ser_model | |
| def __getattribute__(self, attrname): | |
| """Override load and save methods to be used from the serial-model. The | |
| serial-model holds references to the weights in the multi-gpu model. | |
| """ | |
| # return Model.__getattribute__(self, attrname) | |
| if "load" in attrname or "save" in attrname: | |
| return getattr(self._smodel, attrname) | |
| return super(ModelMGPU, self).__getattribute__(attrname) | |