Update code/inference.py
Browse files- code/inference.py +1 -1
code/inference.py
CHANGED
|
@@ -72,7 +72,7 @@ def model_fn(model_dir, context=None):
|
|
| 72 |
model_dir,
|
| 73 |
device_map="auto", # Automatically map layers across GPUs
|
| 74 |
offload_folder=offload_dir, # Offload parts to disk if needed
|
| 75 |
-
max_memory = {i: "15GiB" for i in range(torch.cuda.device_count())} # Example for reducing usage per GPU
|
| 76 |
no_split_module_classes=["QwenForCausalLM"] # Ensure model is split across the GPUs
|
| 77 |
)
|
| 78 |
|
|
|
|
| 72 |
model_dir,
|
| 73 |
device_map="auto", # Automatically map layers across GPUs
|
| 74 |
offload_folder=offload_dir, # Offload parts to disk if needed
|
| 75 |
+
max_memory = {i: "15GiB" for i in range(torch.cuda.device_count())}, # Example for reducing usage per GPU
|
| 76 |
no_split_module_classes=["QwenForCausalLM"] # Ensure model is split across the GPUs
|
| 77 |
)
|
| 78 |
|