Imran1
/

Qwen2.5-72B-Instruct-FP8

Model card Files Files and versions Community

Update inference.py

#8

by FINGU-AI - opened Oct 9

base: refs/heads/main

←

from: refs/pr/8

Discussion Files changed

Files changed (1) hide show

inference.py +9 -9

inference.py CHANGED Viewed

@@ -9,15 +9,15 @@ from typing import List, Dict
 try:
     import vllm
 except ImportError:
-    # Check CUDA version and install the correct vllm version
-    cuda_version = torch.version.cuda
-    if cuda_version == "11.8":
-        vllm_version = "v0.6.1.post1"
-        pip_cmd = f"pip install https://github.com/vllm-project/vllm/releases/download/{vllm_version}/vllm-{vllm_version}+cu118-cp310-cp310-manylinux1_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118"
-    else:
-        raise RuntimeError(f"Unsupported CUDA version: {cuda_version}")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", pip_cmd])
 # Import the necessary modules after installation
 from vllm import LLM, SamplingParams
 from vllm.utils import random_uuid

 try:
     import vllm
 except ImportError:
+    # Install vllm with CUDA 11.8 support
+    vllm_version = "v0.6.1.post1"
+    pip_cmd = [
+        sys.executable,
+        "-m", "pip", "install",
+        f"https://github.com/vllm-project/vllm/releases/download/{vllm_version}/vllm-{vllm_version}+cu118-cp310-cp310-manylinux1_x86_64.whl",
+        "--extra-index-url", "https://download.pytorch.org/whl/cu118"
+    ]
+    subprocess.check_call(pip_cmd)
 # Import the necessary modules after installation
 from vllm import LLM, SamplingParams
 from vllm.utils import random_uuid