Delete code/inference.py
Browse files- code/inference.py +0 -22
code/inference.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import subprocess
|
3 |
-
|
4 |
-
def run_vllm_inference():
|
5 |
-
# Set the necessary environment variables
|
6 |
-
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
7 |
-
|
8 |
-
# vLLM serve command
|
9 |
-
command = [
|
10 |
-
"vllm", "serve", "Imran1/Qwen2.5-72B-Instruct-FP8",
|
11 |
-
"--tensor-parallel-size", "4",
|
12 |
-
"--dtype", "auto",
|
13 |
-
"--api-key", "token-abc123",
|
14 |
-
"--max-model-len", "2000",
|
15 |
-
"--kv-cache-dtype", "auto"
|
16 |
-
]
|
17 |
-
|
18 |
-
# Run the command as a subprocess
|
19 |
-
subprocess.run(command)
|
20 |
-
|
21 |
-
if __name__ == "__main__":
|
22 |
-
run_vllm_inference()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|