Imran1 commited on
Commit
272e727
·
verified ·
1 Parent(s): 4359934

Delete code/inference.py

Browse files
Files changed (1) hide show
  1. code/inference.py +0 -22
code/inference.py DELETED
@@ -1,22 +0,0 @@
1
- import os
2
- import subprocess
3
-
4
- def run_vllm_inference():
5
- # Set the necessary environment variables
6
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
7
-
8
- # vLLM serve command
9
- command = [
10
- "vllm", "serve", "Imran1/Qwen2.5-72B-Instruct-FP8",
11
- "--tensor-parallel-size", "4",
12
- "--dtype", "auto",
13
- "--api-key", "token-abc123",
14
- "--max-model-len", "2000",
15
- "--kv-cache-dtype", "auto"
16
- ]
17
-
18
- # Run the command as a subprocess
19
- subprocess.run(command)
20
-
21
- if __name__ == "__main__":
22
- run_vllm_inference()