Spaces:

damienbenveniste
/

deploy_vLLM

Sleeping

Damien Benveniste commited on Aug 12, 2024

Commit

1d83e4f

1 Parent(s): e30f1f6

modified

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from vllm import AsyncLLMEngine, SamplingParams
 import asyncio
 import json
@@ -9,7 +10,11 @@ app = FastAPI()
 # Initialize the AsyncLLMEngine
 # Replace 'your-model-path' with the actual path or name of your model
-engine = AsyncLLMEngine.from_pretrained('microsoft/Phi-3-mini-4k-instruct')
 class GenerationRequest(BaseModel):
     prompt: str

 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from vllm import AsyncLLMEngine, SamplingParams
+from vllm.engine.arg_utils import AsyncEngineArgs
 import asyncio
 import json
 # Initialize the AsyncLLMEngine
 # Replace 'your-model-path' with the actual path or name of your model
+engine = AsyncLLMEngine.from_engine_args(
+    AsyncEngineArgs(
+        model='microsoft/Phi-3-mini-4k-instruct',
+    )
+)
 class GenerationRequest(BaseModel):
     prompt: str