Damien Benveniste commited on
Commit
1d83e4f
·
1 Parent(s): e30f1f6
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI, Request
2
  from fastapi.responses import StreamingResponse
3
  from pydantic import BaseModel
4
  from vllm import AsyncLLMEngine, SamplingParams
 
5
  import asyncio
6
  import json
7
 
@@ -9,7 +10,11 @@ app = FastAPI()
9
 
10
  # Initialize the AsyncLLMEngine
11
  # Replace 'your-model-path' with the actual path or name of your model
12
- engine = AsyncLLMEngine.from_pretrained('microsoft/Phi-3-mini-4k-instruct')
 
 
 
 
13
 
14
  class GenerationRequest(BaseModel):
15
  prompt: str
 
2
  from fastapi.responses import StreamingResponse
3
  from pydantic import BaseModel
4
  from vllm import AsyncLLMEngine, SamplingParams
5
+ from vllm.engine.arg_utils import AsyncEngineArgs
6
  import asyncio
7
  import json
8
 
 
10
 
11
  # Initialize the AsyncLLMEngine
12
  # Replace 'your-model-path' with the actual path or name of your model
13
+ engine = AsyncLLMEngine.from_engine_args(
14
+ AsyncEngineArgs(
15
+ model='microsoft/Phi-3-mini-4k-instruct',
16
+ )
17
+ )
18
 
19
  class GenerationRequest(BaseModel):
20
  prompt: str