Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
1d83e4f
1
Parent(s):
e30f1f6
modified
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI, Request
|
|
2 |
from fastapi.responses import StreamingResponse
|
3 |
from pydantic import BaseModel
|
4 |
from vllm import AsyncLLMEngine, SamplingParams
|
|
|
5 |
import asyncio
|
6 |
import json
|
7 |
|
@@ -9,7 +10,11 @@ app = FastAPI()
|
|
9 |
|
10 |
# Initialize the AsyncLLMEngine
|
11 |
# Replace 'your-model-path' with the actual path or name of your model
|
12 |
-
engine = AsyncLLMEngine.
|
|
|
|
|
|
|
|
|
13 |
|
14 |
class GenerationRequest(BaseModel):
|
15 |
prompt: str
|
|
|
2 |
from fastapi.responses import StreamingResponse
|
3 |
from pydantic import BaseModel
|
4 |
from vllm import AsyncLLMEngine, SamplingParams
|
5 |
+
from vllm.engine.arg_utils import AsyncEngineArgs
|
6 |
import asyncio
|
7 |
import json
|
8 |
|
|
|
10 |
|
11 |
# Initialize the AsyncLLMEngine
|
12 |
# Replace 'your-model-path' with the actual path or name of your model
|
13 |
+
engine = AsyncLLMEngine.from_engine_args(
|
14 |
+
AsyncEngineArgs(
|
15 |
+
model='microsoft/Phi-3-mini-4k-instruct',
|
16 |
+
)
|
17 |
+
)
|
18 |
|
19 |
class GenerationRequest(BaseModel):
|
20 |
prompt: str
|