p4-q4 / app.py
tejastake's picture
Upload 3 files
d0b184f verified
raw
history blame contribute delete
636 Bytes
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
app = FastAPI()
# Load the model
llm = Llama.from_pretrained(
repo_id="unsloth/phi-4-GGUF",
filename="phi-4-Q4_K_M.gguf",
)
# Define request model
class ChatRequest(BaseModel):
system_prompt: str
query: str
@app.post("/chat-p4q4")
async def chat(request: ChatRequest):
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": request.system_prompt},
{"role": "user", "content": request.query},
]
)
return {"response": response}