cv_job / app.py
saifeddinemk's picture
Fixed app v2
d088330
raw
history blame
1.3 kB
from fastapi import FastAPI, Form
from llama_cpp import Llama
from typing import List
import json
# Initialize FastAPI app
app = FastAPI()
# Load the Llama model
llm = Llama.from_pretrained(
repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
filename="smollm2-360m-instruct-q8_0.gguf", # Replace with the actual path to your GGUF file
)
# Endpoint to generate response from model based on user input
@app.post("/ask/")
async def ask_question(prompt: str = Form(...)):
# Format the prompt as a chat message
messages = [
{"role": "user", "content": prompt}
]
# Generate a response using Llama
response = llm.create_chat_completion(messages=messages)
response_content = response["choices"][0]["message"]["content"]
return {"response": response_content}
# Endpoint to test a simple query (optional)
@app.get("/test/")
async def test():
# Test the model with a simple question
messages = [{"role": "user", "content": "What is the capital of France?"}]
response = llm.create_chat_completion(messages=messages)
response_content = response["choices"][0]["message"]["content"]
return {"test_response": response_content}
import uvicorn
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=8000)