Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Form | |
from llama_cpp import Llama | |
from typing import List | |
import json | |
# Initialize FastAPI app | |
app = FastAPI() | |
# Load the Llama model | |
llm = Llama.from_pretrained( | |
repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF", | |
filename="smollm2-360m-instruct-q8_0.gguf", # Replace with the actual path to your GGUF file | |
) | |
# Endpoint to generate response from model based on user input | |
async def ask_question(prompt: str = Form(...)): | |
# Format the prompt as a chat message | |
messages = [ | |
{"role": "user", "content": prompt} | |
] | |
# Generate a response using Llama | |
response = llm.create_chat_completion(messages=messages) | |
response_content = response["choices"][0]["message"]["content"] | |
return {"response": response_content} | |
# Endpoint to test a simple query (optional) | |
async def test(): | |
# Test the model with a simple question | |
messages = [{"role": "user", "content": "What is the capital of France?"}] | |
response = llm.create_chat_completion(messages=messages) | |
response_content = response["choices"][0]["message"]["content"] | |
return {"test_response": response_content} | |
import uvicorn | |
if __name__ == "__main__": | |
uvicorn.run("main:app", host="0.0.0.0", port=8000) |