File size: 1,296 Bytes
5716ab8
 
9f26a6c
a15fb10
62d4e66
 
91207a8
 
5716ab8
 
 
 
 
9f26a6c
5716ab8
 
 
 
 
 
 
 
 
 
 
9f26a6c
5716ab8
 
 
 
 
 
 
 
 
9f26a6c
5716ab8
91207a8
d088330
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from fastapi import FastAPI, Form
from llama_cpp import Llama
from typing import List
import json

# Initialize FastAPI app
app = FastAPI()

# Load the Llama model
llm = Llama.from_pretrained(
    repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
    filename="smollm2-360m-instruct-q8_0.gguf",  # Replace with the actual path to your GGUF file
)

# Endpoint to generate response from model based on user input
@app.post("/ask/")
async def ask_question(prompt: str = Form(...)):
    # Format the prompt as a chat message
    messages = [
        {"role": "user", "content": prompt}
    ]
    
    # Generate a response using Llama
    response = llm.create_chat_completion(messages=messages)
    response_content = response["choices"][0]["message"]["content"]
    
    return {"response": response_content}

# Endpoint to test a simple query (optional)
@app.get("/test/")
async def test():
    # Test the model with a simple question
    messages = [{"role": "user", "content": "What is the capital of France?"}]
    response = llm.create_chat_completion(messages=messages)
    response_content = response["choices"][0]["message"]["content"]
    
    return {"test_response": response_content}

 
import uvicorn

if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8000)