|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
from huggingface_hub import hf_hub_download |
|
from llama_cpp import Llama |
|
|
|
|
|
class Question(BaseModel): |
|
text: str |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
model_name_or_path = "souzat19/Llama3.1_fn14133.29122024" |
|
model_basename = "unsloth.Q4_K_M.gguf" |
|
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename) |
|
print(f"Model path: {model_path}") |
|
|
|
|
|
lcpp_llm = Llama( |
|
model_path=model_path, |
|
n_threads=2, |
|
n_batch=512, |
|
n_gpu_layers=-1, |
|
n_ctx=4096, |
|
) |
|
|
|
|
|
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. |
|
|
|
### Instruction: |
|
{} |
|
|
|
### Input: |
|
{} |
|
|
|
### Response: |
|
{}""" |
|
|
|
def get_response(text: str) -> str: |
|
|
|
formatted_prompt = alpaca_prompt.format( |
|
"Você é um assistente especializado em planejamento de compras públicas de acordo com a Lei 14.133/2021 e regulamentos infralegais. Responda de forma clara, detalhada e didática e utilize exemplos práticos para explicar os conceitos.", |
|
text, |
|
"" |
|
) |
|
response = lcpp_llm( |
|
prompt=formatted_prompt, |
|
max_tokens=2096, |
|
temperature=0.5, |
|
top_p=0.95, |
|
top_k=50, |
|
stop=['### Response:'], |
|
echo=True |
|
) |
|
response_text = response['choices'][0]['text'] |
|
|
|
|
|
if "### Response:" in response_text: |
|
answer = response_text.split("### Response:")[1].strip() |
|
else: |
|
answer = response_text.strip() |
|
|
|
print(f"Final Answer: {answer}") |
|
return answer |
|
|
|
|
|
|
|
@app.post("/ask") |
|
def ask_question(question: Question): |
|
response = get_response(question.text) |
|
return {"response": response} |
|
|
|
@app.get("/") |
|
async def root(): |
|
return {"status": "online", "message": "API is running. Use POST /ask to make queries."} |
|
|
|
|
|
|