GUI / config /model.py
wendellast's picture
upload new-model
3567e13
from typing import Optional
import requests
from langchain.llms.base import LLM
from util.token_access import load_token
token = load_token()
model = "meta-llama/Llama-3.2-3B-Instruct"
class GuiChat(LLM):
"""GUI LLM wrapper usando login via token."""
chatbot: Optional[object] = None
auth_token: Optional[str] = None
conversation: Optional[str] = None
model: Optional[str] = model
temperature: Optional[float] = 0.9
top_p: Optional[float] = 0.5
repetition_penalty: Optional[float] = 1.2
top_k: Optional[int] = 20
truncate: Optional[int] = 512
max_new_tokens: Optional[int] = 512
stream_resp: Optional[bool] = True
log: Optional[bool] = True
avg_response_time: float = 0.0
def _llm_type(self):
"""Define o tipo de LLM para HuggingChat."""
return "huggingface"
def _call(self, prompt: str) -> str:
"""Chama o modelo Hugging Face e retorna a resposta."""
headers = {
"Authorization": f"Bearer {self.auth_token}",
"Content-Type": "application/json",
}
endpoint = f"https://api-inference.huggingface.co/models/{self.model}"
payload = {
"inputs": prompt,
"parameters": {
"temperature": self.temperature,
"max_new_tokens": self.max_new_tokens,
"top_p": self.top_p,
"top_k": self.top_k,
"repetition_penalty": self.repetition_penalty,
"truncate": self.truncate,
},
}
response = requests.post(endpoint, headers=headers, json=payload)
if response.status_code == 200:
return response.json()[0]["generated_text"]
else:
return f"Erro: {response.status_code}, {response.text}"
def get_avg_response_time(self):
"""Retorna o tempo médio de resposta."""
return self.avg_response_time
chatbot = GuiChat(auth_token=token)
#TEST-BOT
"""
while True:
ask = input("Digite aqui: ")
resposta = chatbot._call(ask)
print(f">>> {resposta}")
"""