Spaces:
Running
Running
from typing import Optional | |
import requests | |
from langchain.llms.base import LLM | |
from util.token_access import load_token | |
token = load_token() | |
model = "meta-llama/Llama-3.2-3B-Instruct" | |
class GuiChat(LLM): | |
"""GUI LLM wrapper usando login via token.""" | |
chatbot: Optional[object] = None | |
auth_token: Optional[str] = None | |
conversation: Optional[str] = None | |
model: Optional[str] = model | |
temperature: Optional[float] = 0.9 | |
top_p: Optional[float] = 0.5 | |
repetition_penalty: Optional[float] = 1.2 | |
top_k: Optional[int] = 20 | |
truncate: Optional[int] = 512 | |
max_new_tokens: Optional[int] = 512 | |
stream_resp: Optional[bool] = True | |
log: Optional[bool] = True | |
avg_response_time: float = 0.0 | |
def _llm_type(self): | |
"""Define o tipo de LLM para HuggingChat.""" | |
return "huggingface" | |
def _call(self, prompt: str) -> str: | |
"""Chama o modelo Hugging Face e retorna a resposta.""" | |
headers = { | |
"Authorization": f"Bearer {self.auth_token}", | |
"Content-Type": "application/json", | |
} | |
endpoint = f"https://api-inference.huggingface.co/models/{self.model}" | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"temperature": self.temperature, | |
"max_new_tokens": self.max_new_tokens, | |
"top_p": self.top_p, | |
"top_k": self.top_k, | |
"repetition_penalty": self.repetition_penalty, | |
"truncate": self.truncate, | |
}, | |
} | |
response = requests.post(endpoint, headers=headers, json=payload) | |
if response.status_code == 200: | |
return response.json()[0]["generated_text"] | |
else: | |
return f"Erro: {response.status_code}, {response.text}" | |
def get_avg_response_time(self): | |
"""Retorna o tempo médio de resposta.""" | |
return self.avg_response_time | |
chatbot = GuiChat(auth_token=token) | |
#TEST-BOT | |
""" | |
while True: | |
ask = input("Digite aqui: ") | |
resposta = chatbot._call(ask) | |
print(f">>> {resposta}") | |
""" | |