File size: 2,851 Bytes
c551206 020a962 1df13e1 c551206 8cd9af7 c551206 1df13e1 c551206 9faed3d 14126e6 8cd9af7 c551206 a9af4d7 c551206 85dbf4a c551206 0e16686 c551206 0e16686 cb4c132 0e16686 8c77830 cb4c132 9faed3d a414401 9faed3d 8c77830 14126e6 9faed3d 9809955 85dbf4a 9faed3d c551206 9faed3d 32fc9d9 9faed3d cb4c132 9faed3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import requests
import os
import json
API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"
headers = {
"Authorization": f"Bearer {API_KEY}",
"Accept": "application/json",
"Content-Type": "application/json",
}
BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
def call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p):
messages = []
if system_message: # Adiciona a mensagem do sistema, se houver
messages.append({"role": "system", "content": system_message})
# Adiciona as mensagens do histórico
for msg in history_api:
messages.append({"role": "user", "content": msg[0]})
if msg[1]: # Garante que não adicionamos respostas vazias
messages.append({"role": "assistant", "content": msg[1]})
# Adiciona a mensagem atual do usuário
messages.append({"role": "user", "content": message})
payload = {
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": False
}
session = requests.Session()
response = session.post(INVOKE_URL, headers=headers, json=payload)
while response.status_code == 202:
request_id = response.headers.get("NVCF-REQID")
fetch_url = FETCH_URL_FORMAT + request_id
response = session.get(fetch_url, headers=headers)
response.raise_for_status()
response_body = response.json()
if response_body.get("choices"):
assistant_message = response_body["choices"][0]["message"]["content"]
return assistant_message
else:
return "Desculpe, ocorreu um erro ao gerar a resposta."
def chatbot_function(message, history_api, system_message, max_tokens, temperature, top_p):
assistant_message = call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p)
history_api.append([message, assistant_message])
return assistant_message, history_api
system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
with gr.Blocks() as demo:
chat_history_state = gr.State([])
chat_interface = gr.ChatInterface(
fn=chatbot_function,
chatbot=gr.Chatbot(value=chat_history_state),
additional_inputs=[system_msg, max_tokens, temperature, top_p],
title="LLAMA 70B Free Demo",
)
demo.launch()
|