File size: 4,556 Bytes
c551206
 
020a962
1df13e1
c551206
1df13e1
 
8b4d47d
c551206
1df13e1
c551206
 
 
 
 
 
 
 
a9af4d7
0e16686
 
a9af4d7
c551206
a9af4d7
c551206
 
 
 
 
85dbf4a
c551206
 
0e16686
c551206
 
 
0e16686
cb4c132
0e16686
8c77830
cb4c132
0e16686
 
a414401
0e16686
8c77830
85dbf4a
0e16686
a9af4d7
2813167
a9af4d7
0e16686
 
 
 
020a962
0e16686
 
 
bf2801d
0e16686
9809955
85dbf4a
 
 
 
1df13e1
c551206
0e16686
 
 
cb4c132
cdf0c60
0e16686
 
 
1df13e1
 
 
 
 
 
 
 
 
 
 
 
 
cb4c132
 
 
 
1df13e1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
import requests
import os
import json

# Carrega a chave da API do ambiente ou define diretamente
API_KEY = os.getenv('API_KEY') 
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/df2bee43-fb69-42b9-9ee5-f4eabbeaf3a8"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def call_nvidia_api(history, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}] if system_message else []
    messages.extend([{"role": "user", "content": msg[0]}, {"role": "assistant", "content": msg[1]} for msg in history if msg[1]])

    payload = {
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }

    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)
    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
    response.raise_for_status()
    response_body = response.json()

    if response_body.get("choices"):
        assistant_message = response_body["choices"][0]["message"]["content"]
        # Retorna tanto a mensagem formatada para o usuário quanto a estrutura completa para o histórico da API
        return assistant_message, response_body["choices"][0]
    else:
        return "Desculpe, ocorreu um erro ao gerar a resposta.", None


def chatbot_submit(message, chat_history_ui, chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val):
    print("Updating chatbot...")

    # Chama a API da NVIDIA para gerar uma resposta
    assistant_message, api_response = call_nvidia_api(chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val)

    # Atualiza o histórico da interface do usuário
    chat_history_ui.append([message, assistant_message])

    # Atualiza o histórico da API se a resposta incluir o formato esperado
    if api_response:
        chat_history_api.append(api_response)

    return assistant_message, chat_history_ui, chat_history_api

system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
# Gradio interface setup
with gr.Blocks() as demo:
    chat_history_state_ui = gr.State([])
    chat_history_state_api = gr.State([])
    # Outros componentes da interface...
    chatbot = gr.ChatInterface(
        fn=chatbot_submit,
        inputs=[gr.Textbox(label="Your Message"), chat_history_state_ui, chat_history_state_api, system_msg, max_tokens, temperature, top_p],
        outputs=[gr.Text(label="Assistant Response"), chat_history_state_ui, chat_history_state_api],
        title="Chatbot Interface"
        description="""<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
            <strong>Explore the Capabilities of LLAMA 2 70B</strong>
        </div>
        <p>Llama 2 is a large language AI model capable of generating text and code in response to prompts.</p>
        <p><strong>How to Use:</strong></p>
        <ol>
            <li>Enter your <strong>message</strong> in the textbox to start a conversation or ask a question.</li>
            <li>Adjust the parameters in the "Additional Inputs" accordion to control the model's behavior.</li>
            <li>Use the buttons below the chatbot to submit your query, clear the chat history, or perform other actions.</li>
        </ol>
        <p><strong>Powered by NVIDIA's cutting-edge AI API, LLAMA 2 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.</strong></p>
        <p><strong>HF Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)</p>
        <p><strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a></p>""",
        submit_btn="Submit",
        clear_btn="🗑️ Clear",
    )

demo.launch()