File size: 4,925 Bytes
c551206
 
 
 
 
cb4c132
0c5bb4b
c551206
 
 
 
 
 
 
 
cb4c132
c551206
 
cb4c132
 
 
 
 
 
9809955
cb4c132
c551206
 
 
 
 
8c77830
cb4c132
8c77830
9809955
8c77830
c551206
8c77830
c551206
 
 
 
 
0c5bb4b
c551206
 
 
 
 
 
0c5bb4b
cb4c132
 
8c77830
cb4c132
9809955
cb4c132
8c77830
4519318
e3d2c03
c15a43b
2813167
8c77830
9809955
2813167
e3d2c03
8c77830
be13696
bf2801d
9809955
 
bf2801d
 
 
6c43314
9809955
6c43314
 
 
 
 
c551206
8e19fdb
cb4c132
cdf0c60
4519318
f8d23e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb4c132
 
 
 
c051fb4
 
 
 
 
c551206
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import requests
import json
import os

# API and environment variables
API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"
headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

# Base system message
BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def clear_chat():
    """Clears the chat history and message state."""
    print("Clearing chat...")
    chat_history_state.value = []
    chatbot.textbox.value = ""

def user(message, history):
    """Updates the chat history with the user message."""
    print(f"User message: {message}")
    history = history or []
    history.append({"role": "user", "content": message})
    return history

def call_nvidia_api(history, system_message, max_tokens, temperature, top_p):
    """Calls the NVIDIA API to generate a response."""
    messages = [{"role": "system", "content": system_message}]
    messages.extend([{"role": role, "content": content} for role, content in history])

    payload = {
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }
    print(f"Payload enviado: {payload}")
    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)
    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
        response.raise_for_status()
    response_body = response.json()
    print(f"Payload recebido: {response_body}")
    if response_body.get("choices"):
        assistant_message = response_body["choices"][0]["message"]["content"]
        history.append({"role": "assistant", "content": assistant_message})
    return history

def chatbot_submit(message, chat_history, system_message, max_tokens_val, temperature_val, top_p_val):
    """Submits the user message to the chatbot and updates the chat history."""
    print("Updating chatbot...")

    # Adiciona a mensagem do usuário ao histórico
    chat_history = user(message, chat_history)

    # Chama a API da NVIDIA para gerar uma resposta
    chat_history = call_nvidia_api(chat_history, system_message, max_tokens_val, temperature_val, top_p_val)

    # Extrai apenas a mensagem do assistente da resposta
    if chat_history and chat_history[-1]["role"] == "assistant":
        assistant_message = chat_history[-1]["content"]
    else:
        assistant_message = "Desculpe, ocorreu um erro ao gerar a resposta."

    return assistant_message, chat_history

chat_history_state = gr.State([])
system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
with gr.Blocks() as demo:
    chat_history_state = gr.State([])
    chatbot = gr.ChatInterface(
        fn=chatbot_submit,
        additional_inputs=[system_msg, max_tokens, temperature, top_p],
        title="LLAMA 70B Free Demo",
        description="""
            <div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
                <strong>Explore the Capabilities of LLAMA 2 70B</strong>
            </div>
            <p>Llama 2 is a large language AI model capable of generating text and code in response to prompts.</p>
            <p><strong>How to Use:</strong></p>
            <ol>
                <li>Enter your <strong>message</strong> in the textbox to start a conversation or ask a question.</li>
                <li>Adjust the parameters in the "Additional Inputs" accordion to control the model's behavior.</li>
                <li>Use the buttons below the chatbot to submit your query, clear the chat history, or perform other actions.</li>
            </ol>
            <p><strong>Powered by NVIDIA's cutting-edge AI API, LLAMA 2 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.</strong></p>
            <p><strong>HF Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)</p>
            <p><strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a></p>
        """,
        submit_btn="Submit",
        clear_btn="🗑️ Clear",
    )

    def clear_chat():
        chat_history_state.value = []
        chatbot.textbox.value = ""

    chatbot.clear()
demo.launch()