File size: 4,505 Bytes
c551206
 
 
 
 
cb4c132
0c5bb4b
c551206
 
 
 
 
 
 
 
cb4c132
c551206
 
cb4c132
 
 
 
 
 
 
 
c551206
cb4c132
c551206
 
0c5bb4b
c551206
 
 
 
cb4c132
c551206
cb4c132
c551206
 
 
 
 
 
 
0c5bb4b
c551206
 
 
 
 
 
0c5bb4b
cb4c132
 
 
 
 
 
3be9b12
e5b736f
c15a43b
cb4c132
c15a43b
cb4c132
 
 
 
0c5bb4b
c15a43b
be13696
cb4c132
 
 
 
 
 
 
c551206
 
 
 
0c5bb4b
cb4c132
 
 
 
 
 
 
 
 
 
 
 
 
 
97d4bb3
0c5bb4b
cb4c132
 
 
 
 
 
 
 
 
 
 
c551206
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import requests
import json
import os

# API and environment variables
API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"
headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

# Base system message
BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."


def clear_chat():
    """Clears the chat history and message state."""
    print("Clearing chat...")
    chat_history_state.value = []
    chatbot.textbox.value = ""


def user(message, history, system_message=None):
    """Updates the chat history with the user message."""
    print(f"User message: {message}")
    history = history or []
    if system_message:
        history.append({"role": "system", "content": system_message})
    history.append({"role": "user", "content": message})
    return history


def call_nvidia_api(history, max_tokens, temperature, top_p):
    """Calls the NVIDIA API to generate a response."""
    payload = {
        "messages": history,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }
    print(f"Payload enviado: {payload}")
    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)
    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
        response.raise_for_status()
    response_body = response.json()
    print(f"Payload recebido: {response_body}")
    if response_body["choices"]:
        assistant_message = response_body["choices"][0]["message"]["content"]
        history.append({"role": "assistant", "content": assistant_message})
    return history


def update_chatbot(message, chat_history, system_message, max_tokens, temperature, top_p):
    """Updates the chatbot with the user message and generates a response."""
    print("Updating chatbot...")
    if not chat_history or (chat_history and chat_history[-1]["role"] != "user"):
        chat_history = user(message, chat_history, system_message)
    else:
        chat_history = user(message, chat_history)
    chat_history = call_nvidia_api(chat_history, max_tokens, temperature, top_p)
    return chat_history


# Gradio interface components
system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("LLAMA 2 70B Free Demo")
            description = """
            
                Explore the Capabilities of LLAMA 2 70B
            
            Llama 2 is a large language AI model capable of generating text and code in response to prompts. 
             How to Use:
            
                Enter your message in the textbox to start a conversation or ask a question.
                Adjust the parameters in the "Additional Inputs" accordion to control the model's behavior.
                Use the buttons below the chatbot to submit your query, clear the chat history, or perform other actions.
            
             Powered by NVIDIA's cutting-edge AI API, LLAMA 2 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.
             HF Created by: @artificialguybr (Twitter)
             Discover more: artificialguy.com
            """
            gr.Markdown(description)

    chat_history_state = gr.State([])
    chatbot = gr.ChatInterface(
        fn=lambda message, history: update_chatbot(message, history, system_msg.value, max_tokens.value, temperature.value, top_p.value),
        additional_inputs=[system_msg, max_tokens, temperature, top_p],
        title="LLAMA 2 70B Chatbot",
        submit_btn="Submit",
        clear_btn="🗑️ Clear",
    )

    # Corrected clear button assignment:
    chatbot.clear(outputs=[chat_history_state, chatbot.textbox])

demo.launch()