Spaces:

artificialguybr
/

LLAMA-2-70B-FREE-DEMO

Running

File size: 4,192 Bytes

c551206
 
020a962
1df13e1
c551206
1df13e1
 
c551206
 
1df13e1
c551206
 
 
 
 
 
 
 
a9af4d7
1df13e1
 
 
 
 
 
a9af4d7
c551206
a9af4d7
c551206
 
 
 
 
 
 
 
 
 
 
1df13e1
cb4c132
8c77830
cb4c132
a414401
 
020a962
8c77830
a9af4d7
 
 
2813167
a9af4d7
 
be13696
a9af4d7
 
020a962
a9af4d7
a414401
bf2801d
a9af4d7
9809955
1df13e1
c551206
8e19fdb
1df13e1
 
 
 
cb4c132
cdf0c60
4519318
f8d23e7
1df13e1
 
 
 
 
 
 
 
 
 
 
 
 
cb4c132
 
 
 
1df13e1

import gradio as gr
import requests
import os
import json

# Carrega a chave da API do ambiente ou define diretamente
API_KEY = os.getenv('API_KEY') 
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def call_nvidia_api(history, system_message, max_tokens, temperature, top_p):
    """Chama a API da NVIDIA para gerar uma resposta."""
    # Prepara as mensagens, incluindo a mensagem do sistema se fornecida
    messages = []
    if system_message:
        messages.append({"role": "system", "content": system_message})
    messages.extend(history)

    payload = {
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }
    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)
    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
    response.raise_for_status()
    response_body = response.json()
    if response_body.get("choices"):
        assistant_message = response_body["choices"][0]["message"]["content"]
        return assistant_message
    else:
        return "Desculpe, ocorreu um erro ao gerar a resposta."

def chatbot_submit(message, chat_history, system_message, max_tokens_val, temperature_val, top_p_val):
    """Submits the user message to the chatbot and updates the chat history."""
    print("Updating chatbot...")

    # Adiciona a mensagem do usuário ao histórico para exibição
    chat_history.append([message, ""])

    # Chama a API da NVIDIA para gerar uma resposta
    assistant_message = call_nvidia_api(chat_history, system_message, max_tokens_val, temperature_val, top_p_val)

    # Atualiza o histórico com a resposta do assistente
    chat_history[-1][1] = assistant_message

    return assistant_message, chat_history

# Gradio interface setup
with gr.Blocks() as demo:
    chat_history_state = gr.State([])
    system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
    max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
    temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
    top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
    chatbot = gr.ChatInterface(
        fn=chatbot_submit,
        additional_inputs=[system_msg, max_tokens, temperature, top_p],
        title="LLAMA 70B Free Demo",
        description="""<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
            <strong>Explore the Capabilities of LLAMA 2 70B</strong>
        </div>
        <p>Llama 2 is a large language AI model capable of generating text and code in response to prompts.</p>
        <p><strong>How to Use:</strong></p>
        <ol>
            <li>Enter your <strong>message</strong> in the textbox to start a conversation or ask a question.</li>
            <li>Adjust the parameters in the "Additional Inputs" accordion to control the model's behavior.</li>
            <li>Use the buttons below the chatbot to submit your query, clear the chat history, or perform other actions.</li>
        </ol>
        <p><strong>Powered by NVIDIA's cutting-edge AI API, LLAMA 2 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.</strong></p>
        <p><strong>HF Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)</p>
        <p><strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a></p>""",
        submit_btn="Submit",
        clear_btn="🗑️ Clear",
    )

demo.launch()