Spaces:

artificialguybr
/

LLAMA-2-70B-FREE-DEMO

Running

File size: 2,851 Bytes

c551206
 
020a962
1df13e1
c551206
8cd9af7
 
c551206
1df13e1
c551206
 
 
 
 
 
 
 
9faed3d
14126e6
 
 
 
 
 
 
 
 
 
 
8cd9af7
c551206
a9af4d7
c551206
 
 
 
 
85dbf4a
c551206
 
0e16686
c551206
 
 
0e16686
cb4c132
0e16686
8c77830
cb4c132
9faed3d
a414401
9faed3d
8c77830
14126e6
9faed3d
 
 
 
9809955
85dbf4a
 
 
 
9faed3d
c551206
9faed3d
 
 
32fc9d9
9faed3d
 
cb4c132
 
9faed3d

import gradio as gr
import requests
import os
import json

API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p):
    messages = []
    if system_message:  # Adiciona a mensagem do sistema, se houver
        messages.append({"role": "system", "content": system_message})
    # Adiciona as mensagens do histórico
    for msg in history_api:
        messages.append({"role": "user", "content": msg[0]})
        if msg[1]:  # Garante que não adicionamos respostas vazias
            messages.append({"role": "assistant", "content": msg[1]})

    # Adiciona a mensagem atual do usuário
    messages.append({"role": "user", "content": message})

    payload = {
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }

    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)
    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
    response.raise_for_status()
    response_body = response.json()

    if response_body.get("choices"):
        assistant_message = response_body["choices"][0]["message"]["content"]
        return assistant_message
    else:
        return "Desculpe, ocorreu um erro ao gerar a resposta."


def chatbot_function(message, history_api, system_message, max_tokens, temperature, top_p):
    assistant_message = call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p)
    history_api.append([message, assistant_message])
    return assistant_message, history_api

system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)

with gr.Blocks() as demo:
    chat_history_state = gr.State([])
    chat_interface = gr.ChatInterface(
        fn=chatbot_function,
        chatbot=gr.Chatbot(value=chat_history_state),
        additional_inputs=[system_msg, max_tokens, temperature, top_p],
        title="LLAMA 70B Free Demo",
    )

demo.launch()