Spaces:

artificialguybr
/

LLAMA-2-70B-FREE-DEMO

Running

File size: 3,337 Bytes

c551206
 
020a962
1df13e1
c551206
8cd9af7
 
c551206
1df13e1
c551206
 
 
 
 
 
 
 
8cd9af7
0e16686
8cd9af7
 
 
 
 
c551206
a9af4d7
c551206
 
 
 
 
85dbf4a
c551206
 
0e16686
c551206
 
 
0e16686
cb4c132
0e16686
8c77830
cb4c132
0e16686
a414401
0e16686
8c77830
0e16686
 
 
 
020a962
0e16686
8cd9af7
bf2801d
0e16686
9809955
85dbf4a
 
 
 
1df13e1
c551206
0e16686
 
8cd9af7
 
 
 
cb4c132
cdf0c60
0e16686
 
 
cb4c132
 
8cd9af7

import gradio as gr
import requests
import os
import json

API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def call_nvidia_api(history_api, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}] if system_message else []
    for msg in history_api:
        if msg[1]:  # Se existe uma resposta do assistente
            messages.append({"role": "user", "content": msg[0]})
            messages.append({"role": "assistant", "content": msg[1]})

    payload = {
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }

    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)
    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
    response.raise_for_status()
    response_body = response.json()

    if response_body.get("choices"):
        assistant_message = response_body["choices"][0]["message"]["content"]
        return assistant_message, response_body["choices"][0]
    else:
        return "Desculpe, ocorreu um erro ao gerar a resposta.", None

def chatbot_submit(message, chat_history_ui, chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val):
    assistant_message, api_response = call_nvidia_api(chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val)

    chat_history_ui.append([message, assistant_message])

    if api_response:
        chat_history_api.append([message, assistant_message])

    return assistant_message, chat_history_ui, chat_history_api

system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
# Gradio interface setup
with gr.Blocks() as demo:
    chat_history_state_ui = gr.State([])
    chat_history_state_api = gr.State([])
    system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
    max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
    temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
    top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
    chatbot = gr.ChatInterface(
        fn=chatbot_submit,
        inputs=[gr.Textbox(label="Your Message"), chat_history_state_ui, chat_history_state_api, system_msg, max_tokens, temperature, top_p],
        outputs=[gr.Text(label="Assistant Response"), chat_history_state_ui, chat_history_state_api],
        title="Chatbot Interface"
    )

demo.launch()