Spaces:

lliu01
/

test_with_feedback

Runtime error

File size: 7,195 Bytes

import argparse
import os
import spaces

import gradio as gr

import json
import requests
from threading import Thread
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

MAX_LENGTH = 4096
DEFAULT_MAX_NEW_TOKENS = 1024


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--base_model", type=str)  # model path
    parser.add_argument("--n_gpus", type=int, default=1)  # n_gpu
    return parser.parse_args()

@spaces.GPU()
def predict(message, history, system_prompt, temperature, max_tokens):
    global model, tokenizer, device
    messages = [{'role': 'system', 'content': system_prompt}]
    for human, assistant in history:
        messages.append({'role': 'user', 'content': human})
        messages.append({'role': 'assistant', 'content': assistant})
    messages.append({'role': 'user', 'content': message})
    problem = [tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)]
    stop_tokens = ["<|endoftext|>", "<|im_end|>"]
    streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
    enc = tokenizer(problem, return_tensors="pt", padding=True, truncation=True)
    input_ids = enc.input_ids
    attention_mask = enc.attention_mask

    if input_ids.shape[1] > MAX_LENGTH:
        input_ids = input_ids[:, -MAX_LENGTH:]

    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)
    generate_kwargs = dict(
        {"input_ids": input_ids, "attention_mask": attention_mask},
        streamer=streamer,
        do_sample=True,
        top_p=0.95,
        temperature=temperature,
        max_new_tokens=DEFAULT_MAX_NEW_TOKENS,
        use_cache=True,
        eos_token_id=tokenizer.eos_token_id # <|im_end|>
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()
    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)

def send_chat_history_to_api(history):
    url = "http://10.59.16.61/api/v1/data-pairs/add"
    headers = {"Content-Type": "application/json"}
    for human, assistant in history:
        data = {"question": human, "answer": assistant}
        response = requests.post(url, headers=headers, json=data)
        if response.status_code != 200:
            print(f"Failed to add data. Status code: {response.status_code}")

sys_prompt = "You are a knowledgeable AI assistant in FortiOS CLI.\
        Your role is to assist users by providing accurate information and answering questions about the config in FortiOS CLI.\
        Your answer should be clear and relevant.\n"

if __name__ == "__main__":
    args = parse_args()
    tokenizer = AutoTokenizer.from_pretrained("lliu01/fortios_one_config")
    if not tokenizer.pad_token:
        tokenizer.pad_token = tokenizer.eos_token
        print(f"The tokenizer.pad_token set as a {tokenizer.eos_token}")

    model = AutoModelForCausalLM.from_pretrained(
        "lliu01/fortios_one_config",
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True
    )
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    with gr.Blocks() as demo:
        chatbot = gr.Chatbot(label="Chat History")
        textbox = gr.Textbox(placeholder="input", container=False, scale=7)
        system_prompt = gr.Textbox(sys_prompt, label="System Prompt")
        temperature = gr.Slider(0, 1, 0.5, label="Temperature")
        max_tokens = gr.Slider(100, 2048, 1024, label="Max Tokens")
        send_button = gr.Button("Report Chat History")

        send_button.click(fn=send_chat_history_to_api, inputs=chatbot)

        gr.ChatInterface(
            predict,
            title="FortiOS CLI Chat - Demo",
            description="FortiOS CLI Chat",
            theme="soft",
            chatbot=chatbot,
            textbox=textbox,
            retry_btn=None,
            undo_btn="Delete Previous",
            clear_btn="Clear",
            additional_inputs=[system_prompt, temperature, max_tokens],
            examples=[
                ["Allow all traffic from any source IP address and any source interface 'port10' to any destination IP address and any destination interface 'port9'. This policy will be applied at all times (always) and will allow all services. Additionally, this policy will enable UTM features, use proxy-based inspection mode, and use an SSL-SSH profile named 'deep-custom'. Finally, this policy will also enable source NAT."],
                ["Configure a firewall policy to allow users 'dina' and '15947' to access 'DR-Exchange-Servers' and 'HQ-Exchange-Servers' using RDP protocol from the 'SSL-VPN-IT-Pool' address range, incoming from the 'ssl.FG-Traffic' interface and outgoing to the 'FG-PA-Inside' interface. The policy should have Antivirus scanning enabled with profile 'ABE_AV' and log all traffic. The policy should be always active and currently disabled for testing or maintenance purposes."],
                ["Configure a firewall policy named 'ZoomAccess' that allows traffic from the 'IP_10.96.54.149' and 'HighCourt_Zoom' addresses coming in through the 'VLAN51' interface to access the 'Zoom_access' destination through the 'npu0_vlink1' interface, at any time, with all services allowed, using proxy-based inspection and SSL certificate inspection."],
                ["Create a dynamic firewall address object named 'EMS2_ZTNA_Condiciones-Clinic' that is based on a FortiClient EMS tag. This object will be used to represent a group of devices that have the 'Condiciones-Clinic' tag in the EMS system, which is related to zero-trust access control (ZTNA)."],
                ["The user wants to create a dynamic firewall address object named 'Pre-Prod DMN Servers' that retrieves IP addresses from a VMware vCenter SDN (Software-Defined Networking) environment. The object will dynamically include IP addresses that match the filter criteria 'Name=b4dmn*' from the vCenter inventory. Specifically, the object will include the following IP addresses: 172.21.121.44, 172.21.121.45, 172.21.121.46, 172.21.121.47, 172.21.121.48, and 172.21.121.49, each with associated object IDs and network IDs for further identification and grouping."],
                ["The user wants to create a traffic shaper named 'Videoconferencia' that limits the maximum bandwidth to 60 megabits per second, effectively enforcing an upper bandwidth limit for video conferencing traffic."],
                ["Configure an interface named 'Sec60' in the 'root' virtual domain with an IP address of 172.18.60.1/24. Allow management access to this interface for ping, fabric, and speed-test. Enable device identification and set the interface role to LAN. Set the SNMP index to 41 and enable auto-authentication for dedicated Fortinet extension devices. Additionally, enable switch controller features such as IGMP snooping, IGMP snooping proxy, and DHCP snooping. Set the color of the interface icon on the GUI to 7 and associate it with the 'FortiLink' interface and VLAN ID 60."],
            ],
            additional_inputs_accordion_name="Parameters",
        ).queue()

    demo.launch()