File size: 1,710 Bytes
8f514e3
f600f75
02f697f
f600f75
74f5cb7
8aaf5ff
43a9475
99da48b
 
f600f75
99da48b
f600f75
 
99da48b
 
 
f600f75
 
99da48b
f600f75
99da48b
 
f600f75
3cd2235
f600f75
 
8aaf5ff
da45dce
 
 
8307fd0
da45dce
 
 
 
 
 
 
 
595f1c1
99da48b
f600f75
8aaf5ff
 
 
 
 
99da48b
8aaf5ff
 
 
 
02f697f
ffce18d
8aaf5ff
 
 
 
 
efaca30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from huggingface_hub import InferenceClient
import gradio as gr

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
system_prompt = ""
system_prompt_sent = False

def format_prompt(message, history):
    global system_prompt_sent
    prompt = "<s>"

    if not any(f"[INST] {system_prompt} [/INST]" in user_prompt for user_prompt, _ in history):
        prompt += f"[INST] {system_prompt} [/INST]"
        system_prompt_sent = True

    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "

    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(
    prompt, history, temperature=0.9, max_new_tokens=4096, top_p=0.95, repetition_penalty=1.0,
):
    global system_prompt_sent
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    formatted_prompt = format_prompt(prompt, history)
    
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
    output = ""

    for response in stream:
        output += response.token.text
        yield output

    return output

chat_interface = gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=False, layout="vertical", height=700),
    concurrency_limit=9,
    theme="soft",
    submit_btn="Enviar",
)

chat_interface.launch(show_api=False)