Spaces:
Sleeping
Sleeping
File size: 1,710 Bytes
8f514e3 f600f75 02f697f f600f75 74f5cb7 8aaf5ff 43a9475 99da48b f600f75 99da48b f600f75 99da48b f600f75 99da48b f600f75 99da48b f600f75 3cd2235 f600f75 8aaf5ff da45dce 8307fd0 da45dce 595f1c1 99da48b f600f75 8aaf5ff 99da48b 8aaf5ff 02f697f ffce18d 8aaf5ff efaca30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from huggingface_hub import InferenceClient
import gradio as gr
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
system_prompt = ""
system_prompt_sent = False
def format_prompt(message, history):
global system_prompt_sent
prompt = "<s>"
if not any(f"[INST] {system_prompt} [/INST]" in user_prompt for user_prompt, _ in history):
prompt += f"[INST] {system_prompt} [/INST]"
system_prompt_sent = True
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(
prompt, history, temperature=0.9, max_new_tokens=4096, top_p=0.95, repetition_penalty=1.0,
):
global system_prompt_sent
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
output = ""
for response in stream:
output += response.token.text
yield output
return output
chat_interface = gr.ChatInterface(
fn=generate,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=False, layout="vertical", height=700),
concurrency_limit=9,
theme="soft",
submit_btn="Enviar",
)
chat_interface.launch(show_api=False) |