import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
def format_prompt(message, history):
prompt = ""
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response} "
prompt += f"[INST] {message} [/INST]"
return prompt
description="""The Rapid TGI (Text Generation Inference) has been developed for learning purposes
Source Code:
"""
title=""" Rapid TGI"
css="""
.gradio-container {
background: rgb(131,58,180);
background: linear-gradient(90deg, rgba(131,58,180,1) 0%, rgba(253,29,29,1) 50%, rgba(252,176,69,1) 100%);
#logo {
content: url('https://i.ibb.co/6vz9WjL/chat-bot.png');
width: 42px;
height: 42px;
margin-right: 10px;
margin-top: 3px;
display:inline-block;
};
#link {
color: #fff;
background-color: transparent;
};
}
"""
def inference(message, history, model="mistralai/Mixtral-8x7B-Instruct-v0.1", Temperature=0.3, tokens=512,top_p=0.95, r_p=0.93):
Temperature = float(Temperature)
if Temperature < 1e-2:
Temperature = 1e-2
top_p = float(top_p)
kwargs = dict(
temperature=Temperature,
max_new_tokens=tokens,
top_p=top_p,
repetition_penalty=r_p,
do_sample=True,
seed=42,
)
prompt = format_prompt(message, history)
client = InferenceClient(model=model)
partial_message = ""
for response in client.text_generation(prompt,**kwargs, stream=True, details=True, return_full_text=False):
partial_message += response.token.text
yield partial_message
chatbot = gr.Chatbot(avatar_images=["https://i.ibb.co/kGd6XrM/user.png", "https://i.ibb.co/6vz9WjL/chat-bot.png"],
bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
UI= gr.ChatInterface(
inference,
chatbot=chatbot,
description=description,
title=title,
additional_inputs_accordion=gr.Accordion(label="Additional Configuration to get better response",open=False),
retry_btn="Retry Again",
undo_btn="Undo",
clear_btn="Clear",
theme="soft",
submit_btn="Send",
css=css,
additional_inputs=[
gr.Dropdown(value="mistralai/Mixtral-8x7B-Instruct-v0.1",
choices =["mistralai/Mixtral-8x7B-Instruct-v0.1","HuggingFaceH4/zephyr-7b-beta",
"mistralai/Mistral-7B-Instruct-v0.1"], label="Available models",
info="default model is Mixtral-8x7B-Instruct-v0.1",interactive=True,),
gr.Slider(value=0.3, maximum=1.0,label="Temperature"),
gr.Slider(value=512, maximum=1020,label="Max New Tokens"),
gr.Slider(value=0.95, maximum=1.0,label="Top P"),
gr.Slider(value=0.93, maximum=1.0,label="Repetition Penalty"),
],
examples=[["Hello"], ["can i know about generative ai ?"], ["how can i deploy a LLM in hugguingface inference endpoint ?"]],
)
UI.queue().launch(show_api= False,max_threads=50)