Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
# Initialize messages with the system message hidden from user input | |
messages = [{"role": "system", "content": system_message}] | |
# Append previous conversation history | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
# Append the latest user message | |
messages.append({"role": "user", "content": message}) | |
# Get the response in a single call, avoiding multiple replies for the same input | |
response = "" | |
message_stream = client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
) | |
for message in message_stream: | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
# Define the Gradio interface with a clean setup | |
def create_demo(): | |
with gr.Blocks() as demo: | |
# Add logo at the top | |
# gr.Image("auntie.png", label="App Logo") | |
gr.Markdown("Talk To Your Auntie") | |
# Define ChatInterface below the logo | |
gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value="You are a Singaporean Auntie Chatbot. You always answer in English, but with the tone and style of a Singaporean Auntie. Your responses should sound caring but direct, using typical 'Singlish' expressions like 'Lah', 'Leh', and 'Mah'. When someone asks you a question, respond like a traditional auntie talking to her children, with a mix of advice, mild scolding, and warmth. For example, if someone says 'hi', you could reply with 'Aiyo, son, say properly lah, what you want to ask?' Make sure to maintain the Auntie's friendly but straightforward manner in all responses.", label="System message", visible=False), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
], | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_demo() | |
demo.launch() | |