File size: 3,819 Bytes
10db5b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import marimo
__generated_with = "0.9.14"
app = marimo.App(width="medium")
@app.cell
def __():
import marimo as mo
import os
from huggingface_hub import InferenceClient
return InferenceClient, mo, os
@app.cell
def __():
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
return (MODEL_NAME,)
@app.cell(hide_code=True)
def __(MODEL_NAME, mo):
mo.md(f"""
# Chat with **{MODEL_NAME}**
""")
return
@app.cell
def __(max_tokens, mo, system_message, temperature, top_p):
mo.hstack(
[
system_message,
mo.vstack([temperature, top_p, max_tokens], align="end"),
],
)
return
@app.cell
def __(mo, respond):
chat = mo.ui.chat(
model=respond,
prompts=["Tell me a joke.", "What is the square root of {{number}}?"],
)
chat
return (chat,)
@app.cell
def __(InferenceClient, MODEL_NAME, os):
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.26.2/en/guides/inference
"""
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
print("HF_TOKEN not set, may have limited access.")
client = InferenceClient(
MODEL_NAME,
token=hf_token,
)
return client, hf_token
@app.cell
def __(client, mo):
# Create UI controls
system_message = mo.ui.text_area(
value="You are a friendly Chatbot.",
label="System message",
)
max_tokens = mo.ui.slider(
start=1,
stop=2048,
value=512,
step=1,
label="Max new tokens",
show_value=True,
)
temperature = mo.ui.slider(
start=0.1,
stop=4.0,
value=0.7,
step=0.1,
label="Temperature",
show_value=True,
)
top_p = mo.ui.slider(
start=0.1,
stop=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
show_value=True,
)
# Add more configuration options if needed.
# Create chat callback
def respond(messages: list[mo.ai.ChatMessage], config):
chat_messages = [{"role": "system", "content": system_message.value}]
for message in messages:
parts = []
# Add text
parts.append({"type": "text", "text": message.content})
# Add attachments
if message.attachments:
for attachment in message.attachments:
content_type = attachment.content_type or ""
# This example only supports image attachments
if content_type.startswith("image"):
parts.append(
{
"type": "image_url",
"image_url": {"url": attachment.url},
}
)
else:
raise ValueError(
f"Unsupported content type {content_type}"
)
chat_messages.append({"role": message.role, "content": parts})
response = client.chat_completion(
chat_messages,
max_tokens=max_tokens.value,
temperature=temperature.value,
top_p=top_p.value,
stream=False,
)
# You can return strings, markdown, charts, tables, dataframes, and more.
return response.choices[0].message.content
return max_tokens, respond, system_message, temperature, top_p
@app.cell
def __():
# If you need to do anything _reactively_ to the chat messages,
# you can access the chat messages using the `chat.value` attribute.
# chat.value
return
if __name__ == "__main__":
app.run()
|