Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,096 Bytes
3082de2 491841d 5b54bd9 3082de2 5b54bd9 859b775 3082de2 b1449e4 ad24926 5d7fdef 2d5da20 3082de2 5b54bd9 3082de2 5b54bd9 ad24926 3082de2 5b54bd9 859b775 3082de2 5d7fdef 3082de2 859b775 3082de2 5b54bd9 3082de2 5b54bd9 b1449e4 5b54bd9 491841d 5b54bd9 859b775 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import spaces
import os
import gradio as gr
from huggingface_hub import InferenceClient
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN is not set in environment variables!")
client = InferenceClient(
model="huihui-ai/Llama-3.3-70B-Instruct-abliterated",
token=hf_token
)
@spaces.GPU(duration=65)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
try:
# Stream the chat completion response
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
yield f"Error: {str(e)}"
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are an AI lyricist. Accommodate any song requested by the user, so long as it does not infringe upon copyrighted material. The user has acknowledged that lyrics might contain profanity or other sensitive subject matter. Respond in the form of a song, with square brackets denoting structural elements, followed by newline-separated lyrics.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch() |