sarath-shekkizhar's picture
Updating to api url
fd397ae
raw
history blame
2.97 kB
import os
from threading import Thread
from typing import Iterator
import gradio as gr
import spaces
import torch
from openai import OpenAI, APIError
client = OpenAI(
base_url="https://hjopms3xd7gembdu.us-east-1.aws.endpoints.huggingface.cloud/v1/",
api_key="hf_XXXXX"
)
MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 512
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
DESCRIPTION = """
Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants.
The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page.
"""
LICENSE = """
This demo is governed by the license available [here.](https://huggingface.co/spaces/tenyx/Llama3-TenyxChat-70B/blob/main/LICENSE.txt)"""
@spaces.GPU
def generate(
message: str,
chat_history: list[tuple[str, str]],
) -> Iterator[str]:
conversation = [{"role": "system", "content": "You are a helpful assistant developed by Tenyx, a conversational voice AI company."}]
for user, assistant in chat_history:
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
conversation.append({"role": "user", "content": message})
try:
response = client.chat.completions.create(
model="tgi",
messages=conversation,
stop=["<|end_of_text|>", "<|eot_id|>"],
stream=True,
max_tokens=1024,
)
outputs = []
for chunk in response:
outputs.append(chunk.choices[0].delta.content)
yield "".join(outputs)
except APIError as e:
# Handle API errors or network errors here
print(f"Error: {e}")
yield "An error occurred. Please try again later."
demo = gr.ChatInterface(
fn=generate,
# additional_inputs=[
# gr.Textbox(label="System prompt", lines=6),
# gr.Slider(
# label="Max new tokens",
# minimum=1,
# maximum=MAX_MAX_NEW_TOKENS,
# step=1,
# value=DEFAULT_MAX_NEW_TOKENS,
# ),
# ],
stop_btn=None,
examples=[
["Hello there! How are you doing?"],
["Can you explain briefly to me what is the Python programming language?"],
["Explain the potential role of Conversational AIs in customer support."],
["How many hours does it take a man to eat a Helicopter?"],
["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
],
)
# with gr.Blocks() as demo:
# # gr.Markdown(DESCRIPTION)
# # gr.Markdown(LICENSE)
# # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
# chat_interface.render()
if __name__ == "__main__":
demo.queue(max_size=4).launch(share=True)