import os from threading import Thread from typing import Iterator import gradio as gr import spaces import torch from openai import OpenAI, APIError client = OpenAI( base_url="https://hjopms3xd7gembdu.us-east-1.aws.endpoints.huggingface.cloud/v1/", api_key="hf_XXXXX" ) MAX_MAX_NEW_TOKENS = 2048 DEFAULT_MAX_NEW_TOKENS = 512 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) DESCRIPTION = """ Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants. The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page. """ LICENSE = """ This demo is governed by the license available [here.](https://huggingface.co/spaces/tenyx/Llama3-TenyxChat-70B/blob/main/LICENSE.txt)""" @spaces.GPU def generate( message: str, chat_history: list[tuple[str, str]], ) -> Iterator[str]: conversation = [{"role": "system", "content": "You are a helpful assistant developed by Tenyx, a conversational voice AI company."}] for user, assistant in chat_history: conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) conversation.append({"role": "user", "content": message}) try: response = client.chat.completions.create( model="tgi", messages=conversation, stop=["<|end_of_text|>", "<|eot_id|>"], stream=True, max_tokens=1024, ) outputs = [] for chunk in response: outputs.append(chunk.choices[0].delta.content) yield "".join(outputs) except APIError as e: # Handle API errors or network errors here print(f"Error: {e}") yield "An error occurred. Please try again later." demo = gr.ChatInterface( fn=generate, # additional_inputs=[ # gr.Textbox(label="System prompt", lines=6), # gr.Slider( # label="Max new tokens", # minimum=1, # maximum=MAX_MAX_NEW_TOKENS, # step=1, # value=DEFAULT_MAX_NEW_TOKENS, # ), # ], stop_btn=None, examples=[ ["Hello there! How are you doing?"], ["Can you explain briefly to me what is the Python programming language?"], ["Explain the potential role of Conversational AIs in customer support."], ["How many hours does it take a man to eat a Helicopter?"], ["Write a 100-word article on 'Benefits of Open-Source in AI research'"], ], ) # with gr.Blocks() as demo: # # gr.Markdown(DESCRIPTION) # # gr.Markdown(LICENSE) # # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button") # chat_interface.render() if __name__ == "__main__": demo.queue(max_size=4).launch(share=True)