Llama3-TenyxChat-70B

Runtime error

App Files Files Community

Llama3-TenyxChat-70B / app.py

sarath-shekkizhar

Updating to api url

fd397ae 8 months ago

raw

history blame

2.97 kB

	import os
	from threading import Thread
	from typing import Iterator

	import gradio as gr
	import spaces
	import torch
	from openai import OpenAI, APIError

	client = OpenAI(
	base_url="https://hjopms3xd7gembdu.us-east-1.aws.endpoints.huggingface.cloud/v1/",
	api_key="hf_XXXXX"
	)

	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 512
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

	DESCRIPTION = """
	Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants.
	The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page.
	"""


	LICENSE = """
	This demo is governed by the license available [here.](https://huggingface.co/spaces/tenyx/Llama3-TenyxChat-70B/blob/main/LICENSE.txt)"""

	@spaces.GPU
	def generate(
	message: str,
	chat_history: list[tuple[str, str]],
	) -> Iterator[str]:
	conversation = [{"role": "system", "content": "You are a helpful assistant developed by Tenyx, a conversational voice AI company."}]

	for user, assistant in chat_history:
	conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
	conversation.append({"role": "user", "content": message})

	try:
	response = client.chat.completions.create(
	model="tgi",
	messages=conversation,
	stop=["<\|end_of_text\|>", "<\|eot_id\|>"],
	stream=True,
	max_tokens=1024,
	)
	outputs = []
	for chunk in response:
	outputs.append(chunk.choices[0].delta.content)
	yield "".join(outputs)

	except APIError as e:
	# Handle API errors or network errors here
	print(f"Error: {e}")
	yield "An error occurred. Please try again later."



	demo = gr.ChatInterface(
	fn=generate,
	# additional_inputs=[
	# gr.Textbox(label="System prompt", lines=6),
	# gr.Slider(
	# label="Max new tokens",
	# minimum=1,
	# maximum=MAX_MAX_NEW_TOKENS,
	# step=1,
	# value=DEFAULT_MAX_NEW_TOKENS,
	# ),
	# ],
	stop_btn=None,
	examples=[
	["Hello there! How are you doing?"],
	["Can you explain briefly to me what is the Python programming language?"],
	["Explain the potential role of Conversational AIs in customer support."],
	["How many hours does it take a man to eat a Helicopter?"],
	["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
	],
	)

	# with gr.Blocks() as demo:
	# # gr.Markdown(DESCRIPTION)
	# # gr.Markdown(LICENSE)
	# # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
	# chat_interface.render()


	if __name__ == "__main__":
	demo.queue(max_size=4).launch(share=True)