litellmlope

Running

App Files Files Community

litellmlope / litellm /llms /prompt_templates /factory.py

ka1kuk

Upload 235 files

7db0ae4 verified about 1 year ago

raw

history blame

24.3 kB

	from enum import Enum
	import requests, traceback
	import json
	from jinja2 import Template, exceptions, Environment, meta
	from typing import Optional, Any


	def default_pt(messages):
	return " ".join(message["content"] for message in messages)


	# alpaca prompt template - for models like mythomax, etc.
	def alpaca_pt(messages):
	prompt = custom_prompt(
	role_dict={
	"system": {
	"pre_message": "### Instruction:\n",
	"post_message": "\n\n",
	},
	"user": {
	"pre_message": "### Instruction:\n",
	"post_message": "\n\n",
	},
	"assistant": {"pre_message": "### Response:\n", "post_message": "\n\n"},
	},
	bos_token="<s>",
	eos_token="</s>",
	messages=messages,
	)
	return prompt


	# Llama2 prompt template
	def llama_2_chat_pt(messages):
	prompt = custom_prompt(
	role_dict={
	"system": {
	"pre_message": "[INST] <<SYS>>\n",
	"post_message": "\n<</SYS>>\n [/INST]\n",
	},
	"user": { # follow this format https://github.com/facebookresearch/llama/blob/77062717054710e352a99add63d160274ce670c6/llama/generation.py#L348
	"pre_message": "[INST] ",
	"post_message": " [/INST]\n",
	},
	"assistant": {
	"post_message": "\n" # follows this - https://replicate.com/blog/how-to-prompt-llama
	},
	},
	messages=messages,
	bos_token="<s>",
	eos_token="</s>",
	)
	return prompt


	def ollama_pt(
	model, messages
	): # https://github.com/jmorganca/ollama/blob/af4cf55884ac54b9e637cd71dadfe9b7a5685877/docs/modelfile.md#template
	if "instruct" in model:
	prompt = custom_prompt(
	role_dict={
	"system": {"pre_message": "### System:\n", "post_message": "\n"},
	"user": {
	"pre_message": "### User:\n",
	"post_message": "\n",
	},
	"assistant": {
	"pre_message": "### Response:\n",
	"post_message": "\n",
	},
	},
	final_prompt_value="### Response:",
	messages=messages,
	)
	elif "llava" in model:
	prompt = ""
	images = []
	for message in messages:
	if isinstance(message["content"], str):
	prompt += message["content"]
	elif isinstance(message["content"], list):
	# see https://docs.litellm.ai/docs/providers/openai#openai-vision-models
	for element in message["content"]:
	if isinstance(element, dict):
	if element["type"] == "text":
	prompt += element["text"]
	elif element["type"] == "image_url":
	image_url = element["image_url"]["url"]
	images.append(image_url)
	return {"prompt": prompt, "images": images}
	else:
	prompt = "".join(
	m["content"]
	if isinstance(m["content"], str) is str
	else "".join(m["content"])
	for m in messages
	)
	return prompt


	def mistral_instruct_pt(messages):
	prompt = custom_prompt(
	initial_prompt_value="<s>",
	role_dict={
	"system": {"pre_message": "[INST]", "post_message": "[/INST]"},
	"user": {"pre_message": "[INST]", "post_message": "[/INST]"},
	"assistant": {"pre_message": "[INST]", "post_message": "[/INST]"},
	},
	final_prompt_value="</s>",
	messages=messages,
	)
	return prompt


	# Falcon prompt template - from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py#L110
	def falcon_instruct_pt(messages):
	prompt = ""
	for message in messages:
	if message["role"] == "system":
	prompt += message["content"]
	else:
	prompt += (
	message["role"]
	+ ":"
	+ message["content"].replace("\r\n", "\n").replace("\n\n", "\n")
	)
	prompt += "\n\n"

	return prompt


	def falcon_chat_pt(messages):
	prompt = ""
	for message in messages:
	if message["role"] == "system":
	prompt += "System: " + message["content"]
	elif message["role"] == "assistant":
	prompt += "Falcon: " + message["content"]
	elif message["role"] == "user":
	prompt += "User: " + message["content"]

	return prompt


	# MPT prompt template - from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py#L110
	def mpt_chat_pt(messages):
	prompt = ""
	for message in messages:
	if message["role"] == "system":
	prompt += "<\|im_start\|>system" + message["content"] + "<\|im_end\|>" + "\n"
	elif message["role"] == "assistant":
	prompt += "<\|im_start\|>assistant" + message["content"] + "<\|im_end\|>" + "\n"
	elif message["role"] == "user":
	prompt += "<\|im_start\|>user" + message["content"] + "<\|im_end\|>" + "\n"
	return prompt


	# WizardCoder prompt template - https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0#prompt-format
	def wizardcoder_pt(messages):
	prompt = ""
	for message in messages:
	if message["role"] == "system":
	prompt += message["content"] + "\n\n"
	elif message["role"] == "user": # map to 'Instruction'
	prompt += "### Instruction:\n" + message["content"] + "\n\n"
	elif message["role"] == "assistant": # map to 'Response'
	prompt += "### Response:\n" + message["content"] + "\n\n"
	return prompt


	# Phind-CodeLlama prompt template - https://huggingface.co/Phind/Phind-CodeLlama-34B-v2#how-to-prompt-the-model
	def phind_codellama_pt(messages):
	prompt = ""
	for message in messages:
	if message["role"] == "system":
	prompt += "### System Prompt\n" + message["content"] + "\n\n"
	elif message["role"] == "user":
	prompt += "### User Message\n" + message["content"] + "\n\n"
	elif message["role"] == "assistant":
	prompt += "### Assistant\n" + message["content"] + "\n\n"
	return prompt


	def hf_chat_template(model: str, messages: list, chat_template: Optional[Any] = None):
	## get the tokenizer config from huggingface
	bos_token = ""
	eos_token = ""
	if chat_template is None:

	def _get_tokenizer_config(hf_model_name):
	url = (
	f"https://huggingface.co/{hf_model_name}/raw/main/tokenizer_config.json"
	)
	# Make a GET request to fetch the JSON data
	response = requests.get(url)
	if response.status_code == 200:
	# Parse the JSON data
	tokenizer_config = json.loads(response.content)
	return {"status": "success", "tokenizer": tokenizer_config}
	else:
	return {"status": "failure"}

	tokenizer_config = _get_tokenizer_config(model)
	if (
	tokenizer_config["status"] == "failure"
	or "chat_template" not in tokenizer_config["tokenizer"]
	):
	raise Exception("No chat template found")
	## read the bos token, eos token and chat template from the json
	tokenizer_config = tokenizer_config["tokenizer"]
	bos_token = tokenizer_config["bos_token"]
	eos_token = tokenizer_config["eos_token"]
	chat_template = tokenizer_config["chat_template"]

	def raise_exception(message):
	raise Exception(f"Error message - {message}")

	# Create a template object from the template text
	env = Environment()
	env.globals["raise_exception"] = raise_exception
	try:
	template = env.from_string(chat_template)
	except Exception as e:
	raise e

	def _is_system_in_template():
	try:
	# Try rendering the template with a system message
	response = template.render(
	messages=[{"role": "system", "content": "test"}],
	eos_token="<eos>",
	bos_token="<bos>",
	)
	return True

	# This will be raised if Jinja attempts to render the system message and it can't
	except:
	return False

	try:
	# Render the template with the provided values
	if _is_system_in_template():
	rendered_text = template.render(
	bos_token=bos_token, eos_token=eos_token, messages=messages
	)
	else:
	# treat a system message as a user message, if system not in template
	try:
	reformatted_messages = []
	for message in messages:
	if message["role"] == "system":
	reformatted_messages.append(
	{"role": "user", "content": message["content"]}
	)
	else:
	reformatted_messages.append(message)
	rendered_text = template.render(
	bos_token=bos_token,
	eos_token=eos_token,
	messages=reformatted_messages,
	)
	except Exception as e:
	if "Conversation roles must alternate user/assistant" in str(e):
	# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
	new_messages = []
	for i in range(len(reformatted_messages) - 1):
	new_messages.append(reformatted_messages[i])
	if (
	reformatted_messages[i]["role"]
	== reformatted_messages[i + 1]["role"]
	):
	if reformatted_messages[i]["role"] == "user":
	new_messages.append(
	{"role": "assistant", "content": ""}
	)
	else:
	new_messages.append({"role": "user", "content": ""})
	new_messages.append(reformatted_messages[-1])
	rendered_text = template.render(
	bos_token=bos_token, eos_token=eos_token, messages=new_messages
	)
	return rendered_text
	except Exception as e:
	raise Exception(f"Error rendering template - {str(e)}")


	# Anthropic template
	def claude_2_1_pt(
	messages: list,
	): # format - https://docs.anthropic.com/claude/docs/how-to-use-system-prompts
	"""
	Claude v2.1 allows system prompts (no Human: needed), but requires it be followed by Human:
	- you can't just pass a system message
	- you can't pass a system message and follow that with an assistant message
	if system message is passed in, you can only do system, human, assistant or system, human

	if a system message is passed in and followed by an assistant message, insert a blank human message between them.

	Additionally, you can "put words in Claude's mouth" by ending with an assistant message.
	See: https://docs.anthropic.com/claude/docs/put-words-in-claudes-mouth
	"""

	class AnthropicConstants(Enum):
	HUMAN_PROMPT = "\n\nHuman: "
	AI_PROMPT = "\n\nAssistant: "

	prompt = ""
	for idx, message in enumerate(messages):
	if message["role"] == "user":
	prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
	elif message["role"] == "system":
	prompt += f"{message['content']}"
	elif message["role"] == "assistant":
	if idx > 0 and messages[idx - 1]["role"] == "system":
	prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}" # Insert a blank human message
	prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
	if messages[-1]["role"] != "assistant":
	prompt += f"{AnthropicConstants.AI_PROMPT.value}" # prompt must end with \"\n\nAssistant: " turn
	return prompt


	### TOGETHER AI


	def get_model_info(token, model):
	try:
	headers = {"Authorization": f"Bearer {token}"}
	response = requests.get("https://api.together.xyz/models/info", headers=headers)
	if response.status_code == 200:
	model_info = response.json()
	for m in model_info:
	if m["name"].lower().strip() == model.strip():
	return m["config"].get("prompt_format", None), m["config"].get(
	"chat_template", None
	)
	return None, None
	else:
	return None, None
	except Exception as e: # safely fail a prompt template request
	return None, None


	def format_prompt_togetherai(messages, prompt_format, chat_template):
	if prompt_format is None:
	return default_pt(messages)

	human_prompt, assistant_prompt = prompt_format.split("{prompt}")

	if chat_template is not None:
	prompt = hf_chat_template(
	model=None, messages=messages, chat_template=chat_template
	)
	elif prompt_format is not None:
	prompt = custom_prompt(
	role_dict={},
	messages=messages,
	initial_prompt_value=human_prompt,
	final_prompt_value=assistant_prompt,
	)
	else:
	prompt = default_pt(messages)
	return prompt


	###


	def anthropic_pt(
	messages: list,
	): # format - https://docs.anthropic.com/claude/reference/complete_post
	"""
	You can "put words in Claude's mouth" by ending with an assistant message.
	See: https://docs.anthropic.com/claude/docs/put-words-in-claudes-mouth
	"""
	class AnthropicConstants(Enum):
	HUMAN_PROMPT = "\n\nHuman: "
	AI_PROMPT = "\n\nAssistant: "

	prompt = ""
	for idx, message in enumerate(
	messages
	): # needs to start with `\n\nHuman: ` and end with `\n\nAssistant: `
	if message["role"] == "user":
	prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
	elif message["role"] == "system":
	prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}<admin>{message['content']}</admin>"
	else:
	prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
	if (
	idx == 0 and message["role"] == "assistant"
	): # ensure the prompt always starts with `\n\nHuman: `
	prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt
	if messages[-1]["role"] != "assistant":
	prompt += f"{AnthropicConstants.AI_PROMPT.value}"
	return prompt


	def _load_image_from_url(image_url):
	try:
	from PIL import Image
	except:
	raise Exception("gemini image conversion failed please run `pip install Pillow`")
	from io import BytesIO
	try:
	# Send a GET request to the image URL
	response = requests.get(image_url)
	response.raise_for_status() # Raise an exception for HTTP errors

	# Check the response's content type to ensure it is an image
	content_type = response.headers.get('content-type')
	if not content_type or 'image' not in content_type:
	raise ValueError(f"URL does not point to a valid image (content-type: {content_type})")

	# Load the image from the response content
	return Image.open(BytesIO(response.content))

	except requests.RequestException as e:
	print(f"Request failed: {e}")
	except UnidentifiedImageError:
	print("Cannot identify image file (it may not be a supported image format or might be corrupted).")
	except ValueError as e:
	print(e)


	def _gemini_vision_convert_messages(messages: list):
	"""
	Converts given messages for GPT-4 Vision to Gemini format.

	Args:
	messages (list): The messages to convert. Each message can be a dictionary with a "content" key. The content can be a string or a list of elements. If it is a string, it will be concatenated to the prompt. If it is a list, each element will be processed based on its type:
	- If the element is a dictionary with a "type" key equal to "text", its "text" value will be concatenated to the prompt.
	- If the element is a dictionary with a "type" key equal to "image_url", its "image_url" value will be added to the list of images.

	Returns:
	tuple: A tuple containing the prompt (a string) and the processed images (a list of objects representing the images).
	"""
	try:
	from PIL import Image
	except:
	raise Exception("gemini image conversion failed please run `pip install Pillow`")

	try:

	# given messages for gpt-4 vision, convert them for gemini
	# https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_python.ipynb
	prompt = ""
	images = []
	for message in messages:
	if isinstance(message["content"], str):
	prompt += message["content"]
	elif isinstance(message["content"], list):
	# see https://docs.litellm.ai/docs/providers/openai#openai-vision-models
	for element in message["content"]:
	if isinstance(element, dict):
	if element["type"] == "text":
	prompt += element["text"]
	elif element["type"] == "image_url":
	image_url = element["image_url"]["url"]
	images.append(image_url)
	# processing images passed to gemini
	processed_images = []
	for img in images:
	if "https:/" in img:
	# Case 1: Image from URL
	image = _load_image_from_url(img)
	processed_images.append(image)
	else:
	# Case 2: Image filepath (e.g. temp.jpeg) given
	image = Image.open(img)
	processed_images.append(image)
	content = [prompt] + processed_images
	return content
	except Exception as e:
	raise e


	def gemini_text_image_pt(messages: list):
	"""
	{
	"contents":[
	{
	"parts":[
	{"text": "What is this picture?"},
	{
	"inline_data": {
	"mime_type":"image/jpeg",
	"data": "'$(base64 -w0 image.jpg)'"
	}
	}
	]
	}
	]
	}
	"""
	try:
	import google.generativeai as genai
	except:
	raise Exception(
	"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
	)

	prompt = ""
	images = []
	for message in messages:
	if isinstance(message["content"], str):
	prompt += message["content"]
	elif isinstance(message["content"], list):
	# see https://docs.litellm.ai/docs/providers/openai#openai-vision-models
	for element in message["content"]:
	if isinstance(element, dict):
	if element["type"] == "text":
	prompt += element["text"]
	elif element["type"] == "image_url":
	image_url = element["image_url"]["url"]
	images.append(image_url)

	content = [prompt] + images
	return content


	# Function call template
	def function_call_prompt(messages: list, functions: list):
	function_prompt = (
	"Produce JSON OUTPUT ONLY! The following functions are available to you:"
	)
	for function in functions:
	function_prompt += f"""\n{function}\n"""

	function_added_to_prompt = False
	for message in messages:
	if "system" in message["role"]:
	message["content"] += f"""{function_prompt}"""
	function_added_to_prompt = True

	if function_added_to_prompt == False:
	messages.append({"role": "system", "content": f"""{function_prompt}"""})

	return messages


	# Custom prompt template
	def custom_prompt(
	role_dict: dict,
	messages: list,
	initial_prompt_value: str = "",
	final_prompt_value: str = "",
	bos_token: str = "",
	eos_token: str = "",
	):
	prompt = bos_token + initial_prompt_value
	bos_open = True
	## a bos token is at the start of a system / human message
	## an eos token is at the end of the assistant response to the message
	for message in messages:
	role = message["role"]

	if role in ["system", "human"] and not bos_open:
	prompt += bos_token
	bos_open = True

	pre_message_str = (
	role_dict[role]["pre_message"]
	if role in role_dict and "pre_message" in role_dict[role]
	else ""
	)
	post_message_str = (
	role_dict[role]["post_message"]
	if role in role_dict and "post_message" in role_dict[role]
	else ""
	)
	prompt += pre_message_str + message["content"] + post_message_str

	if role == "assistant":
	prompt += eos_token
	bos_open = False

	prompt += final_prompt_value
	return prompt


	def prompt_factory(
	model: str,
	messages: list,
	custom_llm_provider: Optional[str] = None,
	api_key: Optional[str] = None,
	):
	original_model_name = model
	model = model.lower()
	if custom_llm_provider == "ollama":
	return ollama_pt(model=model, messages=messages)
	elif custom_llm_provider == "anthropic":
	if any(_ in model for _ in ["claude-2.1","claude-v2:1"]):
	return claude_2_1_pt(messages=messages)
	else:
	return anthropic_pt(messages=messages)
	elif custom_llm_provider == "together_ai":
	prompt_format, chat_template = get_model_info(token=api_key, model=model)
	return format_prompt_togetherai(
	messages=messages, prompt_format=prompt_format, chat_template=chat_template
	)
	elif custom_llm_provider == "gemini":
	if model == "gemini-pro-vision":
	return _gemini_vision_convert_messages(messages=messages)
	else:
	return gemini_text_image_pt(messages=messages)
	try:
	if "meta-llama/llama-2" in model and "chat" in model:
	return llama_2_chat_pt(messages=messages)
	elif (
	"tiiuae/falcon" in model
	): # Note: for the instruct models, it's best to use a User: .., Assistant:.. approach in your prompt template.
	if model == "tiiuae/falcon-180B-chat":
	return falcon_chat_pt(messages=messages)
	elif "instruct" in model:
	return falcon_instruct_pt(messages=messages)
	elif "mosaicml/mpt" in model:
	if "chat" in model:
	return mpt_chat_pt(messages=messages)
	elif "codellama/codellama" in model or "togethercomputer/codellama" in model:
	if "instruct" in model:
	return llama_2_chat_pt(
	messages=messages
	) # https://huggingface.co/blog/codellama#conversational-instructions
	elif "wizardlm/wizardcoder" in model:
	return wizardcoder_pt(messages=messages)
	elif "phind/phind-codellama" in model:
	return phind_codellama_pt(messages=messages)
	elif "togethercomputer/llama-2" in model and (
	"instruct" in model or "chat" in model
	):
	return llama_2_chat_pt(messages=messages)
	elif model in [
	"gryphe/mythomax-l2-13b",
	"gryphe/mythomix-l2-13b",
	"gryphe/mythologic-l2-13b",
	]:
	return alpaca_pt(messages=messages)
	else:
	return hf_chat_template(original_model_name, messages)
	except Exception as e:
	return default_pt(
	messages=messages
	) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)