Spaces:

kelvin-t-lu
/

chatbot

Paused

App Files Files Community

chatbot / client /h2ogpt_client /_core.py

kelvin-t-lu

init

dbd2ac6 over 1 year ago

raw

history blame

15 kB

	import ast
	import asyncio
	import typing
	from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union, ValuesView

	import gradio_client # type: ignore

	from h2ogpt_client import _utils
	from h2ogpt_client._h2ogpt_enums import (
	DocumentSubset,
	LangChainAction,
	LangChainMode,
	PromptType,
	)


	class Client:
	"""h2oGPT Client."""

	def __init__(
	self,
	src: str,
	h2ogpt_key: Optional[str] = None,
	huggingface_token: Optional[str] = None,
	):
	"""
	Creates a GPT client.
	:param src: either the full URL to the hosted h2oGPT
	(e.g. "http://0.0.0.0:7860", "https://fc752f297207f01c32.gradio.live")
	or name of the Hugging Face Space to load, (e.g. "h2oai/h2ogpt-chatbot")
	:param h2ogpt_key: access key to connect with a h2oGPT server
	:param huggingface_token: Hugging Face token to use to access private Spaces
	"""
	self._client = gradio_client.Client(
	src=src, hf_token=huggingface_token, serialize=False, verbose=False
	)
	self._h2ogpt_key = h2ogpt_key
	self._text_completion = TextCompletionCreator(self)
	self._chat_completion = ChatCompletionCreator(self)

	@property
	def text_completion(self) -> "TextCompletionCreator":
	"""Text completion."""
	return self._text_completion

	@property
	def chat_completion(self) -> "ChatCompletionCreator":
	"""Chat completion."""
	return self._chat_completion

	def _predict(self, *args, api_name: str) -> Any:
	return self._client.submit(*args, api_name=api_name).result()

	async def _predict_async(self, *args, api_name: str) -> Any:
	return await asyncio.wrap_future(self._client.submit(*args, api_name=api_name))


	class TextCompletionCreator:
	"""Builder that can create text completions."""

	def __init__(self, client: Client):
	self._client = client

	def create(
	self,
	prompt_type: PromptType = PromptType.plain,
	input_context_for_instruction: str = "",
	enable_sampler=False,
	temperature: float = 0.1,
	top_p: float = 1.0,
	top_k: int = 40,
	beams: float = 1.0,
	early_stopping: bool = False,
	min_output_length: int = 0,
	max_output_length: int = 1024,
	max_time: int = 360,
	repetition_penalty: float = 1.07,
	number_returns: int = 1,
	system_pre_context: str = "",
	add_chat_history_to_context: bool = False,
	langchain_mode: LangChainMode = LangChainMode.DISABLED,
	system_prompt: str = "",
	visible_models: Union[str, list] = [],
	add_search_to_context: bool = False,
	chat_conversation: typing.List[typing.Tuple[str, str]] = None,
	text_context_list: typing.List[str] = None,
	docs_ordering_type: str = None,
	min_max_new_tokens: int = None,
	) -> "TextCompletion":
	"""
	Creates a new text completion.

	:param prompt_type: type of the prompt
	:param input_context_for_instruction: input context for instruction
	:param enable_sampler: enable or disable the sampler, required for use of
	temperature, top_p, top_k
	:param temperature: What sampling temperature to use, between 0 and 3.
	Lower values will make it more focused and deterministic, but may lead
	to repeat. Higher values will make the output more creative, but may
	lead to hallucinations.
	:param top_p: cumulative probability of tokens to sample from
	:param top_k: number of tokens to sample from
	:param beams: Number of searches for optimal overall probability.
	Higher values uses more GPU memory and compute.
	:param early_stopping: whether to stop early or not in beam search
	:param min_output_length: minimum output length
	:param max_output_length: maximum output length
	:param max_time: maximum time to search optimal output
	:param repetition_penalty: penalty for repetition
	:param number_returns:
	:param system_pre_context: directly pre-appended without prompt processing
	:param langchain_mode: LangChain mode
	:param add_chat_history_to_context: Whether to add chat history to context
	:param system_prompt: Universal system prompt to override prompt_type's system
	prompt
	If pass 'None' or 'auto' or None, then automatic per-model value used
	:param visible_models: Single string of base model name, single integer of position of model, to get resopnse from
	:param add_search_to_context: Whether to add web search of query to context
	:param chat_conversation: list of tuples of (human, bot) form
	:param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
	:param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
	:param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
	"""
	params = _utils.to_h2ogpt_params(locals().copy())
	params["instruction"] = "" # empty when chat_mode is False
	params["iinput"] = "" # only chat_mode is True
	params["stream_output"] = False
	params["prompt_type"] = prompt_type.value # convert to serializable type
	params["prompt_dict"] = "" # empty as prompt_type cannot be 'custom'
	params["chat"] = False
	params["instruction_nochat"] = None # future prompt
	params["langchain_mode"] = langchain_mode.value # convert to serializable type
	params["add_chat_history_to_context"] = False # relevant only for the UI
	params["langchain_action"] = LangChainAction.QUERY.value
	params["langchain_agents"] = []
	params["top_k_docs"] = 4 # langchain: number of document chunks
	params["chunk"] = True # langchain: whether to chunk documents
	params["chunk_size"] = 512 # langchain: chunk size for document chunking
	params["document_subset"] = DocumentSubset.Relevant.name
	params["document_choice"] = []
	params["pre_prompt_query"] = ""
	params["prompt_query"] = ""
	params["pre_prompt_summary"] = ""
	params["prompt_summary"] = ""
	params["system_prompt"] = ""
	params["image_loaders"] = []
	params["pdf_loaders"] = []
	params["url_loaders"] = []
	params["jq_schema"] = '.[]'
	params["visible_models"] = visible_models
	params["h2ogpt_key"] = self._client._h2ogpt_key
	params["add_search_to_context"] = add_search_to_context
	params["chat_conversation"] = chat_conversation
	params["text_context_list"] = text_context_list
	params["docs_ordering_type"] = docs_ordering_type
	params["min_max_new_tokens"] = min_max_new_tokens
	return TextCompletion(self._client, params)


	class TextCompletion:
	"""Text completion."""

	_API_NAME = "/submit_nochat_api"

	def __init__(self, client: Client, parameters: OrderedDict[str, Any]):
	self._client = client
	self._parameters = parameters

	def _get_parameters(self, prompt: str) -> OrderedDict[str, Any]:
	self._parameters["instruction_nochat"] = prompt
	return self._parameters

	@staticmethod
	def _get_reply(response: str) -> str:
	return ast.literal_eval(response)["response"]

	async def complete(self, prompt: str) -> str:
	"""
	Complete this text completion.

	:param prompt: text prompt to generate completion for
	:return: response from the model
	"""

	response = await self._client._predict_async(
	str(dict(self._get_parameters(prompt))), api_name=self._API_NAME
	)
	return self._get_reply(response)

	def complete_sync(self, prompt: str) -> str:
	"""
	Complete this text completion synchronously.

	:param prompt: text prompt to generate completion for
	:return: response from the model
	"""
	response = self._client._predict(
	str(dict(self._get_parameters(prompt))), api_name=self._API_NAME
	)
	return self._get_reply(response)


	class ChatCompletionCreator:
	"""Chat completion."""

	def __init__(self, client: Client):
	self._client = client

	def create(
	self,
	prompt_type: PromptType = PromptType.plain,
	input_context_for_instruction: str = "",
	enable_sampler=False,
	temperature: float = 0.1,
	top_p: float = 1.0,
	top_k: int = 40,
	beams: float = 1.0,
	early_stopping: bool = False,
	min_output_length: int = 0,
	max_output_length: int = 1024,
	max_time: int = 360,
	repetition_penalty: float = 1.07,
	number_returns: int = 1,
	system_pre_context: str = "",
	langchain_mode: LangChainMode = LangChainMode.DISABLED,
	system_prompt: str = "",
	visible_models: Union[str, list] = [],
	add_search_to_context: bool= False,
	chat_conversation: typing.List[typing.Tuple[str, str]] = None,
	text_context_list: typing.List[str] = None,
	docs_ordering_type: str = None,
	min_max_new_tokens: int = None,
	) -> "ChatCompletion":
	"""
	Creates a new chat completion.

	:param prompt_type: type of the prompt
	:param input_context_for_instruction: input context for instruction
	:param enable_sampler: enable or disable the sampler, required for use of
	temperature, top_p, top_k
	:param temperature: What sampling temperature to use, between 0 and 3.
	Lower values will make it more focused and deterministic, but may lead
	to repeat. Higher values will make the output more creative, but may
	lead to hallucinations.
	:param top_p: cumulative probability of tokens to sample from
	:param top_k: number of tokens to sample from
	:param beams: Number of searches for optimal overall probability.
	Higher values uses more GPU memory and compute.
	:param early_stopping: whether to stop early or not in beam search
	:param min_output_length: minimum output length
	:param max_output_length: maximum output length
	:param max_time: maximum time to search optimal output
	:param repetition_penalty: penalty for repetition
	:param number_returns:
	:param system_pre_context: directly pre-appended without prompt processing
	:param langchain_mode: LangChain mode
	:param system_prompt: Universal system prompt to override prompt_type's system
	prompt
	:param visible_models: Single string of base model name, single integer of position of model, to get resopnse from
	:param add_search_to_context: Whether to add web search of query to context
	:param chat_conversation: list of tuples of (human, bot) form
	:param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
	:param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
	:param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
	"""
	params = _utils.to_h2ogpt_params(locals().copy())
	params["instruction"] = None # future prompts
	params["iinput"] = "" # ??
	params["stream_output"] = False
	params["prompt_type"] = prompt_type.value # convert to serializable type
	params["prompt_dict"] = "" # empty as prompt_type cannot be 'custom'
	params["chat"] = True
	params["instruction_nochat"] = "" # empty when chat_mode is True
	params["langchain_mode"] = langchain_mode.value # convert to serializable type
	params["add_chat_history_to_context"] = False # relevant only for the UI
	params["system_prompt"] = ""
	params["langchain_action"] = LangChainAction.QUERY.value
	params["langchain_agents"] = []
	params["top_k_docs"] = 4 # langchain: number of document chunks
	params["chunk"] = True # langchain: whether to chunk documents
	params["chunk_size"] = 512 # langchain: chunk size for document chunking
	params["document_subset"] = DocumentSubset.Relevant.name
	params["document_choice"] = []
	params["pre_prompt_query"] = ""
	params["prompt_query"] = ""
	params["pre_prompt_summary"] = ""
	params["prompt_summary"] = ""
	params["system_prompt"] = ""
	params["image_loaders"] = []
	params["pdf_loaders"] = []
	params["url_loaders"] = []
	params["jq_schema"] = '.[]'
	params["visible_models"] = visible_models
	params["h2ogpt_key"] = self._client._h2ogpt_key
	params["add_search_to_context"] = add_search_to_context
	params["chat_conversation"] = chat_conversation
	params["text_context_list"] = text_context_list
	params["docs_ordering_type"] = docs_ordering_type
	params["min_max_new_tokens"] = min_max_new_tokens
	params["chatbot"] = [] # chat history (FIXME: Only works if 1 model?)
	return ChatCompletion(self._client, params)


	class ChatCompletion:
	"""Chat completion."""

	_API_NAME = "/instruction_bot"

	def __init__(self, client: Client, parameters: OrderedDict[str, Any]):
	self._client = client
	self._parameters = parameters

	def _get_parameters(self, prompt: str) -> ValuesView:
	self._parameters["instruction"] = prompt
	self._parameters["chatbot"] += [[prompt, None]]
	return self._parameters.values()

	def _get_reply(self, response: Tuple[List[List[str]]]) -> Dict[str, str]:
	self._parameters["chatbot"][-1][1] = response[0][-1][1]
	return {"user": response[0][-1][0], "gpt": response[0][-1][1]}

	async def chat(self, prompt: str) -> Dict[str, str]:
	"""
	Complete this chat completion.

	:param prompt: text prompt to generate completions for
	:returns chat reply
	"""
	response = await self._client._predict_async(
	*self._get_parameters(prompt), api_name=self._API_NAME
	)
	return self._get_reply(response)

	def chat_sync(self, prompt: str) -> Dict[str, str]:
	"""
	Complete this chat completion.

	:param prompt: text prompt to generate completions for
	:returns chat reply
	"""
	response = self._client._predict(
	*self._get_parameters(prompt), api_name=self._API_NAME
	)
	return self._get_reply(response)

	def chat_history(self) -> List[Dict[str, str]]:
	"""Returns the full chat history."""
	return [{"user": i[0], "gpt": i[1]} for i in self._parameters["chatbot"]]