import ast import asyncio import typing from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union, ValuesView import gradio_client # type: ignore from h2ogpt_client import _utils from h2ogpt_client._h2ogpt_enums import ( DocumentSubset, LangChainAction, LangChainMode, PromptType, ) class Client: """h2oGPT Client.""" def __init__( self, src: str, h2ogpt_key: Optional[str] = None, huggingface_token: Optional[str] = None, ): """ Creates a GPT client. :param src: either the full URL to the hosted h2oGPT (e.g. "http://0.0.0.0:7860", "https://fc752f297207f01c32.gradio.live") or name of the Hugging Face Space to load, (e.g. "h2oai/h2ogpt-chatbot") :param h2ogpt_key: access key to connect with a h2oGPT server :param huggingface_token: Hugging Face token to use to access private Spaces """ self._client = gradio_client.Client( src=src, hf_token=huggingface_token, serialize=False, verbose=False ) self._h2ogpt_key = h2ogpt_key self._text_completion = TextCompletionCreator(self) self._chat_completion = ChatCompletionCreator(self) @property def text_completion(self) -> "TextCompletionCreator": """Text completion.""" return self._text_completion @property def chat_completion(self) -> "ChatCompletionCreator": """Chat completion.""" return self._chat_completion def _predict(self, *args, api_name: str) -> Any: return self._client.submit(*args, api_name=api_name).result() async def _predict_async(self, *args, api_name: str) -> Any: return await asyncio.wrap_future(self._client.submit(*args, api_name=api_name)) class TextCompletionCreator: """Builder that can create text completions.""" def __init__(self, client: Client): self._client = client def create( self, prompt_type: PromptType = PromptType.plain, input_context_for_instruction: str = "", enable_sampler=False, temperature: float = 0.1, top_p: float = 1.0, top_k: int = 40, beams: float = 1.0, early_stopping: bool = False, min_output_length: int = 0, max_output_length: int = 1024, max_time: int = 360, repetition_penalty: float = 1.07, number_returns: int = 1, system_pre_context: str = "", add_chat_history_to_context: bool = False, langchain_mode: LangChainMode = LangChainMode.DISABLED, system_prompt: str = "", visible_models: Union[str, list] = [], add_search_to_context: bool = False, chat_conversation: typing.List[typing.Tuple[str, str]] = None, text_context_list: typing.List[str] = None, docs_ordering_type: str = None, min_max_new_tokens: int = None, ) -> "TextCompletion": """ Creates a new text completion. :param prompt_type: type of the prompt :param input_context_for_instruction: input context for instruction :param enable_sampler: enable or disable the sampler, required for use of temperature, top_p, top_k :param temperature: What sampling temperature to use, between 0 and 3. Lower values will make it more focused and deterministic, but may lead to repeat. Higher values will make the output more creative, but may lead to hallucinations. :param top_p: cumulative probability of tokens to sample from :param top_k: number of tokens to sample from :param beams: Number of searches for optimal overall probability. Higher values uses more GPU memory and compute. :param early_stopping: whether to stop early or not in beam search :param min_output_length: minimum output length :param max_output_length: maximum output length :param max_time: maximum time to search optimal output :param repetition_penalty: penalty for repetition :param number_returns: :param system_pre_context: directly pre-appended without prompt processing :param langchain_mode: LangChain mode :param add_chat_history_to_context: Whether to add chat history to context :param system_prompt: Universal system prompt to override prompt_type's system prompt If pass 'None' or 'auto' or None, then automatic per-model value used :param visible_models: Single string of base model name, single integer of position of model, to get resopnse from :param add_search_to_context: Whether to add web search of query to context :param chat_conversation: list of tuples of (human, bot) form :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model) :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. """ params = _utils.to_h2ogpt_params(locals().copy()) params["instruction"] = "" # empty when chat_mode is False params["iinput"] = "" # only chat_mode is True params["stream_output"] = False params["prompt_type"] = prompt_type.value # convert to serializable type params["prompt_dict"] = "" # empty as prompt_type cannot be 'custom' params["chat"] = False params["instruction_nochat"] = None # future prompt params["langchain_mode"] = langchain_mode.value # convert to serializable type params["add_chat_history_to_context"] = False # relevant only for the UI params["langchain_action"] = LangChainAction.QUERY.value params["langchain_agents"] = [] params["top_k_docs"] = 4 # langchain: number of document chunks params["chunk"] = True # langchain: whether to chunk documents params["chunk_size"] = 512 # langchain: chunk size for document chunking params["document_subset"] = DocumentSubset.Relevant.name params["document_choice"] = [] params["pre_prompt_query"] = "" params["prompt_query"] = "" params["pre_prompt_summary"] = "" params["prompt_summary"] = "" params["system_prompt"] = "" params["image_loaders"] = [] params["pdf_loaders"] = [] params["url_loaders"] = [] params["jq_schema"] = '.[]' params["visible_models"] = visible_models params["h2ogpt_key"] = self._client._h2ogpt_key params["add_search_to_context"] = add_search_to_context params["chat_conversation"] = chat_conversation params["text_context_list"] = text_context_list params["docs_ordering_type"] = docs_ordering_type params["min_max_new_tokens"] = min_max_new_tokens return TextCompletion(self._client, params) class TextCompletion: """Text completion.""" _API_NAME = "/submit_nochat_api" def __init__(self, client: Client, parameters: OrderedDict[str, Any]): self._client = client self._parameters = parameters def _get_parameters(self, prompt: str) -> OrderedDict[str, Any]: self._parameters["instruction_nochat"] = prompt return self._parameters @staticmethod def _get_reply(response: str) -> str: return ast.literal_eval(response)["response"] async def complete(self, prompt: str) -> str: """ Complete this text completion. :param prompt: text prompt to generate completion for :return: response from the model """ response = await self._client._predict_async( str(dict(self._get_parameters(prompt))), api_name=self._API_NAME ) return self._get_reply(response) def complete_sync(self, prompt: str) -> str: """ Complete this text completion synchronously. :param prompt: text prompt to generate completion for :return: response from the model """ response = self._client._predict( str(dict(self._get_parameters(prompt))), api_name=self._API_NAME ) return self._get_reply(response) class ChatCompletionCreator: """Chat completion.""" def __init__(self, client: Client): self._client = client def create( self, prompt_type: PromptType = PromptType.plain, input_context_for_instruction: str = "", enable_sampler=False, temperature: float = 0.1, top_p: float = 1.0, top_k: int = 40, beams: float = 1.0, early_stopping: bool = False, min_output_length: int = 0, max_output_length: int = 1024, max_time: int = 360, repetition_penalty: float = 1.07, number_returns: int = 1, system_pre_context: str = "", langchain_mode: LangChainMode = LangChainMode.DISABLED, system_prompt: str = "", visible_models: Union[str, list] = [], add_search_to_context: bool= False, chat_conversation: typing.List[typing.Tuple[str, str]] = None, text_context_list: typing.List[str] = None, docs_ordering_type: str = None, min_max_new_tokens: int = None, ) -> "ChatCompletion": """ Creates a new chat completion. :param prompt_type: type of the prompt :param input_context_for_instruction: input context for instruction :param enable_sampler: enable or disable the sampler, required for use of temperature, top_p, top_k :param temperature: What sampling temperature to use, between 0 and 3. Lower values will make it more focused and deterministic, but may lead to repeat. Higher values will make the output more creative, but may lead to hallucinations. :param top_p: cumulative probability of tokens to sample from :param top_k: number of tokens to sample from :param beams: Number of searches for optimal overall probability. Higher values uses more GPU memory and compute. :param early_stopping: whether to stop early or not in beam search :param min_output_length: minimum output length :param max_output_length: maximum output length :param max_time: maximum time to search optimal output :param repetition_penalty: penalty for repetition :param number_returns: :param system_pre_context: directly pre-appended without prompt processing :param langchain_mode: LangChain mode :param system_prompt: Universal system prompt to override prompt_type's system prompt :param visible_models: Single string of base model name, single integer of position of model, to get resopnse from :param add_search_to_context: Whether to add web search of query to context :param chat_conversation: list of tuples of (human, bot) form :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model) :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc. """ params = _utils.to_h2ogpt_params(locals().copy()) params["instruction"] = None # future prompts params["iinput"] = "" # ?? params["stream_output"] = False params["prompt_type"] = prompt_type.value # convert to serializable type params["prompt_dict"] = "" # empty as prompt_type cannot be 'custom' params["chat"] = True params["instruction_nochat"] = "" # empty when chat_mode is True params["langchain_mode"] = langchain_mode.value # convert to serializable type params["add_chat_history_to_context"] = False # relevant only for the UI params["system_prompt"] = "" params["langchain_action"] = LangChainAction.QUERY.value params["langchain_agents"] = [] params["top_k_docs"] = 4 # langchain: number of document chunks params["chunk"] = True # langchain: whether to chunk documents params["chunk_size"] = 512 # langchain: chunk size for document chunking params["document_subset"] = DocumentSubset.Relevant.name params["document_choice"] = [] params["pre_prompt_query"] = "" params["prompt_query"] = "" params["pre_prompt_summary"] = "" params["prompt_summary"] = "" params["system_prompt"] = "" params["image_loaders"] = [] params["pdf_loaders"] = [] params["url_loaders"] = [] params["jq_schema"] = '.[]' params["visible_models"] = visible_models params["h2ogpt_key"] = self._client._h2ogpt_key params["add_search_to_context"] = add_search_to_context params["chat_conversation"] = chat_conversation params["text_context_list"] = text_context_list params["docs_ordering_type"] = docs_ordering_type params["min_max_new_tokens"] = min_max_new_tokens params["chatbot"] = [] # chat history (FIXME: Only works if 1 model?) return ChatCompletion(self._client, params) class ChatCompletion: """Chat completion.""" _API_NAME = "/instruction_bot" def __init__(self, client: Client, parameters: OrderedDict[str, Any]): self._client = client self._parameters = parameters def _get_parameters(self, prompt: str) -> ValuesView: self._parameters["instruction"] = prompt self._parameters["chatbot"] += [[prompt, None]] return self._parameters.values() def _get_reply(self, response: Tuple[List[List[str]]]) -> Dict[str, str]: self._parameters["chatbot"][-1][1] = response[0][-1][1] return {"user": response[0][-1][0], "gpt": response[0][-1][1]} async def chat(self, prompt: str) -> Dict[str, str]: """ Complete this chat completion. :param prompt: text prompt to generate completions for :returns chat reply """ response = await self._client._predict_async( *self._get_parameters(prompt), api_name=self._API_NAME ) return self._get_reply(response) def chat_sync(self, prompt: str) -> Dict[str, str]: """ Complete this chat completion. :param prompt: text prompt to generate completions for :returns chat reply """ response = self._client._predict( *self._get_parameters(prompt), api_name=self._API_NAME ) return self._get_reply(response) def chat_history(self) -> List[Dict[str, str]]: """Returns the full chat history.""" return [{"user": i[0], "gpt": i[1]} for i in self._parameters["chatbot"]]