from huggingface_hub import InferenceClient class LlmBot(): def __init__(self, model): self.client = InferenceClient(model) def character_prompt(self, dict, max_new_tokens): system_prompt = ' <' if dict["name"]: system_prompt += f'the person whose name :{dict["name"]}.' if dict["description"]: system_prompt += 'Your description :{dict["description"]}.' if dict["user_name"]: system_prompt += f'users name :{dict["user_name"]}.' system_prompt += 'Do not add the greeting, only at the first request.' system_prompt += 'Be emotional in your responses.' system_prompt += 'Do not include your own name in any responses.' system_prompt += f'ensure responses are shorter than {max_new_tokens} tokens.>' return system_prompt def format_prompt(self, prompt, history, system_setting): formatted_prompt = "" for user_prompt, bot_response in history: formatted_prompt += f"[INST] {user_prompt} [/INST] {bot_response} " formatted_prompt += f"[INST] {system_setting}, {prompt} [/INST]" return formatted_prompt def call(self, prompt, history, name, description, user_name, max_new_tokens): generate_kwargs = dict( temperature=0.9, max_new_tokens=max_new_tokens, top_p=0.95, repetition_penalty=1.0, do_sample=True, ) system_setting = self.character_prompt({"name": name, "description": description, "user_name": user_name}, max_new_tokens) formatted_prompt = self.format_prompt(prompt, history, system_setting) stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text yield output return output