test-v2.1

Sleeping

File size: 2,026 Bytes

from huggingface_hub import InferenceClient

class LlmBot():
    def __init__(self, model):
        self.client = InferenceClient(model)

    def character_prompt(self, dict, max_new_tokens):
        system_prompt = '<SYSTEM> <'
        if dict["name"]:
            system_prompt += f'the person whose name :{dict["name"]}.'
        if dict["description"]:
            system_prompt += 'Your description :{dict["description"]}.'
        if dict["user_name"]:
            system_prompt += f'users name :{dict["user_name"]}.'
        system_prompt += 'Do not add the greeting, only at the first request.'
        system_prompt += 'Do not include settings in your responses.'
        system_prompt += 'Be emotional in your responses and include emotional expressions like *blushed*, *coldly* or other emotions.'
        system_prompt += 'Do not include your own name in any responses.'
        system_prompt += f'ensure responses are shorter than {max_new_tokens} tokens.>'
        return system_prompt

    def format_prompt(self, prompt, history, system_setting):
        formatted_prompt = "<history>"
        for user_prompt, bot_response in history:
            formatted_prompt += f"[INST] {user_prompt} [/INST] {bot_response} </history> "
        formatted_prompt += f"[INST] {system_setting}, <user>{prompt}</user> [/INST]"
        return formatted_prompt

    def call(self, prompt, history, name, description, user_name, max_new_tokens):
        generate_kwargs = dict(
            temperature=0.9,
            max_new_tokens=max_new_tokens,
            top_p=0.95,
            repetition_penalty=1.0,
            do_sample=True,
            )
        system_setting = self.character_prompt({"name": name, "description": description, "user_name": user_name}, max_new_tokens)
        formatted_prompt = self.format_prompt(prompt, history, system_setting)
        stream = self.client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
        
        return stream