test-v2.1 / models.py
taras5500's picture
Update models.py
ab36dbf verified
raw
history blame
No virus
2.03 kB
from huggingface_hub import InferenceClient
class LlmBot():
def __init__(self, model):
self.client = InferenceClient(model)
def character_prompt(self, dict, max_new_tokens):
system_prompt = '<SYSTEM> <'
if dict["name"]:
system_prompt += f'the person whose name :{dict["name"]}.'
if dict["description"]:
system_prompt += 'Your description :{dict["description"]}.'
if dict["user_name"]:
system_prompt += f'users name :{dict["user_name"]}.'
system_prompt += 'Do not add the greeting, only at the first request.'
system_prompt += 'Do not include settings in your responses.'
system_prompt += 'Be emotional in your responses and include emotional expressions like *blushed*, *coldly* or other emotions.'
system_prompt += 'Do not include your own name in any responses.'
system_prompt += f'ensure responses are shorter than {max_new_tokens} tokens.>'
return system_prompt
def format_prompt(self, prompt, history, system_setting):
formatted_prompt = "<history>"
for user_prompt, bot_response in history:
formatted_prompt += f"[INST] {user_prompt} [/INST] {bot_response} </history> "
formatted_prompt += f"[INST] {system_setting}, <user>{prompt}</user> [/INST]"
return formatted_prompt
def call(self, prompt, history, name, description, user_name, max_new_tokens):
generate_kwargs = dict(
temperature=0.9,
max_new_tokens=max_new_tokens,
top_p=0.95,
repetition_penalty=1.0,
do_sample=True,
)
system_setting = self.character_prompt({"name": name, "description": description, "user_name": user_name}, max_new_tokens)
formatted_prompt = self.format_prompt(prompt, history, system_setting)
stream = self.client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
return stream