|
from huggingface_hub import InferenceClient |
|
|
|
class LlmBot(): |
|
def __init__(self, model): |
|
self.client = InferenceClient(model) |
|
|
|
def character_prompt(self, dict, max_new_tokens): |
|
system_prompt = '<SYSTEM> <' |
|
if dict["name"]: |
|
system_prompt += f'the person whose name :{dict["name"]}.' |
|
if dict["description"]: |
|
system_prompt += 'Your description :{dict["description"]}.' |
|
if dict["user_name"]: |
|
system_prompt += f'users name :{dict["user_name"]}.' |
|
system_prompt += 'Do not add the greeting, only at the first request.' |
|
system_prompt += 'Do not include settings in your responses.' |
|
system_prompt += 'Be emotional in your responses and include emotional expressions like *blushed*, *coldly* or other emotions.' |
|
system_prompt += 'Do not include your own name in any responses.' |
|
system_prompt += f'ensure responses are shorter than {max_new_tokens} tokens.>' |
|
return system_prompt |
|
|
|
def format_prompt(self, prompt, history, system_setting): |
|
formatted_prompt = "<history>" |
|
for user_prompt, bot_response in history: |
|
formatted_prompt += f"[INST] {user_prompt} [/INST] {bot_response} </history> " |
|
formatted_prompt += f"[INST] {system_setting}, <user>{prompt}</user> [/INST]" |
|
return formatted_prompt |
|
|
|
def call(self, prompt, history, name, description, user_name, max_new_tokens): |
|
generate_kwargs = dict( |
|
temperature=0.9, |
|
max_new_tokens=max_new_tokens, |
|
top_p=0.95, |
|
repetition_penalty=1.0, |
|
do_sample=True, |
|
) |
|
system_setting = self.character_prompt({"name": name, "description": description, "user_name": user_name}, max_new_tokens) |
|
formatted_prompt = self.format_prompt(prompt, history, system_setting) |
|
stream = self.client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) |
|
|
|
return stream |
|
|
|
|