test-v2.1

Sleeping

App Files Files Community

test-v2.1 / models.py

taras5500

Update models.py

ab36dbf verified 12 months ago

raw

history blame

2.03 kB

	from huggingface_hub import InferenceClient

	class LlmBot():
	def __init__(self, model):
	self.client = InferenceClient(model)

	def character_prompt(self, dict, max_new_tokens):
	system_prompt = '<SYSTEM> <'
	if dict["name"]:
	system_prompt += f'the person whose name :{dict["name"]}.'
	if dict["description"]:
	system_prompt += 'Your description :{dict["description"]}.'
	if dict["user_name"]:
	system_prompt += f'users name :{dict["user_name"]}.'
	system_prompt += 'Do not add the greeting, only at the first request.'
	system_prompt += 'Do not include settings in your responses.'
	system_prompt += 'Be emotional in your responses and include emotional expressions like blushed, coldly or other emotions.'
	system_prompt += 'Do not include your own name in any responses.'
	system_prompt += f'ensure responses are shorter than {max_new_tokens} tokens.>'
	return system_prompt

	def format_prompt(self, prompt, history, system_setting):
	formatted_prompt = "<history>"
	for user_prompt, bot_response in history:
	formatted_prompt += f"[INST] {user_prompt} [/INST] {bot_response} </history> "
	formatted_prompt += f"[INST] {system_setting}, <user>{prompt}</user> [/INST]"
	return formatted_prompt

	def call(self, prompt, history, name, description, user_name, max_new_tokens):
	generate_kwargs = dict(
	temperature=0.9,
	max_new_tokens=max_new_tokens,
	top_p=0.95,
	repetition_penalty=1.0,
	do_sample=True,
	)
	system_setting = self.character_prompt({"name": name, "description": description, "user_name": user_name}, max_new_tokens)
	formatted_prompt = self.format_prompt(prompt, history, system_setting)
	stream = self.client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)

	return stream