Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

llm-arch / src /models.py

alfraser

Set up configuration for models on HF and an associated page on the application to allow end use test chat.

54b3256 over 1 year ago

raw

history blame

2.25 kB

	import json
	import os
	import requests
	from typing import List

	from src.common import config_dir


	class HFLlamaChatModel:
	models = None

	@classmethod
	def load_configs(cls):
	config_file = os.path.join(config_dir, "models.json")
	with open(config_file, "r") as f:
	configs = json.load(f)['models']
	cls.models = []
	for cfg in configs:
	if cls.get_model(cfg['name']) is None:
	cls.models.append(HFLlamaChatModel(cfg['name'], cfg['id'], cfg['description']))

	@classmethod
	def get_model(cls, model: str):
	for m in cls.models:
	if m.name == model:
	return m

	@classmethod
	def available_models(cls) -> List[str]:
	if cls.models is None:
	cls.load_configs()
	return [m.name for m in cls.models]

	def __init__(self, name: str, id: str, description: str):
	self.name = name
	self.id = id
	self.description = description

	def __call__(self,
	query: str,
	auth_token: str,
	system_prompt: str = None,
	max_new_tokens: str = 256,
	temperature: float = 1.0):
	headers = {"Authorization": f"Bearer {auth_token}"}
	api_url = f"https://api-inference.huggingface.co/models/{self.id}"
	if system_prompt is None:
	system_prompt = "You are a helpful assistant."
	query_input = f"[INST] <<SYS>> {system_prompt} <<SYS>> {query} [/INST] "
	query_payload = {
	"inputs": query_input,
	"parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
	}
	print(query_payload)
	response = requests.post(api_url, headers=headers, json=query_payload)
	if response.status_code == 200:
	resp_json = json.loads(response.text)
	llm_text = resp_json[0]['generated_text']
	query_len = len(query_input)
	llm_text = llm_text[query_len:].strip()
	return llm_text
	else:
	error_detail = f"Error from hugging face code: {response.status_code}: {response.reason} ({response.content})"
	raise ValueError(error_detail)