Spaces:

aakashv100
/

phi3-oass1-chatbot

Runtime error

Aakash Vardhan

12289b8 8 months ago

3.12 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	from config import load_config

	config = load_config("config.yaml")
	model_config = config["model_config"]
	model_name = model_config.pop("model_name")
	checkpoint_model = "checkpoint_dir/checkpoint-650"

	# Global variables for model and tokenizer
	model = None
	tokenizer = None
	pipe = None

	def load_model_and_tokenizer():
	global model, tokenizer, pipe
	if model is None:
	print("Loading model and tokenizer...")
	# Convert torch_dtype from string to torch.dtype
	if "torch_dtype" in model_config:
	if model_config["torch_dtype"] == "float32":
	model_config["torch_dtype"] = torch.float32
	elif model_config["torch_dtype"] == "float16":
	model_config["torch_dtype"] = torch.float16
	elif model_config["torch_dtype"] == "bfloat16":
	model_config["torch_dtype"] = torch.bfloat16

	# Load the model without quantization config
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	low_cpu_mem_usage=True,
	**model_config
	)

	model.load_adapter(checkpoint_model)

	tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "right"

	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
	print("Model and tokenizer loaded successfully.")

	def respond(message, history):
	load_model_and_tokenizer()
	system_message = """You are General Knowledge Assistant.
	Answer the questions based on the provided information.
	Be succinct and use first-principles thinking to answer the questions."""
	# Construct the chat list
	chat_list = [{"role": "system", "content": system_message}]
	for user, assistant in history:
	chat_list.extend(
	[
	{"role": "user", "content": user},
	{"role": "assistant", "content": assistant},
	]
	)
	chat_list.append({"role": "user", "content": message})

	prompt = pipe.tokenizer.apply_chat_template(
	chat_list, tokenize=False, add_generation_prompt=True
	)

	outputs = pipe(
	prompt,
	max_new_tokens=256,
	num_beams=1,
	do_sample=True,
	temperature=0.3,
	top_p=0.95,
	top_k=50,
	)
	new_text = outputs[0]["generated_text"][len(prompt) :]
	return new_text.strip()

	examples = [
	["Suggest some breeds that get along with each other"],
	["Explain LLM in AI"],
	["I want to explore Dubai. What are the best places to visit?"],
	]

	demo = gr.ChatInterface(
	respond,
	textbox=gr.Textbox(
	placeholder="Enter your message here...", container=False, scale=7
	),
	examples=examples,
	title="General Knowledge Assistant",
	description="Ask me anything about general knowledge. I'll try to answer succinctly using first principles.",
	)

	if __name__ == "__main__":
	demo.launch(debug=True)