Spaces:
Runtime error
Runtime error
File size: 3,122 Bytes
6503707 54eb4f7 629311e 6503707 54eb4f7 c4cedc2 54eb4f7 12289b8 6503707 e1e15b4 12289b8 e1e15b4 e6c656b b108f42 c4cedc2 e6c656b b108f42 e6c656b b108f42 e6c656b e1e15b4 a21eadd 1ccfbb5 54eb4f7 e1e15b4 fa24d20 54eb4f7 e1e15b4 2a8417b e1e15b4 2a8417b 6503707 c4cedc2 e6c656b c4cedc2 6503707 7eb95ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from config import load_config
config = load_config("config.yaml")
model_config = config["model_config"]
model_name = model_config.pop("model_name")
checkpoint_model = "checkpoint_dir/checkpoint-650"
# Global variables for model and tokenizer
model = None
tokenizer = None
pipe = None
def load_model_and_tokenizer():
global model, tokenizer, pipe
if model is None:
print("Loading model and tokenizer...")
# Convert torch_dtype from string to torch.dtype
if "torch_dtype" in model_config:
if model_config["torch_dtype"] == "float32":
model_config["torch_dtype"] = torch.float32
elif model_config["torch_dtype"] == "float16":
model_config["torch_dtype"] = torch.float16
elif model_config["torch_dtype"] == "bfloat16":
model_config["torch_dtype"] = torch.bfloat16
# Load the model without quantization config
model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
**model_config
)
model.load_adapter(checkpoint_model)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
print("Model and tokenizer loaded successfully.")
def respond(message, history):
load_model_and_tokenizer()
system_message = """You are General Knowledge Assistant.
Answer the questions based on the provided information.
Be succinct and use first-principles thinking to answer the questions."""
# Construct the chat list
chat_list = [{"role": "system", "content": system_message}]
for user, assistant in history:
chat_list.extend(
[
{"role": "user", "content": user},
{"role": "assistant", "content": assistant},
]
)
chat_list.append({"role": "user", "content": message})
prompt = pipe.tokenizer.apply_chat_template(
chat_list, tokenize=False, add_generation_prompt=True
)
outputs = pipe(
prompt,
max_new_tokens=256,
num_beams=1,
do_sample=True,
temperature=0.3,
top_p=0.95,
top_k=50,
)
new_text = outputs[0]["generated_text"][len(prompt) :]
return new_text.strip()
examples = [
["Suggest some breeds that get along with each other"],
["Explain LLM in AI"],
["I want to explore Dubai. What are the best places to visit?"],
]
demo = gr.ChatInterface(
respond,
textbox=gr.Textbox(
placeholder="Enter your message here...", container=False, scale=7
),
examples=examples,
title="General Knowledge Assistant",
description="Ask me anything about general knowledge. I'll try to answer succinctly using first principles.",
)
if __name__ == "__main__":
demo.launch(debug=True)
|