MrD05
/

pyg6b

@@ -3,8 +3,6 @@ from langchain.llms import HuggingFacePipeline
 from langchain import PromptTemplate, LLMChain
 import torch
-device = 0 if torch.cuda.is_available() else -1
 template = """{char_name}'s Persona: {char_persona}
 <START>
 {chat_history}
@@ -13,18 +11,18 @@ template = """{char_name}'s Persona: {char_persona}
 {user_name}: {user_input}
 {char_name}: """
 class EndpointHandler():
     def __init__(self, path=""):
-        tokenizer = AutoTokenizer.from_pretrained("")
-        model = AutoModelForCausalLM.from_pretrained("", load_in_8bit = True, device_map = "auto")
         local_llm = HuggingFacePipeline(
             pipeline = pipeline(
                 "text-generation",
                 model = model,
                 tokenizer = tokenizer,
                 max_length = 2048,
-                device=device,
                 temperature = 0.5,
                 top_p = 0.9,
                 top_k = 0,

 from langchain import PromptTemplate, LLMChain
 import torch
 template = """{char_name}'s Persona: {char_persona}
 <START>
 {chat_history}
 {user_name}: {user_input}
 {char_name}: """
+#model_id="MrD05/kaido-6b"
 class EndpointHandler():
     def __init__(self, path=""):
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", load_in_8bit=True)
         local_llm = HuggingFacePipeline(
             pipeline = pipeline(
                 "text-generation",
                 model = model,
                 tokenizer = tokenizer,
                 max_length = 2048,
                 temperature = 0.5,
                 top_p = 0.9,
                 top_k = 0,