lordvader31 commited on
Commit
121ec50
1 Parent(s): c60b8cd

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +32 -0
handler.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
+ import torch
4
+ from peft import PeftModel
5
+ import json
6
+ import os
7
+
8
+
9
+ class EndpointHandler():
10
+ def __init__(self, path=""):
11
+ base_model_path = json.load(open(os.path.join(path, "training_params.json")))["model"]
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ base_model_path,
14
+ torch_dtype=torch.float16,
15
+ low_cpu_mem_usage=True,
16
+ trust_remote_code=True,
17
+ device_map="auto",
18
+ )
19
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
20
+ model.resize_token_embeddings(len(tokenizer))
21
+ model = PeftModel.from_pretrained(model, path)
22
+ model = model.merge_and_unload()
23
+ self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
24
+
25
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
26
+ inputs = data.pop("inputs", data)
27
+ parameters = data.pop("parameters", None)
28
+ if parameters is not None:
29
+ prediction = self.pipeline(inputs, **parameters)
30
+ else:
31
+ prediction = self.pipeline(inputs)
32
+ return prediction