booksouls commited on
Commit
8fb1919
·
verified ·
1 Parent(s): d56a08c

add handler.py

Browse files
Files changed (1) hide show
  1. handler.py +46 -0
handler.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
+ from typing import Any
4
+
5
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
+
7
+ class EndpointHandler():
8
+ def __init__(self, path=""):
9
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(path).to(device)
10
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
11
+
12
+ def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
13
+ inputs = data.get("inputs")
14
+ parameters = data.get("parameters")
15
+
16
+ if inputs is None:
17
+ raise ValueError(f"'inputs' is missing from the request body")
18
+
19
+ if not isinstance(inputs, str):
20
+ raise ValueError(f"Expected 'inputs' to be a str, but found {type(inputs)}")
21
+
22
+ if parameters is not None and not isinstance(parameters, dict):
23
+ raise ValueError(f"Expected 'parameters' to be a dict, but found {type(parameters)}")
24
+
25
+ # Truncate the tokens to 1024 to prevent errors with BART and long text.
26
+ tokens = self.tokenizer(
27
+ inputs,
28
+ max_length=1024,
29
+ truncation=True,
30
+ return_tensors="pt",
31
+ return_attention_mask=False,
32
+ )
33
+
34
+ # Ensure the input_ids and the model are on the same device to prevent errors.
35
+ input_ids = tokens.input_ids.to(device)
36
+
37
+ # Gradient calculation is not needed for inference.
38
+ with torch.no_grad():
39
+ if parameters is None:
40
+ output = self.model.generate(input_ids)
41
+ else:
42
+ output = self.model.generate(input_ids, **parameters)
43
+
44
+ generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
45
+ return {"generated_text": generated_text}
46
+