booksouls commited on
Commit
3e3c060
·
verified ·
1 Parent(s): 6a649e3

remove handler.py

Browse files
Files changed (1) hide show
  1. handler.py +0 -43
handler.py DELETED
@@ -1,43 +0,0 @@
1
- import torch
2
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
3
- from typing import Any
4
-
5
- class EndpointHandler():
6
- def __init__(self, path=""):
7
- self.model = AutoModelForSeq2SeqLM.from_pretrained(f"{path}/4-bit", device_map="auto")
8
- self.tokenizer = AutoTokenizer.from_pretrained(path)
9
-
10
- def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
11
- inputs = data.get("inputs")
12
- parameters = data.get("parameters")
13
-
14
- if inputs is None:
15
- raise ValueError(f"'inputs' is missing from the request body")
16
-
17
- if not isinstance(inputs, str):
18
- raise ValueError(f"Expected 'inputs' to be a str, but found {type(inputs)}")
19
-
20
- if parameters is not None and not isinstance(parameters, dict):
21
- raise ValueError(f"Expected 'parameters' to be a dict, but found {type(parameters)}")
22
-
23
- # Truncate the tokens to 1024 to prevent errors with BART and long text.
24
- tokens = self.tokenizer(
25
- inputs,
26
- max_length=1024,
27
- truncation=True,
28
- return_tensors="pt",
29
- return_attention_mask=False,
30
- )
31
-
32
- # Ensure the input_ids and the model are both on the GPU to prevent errors.
33
- input_ids = tokens.input_ids.to("cuda")
34
-
35
- # Gradient calculation is not needed for inference.
36
- with torch.no_grad():
37
- if parameters is None:
38
- output = self.model.generate(input_ids)
39
- else:
40
- output = self.model.generate(input_ids, **parameters)
41
-
42
- generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
43
- return {"generated_text": generated_text}