Update code/inference.py
Browse files- code/inference.py +27 -28
code/inference.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
-
from transformers import AutoTokenizer
|
|
|
2 |
import torch
|
3 |
-
import
|
4 |
|
5 |
template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
|
6 |
<START>
|
@@ -21,40 +22,30 @@ Alice Gate: *Alice strides into the room with a smile, her eyes lighting up when
|
|
21 |
Alice Gate:"""
|
22 |
|
23 |
def model_fn(model_dir):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
return model, tokenizer
|
28 |
|
|
|
|
|
29 |
|
30 |
-
def
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
try:
|
36 |
-
result = result[:[m.start() for m in re.finditer(r'[.!?]', result)][-1]+1]
|
37 |
-
except Exception: pass
|
38 |
-
return {
|
39 |
-
"message": result
|
40 |
-
}
|
41 |
|
42 |
-
def predict_fn(
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
# Tokenize sentences
|
47 |
user_name = inputs["user_name"]
|
48 |
user_input = "\n".join(inputs["user_input"])
|
49 |
prompt = template.format(
|
50 |
user_name = user_name,
|
51 |
user_input = user_input
|
52 |
)
|
53 |
-
input_ids = tokenizer(
|
54 |
-
|
55 |
-
return_tensors = "pt"
|
56 |
-
).to("cuda")
|
57 |
-
generator = model.generate(
|
58 |
input_ids["input_ids"],
|
59 |
max_new_tokens = 50,
|
60 |
temperature = 0.5,
|
@@ -64,4 +55,12 @@ def predict_fn(data, model_and_tokenizer):
|
|
64 |
pad_token_id = 50256,
|
65 |
num_return_sequences = 1
|
66 |
)
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
+
from sagemaker_inference import content_types, decoder
|
3 |
import torch
|
4 |
+
import json
|
5 |
|
6 |
template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
|
7 |
<START>
|
|
|
22 |
Alice Gate:"""
|
23 |
|
24 |
def model_fn(model_dir):
|
25 |
+
tokenizer = AutoTokenizer.from_pretrained(model_dir)
|
26 |
+
model = torch.load(f"{model_dir}/torch_model.pt")
|
27 |
+
return model, tokenizer
|
|
|
28 |
|
29 |
+
def input_fn(self, input_data, content_type):
|
30 |
+
return decoder.decode(input_data, content_type)
|
31 |
|
32 |
+
def output_fn(decoded_output, accept):
|
33 |
+
response_body = json.dumps({
|
34 |
+
"message": decoded_output
|
35 |
+
})
|
36 |
+
return response_body, accept
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
def predict_fn(input_data, load_list):
|
39 |
+
model, tokenizer = load_list
|
40 |
+
inputs = data.pop("inputs", input_data)
|
|
|
|
|
41 |
user_name = inputs["user_name"]
|
42 |
user_input = "\n".join(inputs["user_input"])
|
43 |
prompt = template.format(
|
44 |
user_name = user_name,
|
45 |
user_input = user_input
|
46 |
)
|
47 |
+
input_ids = tokenizer(prompt, return_tensors = "pt").to("cuda")
|
48 |
+
encoded_output = model.generate(
|
|
|
|
|
|
|
49 |
input_ids["input_ids"],
|
50 |
max_new_tokens = 50,
|
51 |
temperature = 0.5,
|
|
|
55 |
pad_token_id = 50256,
|
56 |
num_return_sequences = 1
|
57 |
)
|
58 |
+
decoded_output = tokenizer.decode(encoded_output[0], skip_special_tokens=True)
|
59 |
+
decoded_output = result.rsplit("Alice Gate:", 1)[1].split(f"{user_name}:",1)[0].strip()
|
60 |
+
parsed_result = re.sub('\*.*?\*', '', decoded_output).strip()
|
61 |
+
decoded_output = parsed_result if len(parsed_result) != 0 else decoded_output.replace("*","")
|
62 |
+
decoded_output = " ".join(result.split())
|
63 |
+
try:
|
64 |
+
decoded_output = decoded_output[:[m.start() for m in re.finditer(r'[.!?]', decoded_output)][-1]+1]
|
65 |
+
except Exception: pass
|
66 |
+
return decoded_output
|