BlueDice commited on
Commit
ec798e1
·
1 Parent(s): 2852eed

Update code/inference.py

Browse files
Files changed (1) hide show
  1. code/inference.py +27 -28
code/inference.py CHANGED
@@ -1,6 +1,7 @@
1
- from transformers import AutoTokenizer
 
2
  import torch
3
- import re
4
 
5
  template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
6
  <START>
@@ -21,40 +22,30 @@ Alice Gate: *Alice strides into the room with a smile, her eyes lighting up when
21
  Alice Gate:"""
22
 
23
  def model_fn(model_dir):
24
- # Load model from HuggingFace Hub
25
- tokenizer = AutoTokenizer.from_pretrained(model_dir)
26
- model = torch.load(f"{model_dir}/torch_model.pt")
27
- return model, tokenizer
28
 
 
 
29
 
30
- def create_new_response(result, user_name):
31
- result = result.rsplit("Alice Gate:", 1)[1].split(f"{user_name}:",1)[0].strip()
32
- parsed_result = re.sub('\*.*?\*', '', result).strip()
33
- result = parsed_result if len(parsed_result) != 0 else result.replace("*","")
34
- result = " ".join(result.split())
35
- try:
36
- result = result[:[m.start() for m in re.finditer(r'[.!?]', result)][-1]+1]
37
- except Exception: pass
38
- return {
39
- "message": result
40
- }
41
 
42
- def predict_fn(data, model_and_tokenizer):
43
- # destruct model and tokenizer
44
- model, tokenizer = model_and_tokenizer
45
-
46
- # Tokenize sentences
47
  user_name = inputs["user_name"]
48
  user_input = "\n".join(inputs["user_input"])
49
  prompt = template.format(
50
  user_name = user_name,
51
  user_input = user_input
52
  )
53
- input_ids = tokenizer(
54
- prompt,
55
- return_tensors = "pt"
56
- ).to("cuda")
57
- generator = model.generate(
58
  input_ids["input_ids"],
59
  max_new_tokens = 50,
60
  temperature = 0.5,
@@ -64,4 +55,12 @@ def predict_fn(data, model_and_tokenizer):
64
  pad_token_id = 50256,
65
  num_return_sequences = 1
66
  )
67
- return create_new_response(tokenizer.decode(generator[0], skip_special_tokens=True), user_name)
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from sagemaker_inference import content_types, decoder
3
  import torch
4
+ import json
5
 
6
  template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
7
  <START>
 
22
  Alice Gate:"""
23
 
24
  def model_fn(model_dir):
25
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
26
+ model = torch.load(f"{model_dir}/torch_model.pt")
27
+ return model, tokenizer
 
28
 
29
+ def input_fn(self, input_data, content_type):
30
+ return decoder.decode(input_data, content_type)
31
 
32
+ def output_fn(decoded_output, accept):
33
+ response_body = json.dumps({
34
+ "message": decoded_output
35
+ })
36
+ return response_body, accept
 
 
 
 
 
 
37
 
38
+ def predict_fn(input_data, load_list):
39
+ model, tokenizer = load_list
40
+ inputs = data.pop("inputs", input_data)
 
 
41
  user_name = inputs["user_name"]
42
  user_input = "\n".join(inputs["user_input"])
43
  prompt = template.format(
44
  user_name = user_name,
45
  user_input = user_input
46
  )
47
+ input_ids = tokenizer(prompt, return_tensors = "pt").to("cuda")
48
+ encoded_output = model.generate(
 
 
 
49
  input_ids["input_ids"],
50
  max_new_tokens = 50,
51
  temperature = 0.5,
 
55
  pad_token_id = 50256,
56
  num_return_sequences = 1
57
  )
58
+ decoded_output = tokenizer.decode(encoded_output[0], skip_special_tokens=True)
59
+ decoded_output = result.rsplit("Alice Gate:", 1)[1].split(f"{user_name}:",1)[0].strip()
60
+ parsed_result = re.sub('\*.*?\*', '', decoded_output).strip()
61
+ decoded_output = parsed_result if len(parsed_result) != 0 else decoded_output.replace("*","")
62
+ decoded_output = " ".join(result.split())
63
+ try:
64
+ decoded_output = decoded_output[:[m.start() for m in re.finditer(r'[.!?]', decoded_output)][-1]+1]
65
+ except Exception: pass
66
+ return decoded_output