BlueDice
/

Katakuri-350m-onnx

Text Generation

Transformers

ONNX

opt

Inference Endpoints

Model card Files Files and versions Community

BlueDice commited on May 19, 2023

Commit

8bde110

1 Parent(s): a474153

Update handler.py

Browse files

Files changed (1) hide show

handler.py +26 -27

handler.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from transformers import AutoTokenizer
 from optimum.onnxruntime import ORTModelForCausalLM
 import re
 import time
 import torch
@@ -19,40 +19,25 @@ Alice Gate: Yeah, it's really fun. I'm lucky to be able to do this as a job.
 {user_name}: Definetly.
 <END>
 Alice Gate: *Alice strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air* Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!
-{user_input}
-Alice Gate:"""
-class EndpointHandler():
-    def __init__(self, path = ""):
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = ORTModelForCausalLM.from_pretrained(path)#provider = "CUDAExecutionProvider"
-    def response(self, result, user_name):
-        result = result.rsplit("Alice Gate:", 1)[1].split(f"{user_name}:",1)[0].strip()
-        parsed_result = re.sub('\*.*?\*', '', result).strip()
-        result = parsed_result if len(parsed_result) != 0 else result.replace("*","")
-        result = " ".join(result.split())
-        try:
-            result = result[:[m.start() for m in re.finditer(r'[.!?]', result)][-1]+1]
-        except Exception: pass
-        return {
-            "message": result
-        }
-    def __call__(self, data):
-        inputs = data.pop("inputs", data)
-        user_name = inputs["user_name"]
-        user_input = "\n".join(inputs["user_input"])
         prompt = template.format(
             user_name = user_name,
             user_input = user_input
         )
-        input_ids = self.tokenizer(
-            prompt,
-            return_tensors = "pt"
-        ).to("cuda")
-        generator = self.model.generate(
             input_ids["input_ids"],
             max_new_tokens = 50,
             temperature = 0.5,
@@ -62,4 +47,18 @@ class EndpointHandler():
             pad_token_id = 50256,
             num_return_sequences = 1
         )
-        return self.response(self.tokenizer.decode(generator[0], skip_special_tokens=True), user_name)

 from optimum.onnxruntime import ORTModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import re
 import time
 import torch
 {user_name}: Definetly.
 <END>
 Alice Gate: *Alice strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air* Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!
+{user_input}"""
+class SweetCommander():
+    def __init__(self, path="") -> None:
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = ORTModelForCausalLM.from_pretrained(path)#provider = "CUDAExecutionProvider"
+        self.star_line = "***********************************************************"
+    def __call__(self, user_name, user_input):
+        t1 = time.time()
         prompt = template.format(
             user_name = user_name,
             user_input = user_input
         )
+        print(self.star_line)
+        print(prompt)
+        input_ids = self.tokenizer(prompt + "\nAlice Gate:", return_tensors = "pt")
+        encoded_output = self.model.generate(
             input_ids["input_ids"],
             max_new_tokens = 50,
             temperature = 0.5,
             pad_token_id = 50256,
             num_return_sequences = 1
         )
+        decoded_output = self.tokenizer.decode(encoded_output[0], skip_special_tokens = True).replace(prompt, "")
+        decoded_output = decoded_output.split("Alice Gate:", 1)[1].split(f"{user_name}:",1)[0].strip()
+        parsed_result = re.sub('\*.*?\*', '', decoded_output).strip()
+        if len(parsed_result) != 0: decoded_output = parsed_result
+        decoded_output = decoded_output.replace("*","")
+        decoded_output = " ".join(decoded_output.split())
+        try:
+            parsed_result = decoded_output[:[m.start() for m in re.finditer(r'[.!?]', decoded_output)][-1]+1]
+            if len(parsed_result) != 0: decoded_output = parsed_result
+        except Exception: pass
+        print(self.star_line)
+        print("Response:",decoded_output)
+        print("Eval time:",time.time()-t1)
+        print(self.star_line)
+        return decoded_output