humane-intelligence
/

gemma2-9b-cpt-sealionv3-instruct-endpoint

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

binaryaaron commited on Nov 2, 2024

Commit

d646830

·

unverified ·

1 Parent(s): 944bc62

updating handler

Files changed (2) hide show

handler.py +11 -14
tester.py +2 -2

handler.py CHANGED Viewed

@@ -4,22 +4,19 @@ import torch
 MAX_TOKENS=8192
-class EndpointHandler():
-    def __init__(self, path=""):
-        self.pipeline = transformers.pipeline(
             "text-generation",
             model="humane-intelligence/gemma2-9b-cpt-sealionv3-instruct-endpoint",
-            model_kwargs={"torch_dtype": torch.bfloat16},
             device_map="auto",
         )
-def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
-    inputs = data.get("inputs", data)
-    outputs = self.pipeline(
-        inputs,
-        max_new_tokens=256,
-    )
-    print(outputs[0]["generated_text"][-1])
-    return outputs

 MAX_TOKENS=8192
+class EndpointHandler(object):
+    def __init__(self):
+        self.pipeline: transformers.Pipeline = transformers.pipeline(
             "text-generation",
             model="humane-intelligence/gemma2-9b-cpt-sealionv3-instruct-endpoint",
+            model_kwargs={"torch_dtype": torch.bfloat16, "low_cpu_mem_usage": True, },
             device_map="auto",
         )
+    def __call__(self, text_inputs: Any) -> List[List[Dict[str, float]]]:
+        outputs = self.pipeline(
+            text_inputs,
+            max_new_tokens=MAX_TOKENS,
+        )
+        print(outputs[0]["generated_text"][-1])
+        return outputs

tester.py CHANGED Viewed

@@ -2,7 +2,7 @@ from handler import EndpointHandler
 if __name__ == "__main__":
     # init handler
-    my_handler = EndpointHandler(path=".")
     # prepare sample payload
     messages = [
@@ -10,7 +10,7 @@ if __name__ == "__main__":
         ]
     # test the handler
-    pred=my_handler.pipeline(messages)
     # show results
     print(pred)

 if __name__ == "__main__":
     # init handler
+    my_handler = EndpointHandler()
     # prepare sample payload
     messages = [
         ]
     # test the handler
+    pred=my_handler(messages)
     # show results
     print(pred)