Deepthoughtworks
/

gpt-neo-2.7B__low-cpu

Text Generation

text generation

Inference Endpoints

Model card Files Files and versions Community

fwittel commited on Nov 17, 2022

Commit

040e104

·

1 Parent(s): 4c61288

Added tokenizer to handler.py

Files changed (1) hide show

handler.py +14 -4

handler.py CHANGED Viewed

@@ -1,16 +1,26 @@
 from typing import Dict, List, Any
-from transformers import AutoModel, pipeline
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
         # create inference pipeline
         # Do I have to check device?
-        self.pipeline = pipeline("text-generation", model=model)
     # (Might have to adjust typing)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
-        inputs = data.pop("inputs", data)  # Should I get and pass parameters?
-        prediction = self.pipeline(inputs)
         return prediction

 from typing import Dict, List, Any
+from transformers import AutoModel, AutoTokenizer, pipeline
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
+        tokenizer = AutoTokenizer.from_pretrained(path)
         model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
         # create inference pipeline
         # Do I have to check device?
+        self.pipeline = pipeline(
+            "text-generation", model=model, tokenizer=tokenizer)
     # (Might have to adjust typing)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            prediction = self.pipeline(inputs, **parameters)
+        else:
+            prediction = self.pipeline(inputs)
+        # postprocess the prediction
         return prediction