mrpintime
/

GPTPoem

@@ -1,48 +0,0 @@
-from typing import Dict, List, Any
-from transformers import GPT2LMHeadModel, AutoTokenizer
-import torch
-class EndpointHandler():
-    def __init__(self, path="mrpintime/GPTPoem"):
-        if torch.cuda.is_available():
-            self.device = "cuda"
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            self.device = "mps"
-        else:
-            self.device = "cpu"
-        self.device_type = "cuda" if self.device.startswith("cuda") else "cpu"
-        self.tokenizer = AutoTokenizer.from_pretrained('bolbolzaban/gpt2-persian', device=self.device)
-        self.model = GPT2LMHeadModel.from_pretrained('mrpintime/GPTPoem').to(self.device)
-    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """
-       data args:
-            inputs (:obj: `str` | `PIL.Image` | `np.array`)
-            kwargs
-      Return:
-            A :obj:`list` | `dict`: will be serialized and returned
-        """
-        start_ids = self.tokenizer.encode(data['inputs'], add_special_tokens=False)
-        idx = (torch.tensor(start_ids)[None, ...])
-        # run generation
-        samples = []
-        with torch.autocast(device_type=self.device_type, dtype=torch.bfloat16):
-            for k in range(int(data['parameters']['num_samples'])):
-                for _ in range(int(data['parameters']['max_new_tokens'])):
-                    # forward the model to get the logits for the index in the sequence
-                    logits, _ = self.model(idx)
-                    # pluck the logits at the final step and scale by desired temperature
-                    logits = logits[:, -1, :] / data['parameters']['temperature']
-                    # optionally crop the logits to only the top k options
-                    if int(data['parameters']['top_k']) is not None:
-                        v, _ = torch.topk(logits, min(int(data['parameters']['top_k']), logits.size(-1)))
-                        logits[logits < v[:, [-1]]] = -float('Inf')
-                    # apply softmax to convert logits to (normalized) probabilities
-                    probs = torch.nn.functional.softmax(logits, dim=-1)
-                    # sample from the distribution
-                    idx_next = torch.multinomial(probs, num_samples=1)
-                    # append sampled index to the running sequence and continue
-                    idx = torch.cat((idx, idx_next), dim=1)
-                samples.append(idx)
-        return samples