File size: 744 Bytes
3da6512 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from typing import Dict, List, Any
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class EndpointHandler:
def __init__(self, path=""):
self.tokenizer = AutoTokenizer.from_pretrained(path).to(device)
self.model = AutoModelForSeq2SeqLM.from_pretrained(path).to(device))
def __call__(self, data: str) -> str:
inp = self.tokenizer(data, return_tensors="pt")
for q in inp:
inp[q] = inp[q].to(device)
with torch.inference_mode():
out= model.generate(**inp)
final_output = tokenizer.batch_decode(out,skip_special_tokens=True)
return {"translation": final_output[0]} |