|
from typing import Dict, List, Any |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import torch |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
class EndpointHandler: |
|
def __init__(self, path=""): |
|
self.tokenizer = AutoTokenizer.from_pretrained(path).to(device) |
|
self.model = AutoModelForSeq2SeqLM.from_pretrained(path).to(device)) |
|
|
|
def __call__(self, data: str) -> str: |
|
inp = self.tokenizer(data, return_tensors="pt") |
|
for q in inp: |
|
inp[q] = inp[q].to(device) |
|
with torch.inference_mode(): |
|
out= model.generate(**inp) |
|
final_output = tokenizer.batch_decode(out,skip_special_tokens=True) |
|
return {"translation": final_output[0]} |