Spaces:
Sleeping
Sleeping
import config | |
import torch | |
class BERTDataset: | |
def __init__(self, review, target): | |
self.review = review | |
self.target = target | |
self.tokenizer = config.TOKENIZER | |
self.max_len = config.MAX_LEN | |
def __len__(self): | |
return len(self.review) | |
def __getitem__(self, item): | |
review = str(self.review[item]) | |
review = " ".join(review.split()) | |
inputs = self.tokenizer.encode_plus( | |
review, | |
None, | |
add_special_tokens=True, | |
max_length=self.max_len | |
) | |
ids = inputs["input_ids"] | |
mask = inputs["attention_mask"] | |
token_type_ids = inputs["token_type_ids"] | |
padding_length = self.max_len - len(ids) | |
ids = ids + ([0] * padding_length) | |
mask = mask + ([0] * padding_length) | |
token_type_ids = token_type_ids + ([0] * padding_length) | |
return { | |
'ids': torch.tensor(ids, dtype=torch.long), | |
'mask': torch.tensor(mask, dtype=torch.long), | |
'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long), | |
'targets': torch.tensor(self.target[item], dtype=torch.float) | |
} | |