Spaces:

FFZG-cleopatra
/

latvian-twitter-sentiment-classifier

Build error

thak123 commited on May 8, 2023

Commit

7d76b6a

1 Parent(s): 819c669

Update dataset.py

Files changed (1) hide show

dataset.py CHANGED Viewed

+import config
+import torch
+class BERTDataset:
+    def __init__(self, review, target):
+        self.review = review
+        self.target = target
+        self.tokenizer = config.TOKENIZER
+        self.max_len = config.MAX_LEN
+    def __len__(self):
+        return len(self.review)
+    def __getitem__(self, item):
+        review = str(self.review[item])
+        review = " ".join(review.split())
+        inputs = self.tokenizer.encode_plus(
+            review,
+            None,
+            add_special_tokens=True,
+            max_length=self.max_len
+        )
+        ids = inputs["input_ids"]
+        mask = inputs["attention_mask"]
+        token_type_ids = inputs["token_type_ids"]
+        padding_length = self.max_len - len(ids)
+        ids = ids + ([0] * padding_length)
+        mask = mask + ([0] * padding_length)
+        token_type_ids = token_type_ids + ([0] * padding_length)
+        return {
+            'ids': torch.tensor(ids, dtype=torch.long),
+            'mask': torch.tensor(mask, dtype=torch.long),
+            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
+            'targets': torch.tensor(self.target[item], dtype=torch.float)
+        }