import json import os import re from collections import defaultdict import glob import numpy as np import time import torch import torch.nn as nn import torchvision.models as models import torch.nn.functional as F from torch import optim from torch.utils.data import Dataset from torchvision import transforms from torch.utils.data import DataLoader from PIL import Image class Vocabulary: def __init__(self, vocabulary_file_path): #Initialize the Vocabulary object. # Load vocabulary from the provided file path self.vocabulary = self._load_vocabulary(vocabulary_file_path) # Create a mapping from words to indices self.vocabulary2idx = {word: idx for idx, word in enumerate(self.vocabulary)} # Store the total size of the vocabulary self.vocabulary_size = len(self.vocabulary) def _load_vocabulary(self, vocabulary_file_path): #Load vocabulary from a file. with open(vocabulary_file_path, 'r') as file: # Read each line, strip extra whitespace, and return as a list vocabulary = [line.strip() for line in file] return vocabulary def word2idx(self, word): #Convert a word to its corresponding index. # Return the index of the word or the index of '' if the word is not in the vocabulary return self.vocabulary2idx.get(word, self.vocabulary2idx.get('')) def idx2word(self, idx): #Convert an index back to its corresponding word. return self.vocabulary[idx]