import os import torch import numpy as np from torch import nn, optim from torchtext import data from transformer import Transformer import sys sys.path.append(os.path.abspath("src/data_processing/")) from data_processing import ( SRC, TRG, train_data, valid_data, ) # Setting the device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") """Hyperparameters""" BATCH_SIZE = 16 # Creating data iterators for training and validation sets train_iter, valid_iter = data.BucketIterator.splits( (train_data, valid_data), batch_size=BATCH_SIZE, sort=None, sort_within_batch=False, sort_key=lambda x: len(x.eng), device=device, shuffle=True, ) # Training parameters num_epochs = 30 learning_rate = 0.0001 # Transformer model hyperparameters num_heads = 8 num_encoder_layers = 3 num_decoder_layers = 3 max_len = 230 dropout = 0.4 embedding_size = 256 src_pad_idx = SRC.vocab.stoi[""] # Vocabulary sizes src_vocab_size = len(SRC.vocab) print("Size of English vocabulary:", src_vocab_size) trg_vocab_size = len(TRG.vocab) print("Size of Arabic vocabulary:", trg_vocab_size) # Creating the Transformer model model = Transformer( embedding_size, src_vocab_size, trg_vocab_size, src_pad_idx, num_heads, num_encoder_layers, num_decoder_layers, dropout, max_len, device=device, ).to(device) # Lists to track training and validation losses train_loss = [] validation_loss = [] # Optimizer definition optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Criterion for loss calculation pad_idx = SRC.vocab.stoi[""] criterion = nn.CrossEntropyLoss(ignore_index=pad_idx) # Main training loop for epoch in range(num_epochs): stepLoss = [] model.train() # Set the model to training mode for batch in train_iter: input_data = batch.eng.to(device) target = batch.ar.to(device) output = model(input_data, target[:-1]) # Forward pass optimizer.zero_grad() # Zero the gradients output = output.reshape(-1, trg_vocab_size) target = target[1:].reshape(-1) loss = criterion(output, target) # Calculate the loss loss.backward() # Backpropagation optimizer.step() # Update the parameters stepLoss.append(loss.item()) train_loss.append(np.mean(stepLoss)) print(" Epoch {} | Train Cross Entropy Loss: ".format(epoch), np.mean(stepLoss)) # Validation loop with torch.inference_mode(): stepValidLoss = [] model.eval() # Set the model to evaluation mode for i, batch in enumerate(valid_iter): input_sentence = batch.eng.to(device) target = batch.ar.to(device) optimizer.zero_grad() output = model(input_sentence, target[:-1]) output = output.reshape(-1, trg_vocab_size) target = target[1:].reshape(-1) loss = criterion(output, target) stepValidLoss.append(loss.item()) validation_loss.append(np.mean(stepValidLoss)) print( " Epoch {} | Validation Cross Entropy Loss: ".format(epoch), np.mean(stepValidLoss), ) # Save the model script_directory = os.path.dirname(os.path.abspath(__file__)) model = model.to('cpu') torch.save(model.state_dict(), os.path.join(script_directory, "../../models/arabic2english.pt"))