alifalhasan's picture
[Task] Minor Update
b1c38c2 verified
import os
import torch
import numpy as np
from torch import nn, optim
from torchtext import data
from transformer import Transformer
import sys
sys.path.append(os.path.abspath("src/data_processing/"))
from data_processing import (
SRC,
TRG,
train_data,
valid_data,
)
# Setting the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"""Hyperparameters"""
BATCH_SIZE = 16
# Creating data iterators for training and validation sets
train_iter, valid_iter = data.BucketIterator.splits(
(train_data, valid_data),
batch_size=BATCH_SIZE,
sort=None,
sort_within_batch=False,
sort_key=lambda x: len(x.eng),
device=device,
shuffle=True,
)
# Training parameters
num_epochs = 30
learning_rate = 0.0001
# Transformer model hyperparameters
num_heads = 8
num_encoder_layers = 3
num_decoder_layers = 3
max_len = 230
dropout = 0.4
embedding_size = 256
src_pad_idx = SRC.vocab.stoi["<pad>"]
# Vocabulary sizes
src_vocab_size = len(SRC.vocab)
print("Size of English vocabulary:", src_vocab_size)
trg_vocab_size = len(TRG.vocab)
print("Size of Arabic vocabulary:", trg_vocab_size)
# Creating the Transformer model
model = Transformer(
embedding_size,
src_vocab_size,
trg_vocab_size,
src_pad_idx,
num_heads,
num_encoder_layers,
num_decoder_layers,
dropout,
max_len,
device=device,
).to(device)
# Lists to track training and validation losses
train_loss = []
validation_loss = []
# Optimizer definition
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Criterion for loss calculation
pad_idx = SRC.vocab.stoi["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
# Main training loop
for epoch in range(num_epochs):
stepLoss = []
model.train() # Set the model to training mode
for batch in train_iter:
input_data = batch.eng.to(device)
target = batch.ar.to(device)
output = model(input_data, target[:-1]) # Forward pass
optimizer.zero_grad() # Zero the gradients
output = output.reshape(-1, trg_vocab_size)
target = target[1:].reshape(-1)
loss = criterion(output, target) # Calculate the loss
loss.backward() # Backpropagation
optimizer.step() # Update the parameters
stepLoss.append(loss.item())
train_loss.append(np.mean(stepLoss))
print(" Epoch {} | Train Cross Entropy Loss: ".format(epoch), np.mean(stepLoss))
# Validation loop
with torch.inference_mode():
stepValidLoss = []
model.eval() # Set the model to evaluation mode
for i, batch in enumerate(valid_iter):
input_sentence = batch.eng.to(device)
target = batch.ar.to(device)
optimizer.zero_grad()
output = model(input_sentence, target[:-1])
output = output.reshape(-1, trg_vocab_size)
target = target[1:].reshape(-1)
loss = criterion(output, target)
stepValidLoss.append(loss.item())
validation_loss.append(np.mean(stepValidLoss))
print(
" Epoch {} | Validation Cross Entropy Loss: ".format(epoch),
np.mean(stepValidLoss),
)
# Save the model
script_directory = os.path.dirname(os.path.abspath(__file__))
model = model.to('cpu')
torch.save(model.state_dict(), os.path.join(script_directory, "../../models/arabic2english.pt"))