Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import numpy as np | |
from torch import nn, optim | |
from torchtext import data | |
from transformer import Transformer | |
import sys | |
sys.path.append(os.path.abspath("src/data_processing/")) | |
from data_processing import ( | |
SRC, | |
TRG, | |
train_data, | |
valid_data, | |
) | |
# Setting the device | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
"""Hyperparameters""" | |
BATCH_SIZE = 16 | |
# Creating data iterators for training and validation sets | |
train_iter, valid_iter = data.BucketIterator.splits( | |
(train_data, valid_data), | |
batch_size=BATCH_SIZE, | |
sort=None, | |
sort_within_batch=False, | |
sort_key=lambda x: len(x.eng), | |
device=device, | |
shuffle=True, | |
) | |
# Training parameters | |
num_epochs = 30 | |
learning_rate = 0.0001 | |
# Transformer model hyperparameters | |
num_heads = 8 | |
num_encoder_layers = 3 | |
num_decoder_layers = 3 | |
max_len = 230 | |
dropout = 0.4 | |
embedding_size = 256 | |
src_pad_idx = SRC.vocab.stoi["<pad>"] | |
# Vocabulary sizes | |
src_vocab_size = len(SRC.vocab) | |
print("Size of English vocabulary:", src_vocab_size) | |
trg_vocab_size = len(TRG.vocab) | |
print("Size of Arabic vocabulary:", trg_vocab_size) | |
# Creating the Transformer model | |
model = Transformer( | |
embedding_size, | |
src_vocab_size, | |
trg_vocab_size, | |
src_pad_idx, | |
num_heads, | |
num_encoder_layers, | |
num_decoder_layers, | |
dropout, | |
max_len, | |
device=device, | |
).to(device) | |
# Lists to track training and validation losses | |
train_loss = [] | |
validation_loss = [] | |
# Optimizer definition | |
optimizer = optim.Adam(model.parameters(), lr=learning_rate) | |
# Criterion for loss calculation | |
pad_idx = SRC.vocab.stoi["<pad>"] | |
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx) | |
# Main training loop | |
for epoch in range(num_epochs): | |
stepLoss = [] | |
model.train() # Set the model to training mode | |
for batch in train_iter: | |
input_data = batch.eng.to(device) | |
target = batch.ar.to(device) | |
output = model(input_data, target[:-1]) # Forward pass | |
optimizer.zero_grad() # Zero the gradients | |
output = output.reshape(-1, trg_vocab_size) | |
target = target[1:].reshape(-1) | |
loss = criterion(output, target) # Calculate the loss | |
loss.backward() # Backpropagation | |
optimizer.step() # Update the parameters | |
stepLoss.append(loss.item()) | |
train_loss.append(np.mean(stepLoss)) | |
print(" Epoch {} | Train Cross Entropy Loss: ".format(epoch), np.mean(stepLoss)) | |
# Validation loop | |
with torch.inference_mode(): | |
stepValidLoss = [] | |
model.eval() # Set the model to evaluation mode | |
for i, batch in enumerate(valid_iter): | |
input_sentence = batch.eng.to(device) | |
target = batch.ar.to(device) | |
optimizer.zero_grad() | |
output = model(input_sentence, target[:-1]) | |
output = output.reshape(-1, trg_vocab_size) | |
target = target[1:].reshape(-1) | |
loss = criterion(output, target) | |
stepValidLoss.append(loss.item()) | |
validation_loss.append(np.mean(stepValidLoss)) | |
print( | |
" Epoch {} | Validation Cross Entropy Loss: ".format(epoch), | |
np.mean(stepValidLoss), | |
) | |
# Save the model | |
script_directory = os.path.dirname(os.path.abspath(__file__)) | |
model = model.to('cpu') | |
torch.save(model.state_dict(), os.path.join(script_directory, "../../models/arabic2english.pt")) | |