Spaces:
Sleeping
Sleeping
File size: 3,381 Bytes
8e41ab0 9a4dd2c 8e41ab0 9a4dd2c b1c38c2 9a4dd2c 8e41ab0 b1c38c2 9a4dd2c b1c38c2 9a4dd2c b1c38c2 8e41ab0 9a4dd2c 8e41ab0 9a4dd2c 8e41ab0 b1c38c2 9a4dd2c b1c38c2 9a4dd2c b1c38c2 8e41ab0 b1c38c2 8e41ab0 9a4dd2c 8e41ab0 b1c38c2 8e41ab0 b1c38c2 8e41ab0 b1c38c2 9a4dd2c 8e41ab0 b1c38c2 8e41ab0 9a4dd2c b1c38c2 8e41ab0 b1c38c2 9a4dd2c 8e41ab0 9a4dd2c b1c38c2 8e41ab0 9a4dd2c 8e41ab0 b1c38c2 9a4dd2c b1c38c2 8e41ab0 9a4dd2c b1c38c2 9a4dd2c 8e41ab0 9a4dd2c 8e41ab0 9a4dd2c b1c38c2 9a4dd2c 8e41ab0 9a4dd2c 8e41ab0 9a4dd2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import os
import torch
import numpy as np
from torch import nn, optim
from torchtext import data
from transformer import Transformer
import sys
sys.path.append(os.path.abspath("src/data_processing/"))
from data_processing import (
SRC,
TRG,
train_data,
valid_data,
)
# Setting the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"""Hyperparameters"""
BATCH_SIZE = 16
# Creating data iterators for training and validation sets
train_iter, valid_iter = data.BucketIterator.splits(
(train_data, valid_data),
batch_size=BATCH_SIZE,
sort=None,
sort_within_batch=False,
sort_key=lambda x: len(x.eng),
device=device,
shuffle=True,
)
# Training parameters
num_epochs = 30
learning_rate = 0.0001
# Transformer model hyperparameters
num_heads = 8
num_encoder_layers = 3
num_decoder_layers = 3
max_len = 230
dropout = 0.4
embedding_size = 256
src_pad_idx = SRC.vocab.stoi["<pad>"]
# Vocabulary sizes
src_vocab_size = len(SRC.vocab)
print("Size of English vocabulary:", src_vocab_size)
trg_vocab_size = len(TRG.vocab)
print("Size of Arabic vocabulary:", trg_vocab_size)
# Creating the Transformer model
model = Transformer(
embedding_size,
src_vocab_size,
trg_vocab_size,
src_pad_idx,
num_heads,
num_encoder_layers,
num_decoder_layers,
dropout,
max_len,
device=device,
).to(device)
# Lists to track training and validation losses
train_loss = []
validation_loss = []
# Optimizer definition
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Criterion for loss calculation
pad_idx = SRC.vocab.stoi["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
# Main training loop
for epoch in range(num_epochs):
stepLoss = []
model.train() # Set the model to training mode
for batch in train_iter:
input_data = batch.eng.to(device)
target = batch.ar.to(device)
output = model(input_data, target[:-1]) # Forward pass
optimizer.zero_grad() # Zero the gradients
output = output.reshape(-1, trg_vocab_size)
target = target[1:].reshape(-1)
loss = criterion(output, target) # Calculate the loss
loss.backward() # Backpropagation
optimizer.step() # Update the parameters
stepLoss.append(loss.item())
train_loss.append(np.mean(stepLoss))
print(" Epoch {} | Train Cross Entropy Loss: ".format(epoch), np.mean(stepLoss))
# Validation loop
with torch.inference_mode():
stepValidLoss = []
model.eval() # Set the model to evaluation mode
for i, batch in enumerate(valid_iter):
input_sentence = batch.eng.to(device)
target = batch.ar.to(device)
optimizer.zero_grad()
output = model(input_sentence, target[:-1])
output = output.reshape(-1, trg_vocab_size)
target = target[1:].reshape(-1)
loss = criterion(output, target)
stepValidLoss.append(loss.item())
validation_loss.append(np.mean(stepValidLoss))
print(
" Epoch {} | Validation Cross Entropy Loss: ".format(epoch),
np.mean(stepValidLoss),
)
# Save the model
script_directory = os.path.dirname(os.path.abspath(__file__))
model = model.to('cpu')
torch.save(model.state_dict(), os.path.join(script_directory, "../../models/arabic2english.pt"))
|