File size: 3,381 Bytes
8e41ab0
 
 
 
 
9a4dd2c
8e41ab0
9a4dd2c
 
 
 
 
 
 
 
 
 
b1c38c2
9a4dd2c
8e41ab0
 
 
 
b1c38c2
9a4dd2c
 
 
 
 
 
 
 
 
 
b1c38c2
9a4dd2c
 
 
b1c38c2
8e41ab0
 
 
9a4dd2c
8e41ab0
 
 
9a4dd2c
8e41ab0
b1c38c2
9a4dd2c
b1c38c2
9a4dd2c
 
b1c38c2
8e41ab0
b1c38c2
8e41ab0
 
 
 
9a4dd2c
 
 
 
 
 
8e41ab0
 
 
b1c38c2
 
 
8e41ab0
b1c38c2
8e41ab0
 
b1c38c2
9a4dd2c
8e41ab0
b1c38c2
 
8e41ab0
9a4dd2c
b1c38c2
8e41ab0
 
 
 
b1c38c2
 
9a4dd2c
8e41ab0
9a4dd2c
b1c38c2
 
 
8e41ab0
9a4dd2c
8e41ab0
b1c38c2
9a4dd2c
b1c38c2
 
8e41ab0
9a4dd2c
b1c38c2
9a4dd2c
8e41ab0
 
9a4dd2c
8e41ab0
 
 
 
 
9a4dd2c
 
b1c38c2
9a4dd2c
 
 
 
8e41ab0
9a4dd2c
8e41ab0
9a4dd2c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import torch
import numpy as np

from torch import nn, optim
from torchtext import data
from transformer import Transformer

import sys
sys.path.append(os.path.abspath("src/data_processing/"))
from data_processing import (
    SRC,
    TRG,
    train_data,
    valid_data,
)

# Setting the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

"""Hyperparameters"""
BATCH_SIZE = 16

# Creating data iterators for training and validation sets
train_iter, valid_iter = data.BucketIterator.splits(
    (train_data, valid_data),
    batch_size=BATCH_SIZE,
    sort=None,
    sort_within_batch=False,
    sort_key=lambda x: len(x.eng),
    device=device,
    shuffle=True,
)

# Training parameters
num_epochs = 30
learning_rate = 0.0001

# Transformer model hyperparameters
num_heads = 8
num_encoder_layers = 3
num_decoder_layers = 3

max_len = 230
dropout = 0.4
embedding_size = 256
src_pad_idx = SRC.vocab.stoi["<pad>"]

# Vocabulary sizes
src_vocab_size = len(SRC.vocab)
print("Size of English vocabulary:", src_vocab_size)

trg_vocab_size = len(TRG.vocab)
print("Size of Arabic vocabulary:", trg_vocab_size)

# Creating the Transformer model
model = Transformer(
    embedding_size,
    src_vocab_size,
    trg_vocab_size,
    src_pad_idx,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    dropout,
    max_len,
    device=device,
).to(device)

# Lists to track training and validation losses
train_loss = []
validation_loss = []

# Optimizer definition
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Criterion for loss calculation
pad_idx = SRC.vocab.stoi["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

# Main training loop
for epoch in range(num_epochs):
    stepLoss = []
    model.train()  # Set the model to training mode
    for batch in train_iter:
        input_data = batch.eng.to(device)
        target = batch.ar.to(device)

        output = model(input_data, target[:-1])  # Forward pass
        optimizer.zero_grad()  # Zero the gradients
        output = output.reshape(-1, trg_vocab_size)
        target = target[1:].reshape(-1)

        loss = criterion(output, target)  # Calculate the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update the parameters

        stepLoss.append(loss.item())

    train_loss.append(np.mean(stepLoss))
    print(" Epoch {} | Train Cross Entropy Loss: ".format(epoch), np.mean(stepLoss))

    # Validation loop
    with torch.inference_mode():
        stepValidLoss = []
        model.eval()  # Set the model to evaluation mode
        for i, batch in enumerate(valid_iter):
            input_sentence = batch.eng.to(device)
            target = batch.ar.to(device)
            optimizer.zero_grad()
            output = model(input_sentence, target[:-1])
            output = output.reshape(-1, trg_vocab_size)
            target = target[1:].reshape(-1)
            loss = criterion(output, target)

            stepValidLoss.append(loss.item())

    validation_loss.append(np.mean(stepValidLoss))
    print(
        " Epoch {} | Validation Cross Entropy Loss: ".format(epoch),
        np.mean(stepValidLoss),
    )

# Save the model
script_directory = os.path.dirname(os.path.abspath(__file__))
model = model.to('cpu')
torch.save(model.state_dict(), os.path.join(script_directory, "../../models/arabic2english.pt"))