File size: 3,160 Bytes
ec14681
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

name: "dyu_fr_transformer-sp"
joeynmt_version: "2.3.0"
model_dir: "../saved_model/lean_model"
use_cuda: False # False for CPU training
fp16: False

data:
    train: "../data/dyu_fr"
    dev: "../data/dyu_fr"
    test: "../data/dyu_fr"
    dataset_type: "huggingface"
    dataset_cfg:
        name: "dyu-fr"
    sample_dev_subset: 1460
    src:
        lang: "dyu"
        max_length: 100
        lowercase: False
        normalize: False
        level: "bpe"
        voc_limit: 4000
        voc_min_freq: 1
        voc_file: "../saved_model/lean_model/vocab.txt"
        tokenizer_type: "sentencepiece"
        tokenizer_cfg:
            model_file: "../saved_model/lean_model/sp.model"
    trg:
        lang: "fr"
        max_length: 100
        lowercase: False
        normalize: False
        level: "bpe"
        voc_limit: 4000
        voc_min_freq: 1
        voc_file: "../saved_model/lean_model/vocab.txt"
        tokenizer_type: "sentencepiece"
        tokenizer_cfg:
            model_file: "../saved_model/lean_model/sp.model"
    special_symbols:
        unk_token: "<unk>"
        unk_id: 0
        pad_token: "<pad>"
        pad_id: 1
        bos_token: "<s>"
        bos_id: 2
        eos_token: "</s>"
        eos_id: 3


testing:
    load_model: "../saved_model/lean_model/best.ckpt"
    n_best: 1
    beam_size: 5
    beam_alpha: 1.0
    batch_size: 256
    batch_type: "token"
    max_output_length: 100
    eval_metrics: ["bleu"]
    #return_prob: "hyp"
    #return_attention: False
    sacrebleu_cfg:
        tokenize: "13a"

training:
    #load_model: "../saved_model/lean_model/latest.ckpt"
    #reset_best_ckpt: False
    #reset_scheduler: False
    #reset_optimizer: False
    #reset_iter_state: False
    random_seed: 42
    optimizer: "adamw"
    normalization: "tokens"
    adam_betas: [0.9, 0.999]
    scheduling: "warmupinversesquareroot"
    learning_rate_warmup: 100
    learning_rate: 0.0003
    learning_rate_min: 0.00000001
    weight_decay: 0.0
    label_smoothing: 0.1
    loss: "crossentropy"
    batch_size: 512
    batch_type: "token"
    batch_multiplier: 4
    early_stopping_metric: "bleu"
    epochs: 6
    updates: 550
    validation_freq: 30
    logging_freq: 5
    overwrite: True
    shuffle: True
    print_valid_sents: [0, 1, 2, 3]
    keep_best_ckpts: 3

model:
    initializer: "xavier_uniform"
    bias_initializer: "zeros"
    init_gain: 1.0
    embed_initializer: "xavier_uniform"
    embed_init_gain: 1.0
    tied_embeddings: True
    tied_softmax: True
    encoder:
        type: "transformer"
        num_layers: 6
        num_heads: 4
        embeddings:
            embedding_dim: 256
            scale: True
            dropout: 0.0
        # typically ff_size = 4 x hidden_size
        hidden_size: 256
        ff_size: 1024
        dropout: 0.2
        layer_norm: "pre"
    decoder:
        type: "transformer"
        num_layers: 6
        num_heads: 8
        embeddings:
            embedding_dim: 256
            scale: True
            dropout: 0.0
        # typically ff_size = 4 x hidden_size
        hidden_size: 256
        ff_size: 1024
        dropout: 0.1
        layer_norm: "pre"