idiomify / config.yaml
eubinecto's picture
[#7] tokenizer:t-1-1. The tokenizer with the idiom special tokens is now fetchable directly from wandb
12f548d
raw
history blame
811 Bytes
# for training an idiomifier
idiomifier:
ver: m-1-2
desc: just overfitting the model, but on the entire PIE dataset.
bart: facebook/bart-base
lr: 0.0001
literal2idiomatic_ver: d-1-2
idioms_ver: d-1-2
max_epochs: 2
batch_size: 40
shuffle: true
seed: 104
# for building & uploading datasets or tokenizer
idioms:
ver: d-1-3
description: the set of idioms in the traning set of literal2idiomatic_d-1-3. Definitions of them are added as well.
literal2idiomatic:
ver: d-1-3
description: The idioms are annotated with <idiom> & </idiom>.
train_ratio: 0.8
seed: 104
boi_token: <idiom>
eoi_token: </idiom>
tokenizer:
ver: t-1-1
description: A pretrained BartTokenizer. The idiom special tokens are pre-added.
bart: facebook/bart-base
boi_token: <idiom>
eoi_token: </idiom>