bkhmsi's picture
support for TD2
d7c4b94
raw
history blame
2.22 kB
run-title: tashkeela-d2
debug: false
model-name: TD2
paths:
base: ./dataset/ashaar
save: ./models
load: tashkeela-d2.pt
load-td2: td2/tashkeela-ashaar-td2.pt
resume: ./models/Tashkeela-D2/tashkeela-d2.pt
constants: ./dataset/helpers/constants
word-embs: vocab.vec
test: test
modeling:
"checkpoint": munael/Partial-Arabic-Diacritization-TD2
"base_model": CAMeL-Lab/bert-base-arabic-camelbert-mix-ner
# "base_model": UBC-NLP/MARBERTv2
# "base_model": UBC-NLP/ARBERTv2
"deep-cls": true
"full-finetune": true #< From true
"keep-token-model-layers": 2
# "num-finetune-last-layers": 2 #
"num-chars": 40
"char-embed-dim": 128
"token_hidden_size": 768
"deep-down-proj": true
"dropout": 0.2
"sentence_dropout": 0.1
"diac_model_config": {
"vocab_size": 1,
"num_hidden_layers": 2,
"hidden_size": 768,
"intermediate_size": 2304,
"num_attention_heads": 8,
}
loader:
wembs-limit: -1
num-workers: 0
train:
epochs: 1000
batch-size: 1
char-embed-dim: 32
resume: false
resume-lr: false
max-word-len: 13
max-sent-len: 10
rnn-cell: lstm
sent-lstm-layers: 2
word-lstm-layers: 2
sent-lstm-units: 256
word-lstm-units: 512
decoder-units: 256
sent-dropout: 0.2
diac-dropout: 0
final-dropout: 0.2
sent-mask-zero: false
lr-factor: 0.5
lr-patience: 1
lr-min: 1.e-7
lr-init: 0.002
weight-decay: 0
vertical-dropout: 0.25
recurrent-dropout: 0.25
stopping-delta: 1.e-7
stopping-patience: 3
predictor:
batch-size: 1
stride: 2
window: 20
gt-signal-prob: 0
seed-idx: 0
sentence-break:
stride: 2
window: 10
min-window: 1
export-map: false
files:
- train/train.txt
- val/val.txt
delimeters:
- ،
- ؛
- ','
- ;
- «
- »
- '{'
- '}'
- '('
- ')'
- '['
- ']'
- '.'
- '*'
- '-'
- ':'
- '?'
- '!'
- ؟
segment:
stride: 2
window: 10
min-window: 1
export-map: false
files:
- train/train.txt
- val/val.txt
delimeters:
- ،
- ؛
- ','
- ;
- «
- »
- '{'
- '}'
- '('
- ')'
- '['
- ']'
- '.'
- '*'
- '-'
- ':'
- '?'
- '!'
- ؟