|
run-title: tashkeela-d2 |
|
debug: false |
|
model-name: TD2 |
|
|
|
paths: |
|
base: ./dataset/ashaar |
|
save: ./models |
|
load: tashkeela-d2.pt |
|
load-td2: td2/tashkeela-ashaar-td2.pt |
|
resume: ./models/Tashkeela-D2/tashkeela-d2.pt |
|
constants: ./dataset/helpers/constants |
|
word-embs: vocab.vec |
|
test: test |
|
|
|
modeling: |
|
"checkpoint": munael/Partial-Arabic-Diacritization-TD2 |
|
"base_model": CAMeL-Lab/bert-base-arabic-camelbert-mix-ner |
|
|
|
|
|
"deep-cls": true |
|
"full-finetune": true |
|
"keep-token-model-layers": 2 |
|
|
|
"num-chars": 40 |
|
"char-embed-dim": 128 |
|
"token_hidden_size": 768 |
|
"deep-down-proj": true |
|
"dropout": 0.2 |
|
"sentence_dropout": 0.1 |
|
"diac_model_config": { |
|
"vocab_size": 1, |
|
"num_hidden_layers": 2, |
|
"hidden_size": 768, |
|
"intermediate_size": 2304, |
|
"num_attention_heads": 8, |
|
} |
|
|
|
loader: |
|
wembs-limit: -1 |
|
num-workers: 0 |
|
|
|
train: |
|
epochs: 1000 |
|
batch-size: 1 |
|
char-embed-dim: 32 |
|
resume: false |
|
resume-lr: false |
|
|
|
max-word-len: 13 |
|
max-sent-len: 10 |
|
|
|
rnn-cell: lstm |
|
sent-lstm-layers: 2 |
|
word-lstm-layers: 2 |
|
|
|
sent-lstm-units: 256 |
|
word-lstm-units: 512 |
|
decoder-units: 256 |
|
|
|
sent-dropout: 0.2 |
|
diac-dropout: 0 |
|
final-dropout: 0.2 |
|
|
|
sent-mask-zero: false |
|
|
|
lr-factor: 0.5 |
|
lr-patience: 1 |
|
lr-min: 1.e-7 |
|
lr-init: 0.002 |
|
|
|
weight-decay: 0 |
|
vertical-dropout: 0.25 |
|
recurrent-dropout: 0.25 |
|
|
|
stopping-delta: 1.e-7 |
|
stopping-patience: 3 |
|
|
|
predictor: |
|
batch-size: 1 |
|
stride: 2 |
|
window: 20 |
|
gt-signal-prob: 0 |
|
seed-idx: 0 |
|
|
|
sentence-break: |
|
stride: 2 |
|
window: 10 |
|
min-window: 1 |
|
export-map: false |
|
files: |
|
- train/train.txt |
|
- val/val.txt |
|
delimeters: |
|
- ، |
|
- ؛ |
|
- ',' |
|
- ; |
|
- « |
|
- » |
|
- '{' |
|
- '}' |
|
- '(' |
|
- ')' |
|
- '[' |
|
- ']' |
|
- '.' |
|
- '*' |
|
- '-' |
|
- ':' |
|
- '?' |
|
- '!' |
|
- ؟ |
|
|
|
|
|
segment: |
|
stride: 2 |
|
window: 10 |
|
min-window: 1 |
|
export-map: false |
|
files: |
|
- train/train.txt |
|
- val/val.txt |
|
delimeters: |
|
- ، |
|
- ؛ |
|
- ',' |
|
- ; |
|
- « |
|
- » |
|
- '{' |
|
- '}' |
|
- '(' |
|
- ')' |
|
- '[' |
|
- ']' |
|
- '.' |
|
- '*' |
|
- '-' |
|
- ':' |
|
- '?' |
|
- '!' |
|
- ؟ |