File size: 1,545 Bytes
d36d50b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
run-title: tashkeela-d2
debug: false

paths:
  base: ./dataset/ashaar
  save: ./models
  load: tashkeela-d2.pt
  resume: ./models/Tashkeela-D2/tashkeela-d2.pt
  constants: ./dataset/helpers/constants
  word-embs: vocab.vec
  test: test

loader:
  wembs-limit: -1
  num-workers: 0

train:
  epochs: 1000
  batch-size: 32
  char-embed-dim: 32
  resume: false
  resume-lr: false

  max-word-len: 13
  max-sent-len: 10

  rnn-cell: lstm
  sent-lstm-layers: 2
  word-lstm-layers: 2

  sent-lstm-units: 256
  word-lstm-units: 512
  decoder-units: 256

  sent-dropout: 0.2
  diac-dropout: 0
  final-dropout: 0.2

  sent-mask-zero: false

  lr-factor: 0.5
  lr-patience: 1
  lr-min: 1.e-7
  lr-init: 0.002

  weight-decay: 0
  vertical-dropout: 0.25
  recurrent-dropout: 0.25

  stopping-delta: 1.e-7
  stopping-patience: 3

predictor:
  batch-size: 75
  stride: 2
  window: 20
  gt-signal-prob: 0
  seed-idx: 0

sentence-break:
  stride: 2
  window: 10
  min-window: 1
  export-map: false
  files:
    - train/train.txt
    - val/val.txt
  delimeters:
    - ،
    - ؛
    - ','
    - ;
    - «
    - »
    - '{'
    - '}'
    - '('
    - ')'
    - '['
    - ']'
    - '.'
    - '*'
    - '-'
    - ':'
    - '?'
    - '!'
    - ؟


segment:
  stride: 2
  window: 10
  min-window: 1
  export-map: false
  files:
    - train/train.txt
    - val/val.txt
  delimeters:
    - ،
    - ؛
    - ','
    - ;
    - «
    - »
    - '{'
    - '}'
    - '('
    - ')'
    - '['
    - ']'
    - '.'
    - '*'
    - '-'
    - ':'
    - '?'
    - '!'
    - ؟