duwuonline commited on
Commit
c065885
1 Parent(s): e14abe1

Training in progress, step 500

Browse files
config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "_name_or_path": "google/mt5-small",
3
  "architectures": [
4
  "MT5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 1024,
8
  "d_kv": 64,
9
- "d_model": 512,
10
- "decoder_start_token_id": 0,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
@@ -16,13 +16,201 @@
16
  "is_encoder_decoder": true,
17
  "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
 
 
19
  "model_type": "mt5",
20
- "num_decoder_layers": 8,
21
- "num_heads": 6,
22
- "num_layers": 8,
 
 
23
  "pad_token_id": 0,
24
  "relative_attention_max_distance": 128,
25
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "tie_word_embeddings": false,
27
  "tokenizer_class": "T5Tokenizer",
28
  "torch_dtype": "float32",
 
1
  {
2
+ "_name_or_path": "csebuetnlp/mT5_m2o_arabic_crossSum",
3
  "architectures": [
4
  "MT5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
  "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 250021,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
 
16
  "is_encoder_decoder": true,
17
  "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
19
+ "length_penalty": 0.6,
20
+ "max_length": 84,
21
  "model_type": "mt5",
22
+ "num_beams": 4,
23
+ "num_decoder_layers": 12,
24
+ "num_heads": 12,
25
+ "num_layers": 12,
26
+ "output_past": true,
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
30
+ "task_specific_params": {
31
+ "langid_map": {
32
+ "amharic": [
33
+ 35,
34
+ "\u2581<extra_id_64>"
35
+ ],
36
+ "arabic": [
37
+ 4,
38
+ "\u2581<extra_id_95>"
39
+ ],
40
+ "azerbaijani": [
41
+ 7,
42
+ "\u2581<extra_id_92>"
43
+ ],
44
+ "bengali": [
45
+ 42,
46
+ "\u2581<extra_id_57>"
47
+ ],
48
+ "burmese": [
49
+ 33,
50
+ "\u2581<extra_id_66>"
51
+ ],
52
+ "chinese_simplified": [
53
+ 40,
54
+ "\u2581<extra_id_59>"
55
+ ],
56
+ "chinese_traditional": [
57
+ 44,
58
+ "\u2581<extra_id_55>"
59
+ ],
60
+ "english": [
61
+ 30,
62
+ "\u2581<extra_id_69>"
63
+ ],
64
+ "french": [
65
+ 10,
66
+ "\u2581<extra_id_89>"
67
+ ],
68
+ "gujarati": [
69
+ 27,
70
+ "\u2581<extra_id_72>"
71
+ ],
72
+ "hausa": [
73
+ 43,
74
+ "\u2581<extra_id_56>"
75
+ ],
76
+ "hindi": [
77
+ 21,
78
+ "\u2581<extra_id_78>"
79
+ ],
80
+ "igbo": [
81
+ 9,
82
+ "\u2581<extra_id_90>"
83
+ ],
84
+ "indonesian": [
85
+ 1,
86
+ "\u2581<extra_id_98>"
87
+ ],
88
+ "japanese": [
89
+ 37,
90
+ "\u2581<extra_id_62>"
91
+ ],
92
+ "kirundi": [
93
+ 0,
94
+ "\u2581<extra_id_99>"
95
+ ],
96
+ "korean": [
97
+ 29,
98
+ "\u2581<extra_id_70>"
99
+ ],
100
+ "kyrgyz": [
101
+ 5,
102
+ "\u2581<extra_id_94>"
103
+ ],
104
+ "marathi": [
105
+ 13,
106
+ "\u2581<extra_id_86>"
107
+ ],
108
+ "nepali": [
109
+ 20,
110
+ "\u2581<extra_id_79>"
111
+ ],
112
+ "oromo": [
113
+ 41,
114
+ "\u2581<extra_id_58>"
115
+ ],
116
+ "pashto": [
117
+ 34,
118
+ "\u2581<extra_id_65>"
119
+ ],
120
+ "persian": [
121
+ 23,
122
+ "\u2581<extra_id_76>"
123
+ ],
124
+ "pidgin": [
125
+ 14,
126
+ "\u2581<extra_id_85>"
127
+ ],
128
+ "portuguese": [
129
+ 39,
130
+ "\u2581<extra_id_60>"
131
+ ],
132
+ "punjabi": [
133
+ 17,
134
+ "\u2581<extra_id_82>"
135
+ ],
136
+ "russian": [
137
+ 36,
138
+ "\u2581<extra_id_63>"
139
+ ],
140
+ "scottish_gaelic": [
141
+ 24,
142
+ "\u2581<extra_id_75>"
143
+ ],
144
+ "serbian_cyrillic": [
145
+ 28,
146
+ "\u2581<extra_id_71>"
147
+ ],
148
+ "serbian_latin": [
149
+ 11,
150
+ "\u2581<extra_id_88>"
151
+ ],
152
+ "sinhala": [
153
+ 31,
154
+ "\u2581<extra_id_68>"
155
+ ],
156
+ "somali": [
157
+ 19,
158
+ "\u2581<extra_id_80>"
159
+ ],
160
+ "spanish": [
161
+ 3,
162
+ "\u2581<extra_id_96>"
163
+ ],
164
+ "swahili": [
165
+ 18,
166
+ "\u2581<extra_id_81>"
167
+ ],
168
+ "tamil": [
169
+ 32,
170
+ "\u2581<extra_id_67>"
171
+ ],
172
+ "telugu": [
173
+ 22,
174
+ "\u2581<extra_id_77>"
175
+ ],
176
+ "thai": [
177
+ 6,
178
+ "\u2581<extra_id_93>"
179
+ ],
180
+ "tigrinya": [
181
+ 16,
182
+ "\u2581<extra_id_83>"
183
+ ],
184
+ "turkish": [
185
+ 15,
186
+ "\u2581<extra_id_84>"
187
+ ],
188
+ "ukrainian": [
189
+ 2,
190
+ "\u2581<extra_id_97>"
191
+ ],
192
+ "urdu": [
193
+ 38,
194
+ "\u2581<extra_id_61>"
195
+ ],
196
+ "uzbek": [
197
+ 8,
198
+ "\u2581<extra_id_91>"
199
+ ],
200
+ "vietnamese": [
201
+ 12,
202
+ "\u2581<extra_id_87>"
203
+ ],
204
+ "welsh": [
205
+ 26,
206
+ "\u2581<extra_id_73>"
207
+ ],
208
+ "yoruba": [
209
+ 25,
210
+ "\u2581<extra_id_74>"
211
+ ]
212
+ }
213
+ },
214
  "tie_word_embeddings": false,
215
  "tokenizer_class": "T5Tokenizer",
216
  "torch_dtype": "float32",
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f5830b2e3aad62b12e166980a73ca5c73672a0fbfd9483feced66a36fd24c16
3
- size 1200772613
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6610a75a47e18332d64acc0cb22cc8bf881a80a4e692cc189af55f57e8db1830
3
+ size 2329702581
special_tokens_map.json CHANGED
@@ -1,4 +1,51 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "eos_token": "</s>",
3
  "pad_token": "<pad>",
4
  "unk_token": "<unk>"
 
1
  {
2
+ "additional_special_tokens": [
3
+ "▁<extra_id_64>",
4
+ "▁<extra_id_95>",
5
+ "▁<extra_id_92>",
6
+ "▁<extra_id_57>",
7
+ "▁<extra_id_66>",
8
+ "▁<extra_id_59>",
9
+ "▁<extra_id_55>",
10
+ "▁<extra_id_69>",
11
+ "▁<extra_id_89>",
12
+ "▁<extra_id_72>",
13
+ "▁<extra_id_56>",
14
+ "▁<extra_id_78>",
15
+ "▁<extra_id_90>",
16
+ "▁<extra_id_98>",
17
+ "▁<extra_id_62>",
18
+ "▁<extra_id_99>",
19
+ "▁<extra_id_70>",
20
+ "▁<extra_id_94>",
21
+ "▁<extra_id_86>",
22
+ "▁<extra_id_79>",
23
+ "▁<extra_id_58>",
24
+ "▁<extra_id_65>",
25
+ "▁<extra_id_76>",
26
+ "▁<extra_id_85>",
27
+ "▁<extra_id_60>",
28
+ "▁<extra_id_82>",
29
+ "▁<extra_id_63>",
30
+ "▁<extra_id_75>",
31
+ "▁<extra_id_71>",
32
+ "▁<extra_id_88>",
33
+ "▁<extra_id_68>",
34
+ "▁<extra_id_80>",
35
+ "▁<extra_id_96>",
36
+ "▁<extra_id_81>",
37
+ "▁<extra_id_67>",
38
+ "▁<extra_id_77>",
39
+ "▁<extra_id_93>",
40
+ "▁<extra_id_83>",
41
+ "▁<extra_id_84>",
42
+ "▁<extra_id_97>",
43
+ "▁<extra_id_61>",
44
+ "▁<extra_id_91>",
45
+ "▁<extra_id_87>",
46
+ "▁<extra_id_73>",
47
+ "▁<extra_id_74>"
48
+ ],
49
  "eos_token": "</s>",
50
  "pad_token": "<pad>",
51
  "unk_token": "<unk>"
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b49fe5973ba16763424a9230fea46fd3bebedc6e497d7d230fb00ce9f5a2fb1
3
- size 16330634
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c87e0c0c4807ff408ebe1e3d8974b136b593183c5eed5f4c1aa80640b30cad3
3
+ size 16339317
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55369b15327970abeac8d1ab86df158e536d3cca4871788d3b9cf7888f3ca935
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b0caaef3812808a33407ed82f01e1814853d66a933e10a3c392756bb8b41ab
3
  size 4219