duwuonline commited on
Commit
5b5d99c
1 Parent(s): 97a5d5a

Training in progress, step 500

Browse files
config.json CHANGED
@@ -1,25 +1,23 @@
1
  {
2
- "_name_or_path": "csebuetnlp/mT5_m2o_arabic_crossSum",
3
  "architectures": [
4
- "MT5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
  "d_kv": 64,
9
  "d_model": 768,
10
- "decoder_start_token_id": 250021,
11
- "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
- "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
19
- "length_penalty": 0.6,
20
- "max_length": 84,
21
- "model_type": "mt5",
22
- "num_beams": 4,
23
  "num_decoder_layers": 12,
24
  "num_heads": 12,
25
  "num_layers": 12,
@@ -27,194 +25,8 @@
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
30
- "task_specific_params": {
31
- "langid_map": {
32
- "amharic": [
33
- 35,
34
- "\u2581<extra_id_64>"
35
- ],
36
- "arabic": [
37
- 4,
38
- "\u2581<extra_id_95>"
39
- ],
40
- "azerbaijani": [
41
- 7,
42
- "\u2581<extra_id_92>"
43
- ],
44
- "bengali": [
45
- 42,
46
- "\u2581<extra_id_57>"
47
- ],
48
- "burmese": [
49
- 33,
50
- "\u2581<extra_id_66>"
51
- ],
52
- "chinese_simplified": [
53
- 40,
54
- "\u2581<extra_id_59>"
55
- ],
56
- "chinese_traditional": [
57
- 44,
58
- "\u2581<extra_id_55>"
59
- ],
60
- "english": [
61
- 30,
62
- "\u2581<extra_id_69>"
63
- ],
64
- "french": [
65
- 10,
66
- "\u2581<extra_id_89>"
67
- ],
68
- "gujarati": [
69
- 27,
70
- "\u2581<extra_id_72>"
71
- ],
72
- "hausa": [
73
- 43,
74
- "\u2581<extra_id_56>"
75
- ],
76
- "hindi": [
77
- 21,
78
- "\u2581<extra_id_78>"
79
- ],
80
- "igbo": [
81
- 9,
82
- "\u2581<extra_id_90>"
83
- ],
84
- "indonesian": [
85
- 1,
86
- "\u2581<extra_id_98>"
87
- ],
88
- "japanese": [
89
- 37,
90
- "\u2581<extra_id_62>"
91
- ],
92
- "kirundi": [
93
- 0,
94
- "\u2581<extra_id_99>"
95
- ],
96
- "korean": [
97
- 29,
98
- "\u2581<extra_id_70>"
99
- ],
100
- "kyrgyz": [
101
- 5,
102
- "\u2581<extra_id_94>"
103
- ],
104
- "marathi": [
105
- 13,
106
- "\u2581<extra_id_86>"
107
- ],
108
- "nepali": [
109
- 20,
110
- "\u2581<extra_id_79>"
111
- ],
112
- "oromo": [
113
- 41,
114
- "\u2581<extra_id_58>"
115
- ],
116
- "pashto": [
117
- 34,
118
- "\u2581<extra_id_65>"
119
- ],
120
- "persian": [
121
- 23,
122
- "\u2581<extra_id_76>"
123
- ],
124
- "pidgin": [
125
- 14,
126
- "\u2581<extra_id_85>"
127
- ],
128
- "portuguese": [
129
- 39,
130
- "\u2581<extra_id_60>"
131
- ],
132
- "punjabi": [
133
- 17,
134
- "\u2581<extra_id_82>"
135
- ],
136
- "russian": [
137
- 36,
138
- "\u2581<extra_id_63>"
139
- ],
140
- "scottish_gaelic": [
141
- 24,
142
- "\u2581<extra_id_75>"
143
- ],
144
- "serbian_cyrillic": [
145
- 28,
146
- "\u2581<extra_id_71>"
147
- ],
148
- "serbian_latin": [
149
- 11,
150
- "\u2581<extra_id_88>"
151
- ],
152
- "sinhala": [
153
- 31,
154
- "\u2581<extra_id_68>"
155
- ],
156
- "somali": [
157
- 19,
158
- "\u2581<extra_id_80>"
159
- ],
160
- "spanish": [
161
- 3,
162
- "\u2581<extra_id_96>"
163
- ],
164
- "swahili": [
165
- 18,
166
- "\u2581<extra_id_81>"
167
- ],
168
- "tamil": [
169
- 32,
170
- "\u2581<extra_id_67>"
171
- ],
172
- "telugu": [
173
- 22,
174
- "\u2581<extra_id_77>"
175
- ],
176
- "thai": [
177
- 6,
178
- "\u2581<extra_id_93>"
179
- ],
180
- "tigrinya": [
181
- 16,
182
- "\u2581<extra_id_83>"
183
- ],
184
- "turkish": [
185
- 15,
186
- "\u2581<extra_id_84>"
187
- ],
188
- "ukrainian": [
189
- 2,
190
- "\u2581<extra_id_97>"
191
- ],
192
- "urdu": [
193
- 38,
194
- "\u2581<extra_id_61>"
195
- ],
196
- "uzbek": [
197
- 8,
198
- "\u2581<extra_id_91>"
199
- ],
200
- "vietnamese": [
201
- 12,
202
- "\u2581<extra_id_87>"
203
- ],
204
- "welsh": [
205
- 26,
206
- "\u2581<extra_id_73>"
207
- ],
208
- "yoruba": [
209
- 25,
210
- "\u2581<extra_id_74>"
211
- ]
212
- }
213
- },
214
- "tie_word_embeddings": false,
215
- "tokenizer_class": "T5Tokenizer",
216
  "torch_dtype": "float32",
217
  "transformers_version": "4.32.0",
218
  "use_cache": true,
219
- "vocab_size": 250112
220
  }
 
1
  {
2
+ "_name_or_path": "VietAI/vit5-base-vietnews-summarization",
3
  "architectures": [
4
+ "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
  "d_kv": 64,
9
  "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
  "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
 
 
21
  "num_decoder_layers": 12,
22
  "num_heads": 12,
23
  "num_layers": 12,
 
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "torch_dtype": "float32",
29
  "transformers_version": "4.32.0",
30
  "use_cache": true,
31
+ "vocab_size": 36096
32
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cdb96a74bf67fdf9baaadd25a6aed4e568a0f610ab3759785abe2ee5c726b85
3
- size 2329702581
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fb5070cd9e483b48e23d11bd08c3c74e57181f58fee392990c098bd12d0ef04
3
+ size 903892625
special_tokens_map.json CHANGED
@@ -1,50 +1,101 @@
1
  {
2
  "additional_special_tokens": [
3
- "▁<extra_id_64>",
4
- "▁<extra_id_95>",
5
- "▁<extra_id_92>",
6
- "▁<extra_id_57>",
7
- "▁<extra_id_66>",
8
- "▁<extra_id_59>",
9
- "▁<extra_id_55>",
10
- "▁<extra_id_69>",
11
- "▁<extra_id_89>",
12
- "▁<extra_id_72>",
13
- "▁<extra_id_56>",
14
- "▁<extra_id_78>",
15
- "▁<extra_id_90>",
16
- "▁<extra_id_98>",
17
- "▁<extra_id_62>",
18
- "▁<extra_id_99>",
19
- "▁<extra_id_70>",
20
- "▁<extra_id_94>",
21
- "▁<extra_id_86>",
22
- "▁<extra_id_79>",
23
- "▁<extra_id_58>",
24
- "▁<extra_id_65>",
25
- "▁<extra_id_76>",
26
- "▁<extra_id_85>",
27
- "▁<extra_id_60>",
28
- "▁<extra_id_82>",
29
- "▁<extra_id_63>",
30
- "▁<extra_id_75>",
31
- "▁<extra_id_71>",
32
- "▁<extra_id_88>",
33
- "▁<extra_id_68>",
34
- "▁<extra_id_80>",
35
- "▁<extra_id_96>",
36
- "▁<extra_id_81>",
37
- "▁<extra_id_67>",
38
- "▁<extra_id_77>",
39
- "▁<extra_id_93>",
40
- "▁<extra_id_83>",
41
- "▁<extra_id_84>",
42
- "▁<extra_id_97>",
43
- "▁<extra_id_61>",
44
- "▁<extra_id_91>",
45
- "▁<extra_id_87>",
46
- "▁<extra_id_73>",
47
- "▁<extra_id_74>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  ],
49
  "eos_token": "</s>",
50
  "pad_token": "<pad>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>"
99
  ],
100
  "eos_token": "</s>",
101
  "pad_token": "<pad>",
spiece.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
- size 4309802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59986b62f9f0b90edafb9b073ea7b93d21114a5841219a1ea2399ade73f729c6
3
+ size 820370
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c87e0c0c4807ff408ebe1e3d8974b136b593183c5eed5f4c1aa80640b30cad3
3
- size 16339317
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7157ec8a33c7d8b9e27d123c9938bcdfcbc6f2e502a2bf25ef434d05a542ac12
3
+ size 2418088
tokenizer_config.json CHANGED
@@ -1,9 +1,105 @@
1
  {
2
- "additional_special_tokens": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "clean_up_tokenization_spaces": true,
4
  "eos_token": "</s>",
5
- "extra_ids": 0,
6
- "legacy": true,
7
  "model_max_length": 1000000000000000019884624838656,
8
  "pad_token": "<pad>",
9
  "sp_model_kwargs": {},
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>"
99
+ ],
100
  "clean_up_tokenization_spaces": true,
101
  "eos_token": "</s>",
102
+ "extra_ids": 96,
 
103
  "model_max_length": 1000000000000000019884624838656,
104
  "pad_token": "<pad>",
105
  "sp_model_kwargs": {},
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4b0caaef3812808a33407ed82f01e1814853d66a933e10a3c392756bb8b41ab
3
  size 4219
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa92890800e4c27c897debaa35f8eb7e0b83fcfec55c552cf59ca8a21d5fa5b
3
  size 4219