asahi417 commited on
Commit
53d0409
1 Parent(s): fe6abe2

model update

Browse files
README.md ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - relbert/semeval2012_relational_similarity
4
+ model-index:
5
+ - name: relbert/relbert-roberta-base-nce-semeval2012-2
6
+ results:
7
+ - task:
8
+ name: Relation Mapping
9
+ type: sorting-task
10
+ dataset:
11
+ name: Relation Mapping
12
+ args: relbert/relation_mapping
13
+ type: relation-mapping
14
+ metrics:
15
+ - name: Accuracy
16
+ type: accuracy
17
+ value: 0.8303968253968254
18
+ - task:
19
+ name: Analogy Questions (SAT full)
20
+ type: multiple-choice-qa
21
+ dataset:
22
+ name: SAT full
23
+ args: relbert/analogy_questions
24
+ type: analogy-questions
25
+ metrics:
26
+ - name: Accuracy
27
+ type: accuracy
28
+ value: 0.7192513368983957
29
+ - task:
30
+ name: Analogy Questions (SAT)
31
+ type: multiple-choice-qa
32
+ dataset:
33
+ name: SAT
34
+ args: relbert/analogy_questions
35
+ type: analogy-questions
36
+ metrics:
37
+ - name: Accuracy
38
+ type: accuracy
39
+ value: 0.7091988130563798
40
+ - task:
41
+ name: Analogy Questions (BATS)
42
+ type: multiple-choice-qa
43
+ dataset:
44
+ name: BATS
45
+ args: relbert/analogy_questions
46
+ type: analogy-questions
47
+ metrics:
48
+ - name: Accuracy
49
+ type: accuracy
50
+ value: 0.8043357420789328
51
+ - task:
52
+ name: Analogy Questions (Google)
53
+ type: multiple-choice-qa
54
+ dataset:
55
+ name: Google
56
+ args: relbert/analogy_questions
57
+ type: analogy-questions
58
+ metrics:
59
+ - name: Accuracy
60
+ type: accuracy
61
+ value: 0.948
62
+ - task:
63
+ name: Analogy Questions (U2)
64
+ type: multiple-choice-qa
65
+ dataset:
66
+ name: U2
67
+ args: relbert/analogy_questions
68
+ type: analogy-questions
69
+ metrics:
70
+ - name: Accuracy
71
+ type: accuracy
72
+ value: 0.6798245614035088
73
+ - task:
74
+ name: Analogy Questions (U4)
75
+ type: multiple-choice-qa
76
+ dataset:
77
+ name: U4
78
+ args: relbert/analogy_questions
79
+ type: analogy-questions
80
+ metrics:
81
+ - name: Accuracy
82
+ type: accuracy
83
+ value: 0.6643518518518519
84
+ - task:
85
+ name: Analogy Questions (ConceptNet Analogy)
86
+ type: multiple-choice-qa
87
+ dataset:
88
+ name: ConceptNet Analogy
89
+ args: relbert/analogy_questions
90
+ type: analogy-questions
91
+ metrics:
92
+ - name: Accuracy
93
+ type: accuracy
94
+ value: 0.4865771812080537
95
+ - task:
96
+ name: Analogy Questions (TREX Analogy)
97
+ type: multiple-choice-qa
98
+ dataset:
99
+ name: TREX Analogy
100
+ args: relbert/analogy_questions
101
+ type: analogy-questions
102
+ metrics:
103
+ - name: Accuracy
104
+ type: accuracy
105
+ value: 0.6338797814207651
106
+ - task:
107
+ name: Analogy Questions (NELL-ONE Analogy)
108
+ type: multiple-choice-qa
109
+ dataset:
110
+ name: NELL-ONE Analogy
111
+ args: relbert/analogy_questions
112
+ type: analogy-questions
113
+ metrics:
114
+ - name: Accuracy
115
+ type: accuracy
116
+ value: 0.6633333333333333
117
+ - task:
118
+ name: Lexical Relation Classification (BLESS)
119
+ type: classification
120
+ dataset:
121
+ name: BLESS
122
+ args: relbert/lexical_relation_classification
123
+ type: relation-classification
124
+ metrics:
125
+ - name: F1
126
+ type: f1
127
+ value: 0.9169805635076088
128
+ - name: F1 (macro)
129
+ type: f1_macro
130
+ value: 0.9133613159985977
131
+ - task:
132
+ name: Lexical Relation Classification (CogALexV)
133
+ type: classification
134
+ dataset:
135
+ name: CogALexV
136
+ args: relbert/lexical_relation_classification
137
+ type: relation-classification
138
+ metrics:
139
+ - name: F1
140
+ type: f1
141
+ value: 0.8643192488262911
142
+ - name: F1 (macro)
143
+ type: f1_macro
144
+ value: 0.709680204738525
145
+ - task:
146
+ name: Lexical Relation Classification (EVALution)
147
+ type: classification
148
+ dataset:
149
+ name: BLESS
150
+ args: relbert/lexical_relation_classification
151
+ type: relation-classification
152
+ metrics:
153
+ - name: F1
154
+ type: f1
155
+ value: 0.6782231852654388
156
+ - name: F1 (macro)
157
+ type: f1_macro
158
+ value: 0.665196173208286
159
+ - task:
160
+ name: Lexical Relation Classification (K&H+N)
161
+ type: classification
162
+ dataset:
163
+ name: K&H+N
164
+ args: relbert/lexical_relation_classification
165
+ type: relation-classification
166
+ metrics:
167
+ - name: F1
168
+ type: f1
169
+ value: 0.9568060095986646
170
+ - name: F1 (macro)
171
+ type: f1_macro
172
+ value: 0.8745909398702613
173
+ - task:
174
+ name: Lexical Relation Classification (ROOT09)
175
+ type: classification
176
+ dataset:
177
+ name: ROOT09
178
+ args: relbert/lexical_relation_classification
179
+ type: relation-classification
180
+ metrics:
181
+ - name: F1
182
+ type: f1
183
+ value: 0.9150736446255092
184
+ - name: F1 (macro)
185
+ type: f1_macro
186
+ value: 0.9142555280970402
187
+
188
+ ---
189
+ # relbert/relbert-roberta-base-nce-semeval2012-2
190
+
191
+ RelBERT based on [roberta-large](https://huggingface.co/roberta-large) fine-tuned on [relbert/semeval2012_relational_similarity](https://huggingface.co/datasets/relbert/semeval2012_relational_similarity) (see the [`relbert`](https://github.com/asahi417/relbert) for more detail of fine-tuning).
192
+ This model achieves the following results on the relation understanding tasks:
193
+ - Analogy Question ([dataset](https://huggingface.co/datasets/relbert/analogy_questions), [full result](https://huggingface.co/relbert/relbert-roberta-base-nce-semeval2012-2/raw/main/analogy.forward.json)):
194
+ - Accuracy on SAT (full): 0.7192513368983957
195
+ - Accuracy on SAT: 0.7091988130563798
196
+ - Accuracy on BATS: 0.8043357420789328
197
+ - Accuracy on U2: 0.6798245614035088
198
+ - Accuracy on U4: 0.6643518518518519
199
+ - Accuracy on Google: 0.948
200
+ - Accuracy on ConceptNet Analogy: 0.4865771812080537
201
+ - Accuracy on T-Rex Analogy: 0.6338797814207651
202
+ - Accuracy on NELL-ONE Analogy: 0.6633333333333333
203
+ - Lexical Relation Classification ([dataset](https://huggingface.co/datasets/relbert/lexical_relation_classification), [full result](https://huggingface.co/relbert/relbert-roberta-base-nce-semeval2012-2/raw/main/classification.json)):
204
+ - Micro F1 score on BLESS: 0.9169805635076088
205
+ - Micro F1 score on CogALexV: 0.8643192488262911
206
+ - Micro F1 score on EVALution: 0.6782231852654388
207
+ - Micro F1 score on K&H+N: 0.9568060095986646
208
+ - Micro F1 score on ROOT09: 0.9150736446255092
209
+ - Relation Mapping ([dataset](https://huggingface.co/datasets/relbert/relation_mapping), [full result](https://huggingface.co/relbert/relbert-roberta-base-nce-semeval2012-2/raw/main/relation_mapping.json)):
210
+ - Accuracy on Relation Mapping: 0.8303968253968254
211
+
212
+
213
+ ### Usage
214
+ This model can be used through the [relbert library](https://github.com/asahi417/relbert). Install the library via pip
215
+ ```shell
216
+ pip install relbert
217
+ ```
218
+ and activate model as below.
219
+ ```python
220
+ from relbert import RelBERT
221
+ model = RelBERT("relbert/relbert-roberta-base-nce-semeval2012-2")
222
+ vector = model.get_embedding(['Tokyo', 'Japan']) # shape of (n_dim, )
223
+ ```
224
+
225
+ ### Training hyperparameters
226
+
227
+ - model: roberta-large
228
+ - max_length: 64
229
+ - epoch: 10
230
+ - batch: 32
231
+ - random_seed: 2
232
+ - lr: 5e-06
233
+ - lr_warmup: 10
234
+ - aggregation_mode: average_no_mask
235
+ - data: relbert/semeval2012_relational_similarity
236
+ - data_name: None
237
+ - exclude_relation: None
238
+ - split: train
239
+ - split_valid: validation
240
+ - loss_function: nce
241
+ - classification_loss: False
242
+ - loss_function_config: {'temperature': 0.05, 'num_negative': 100, 'num_positive': 10}
243
+ - augment_negative_by_positive: True
244
+
245
+ See the full configuration at [config file](https://huggingface.co/relbert/relbert-roberta-base-nce-semeval2012-2/raw/main/finetuning_config.json).
246
+
247
+ ### Reference
248
+ If you use any resource from RelBERT, please consider to cite our [paper](https://aclanthology.org/2021.emnlp-main.712/).
249
+
250
+ ```
251
+
252
+ @inproceedings{ushio-etal-2021-distilling,
253
+ title = "Distilling Relation Embeddings from Pretrained Language Models",
254
+ author = "Ushio, Asahi and
255
+ Camacho-Collados, Jose and
256
+ Schockaert, Steven",
257
+ booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
258
+ month = nov,
259
+ year = "2021",
260
+ address = "Online and Punta Cana, Dominican Republic",
261
+ publisher = "Association for Computational Linguistics",
262
+ url = "https://aclanthology.org/2021.emnlp-main.712",
263
+ doi = "10.18653/v1/2021.emnlp-main.712",
264
+ pages = "9044--9062",
265
+ abstract = "Pre-trained language models have been found to capture a surprisingly rich amount of lexical knowledge, ranging from commonsense properties of everyday concepts to detailed factual knowledge about named entities. Among others, this makes it possible to distill high-quality word vectors from pre-trained language models. However, it is currently unclear to what extent it is possible to distill relation embeddings, i.e. vectors that characterize the relationship between two words. Such relation embeddings are appealing because they can, in principle, encode relational knowledge in a more fine-grained way than is possible with knowledge graphs. To obtain relation embeddings from a pre-trained language model, we encode word pairs using a (manually or automatically generated) prompt, and we fine-tune the language model such that relationally similar word pairs yield similar output vectors. We find that the resulting relation embeddings are highly competitive on analogy (unsupervised) and relation classification (supervised) benchmarks, even without any task-specific fine-tuning. Source code to reproduce our experimental results and the model checkpoints are available in the following repository: https://github.com/asahi417/relbert",
266
+ }
267
+
268
+ ```
analogy.forward.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"semeval2012_relational_similarity/validation": 0.7974683544303798, "scan/test": 0.2908415841584158, "sat_full/test": 0.7192513368983957, "sat/test": 0.7091988130563798, "u2/test": 0.6798245614035088, "u4/test": 0.6643518518518519, "google/test": 0.948, "bats/test": 0.8043357420789328, "t_rex_relational_similarity/test": 0.6338797814207651, "conceptnet_relational_similarity/test": 0.4865771812080537, "nell_relational_similarity/test": 0.6633333333333333}
classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lexical_relation_classification/BLESS": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.9169805635076088, "test/f1_macro": 0.9133613159985977, "test/f1_micro": 0.9169805635076088, "test/p_macro": 0.9065498330870753, "test/p_micro": 0.9169805635076088, "test/r_macro": 0.9208392163252331, "test/r_micro": 0.9169805635076088, "test/f1/attri": 0.9212207239176722, "test/p/attri": 0.9102384291725105, "test/r/attri": 0.9324712643678161, "test/f1/coord": 0.9544175576814856, "test/p/coord": 0.9474860335195531, "test/r/coord": 0.9614512471655329, "test/f1/event": 0.8538071065989848, "test/p/event": 0.8285714285714286, "test/r/event": 0.8806282722513089, "test/f1/hyper": 0.9296987087517934, "test/p/hyper": 0.9337175792507204, "test/r/hyper": 0.9257142857142857, "test/f1/mero": 0.8896146309601568, "test/p/mero": 0.867515923566879, "test/r/mero": 0.9128686327077749, "test/f1/random": 0.931409168081494, "test/p/random": 0.9517696044413602, "test/r/random": 0.9119015957446809}, "lexical_relation_classification/CogALexV": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.8643192488262911, "test/f1_macro": 0.709680204738525, "test/f1_micro": 0.8643192488262911, "test/p_macro": 0.7459301562952307, "test/p_micro": 0.8643192488262911, "test/r_macro": 0.6794130847403439, "test/r_micro": 0.8643192488262911, "test/f1/ANT": 0.7822222222222223, "test/p/ANT": 0.8380952380952381, "test/r/ANT": 0.7333333333333333, "test/f1/HYPER": 0.6134453781512605, "test/p/HYPER": 0.6596385542168675, "test/r/HYPER": 0.5732984293193717, "test/f1/PART_OF": 0.7286063569682152, "test/p/PART_OF": 0.8054054054054054, "test/r/PART_OF": 0.6651785714285714, "test/f1/RANDOM": 0.937519923493784, "test/p/RANDOM": 0.9147744945567652, "test/r/RANDOM": 0.9614253023864008, "test/f1/SYN": 0.48660714285714285, "test/p/SYN": 0.5117370892018779, "test/r/SYN": 0.46382978723404256}, "lexical_relation_classification/EVALution": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.6782231852654388, "test/f1_macro": 0.665196173208286, "test/f1_micro": 0.6782231852654388, "test/p_macro": 0.6665905293786667, "test/p_micro": 0.6782231852654388, "test/r_macro": 0.6653300709395378, "test/r_micro": 0.6782231852654388, "test/f1/Antonym": 0.7931456548347613, "test/p/Antonym": 0.8059701492537313, "test/r/Antonym": 0.7807228915662651, "test/f1/HasA": 0.6541353383458647, "test/p/HasA": 0.7016129032258065, "test/r/HasA": 0.6126760563380281, "test/f1/HasProperty": 0.8126888217522659, "test/p/HasProperty": 0.7911764705882353, "test/r/HasProperty": 0.8354037267080745, "test/f1/IsA": 0.6145610278372591, "test/p/IsA": 0.6042105263157894, "test/r/IsA": 0.6252723311546841, "test/f1/MadeOf": 0.6363636363636364, "test/p/MadeOf": 0.6222222222222222, "test/r/MadeOf": 0.6511627906976745, "test/f1/PartOf": 0.6622073578595318, "test/p/PartOf": 0.6428571428571429, "test/r/PartOf": 0.6827586206896552, "test/f1/Synonym": 0.483271375464684, "test/p/Synonym": 0.49808429118773945, "test/r/Synonym": 0.4693140794223827}, "lexical_relation_classification/K&H+N": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.9568060095986646, "test/f1_macro": 0.8745909398702613, "test/f1_micro": 0.9568060095986646, "test/p_macro": 0.8722419340096175, "test/p_micro": 0.9568060095986646, "test/r_macro": 0.8771975453603155, "test/r_micro": 0.9568060095986646, "test/f1/false": 0.9696655047096344, "test/p/false": 0.9703131957844738, "test/r/false": 0.9690186777349541, "test/f1/hypo": 0.9182389937106918, "test/p/hypo": 0.9258536585365854, "test/r/hypo": 0.9107485604606526, "test/f1/mero": 0.6490872210953347, "test/p/mero": 0.6324110671936759, "test/r/mero": 0.6666666666666666, "test/f1/sibl": 0.9613720399653843, "test/p/sibl": 0.9603898145237346, "test/r/sibl": 0.9623562765789888}, "lexical_relation_classification/ROOT09": {"classifier_config": {"activation": "relu", "alpha": 0.0001, "batch_size": "auto", "beta_1": 0.9, "beta_2": 0.999, "early_stopping": false, "epsilon": 1e-08, "hidden_layer_sizes": [100], "learning_rate": "constant", "learning_rate_init": 0.001, "max_fun": 15000, "max_iter": 200, "momentum": 0.9, "n_iter_no_change": 10, "nesterovs_momentum": true, "power_t": 0.5, "random_state": 0, "shuffle": true, "solver": "adam", "tol": 0.0001, "validation_fraction": 0.1, "verbose": false, "warm_start": false}, "test/accuracy": 0.9150736446255092, "test/f1_macro": 0.9142555280970402, "test/f1_micro": 0.9150736446255092, "test/p_macro": 0.9123400631967442, "test/p_micro": 0.9150736446255092, "test/r_macro": 0.916280536276508, "test/r_micro": 0.9150736446255092, "test/f1/COORD": 0.9762050030506406, "test/p/COORD": 0.9720534629404617, "test/r/COORD": 0.9803921568627451, "test/f1/HYPER": 0.8489296636085627, "test/p/HYPER": 0.8401937046004843, "test/r/HYPER": 0.857849196538937, "test/f1/RANDOM": 0.9176319176319176, "test/p/RANDOM": 0.9247730220492867, "test/r/RANDOM": 0.9106002554278416}}
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-large",
3
+ "architectures": [
4
+ "RobertaModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "relbert_config": {
23
+ "aggregation_mode": "average_no_mask",
24
+ "template": "I wasn\u2019t aware of this relationship, but I just read in the encyclopedia that <subj> is the <mask> of <obj>"
25
+ },
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.26.1",
28
+ "type_vocab_size": 1,
29
+ "use_cache": true,
30
+ "vocab_size": 50265
31
+ }
finetuning_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template": "I wasn\u2019t aware of this relationship, but I just read in the encyclopedia that <subj> is the <mask> of <obj>",
3
+ "model": "roberta-large",
4
+ "max_length": 64,
5
+ "epoch": 10,
6
+ "batch": 32,
7
+ "random_seed": 2,
8
+ "lr": 5e-06,
9
+ "lr_warmup": 10,
10
+ "aggregation_mode": "average_no_mask",
11
+ "data": "relbert/semeval2012_relational_similarity",
12
+ "data_name": null,
13
+ "exclude_relation": null,
14
+ "split": "train",
15
+ "split_valid": "validation",
16
+ "loss_function": "nce",
17
+ "classification_loss": false,
18
+ "loss_function_config": {
19
+ "temperature": 0.05,
20
+ "num_negative": 100,
21
+ "num_positive": 10
22
+ },
23
+ "augment_negative_by_positive": true
24
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2625a24982aa7546e5d7c5bb28293ef3adbfdf6393b31806150e3ba3272c8759
3
+ size 1421575277
relation_mapping.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 512,
9
+ "name_or_path": "roberta-large",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "RobertaTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff