firqaaa commited on
Commit
3e4bdf2
·
verified ·
1 Parent(s): 26ef440

Add SetFit ABSA model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false
7
  }
 
1
  {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false
7
  }
README.md CHANGED
@@ -9,22 +9,22 @@ tags:
9
  metrics:
10
  - accuracy
11
  widget:
12
- - text: di area tersebut makanan perancis:mungkin agak ramai di akhir pekan, tapi
13
- suasana bagus dan ini adalah makanan prancis terbaik yang bisa anda temukan di
14
- area tersebut makanan perancis
15
- - text: para pelayan dan pemilik tidak:para pelayan dan pemilik tidak peduli tentang
16
- hal ini dan berjanji untuk memanggil pembasmi tetapi tidak kecewa atau meminta
17
- maaf seperti yang saya harapkan.
18
- - text: suasana ramai tapi suasana:suasana ramai tapi suasana seperti bistro.
19
- - text: menyukai artisanal! tarif perancis:jika anda menyukai anggur dan keju serta
20
- hidangan prancis yang lezat, anda akan menyukai artisanal! tarif perancis
21
- - text: hebat lagi, harga juga sangat terjangkau:hebat lagi, harga juga sangat terjangkau,
22
- dan makanan sangat enak.
23
  pipeline_tag: text-classification
24
  inference: false
25
- base_model: firqaaa/indo-sentence-bert-base
26
  model-index:
27
- - name: SetFit Polarity Model with firqaaa/indo-sentence-bert-base
28
  results:
29
  - task:
30
  type: text-classification
@@ -35,13 +35,13 @@ model-index:
35
  split: test
36
  metrics:
37
  - type: accuracy
38
- value: 0.6836734693877551
39
  name: Accuracy
40
  ---
41
 
42
- # SetFit Polarity Model with firqaaa/indo-sentence-bert-base
43
 
44
- This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Aspect Based Sentiment Analysis (ABSA). This SetFit model uses [firqaaa/indo-sentence-bert-base](https://huggingface.co/firqaaa/indo-sentence-bert-base) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification. In particular, this model is in charge of classifying aspect polarities.
45
 
46
  The model has been trained using an efficient few-shot learning technique that involves:
47
 
@@ -58,12 +58,12 @@ This model was trained within the context of a larger system for ABSA, which loo
58
 
59
  ### Model Description
60
  - **Model Type:** SetFit
61
- - **Sentence Transformer body:** [firqaaa/indo-sentence-bert-base](https://huggingface.co/firqaaa/indo-sentence-bert-base)
62
  - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
63
  - **spaCy Model:** id_core_news_trf
64
- - **SetFitABSA Aspect Model:** [firqaaa/indo-setfit-absa-sentence-bert-base-p1-restaurants-aspect](https://huggingface.co/firqaaa/indo-setfit-absa-sentence-bert-base-p1-restaurants-aspect)
65
- - **SetFitABSA Polarity Model:** [firqaaa/indo-setfit-absa-sentence-bert-base-p1-restaurants-polarity](https://huggingface.co/firqaaa/indo-setfit-absa-sentence-bert-base-p1-restaurants-polarity)
66
- - **Maximum Sequence Length:** 512 tokens
67
  - **Number of Classes:** 4 classes
68
  <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
69
  <!-- - **Language:** Unknown -->
@@ -88,7 +88,7 @@ This model was trained within the context of a larger system for ABSA, which loo
88
  ### Metrics
89
  | Label | Accuracy |
90
  |:--------|:---------|
91
- | **all** | 0.6837 |
92
 
93
  ## Uses
94
 
@@ -107,8 +107,8 @@ from setfit import AbsaModel
107
 
108
  # Download from the 🤗 Hub
109
  model = AbsaModel.from_pretrained(
110
- "firqaaa/indo-setfit-absa-sentence-bert-base-p1-restaurants-aspect",
111
- "firqaaa/indo-setfit-absa-sentence-bert-base-p1-restaurants-polarity",
112
  )
113
  # Run inference
114
  preds = model("The food was great, but the venue is just way too busy.")
@@ -143,17 +143,17 @@ preds = model("The food was great, but the venue is just way too busy.")
143
  ### Training Set Metrics
144
  | Training set | Min | Median | Max |
145
  |:-------------|:----|:--------|:----|
146
- | Word count | 3 | 20.9935 | 62 |
147
 
148
  | Label | Training Sample Count |
149
  |:--------|:----------------------|
150
- | konflik | 21 |
151
- | negatif | 243 |
152
- | netral | 186 |
153
- | positif | 626 |
154
 
155
  ### Training Hyperparameters
156
- - batch_size: (32, 32)
157
  - num_epochs: (1, 1)
158
  - max_steps: -1
159
  - sampling_strategy: oversampling
@@ -170,69 +170,79 @@ preds = model("The food was great, but the venue is just way too busy.")
170
  - load_best_model_at_end: True
171
 
172
  ### Training Results
173
- | Epoch | Step | Training Loss | Validation Loss |
174
- |:----------:|:-------:|:-------------:|:---------------:|
175
- | 0.0000 | 1 | 0.2996 | - |
176
- | 0.0024 | 50 | 0.2488 | - |
177
- | 0.0048 | 100 | 0.2636 | - |
178
- | 0.0071 | 150 | 0.2544 | - |
179
- | 0.0095 | 200 | 0.2036 | - |
180
- | 0.0119 | 250 | 0.2002 | - |
181
- | 0.0143 | 300 | 0.1723 | - |
182
- | 0.0167 | 350 | 0.2112 | - |
183
- | 0.0191 | 400 | 0.1655 | - |
184
- | 0.0214 | 450 | 0.1559 | - |
185
- | **0.0238** | **500** | **0.0602** | **0.2033** |
186
- | 0.0262 | 550 | 0.1047 | - |
187
- | 0.0286 | 600 | 0.1228 | - |
188
- | 0.0310 | 650 | 0.1152 | - |
189
- | 0.0333 | 700 | 0.0444 | - |
190
- | 0.0357 | 750 | 0.0479 | - |
191
- | 0.0381 | 800 | 0.065 | - |
192
- | 0.0405 | 850 | 0.0417 | - |
193
- | 0.0429 | 900 | 0.0647 | - |
194
- | 0.0452 | 950 | 0.0517 | - |
195
- | 0.0476 | 1000 | 0.0433 | 0.2399 |
196
- | 0.0500 | 1050 | 0.0044 | - |
197
- | 0.0524 | 1100 | 0.0241 | - |
198
- | 0.0548 | 1150 | 0.0204 | - |
199
- | 0.0572 | 1200 | 0.0532 | - |
200
- | 0.0595 | 1250 | 0.0116 | - |
201
- | 0.0619 | 1300 | 0.0288 | - |
202
- | 0.0643 | 1350 | 0.0125 | - |
203
- | 0.0667 | 1400 | 0.0357 | - |
204
- | 0.0691 | 1450 | 0.0028 | - |
205
- | 0.0714 | 1500 | 0.027 | 0.2564 |
206
- | 0.0738 | 1550 | 0.0032 | - |
207
- | 0.0762 | 1600 | 0.0048 | - |
208
- | 0.0786 | 1650 | 0.0003 | - |
209
- | 0.0810 | 1700 | 0.0008 | - |
210
- | 0.0834 | 1750 | 0.0008 | - |
211
- | 0.0857 | 1800 | 0.0023 | - |
212
- | 0.0881 | 1850 | 0.0003 | - |
213
- | 0.0905 | 1900 | 0.0004 | - |
214
- | 0.0929 | 1950 | 0.0003 | - |
215
- | 0.0953 | 2000 | 0.0054 | 0.2812 |
216
- | 0.0976 | 2050 | 0.0005 | - |
217
- | 0.1000 | 2100 | 0.0006 | - |
218
- | 0.1024 | 2150 | 0.0004 | - |
219
- | 0.1048 | 2200 | 0.0019 | - |
220
- | 0.1072 | 2250 | 0.0007 | - |
221
- | 0.1095 | 2300 | 0.0004 | - |
222
- | 0.1119 | 2350 | 0.0001 | - |
223
- | 0.1143 | 2400 | 0.0004 | - |
224
- | 0.1167 | 2450 | 0.0069 | - |
225
- | 0.1191 | 2500 | 0.0001 | 0.2845 |
226
- | 0.1215 | 2550 | 0.0 | - |
227
- | 0.1238 | 2600 | 0.0002 | - |
228
- | 0.1262 | 2650 | 0.0001 | - |
229
- | 0.1286 | 2700 | 0.0109 | - |
230
- | 0.1310 | 2750 | 0.0037 | - |
231
- | 0.1334 | 2800 | 0.0001 | - |
232
- | 0.1357 | 2850 | 0.0001 | - |
233
- | 0.1381 | 2900 | 0.0001 | - |
234
- | 0.1405 | 2950 | 0.0001 | - |
235
- | 0.1429 | 3000 | 0.0001 | 0.2839 |
 
 
 
 
 
 
 
 
 
 
236
 
237
  * The bold row denotes the saved checkpoint.
238
  ### Framework Versions
 
9
  metrics:
10
  - accuracy
11
  widget:
12
+ - text: gulungan biasa menjadi gulungan luar dalam,:dibutuhkan biaya tambahan $2 untuk
13
+ mengubah gulungan biasa menjadi gulungan luar dalam, tetapi gulungan tersebut
14
+ berukuran lebih dari tiga kali lipat, dan itu bukan ha dari nasi.
15
+ - text: -a-bagel (baik di:ess-a-bagel (baik di sty-town atau midtown) sejauh ini merupakan
16
+ bagel terbaik di ny.
17
+ - text: mahal wadah ini pengelola:ketika kami sedang duduk makan makanan di bawah
18
+ standar, manajer mulai mencaci-maki beberapa karyawan karena meletakkan wadah
19
+ bumbu yang salah dan menjelaskan kepada mereka betapa mahal wadah ini pengelola
20
+ - text: staf sangat akomodatif.:staf sangat akomodatif.
21
+ - text: layanan luar biasa melayani:makanan india yang enak dan layanan luar biasa
22
+ melayani
23
  pipeline_tag: text-classification
24
  inference: false
25
+ base_model: BAAI/bge-m3
26
  model-index:
27
+ - name: SetFit Polarity Model with BAAI/bge-m3
28
  results:
29
  - task:
30
  type: text-classification
 
35
  split: test
36
  metrics:
37
  - type: accuracy
38
+ value: 0.7898320472083522
39
  name: Accuracy
40
  ---
41
 
42
+ # SetFit Polarity Model with BAAI/bge-m3
43
 
44
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Aspect Based Sentiment Analysis (ABSA). This SetFit model uses [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification. In particular, this model is in charge of classifying aspect polarities.
45
 
46
  The model has been trained using an efficient few-shot learning technique that involves:
47
 
 
58
 
59
  ### Model Description
60
  - **Model Type:** SetFit
61
+ - **Sentence Transformer body:** [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3)
62
  - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
63
  - **spaCy Model:** id_core_news_trf
64
+ - **SetFitABSA Aspect Model:** [firqaaa/indo-setfit-absa-bert-base-restaurants-aspect](https://huggingface.co/firqaaa/indo-setfit-absa-bert-base-restaurants-aspect)
65
+ - **SetFitABSA Polarity Model:** [firqaaa/indo-setfit-absa-bert-base-restaurants-polarity](https://huggingface.co/firqaaa/indo-setfit-absa-bert-base-restaurants-polarity)
66
+ - **Maximum Sequence Length:** 8192 tokens
67
  - **Number of Classes:** 4 classes
68
  <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
69
  <!-- - **Language:** Unknown -->
 
88
  ### Metrics
89
  | Label | Accuracy |
90
  |:--------|:---------|
91
+ | **all** | 0.7898 |
92
 
93
  ## Uses
94
 
 
107
 
108
  # Download from the 🤗 Hub
109
  model = AbsaModel.from_pretrained(
110
+ "firqaaa/indo-setfit-absa-bert-base-restaurants-aspect",
111
+ "firqaaa/indo-setfit-absa-bert-base-restaurants-polarity",
112
  )
113
  # Run inference
114
  preds = model("The food was great, but the venue is just way too busy.")
 
143
  ### Training Set Metrics
144
  | Training set | Min | Median | Max |
145
  |:-------------|:----|:--------|:----|
146
+ | Word count | 3 | 20.6594 | 62 |
147
 
148
  | Label | Training Sample Count |
149
  |:--------|:----------------------|
150
+ | konflik | 34 |
151
+ | negatif | 323 |
152
+ | netral | 258 |
153
+ | positif | 853 |
154
 
155
  ### Training Hyperparameters
156
+ - batch_size: (16, 16)
157
  - num_epochs: (1, 1)
158
  - max_steps: -1
159
  - sampling_strategy: oversampling
 
170
  - load_best_model_at_end: True
171
 
172
  ### Training Results
173
+ | Epoch | Step | Training Loss | Validation Loss |
174
+ |:----------:|:--------:|:-------------:|:---------------:|
175
+ | 0.0000 | 1 | 0.2345 | - |
176
+ | 0.0006 | 50 | 0.2337 | - |
177
+ | 0.0013 | 100 | 0.267 | - |
178
+ | 0.0019 | 150 | 0.2335 | - |
179
+ | 0.0025 | 200 | 0.2368 | - |
180
+ | 0.0032 | 250 | 0.2199 | - |
181
+ | 0.0038 | 300 | 0.2325 | - |
182
+ | 0.0045 | 350 | 0.2071 | - |
183
+ | 0.0051 | 400 | 0.2229 | - |
184
+ | 0.0057 | 450 | 0.1153 | - |
185
+ | 0.0064 | 500 | 0.1771 | 0.1846 |
186
+ | 0.0070 | 550 | 0.1612 | - |
187
+ | 0.0076 | 600 | 0.1487 | - |
188
+ | 0.0083 | 650 | 0.147 | - |
189
+ | 0.0089 | 700 | 0.1982 | - |
190
+ | 0.0096 | 750 | 0.1579 | - |
191
+ | 0.0102 | 800 | 0.1148 | - |
192
+ | 0.0108 | 850 | 0.1008 | - |
193
+ | 0.0115 | 900 | 0.2035 | - |
194
+ | 0.0121 | 950 | 0.1348 | - |
195
+ | **0.0127** | **1000** | **0.0974** | **0.182** |
196
+ | 0.0134 | 1050 | 0.121 | - |
197
+ | 0.0140 | 1100 | 0.1949 | - |
198
+ | 0.0147 | 1150 | 0.2424 | - |
199
+ | 0.0153 | 1200 | 0.0601 | - |
200
+ | 0.0159 | 1250 | 0.0968 | - |
201
+ | 0.0166 | 1300 | 0.0137 | - |
202
+ | 0.0172 | 1350 | 0.034 | - |
203
+ | 0.0178 | 1400 | 0.1217 | - |
204
+ | 0.0185 | 1450 | 0.0454 | - |
205
+ | 0.0191 | 1500 | 0.0397 | 0.2216 |
206
+ | 0.0198 | 1550 | 0.0226 | - |
207
+ | 0.0204 | 1600 | 0.0939 | - |
208
+ | 0.0210 | 1650 | 0.0537 | - |
209
+ | 0.0217 | 1700 | 0.0566 | - |
210
+ | 0.0223 | 1750 | 0.162 | - |
211
+ | 0.0229 | 1800 | 0.0347 | - |
212
+ | 0.0236 | 1850 | 0.103 | - |
213
+ | 0.0242 | 1900 | 0.0615 | - |
214
+ | 0.0249 | 1950 | 0.0589 | - |
215
+ | 0.0255 | 2000 | 0.1668 | 0.2132 |
216
+ | 0.0261 | 2050 | 0.1809 | - |
217
+ | 0.0268 | 2100 | 0.0579 | - |
218
+ | 0.0274 | 2150 | 0.088 | - |
219
+ | 0.0280 | 2200 | 0.1047 | - |
220
+ | 0.0287 | 2250 | 0.1255 | - |
221
+ | 0.0293 | 2300 | 0.0312 | - |
222
+ | 0.0300 | 2350 | 0.0097 | - |
223
+ | 0.0306 | 2400 | 0.0973 | - |
224
+ | 0.0312 | 2450 | 0.0066 | - |
225
+ | 0.0319 | 2500 | 0.0589 | 0.2591 |
226
+ | 0.0325 | 2550 | 0.0529 | - |
227
+ | 0.0331 | 2600 | 0.0169 | - |
228
+ | 0.0338 | 2650 | 0.0455 | - |
229
+ | 0.0344 | 2700 | 0.0609 | - |
230
+ | 0.0350 | 2750 | 0.1151 | - |
231
+ | 0.0357 | 2800 | 0.0031 | - |
232
+ | 0.0363 | 2850 | 0.0546 | - |
233
+ | 0.0370 | 2900 | 0.0051 | - |
234
+ | 0.0376 | 2950 | 0.0679 | - |
235
+ | 0.0382 | 3000 | 0.0046 | 0.2646 |
236
+ | 0.0389 | 3050 | 0.011 | - |
237
+ | 0.0395 | 3100 | 0.0701 | - |
238
+ | 0.0401 | 3150 | 0.0011 | - |
239
+ | 0.0408 | 3200 | 0.011 | - |
240
+ | 0.0414 | 3250 | 0.0026 | - |
241
+ | 0.0421 | 3300 | 0.0027 | - |
242
+ | 0.0427 | 3350 | 0.0012 | - |
243
+ | 0.0433 | 3400 | 0.0454 | - |
244
+ | 0.0440 | 3450 | 0.0011 | - |
245
+ | 0.0446 | 3500 | 0.0012 | 0.2602 |
246
 
247
  * The bold row denotes the saved checkpoint.
248
  ### Framework Versions
config.json CHANGED
@@ -1,47 +1,28 @@
1
  {
2
- "_name_or_path": "models/step_500/",
3
- "_num_labels": 5,
4
  "architectures": [
5
- "BertModel"
6
  ],
7
  "attention_probs_dropout_prob": 0.1,
 
8
  "classifier_dropout": null,
9
- "directionality": "bidi",
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "LABEL_0",
15
- "1": "LABEL_1",
16
- "2": "LABEL_2",
17
- "3": "LABEL_3",
18
- "4": "LABEL_4"
19
- },
20
  "initializer_range": 0.02,
21
- "intermediate_size": 3072,
22
- "label2id": {
23
- "LABEL_0": 0,
24
- "LABEL_1": 1,
25
- "LABEL_2": 2,
26
- "LABEL_3": 3,
27
- "LABEL_4": 4
28
- },
29
- "layer_norm_eps": 1e-12,
30
- "max_position_embeddings": 512,
31
- "model_type": "bert",
32
- "num_attention_heads": 12,
33
- "num_hidden_layers": 12,
34
  "output_past": true,
35
- "pad_token_id": 0,
36
- "pooler_fc_size": 768,
37
- "pooler_num_attention_heads": 12,
38
- "pooler_num_fc_layers": 3,
39
- "pooler_size_per_head": 128,
40
- "pooler_type": "first_token_transform",
41
  "position_embedding_type": "absolute",
42
  "torch_dtype": "float32",
43
  "transformers_version": "4.36.2",
44
- "type_vocab_size": 2,
45
  "use_cache": true,
46
- "vocab_size": 50000
47
  }
 
1
  {
2
+ "_name_or_path": "models/step_1000/",
 
3
  "architectures": [
4
+ "XLMRobertaModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
  "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
 
 
 
 
 
 
 
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 8194,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
 
 
 
 
 
 
 
20
  "output_past": true,
21
+ "pad_token_id": 1,
 
 
 
 
 
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
  "transformers_version": "4.36.2",
25
+ "type_vocab_size": 1,
26
  "use_cache": true,
27
+ "vocab_size": 250002
28
  }
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.2.2",
4
- "transformers": "4.20.1",
5
- "pytorch": "1.11.0"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.2.2",
4
+ "transformers": "4.33.0",
5
+ "pytorch": "2.1.2+cu121"
6
  }
7
  }
config_setfit.json CHANGED
@@ -1,11 +1,11 @@
1
  {
 
 
2
  "labels": [
3
  "konflik",
4
  "negatif",
5
  "netral",
6
  "positif"
7
  ],
8
- "spacy_model": "id_core_news_trf",
9
- "span_context": 3,
10
  "normalize_embeddings": false
11
  }
 
1
  {
2
+ "span_context": 3,
3
+ "spacy_model": "id_core_news_trf",
4
  "labels": [
5
  "konflik",
6
  "negatif",
7
  "netral",
8
  "positif"
9
  ],
 
 
10
  "normalize_embeddings": false
11
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e75903b9d43b34a9602d4470583b192ab954eaad3938be8194373d688bd884c
3
- size 497787752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f63dee23a0bf95fda64e8d449f3b986d248f47902b6bd425fe8c3c8a990cf1a
3
+ size 2271064456
model_head.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59f44ec07fb9b9dace9551bb690bc911ad26d46608df4ea725d60dc16975f68e
3
- size 25543
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:231e6b182373e783eee98fca20da2f2e903c2cd68be2b105c4b08682bb9f33e0
3
+ size 33735
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 512,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 8192,
3
  "do_lower_case": false
4
  }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json CHANGED
@@ -1,34 +1,48 @@
1
  {
 
 
 
 
 
 
 
2
  "cls_token": {
3
- "content": "[CLS]",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
- "mask_token": {
10
- "content": "[MASK]",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
 
 
 
 
 
 
16
  "pad_token": {
17
- "content": "[PAD]",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
- "content": "[SEP]",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "unk_token": {
31
- "content": "[UNK]",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "cls_token": {
10
+ "content": "<s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "eos_token": {
17
+ "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
  "pad_token": {
31
+ "content": "<pad>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
+ "content": "</s>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
  "single_word": false
43
  },
44
  "unk_token": {
45
+ "content": "<unk>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "[PAD]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
@@ -9,7 +9,7 @@
9
  "special": true
10
  },
11
  "1": {
12
- "content": "[UNK]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -17,7 +17,7 @@
17
  "special": true
18
  },
19
  "2": {
20
- "content": "[CLS]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
@@ -25,40 +25,38 @@
25
  "special": true
26
  },
27
  "3": {
28
- "content": "[SEP]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "4": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": true,
45
- "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
- "do_lower_case": true,
48
- "mask_token": "[MASK]",
49
- "max_length": 512,
50
- "model_max_length": 1000000000000000019884624838656,
51
- "never_split": null,
52
  "pad_to_multiple_of": null,
53
- "pad_token": "[PAD]",
54
  "pad_token_type_id": 0,
55
  "padding_side": "right",
56
- "sep_token": "[SEP]",
 
57
  "stride": 0,
58
- "strip_accents": null,
59
- "tokenize_chinese_chars": true,
60
- "tokenizer_class": "BertTokenizer",
61
  "truncation_side": "right",
62
  "truncation_strategy": "longest_first",
63
- "unk_token": "[UNK]"
64
  }
 
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "<s>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
 
9
  "special": true
10
  },
11
  "1": {
12
+ "content": "<pad>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
17
  "special": true
18
  },
19
  "2": {
20
+ "content": "</s>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
25
  "special": true
26
  },
27
  "3": {
28
+ "content": "<unk>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  }
43
  },
44
+ "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "max_length": 8192,
50
+ "model_max_length": 8192,
 
 
51
  "pad_to_multiple_of": null,
52
+ "pad_token": "<pad>",
53
  "pad_token_type_id": 0,
54
  "padding_side": "right",
55
+ "sep_token": "</s>",
56
+ "sp_model_kwargs": {},
57
  "stride": 0,
58
+ "tokenizer_class": "XLMRobertaTokenizer",
 
 
59
  "truncation_side": "right",
60
  "truncation_strategy": "longest_first",
61
+ "unk_token": "<unk>"
62
  }