seregadgl commited on
Commit
f969918
·
verified ·
1 Parent(s): aae4a49

Add new SentenceTransformer model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 0_Transformer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
0_Transformer/config.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "4f4251a1ce7d8ead25533a658686f904866a24f2",
3
+ "_name_or_path": "jinaai/jina-clip-v2",
4
+ "add_projections": false,
5
+ "architectures": [
6
+ "JinaCLIPModel"
7
+ ],
8
+ "auto_map": {
9
+ "AutoConfig": "jinaai/jina-clip-implementation--configuration_clip.JinaCLIPConfig",
10
+ "AutoModel": "jinaai/jina-clip-implementation--modeling_clip.JinaCLIPModel"
11
+ },
12
+ "initializer_factor": 1.0,
13
+ "logit_scale_init_value": 2.6592,
14
+ "matryoshka_dimensions": [
15
+ 32,
16
+ 64,
17
+ 128,
18
+ 256,
19
+ 512,
20
+ 768,
21
+ 1024
22
+ ],
23
+ "model_type": "jina_clip",
24
+ "projection_dim": 1024,
25
+ "text_config": {
26
+ "_attn_implementation_autoset": false,
27
+ "_name_or_path": "",
28
+ "add_cross_attention": false,
29
+ "architectures": null,
30
+ "bad_words_ids": null,
31
+ "begin_suppress_tokens": null,
32
+ "bos_token_id": null,
33
+ "chunk_size_feed_forward": 0,
34
+ "cross_attention_hidden_size": null,
35
+ "decoder_start_token_id": null,
36
+ "default_instruction_task": null,
37
+ "default_lora_task": "retrieval.query",
38
+ "diversity_penalty": 0.0,
39
+ "do_sample": false,
40
+ "early_stopping": false,
41
+ "embed_dim": 1024,
42
+ "encoder_no_repeat_ngram_size": 0,
43
+ "eos_token_id": null,
44
+ "exponential_decay_length_penalty": null,
45
+ "finetuning_task": null,
46
+ "forced_bos_token_id": null,
47
+ "forced_eos_token_id": null,
48
+ "hf_model_config_kwargs": {
49
+ "load_trained_adapters": false,
50
+ "lora_adaptations": [
51
+ "retrieval.query"
52
+ ],
53
+ "lora_alpha": 4,
54
+ "lora_dropout_p": 0.0,
55
+ "lora_main_params_trainable": false,
56
+ "lora_rank": 4,
57
+ "task_instructions": {
58
+ "retrieval.query": "Represent the query for retrieving evidence documents: "
59
+ },
60
+ "use_flash_attn": false
61
+ },
62
+ "hf_model_name_or_path": "jinaai/jina-embeddings-v3",
63
+ "id2label": {
64
+ "0": "LABEL_0",
65
+ "1": "LABEL_1"
66
+ },
67
+ "is_decoder": false,
68
+ "is_encoder_decoder": false,
69
+ "label2id": {
70
+ "LABEL_0": 0,
71
+ "LABEL_1": 1
72
+ },
73
+ "length_penalty": 1.0,
74
+ "max_length": 20,
75
+ "min_length": 0,
76
+ "model_type": "jina_clip_text",
77
+ "no_repeat_ngram_size": 0,
78
+ "num_beam_groups": 1,
79
+ "num_beams": 1,
80
+ "num_return_sequences": 1,
81
+ "output_attentions": false,
82
+ "output_hidden_states": false,
83
+ "output_scores": false,
84
+ "pad_token_id": null,
85
+ "pooler_type": "mean_pooler",
86
+ "prefix": null,
87
+ "problem_type": null,
88
+ "proj_bias": false,
89
+ "proj_type": null,
90
+ "pruned_heads": {},
91
+ "remove_invalid_values": false,
92
+ "repetition_penalty": 1.0,
93
+ "return_dict": true,
94
+ "return_dict_in_generate": false,
95
+ "sep_token_id": null,
96
+ "suppress_tokens": null,
97
+ "task_specific_params": null,
98
+ "temperature": 1.0,
99
+ "tf_legacy_loss": false,
100
+ "tie_encoder_decoder": false,
101
+ "tie_word_embeddings": true,
102
+ "tokenizer_class": null,
103
+ "top_k": 50,
104
+ "top_p": 1.0,
105
+ "torch_dtype": null,
106
+ "torchscript": false,
107
+ "transformers_version": "4.46.3",
108
+ "typical_p": 1.0,
109
+ "use_bfloat16": false
110
+ },
111
+ "torch_dtype": "float32",
112
+ "transformers_version": null,
113
+ "truncate_dim": null,
114
+ "use_text_flash_attn": false,
115
+ "use_vision_xformers": false,
116
+ "vision_config": {
117
+ "_attn_implementation_autoset": false,
118
+ "_name_or_path": "",
119
+ "add_cross_attention": false,
120
+ "architectures": null,
121
+ "bad_words_ids": null,
122
+ "begin_suppress_tokens": null,
123
+ "bos_token_id": null,
124
+ "chunk_size_feed_forward": 0,
125
+ "cross_attention_hidden_size": null,
126
+ "decoder_start_token_id": null,
127
+ "diversity_penalty": 0.0,
128
+ "do_sample": false,
129
+ "drop_path_rate": 0.0,
130
+ "early_stopping": false,
131
+ "embed_dim": 1024,
132
+ "encoder_no_repeat_ngram_size": 0,
133
+ "eos_token_id": null,
134
+ "exponential_decay_length_penalty": null,
135
+ "finetuning_task": null,
136
+ "forced_bos_token_id": null,
137
+ "forced_eos_token_id": null,
138
+ "fused_layer_norm": false,
139
+ "head_width": 64,
140
+ "id2label": {
141
+ "0": "LABEL_0",
142
+ "1": "LABEL_1"
143
+ },
144
+ "image_size": 512,
145
+ "intp_freq": true,
146
+ "is_decoder": false,
147
+ "is_encoder_decoder": false,
148
+ "label2id": {
149
+ "LABEL_0": 0,
150
+ "LABEL_1": 1
151
+ },
152
+ "layers": 24,
153
+ "length_penalty": 1.0,
154
+ "ls_init_value": null,
155
+ "max_length": 20,
156
+ "min_length": 0,
157
+ "mlp_ratio": 2.6667,
158
+ "model_type": "jina_clip_vision",
159
+ "naive_swiglu": true,
160
+ "no_repeat_ngram_size": 0,
161
+ "num_beam_groups": 1,
162
+ "num_beams": 1,
163
+ "num_return_sequences": 1,
164
+ "output_attentions": false,
165
+ "output_hidden_states": false,
166
+ "output_scores": false,
167
+ "pad_token_id": null,
168
+ "patch_dropout": 0.1,
169
+ "patch_size": 14,
170
+ "post_norm": false,
171
+ "prefix": null,
172
+ "problem_type": null,
173
+ "proj_type": null,
174
+ "pruned_heads": {},
175
+ "pt_hw_seq_len": 16,
176
+ "qkv_bias": true,
177
+ "remove_invalid_values": false,
178
+ "repetition_penalty": 1.0,
179
+ "return_dict": true,
180
+ "return_dict_in_generate": false,
181
+ "rope_embeddings": true,
182
+ "sep_token_id": null,
183
+ "subln": true,
184
+ "suppress_tokens": null,
185
+ "task_specific_params": null,
186
+ "temperature": 1.0,
187
+ "tf_legacy_loss": false,
188
+ "tie_encoder_decoder": false,
189
+ "tie_word_embeddings": true,
190
+ "tokenizer_class": null,
191
+ "top_k": 50,
192
+ "top_p": 1.0,
193
+ "torch_dtype": null,
194
+ "torchscript": false,
195
+ "transformers_version": "4.46.3",
196
+ "typical_p": 1.0,
197
+ "use_bfloat16": false,
198
+ "width": 1024,
199
+ "x_attention": false
200
+ }
201
+ }
0_Transformer/custom_st.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import os
4
+ from io import BytesIO
5
+ from typing import Any, Dict, List, Literal, Optional, Union
6
+
7
+ import requests
8
+ import torch
9
+ from PIL import Image
10
+ from torch import nn
11
+ from transformers import AutoConfig, AutoImageProcessor, AutoModel, AutoTokenizer
12
+
13
+
14
+ class Transformer(nn.Module):
15
+ def __init__(
16
+ self,
17
+ model_name_or_path: str = 'jinaai/jina-clip-v2',
18
+ tokenizer_name_or_path: Optional[str] = None,
19
+ image_processor_name_or_path: Optional[str] = None,
20
+ max_seq_length: Optional[int] = None,
21
+ config_args: Optional[Dict[str, Any]] = None,
22
+ model_args: Optional[Dict[str, Any]] = None,
23
+ tokenizer_args: Optional[Dict[str, Any]] = None,
24
+ image_processor_args: Optional[Dict[str, Any]] = None,
25
+ assume_text_inputs: bool = False,
26
+ cache_dir: Optional[str] = None,
27
+ backend: Literal['torch', 'onnx', 'openvino'] = 'torch',
28
+ **_,
29
+ ) -> None:
30
+ """
31
+ Creates a custom SentenceTransformer module that uses `jinai/jina-clip-v2` to
32
+ map sentences/images to embeddings
33
+
34
+ Args:
35
+ model_name_or_path (str, optional): If it is a filepath on disc, it loads
36
+ the model from that path. If it is not a path, tries to construct a
37
+ model from the Hugging Face Hub with that name. Defaults to
38
+ 'jinaai/jina-clip-v2'
39
+ tokenizer_name_or_path (str, optional): If it is a filepath on disc, it
40
+ loads the tokenizer from that path. If it is not a path, tries to
41
+ construct a tokenizer from the Hugging Face Hub with that name.
42
+ If `None` it is automatically set to the value of `model_name_or_path`
43
+ image_processor_name_or_path (str, optional): If it is a filepath on disc,
44
+ it loads the image processor from that path. If it is not a path, tries
45
+ to construct an image processor from the Hugging Face Hub with that
46
+ name. If `None` it is automatically set to the value of
47
+ `model_name_or_path`
48
+ max_seq_length (int, optional): The maximum sequence length of the model.
49
+ If not provided, will be inferred from model or tokenizer
50
+ config_args (Dict[str, Any], optional): Additional model configuration
51
+ parameters to be passed to the Hugging Face Transformers config
52
+ model_args (Dict[str, Any], optional): Additional model configuration
53
+ parameters to be passed to the Hugging Face Transformers model
54
+ tokenizer_args (Dict[str, Any], optional): Additional tokenizer
55
+ configuration parameters to be passed to the Hugging Face Transformers
56
+ tokenizer
57
+ image_processor_args (Dict[str, Any], optional): Additional image processor
58
+ configuration parameters to be passed to the Hugging Face Transformers
59
+ image processor
60
+ assume_text_inputs (bool, optional): If set to `True`, all inputs are
61
+ treated as texts. Defaults to `False`
62
+ cache_dir (str, optional): The Hugging Face Hub cache directory
63
+ backend (str, optional): Computational backend, only 'torch' is supported
64
+
65
+ Example:
66
+ ::
67
+
68
+ from sentence_transformers import SentenceTransformer
69
+
70
+ model = SentenceTransformer(
71
+ 'jinaai/jina-clip-v2', trust_remote_code=True
72
+ )
73
+ sentences_or_images = [
74
+ "The weather is lovely today.",
75
+ "It's so sunny outside!",
76
+ "/path/to/stadium.jpg",
77
+ ]
78
+ embeddings = model.encode(sentences_or_images)
79
+ print(embeddings.shape)
80
+ # (3, 1024)
81
+
82
+ # Get the similarity scores between all inputs
83
+ similarities = model.similarity(embeddings, embeddings)
84
+ print(similarities)
85
+ # tensor([[1.0000, 0.6817, 0.0492],
86
+ # [0.6817, 1.0000, 0.0421],
87
+ # [0.0492, 0.0421, 1.0000]])
88
+ """
89
+ super(Transformer, self).__init__()
90
+ if backend != 'torch':
91
+ raise ValueError(
92
+ f'Backend \'{backend}\' is not supported, please use \'torch\' instead'
93
+ )
94
+
95
+ config_kwargs = config_args or {}
96
+ model_kwargs = model_args or {}
97
+ tokenizer_kwargs = tokenizer_args or {}
98
+ image_processor_kwargs = {
99
+ 'token': model_kwargs.get('token', None),
100
+ 'trust_remote_code': model_kwargs.get('trust_remote_code', False),
101
+ 'revision': model_kwargs.get('revision', None),
102
+ 'local_files_only': model_kwargs.get('local_files_only', None),
103
+ }
104
+ image_processor_kwargs.update(image_processor_args or {})
105
+
106
+ config = AutoConfig.from_pretrained(
107
+ model_name_or_path, cache_dir=cache_dir, **config_kwargs
108
+ )
109
+ self.model = AutoModel.from_pretrained(
110
+ model_name_or_path, config=config, cache_dir=cache_dir, **model_kwargs
111
+ )
112
+ if max_seq_length is not None and 'model_max_length' not in tokenizer_kwargs:
113
+ tokenizer_kwargs['model_max_length'] = max_seq_length
114
+
115
+ self.tokenizer = AutoTokenizer.from_pretrained(
116
+ tokenizer_name_or_path or model_name_or_path,
117
+ cache_dir=cache_dir,
118
+ **tokenizer_kwargs,
119
+ )
120
+ self.image_processor = AutoImageProcessor.from_pretrained(
121
+ image_processor_name_or_path or model_name_or_path,
122
+ cache_dir=cache_dir,
123
+ **image_processor_kwargs,
124
+ )
125
+ self.assume_text_inputs = assume_text_inputs
126
+
127
+ # No max_seq_length set. Try to infer from model
128
+ if max_seq_length is None:
129
+ if (
130
+ hasattr(self.model, 'config')
131
+ and hasattr(self.model.config, 'max_position_embeddings')
132
+ and hasattr(self.tokenizer, 'model_max_length')
133
+ ):
134
+ max_seq_length = min(
135
+ self.model.config.max_position_embeddings,
136
+ self.tokenizer.model_max_length,
137
+ )
138
+ self.max_seq_length = max_seq_length
139
+ if tokenizer_name_or_path is not None:
140
+ self.model.config.tokenizer_class = self.tokenizer.__class__.__name__
141
+
142
+ @staticmethod
143
+ def _decode_data_image(data_image_str: str) -> Image.Image:
144
+ header, data = data_image_str.split(',', 1)
145
+ image_data = base64.b64decode(data)
146
+ return Image.open(BytesIO(image_data))
147
+
148
+ def tokenize(
149
+ self, texts: List[Union[str, Image.Image]], padding: Union[str, bool] = True
150
+ ) -> Dict[str, torch.Tensor]:
151
+ """
152
+ Encodes input samples. Text samples are tokenized. Image URLs, image data
153
+ buffers and PIL images are passed through the image processor.
154
+ """
155
+ _images = []
156
+ _texts = []
157
+ _image_or_text_descriptors = []
158
+
159
+ if self.assume_text_inputs:
160
+ for sample in texts:
161
+ if isinstance(sample, str):
162
+ _texts.append(sample)
163
+ _image_or_text_descriptors.append(1)
164
+ else:
165
+ for sample in texts:
166
+ if isinstance(sample, str):
167
+ if sample.startswith('http'):
168
+ try:
169
+ response = requests.get(sample)
170
+ _images.append(
171
+ Image.open(BytesIO(response.content)).convert('RGB')
172
+ )
173
+ _image_or_text_descriptors.append(0)
174
+ except Exception as e:
175
+ _ = str(e)
176
+ _texts.append(sample)
177
+ _image_or_text_descriptors.append(1)
178
+ elif sample.startswith('data:image/'):
179
+ _images.append(self._decode_data_image(sample).convert('RGB'))
180
+ _image_or_text_descriptors.append(0)
181
+ else:
182
+ try:
183
+ _images.append(Image.open(sample).convert('RGB'))
184
+ _image_or_text_descriptors.append(0)
185
+ except Exception as e:
186
+ _ = str(e)
187
+ _texts.append(sample)
188
+ _image_or_text_descriptors.append(1)
189
+ elif isinstance(sample, Image.Image):
190
+ _images.append(sample.convert('RGB'))
191
+ _image_or_text_descriptors.append(0)
192
+
193
+ encoding = {}
194
+ if len(_texts):
195
+ encoding['input_ids'] = self.tokenizer(
196
+ _texts,
197
+ padding=padding,
198
+ truncation='longest_first',
199
+ return_tensors='pt',
200
+ max_length=self.max_seq_length,
201
+ ).input_ids
202
+
203
+ if len(_images):
204
+ encoding['pixel_values'] = self.image_processor(
205
+ _images, return_tensors='pt'
206
+ ).pixel_values
207
+
208
+ encoding['image_text_info'] = _image_or_text_descriptors
209
+ return encoding
210
+
211
+ def forward(self, features: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
212
+ image_embeddings = []
213
+ text_embeddings = []
214
+
215
+ if 'pixel_values' in features:
216
+ image_embeddings = self.model.get_image_features(features['pixel_values'])
217
+ if 'input_ids' in features:
218
+ text_embeddings = self.model.get_text_features(features['input_ids'])
219
+
220
+ sentence_embedding = []
221
+ image_features = iter(image_embeddings)
222
+ text_features = iter(text_embeddings)
223
+ for _, _input_type in enumerate(features['image_text_info']):
224
+ if _input_type == 0:
225
+ sentence_embedding.append(next(image_features))
226
+ else:
227
+ sentence_embedding.append(next(text_features))
228
+
229
+ features['sentence_embedding'] = torch.stack(sentence_embedding).float()
230
+ return features
231
+
232
+ def save(self, output_path: str, safe_serialization: bool = True) -> None:
233
+ self.model.save_pretrained(output_path, safe_serialization=safe_serialization)
234
+ self.tokenizer.save_pretrained(output_path)
235
+ self.image_processor.save_pretrained(output_path)
236
+
237
+ @staticmethod
238
+ def load(input_path: str) -> 'Transformer':
239
+ # Old classes used other config names than 'sentence_bert_config.json'
240
+ for config_name in [
241
+ 'sentence_bert_config.json',
242
+ 'sentence_roberta_config.json',
243
+ 'sentence_distilbert_config.json',
244
+ 'sentence_camembert_config.json',
245
+ 'sentence_albert_config.json',
246
+ 'sentence_xlm-roberta_config.json',
247
+ 'sentence_xlnet_config.json',
248
+ ]:
249
+ sbert_config_path = os.path.join(input_path, config_name)
250
+ if os.path.exists(sbert_config_path):
251
+ break
252
+
253
+ with open(sbert_config_path) as fIn:
254
+ config = json.load(fIn)
255
+
256
+ # Don't allow configs to set trust_remote_code
257
+ if 'config_kwargs' in config and 'trust_remote_code' in config['config_kwargs']:
258
+ config['config_kwargs'].pop('trust_remote_code')
259
+ if 'model_kwargs' in config and 'trust_remote_code' in config['model_kwargs']:
260
+ config['model_kwargs'].pop('trust_remote_code')
261
+ if (
262
+ 'tokenizer_kwargs' in config
263
+ and 'trust_remote_code' in config['tokenizer_kwargs']
264
+ ):
265
+ config['tokenizer_kwargs'].pop('trust_remote_code')
266
+ if (
267
+ 'image_processor_kwargs' in config
268
+ and 'trust_remote_code' in config['image_processor_kwargs']
269
+ ):
270
+ config['image_processor_kwargs'].pop('trust_remote_code')
271
+
272
+ return Transformer(model_name_or_path=input_path, **config)
0_Transformer/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e5dff44bee390193eeb733f543008b6f2fb5779c58073881785b03097788e9
3
+ size 3461246364
0_Transformer/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "jinaai/jina-clip-implementation--processing_clip.JinaCLIPImageProcessor",
4
+ "AutoProcessor": "jinaai/jina-clip-implementation--processing_clip.JinaCLIPProcessor"
5
+ },
6
+ "fill_color": 0,
7
+ "image_processor_type": "JinaCLIPImageProcessor",
8
+ "interpolation": "bicubic",
9
+ "mean": [
10
+ 0.48145466,
11
+ 0.4578275,
12
+ 0.40821073
13
+ ],
14
+ "processor_class": "JinaCLIPProcessor",
15
+ "resize_mode": "shortest",
16
+ "size": 512,
17
+ "std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ]
22
+ }
0_Transformer/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
0_Transformer/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e19cd8c08f528b481e909f73dbd1fd62b1e8b1117579ba205e477801237f9e0
3
+ size 17082988
0_Transformer/tokenizer_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "max_length": 77,
50
+ "model_max_length": 8194,
51
+ "pad_to_multiple_of": null,
52
+ "pad_token": "<pad>",
53
+ "pad_token_type_id": 0,
54
+ "padding_side": "right",
55
+ "sep_token": "</s>",
56
+ "stride": 0,
57
+ "tokenizer_class": "XLMRobertaTokenizer",
58
+ "truncation_side": "right",
59
+ "truncation_strategy": "longest_first",
60
+ "unk_token": "<unk>"
61
+ }
README.md ADDED
@@ -0,0 +1,552 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: jinaai/jina-clip-v2
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - pearson_cosine
6
+ - spearman_cosine
7
+ pipeline_tag: sentence-similarity
8
+ tags:
9
+ - sentence-transformers
10
+ - sentence-similarity
11
+ - feature-extraction
12
+ - generated_from_trainer
13
+ - dataset_size:63802
14
+ - loss:CoSENTLoss
15
+ widget:
16
+ - source_sentence: машинка детская самоходная бибикар желтый
17
+ sentences:
18
+ - 'машинка детская красная бибикар '
19
+ - моторное масло alpine dx1 5w 30 5л 0101662
20
+ - 'спинбайк schwinn ic7 '
21
+ - source_sentence: 'велосипед stels saber 20 фиолетовый '
22
+ sentences:
23
+ - 'детские спортивные комплексы '
24
+ - 'велосипед bmx stels saber 20 v010 2020 '
25
+ - 50218 кабель ugreen hd132 hdmi zinc alloy optical fiber cable черный 40m
26
+ - source_sentence: гидравличесские прессы
27
+ sentences:
28
+ - пресс гидравлический ручной механизмом
29
+ - ракетка для настольного тенниса fora 7
30
+ - 'объектив panasonic 20mm f1 7 asph ii h h020ae k '
31
+ - source_sentence: 'бокс пластиковый монтажной платой щмп п 300х200х130 мм ip65 proxima
32
+ ящики щитки шкафы '
33
+ sentences:
34
+ - батарейный отсек для 4xаа открытый проволочные выводы разъем dcx2 1 battery holder
35
+ 4xaa 6v dc
36
+ - 'bugera bc15 '
37
+ - 'бокс пластиковый монтажной платой щмп п 500х350х190 мм ip65 proxima ящики щитки
38
+ шкафы '
39
+ - source_sentence: 'honor watch gs pro black '
40
+ sentences:
41
+ - 'honor watch gs pro white '
42
+ - трансформер pituso carlo hb gy 06 lemon
43
+ - 'электровелосипед колхозник volten greenline 500w '
44
+ model-index:
45
+ - name: SentenceTransformer based on jinaai/jina-clip-v2
46
+ results:
47
+ - task:
48
+ type: semantic-similarity
49
+ name: Semantic Similarity
50
+ dataset:
51
+ name: example dev
52
+ type: example-dev
53
+ metrics:
54
+ - type: pearson_cosine
55
+ value: 0.46018545926876964
56
+ name: Pearson Cosine
57
+ - type: spearman_cosine
58
+ value: 0.4873837299726027
59
+ name: Spearman Cosine
60
+ ---
61
+
62
+ # SentenceTransformer based on jinaai/jina-clip-v2
63
+
64
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [jinaai/jina-clip-v2](https://huggingface.co/jinaai/jina-clip-v2). It maps sentences & paragraphs to a None-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
65
+
66
+ ## Model Details
67
+
68
+ ### Model Description
69
+ - **Model Type:** Sentence Transformer
70
+ - **Base model:** [jinaai/jina-clip-v2](https://huggingface.co/jinaai/jina-clip-v2) <!-- at revision 4f4251a1ce7d8ead25533a658686f904866a24f2 -->
71
+ - **Maximum Sequence Length:** None tokens
72
+ - **Output Dimensionality:** None dimensions
73
+ - **Similarity Function:** Cosine Similarity
74
+ <!-- - **Training Dataset:** Unknown -->
75
+ <!-- - **Language:** Unknown -->
76
+ <!-- - **License:** Unknown -->
77
+
78
+ ### Model Sources
79
+
80
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
81
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
82
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
83
+
84
+ ### Full Model Architecture
85
+
86
+ ```
87
+ SentenceTransformer(
88
+ (transformer): Transformer(
89
+ (model): JinaCLIPModel(
90
+ (text_model): HFTextEncoder(
91
+ (transformer): XLMRobertaLoRA(
92
+ (roberta): XLMRobertaModel(
93
+ (embeddings): XLMRobertaEmbeddings(
94
+ (word_embeddings): ParametrizedEmbedding(
95
+ 250002, 1024, padding_idx=1
96
+ (parametrizations): ModuleDict(
97
+ (weight): ParametrizationList(
98
+ (0): LoRAParametrization()
99
+ )
100
+ )
101
+ )
102
+ (token_type_embeddings): ParametrizedEmbedding(
103
+ 1, 1024
104
+ (parametrizations): ModuleDict(
105
+ (weight): ParametrizationList(
106
+ (0): LoRAParametrization()
107
+ )
108
+ )
109
+ )
110
+ )
111
+ (emb_drop): Dropout(p=0.1, inplace=False)
112
+ (emb_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
113
+ (encoder): XLMRobertaEncoder(
114
+ (layers): ModuleList(
115
+ (0-23): 24 x Block(
116
+ (mixer): MHA(
117
+ (rotary_emb): RotaryEmbedding()
118
+ (Wqkv): ParametrizedLinearResidual(
119
+ in_features=1024, out_features=3072, bias=True
120
+ (parametrizations): ModuleDict(
121
+ (weight): ParametrizationList(
122
+ (0): LoRAParametrization()
123
+ )
124
+ )
125
+ )
126
+ (inner_attn): SelfAttention(
127
+ (drop): Dropout(p=0.1, inplace=False)
128
+ )
129
+ (inner_cross_attn): CrossAttention(
130
+ (drop): Dropout(p=0.1, inplace=False)
131
+ )
132
+ (out_proj): ParametrizedLinear(
133
+ in_features=1024, out_features=1024, bias=True
134
+ (parametrizations): ModuleDict(
135
+ (weight): ParametrizationList(
136
+ (0): LoRAParametrization()
137
+ )
138
+ )
139
+ )
140
+ )
141
+ (dropout1): Dropout(p=0.1, inplace=False)
142
+ (drop_path1): StochasticDepth(p=0.0, mode=row)
143
+ (norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
144
+ (mlp): Mlp(
145
+ (fc1): ParametrizedLinear(
146
+ in_features=1024, out_features=4096, bias=True
147
+ (parametrizations): ModuleDict(
148
+ (weight): ParametrizationList(
149
+ (0): LoRAParametrization()
150
+ )
151
+ )
152
+ )
153
+ (fc2): ParametrizedLinear(
154
+ in_features=4096, out_features=1024, bias=True
155
+ (parametrizations): ModuleDict(
156
+ (weight): ParametrizationList(
157
+ (0): LoRAParametrization()
158
+ )
159
+ )
160
+ )
161
+ )
162
+ (dropout2): Dropout(p=0.1, inplace=False)
163
+ (drop_path2): StochasticDepth(p=0.0, mode=row)
164
+ (norm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
165
+ )
166
+ )
167
+ )
168
+ )
169
+ )
170
+ (pooler): MeanPooler()
171
+ (proj): Identity()
172
+ )
173
+ (vision_model): EVAVisionTransformer(
174
+ (patch_embed): PatchEmbed(
175
+ (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
176
+ )
177
+ (pos_drop): Dropout(p=0.0, inplace=False)
178
+ (rope): VisionRotaryEmbeddingFast()
179
+ (blocks): ModuleList(
180
+ (0-23): 24 x Block(
181
+ (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
182
+ (attn): Attention(
183
+ (q_proj): Linear(in_features=1024, out_features=1024, bias=False)
184
+ (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
185
+ (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
186
+ (attn_drop): Dropout(p=0.0, inplace=False)
187
+ (inner_attn_ln): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
188
+ (proj): Linear(in_features=1024, out_features=1024, bias=True)
189
+ (proj_drop): Dropout(p=0.0, inplace=False)
190
+ (rope): VisionRotaryEmbeddingFast()
191
+ )
192
+ (drop_path): Identity()
193
+ (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
194
+ (mlp): SwiGLU(
195
+ (w1): Linear(in_features=1024, out_features=2730, bias=True)
196
+ (w2): Linear(in_features=1024, out_features=2730, bias=True)
197
+ (act): SiLU()
198
+ (ffn_ln): LayerNorm((2730,), eps=1e-06, elementwise_affine=True)
199
+ (w3): Linear(in_features=2730, out_features=1024, bias=True)
200
+ (drop): Dropout(p=0.0, inplace=False)
201
+ )
202
+ )
203
+ )
204
+ (norm): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
205
+ (head): Identity()
206
+ (patch_dropout): PatchDropout()
207
+ )
208
+ (visual_projection): Identity()
209
+ (text_projection): Identity()
210
+ )
211
+ )
212
+ (normalizer): Normalize()
213
+ )
214
+ ```
215
+
216
+ ## Usage
217
+
218
+ ### Direct Usage (Sentence Transformers)
219
+
220
+ First install the Sentence Transformers library:
221
+
222
+ ```bash
223
+ pip install -U sentence-transformers
224
+ ```
225
+
226
+ Then you can load this model and run inference.
227
+ ```python
228
+ from sentence_transformers import SentenceTransformer
229
+
230
+ # Download from the 🤗 Hub
231
+ model = SentenceTransformer("seregadgl/t12")
232
+ # Run inference
233
+ sentences = [
234
+ 'honor watch gs pro black ',
235
+ 'honor watch gs pro white ',
236
+ 'трансформер pituso carlo hb gy 06 lemon',
237
+ ]
238
+ embeddings = model.encode(sentences)
239
+ print(embeddings.shape)
240
+ # [3, 1024]
241
+
242
+ # Get the similarity scores for the embeddings
243
+ similarities = model.similarity(embeddings, embeddings)
244
+ print(similarities.shape)
245
+ # [3, 3]
246
+ ```
247
+
248
+ <!--
249
+ ### Direct Usage (Transformers)
250
+
251
+ <details><summary>Click to see the direct usage in Transformers</summary>
252
+
253
+ </details>
254
+ -->
255
+
256
+ <!--
257
+ ### Downstream Usage (Sentence Transformers)
258
+
259
+ You can finetune this model on your own dataset.
260
+
261
+ <details><summary>Click to expand</summary>
262
+
263
+ </details>
264
+ -->
265
+
266
+ <!--
267
+ ### Out-of-Scope Use
268
+
269
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
270
+ -->
271
+
272
+ ## Evaluation
273
+
274
+ ### Metrics
275
+
276
+ #### Semantic Similarity
277
+
278
+ * Dataset: `example-dev`
279
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
280
+
281
+ | Metric | Value |
282
+ |:--------------------|:-----------|
283
+ | pearson_cosine | 0.4602 |
284
+ | **spearman_cosine** | **0.4874** |
285
+
286
+ <!--
287
+ ## Bias, Risks and Limitations
288
+
289
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
290
+ -->
291
+
292
+ <!--
293
+ ### Recommendations
294
+
295
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
296
+ -->
297
+
298
+ ## Training Details
299
+
300
+ ### Training Dataset
301
+
302
+ #### Unnamed Dataset
303
+
304
+
305
+ * Size: 63,802 training samples
306
+ * Columns: <code>doc</code>, <code>candidate</code>, and <code>label</code>
307
+ * Approximate statistics based on the first 1000 samples:
308
+ | | doc | candidate | label |
309
+ |:--------|:-----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|:------------------------------------------------|
310
+ | type | string | string | int |
311
+ | details | <ul><li>min: 5 characters</li><li>mean: 40.56 characters</li><li>max: 115 characters</li></ul> | <ul><li>min: 4 characters</li><li>mean: 40.11 characters</li><li>max: 115 characters</li></ul> | <ul><li>0: ~85.20%</li><li>1: ~14.80%</li></ul> |
312
+ * Samples:
313
+ | doc | candidate | label |
314
+ |:-------------------------------------------------------|:-----------------------------------------------------------------------|:---------------|
315
+ | <code>массажер xiaomi massage gun eu bhr5608eu </code> | <code>перкуссионный массажер xiaomi massage gun mini bhr6083gl </code> | <code>0</code> |
316
+ | <code>безударная дрель ingco ed50028 </code> | <code>ударная дрель ingco id211002 </code> | <code>0</code> |
317
+ | <code>жидкость old smuggler 30мл 20мг </code> | <code>жидкость old smuggler salt 30ml marlboro 20mg</code> | <code>0</code> |
318
+ * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
319
+ ```json
320
+ {
321
+ "scale": 20.0,
322
+ "similarity_fct": "pairwise_cos_sim"
323
+ }
324
+ ```
325
+
326
+ ### Evaluation Dataset
327
+
328
+ #### Unnamed Dataset
329
+
330
+
331
+ * Size: 7,090 evaluation samples
332
+ * Columns: <code>doc</code>, <code>candidate</code>, and <code>label</code>
333
+ * Approximate statistics based on the first 1000 samples:
334
+ | | doc | candidate | label |
335
+ |:--------|:-----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|:------------------------------------------------|
336
+ | type | string | string | int |
337
+ | details | <ul><li>min: 4 characters</li><li>mean: 40.68 characters</li><li>max: 198 characters</li></ul> | <ul><li>min: 5 characters</li><li>mean: 39.92 characters</li><li>max: 178 characters</li></ul> | <ul><li>0: ~84.20%</li><li>1: ~15.80%</li></ul> |
338
+ * Samples:
339
+ | doc | candidate | label |
340
+ |:--------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------|:---------------|
341
+ | <code>круглое пляжное парео селфи коврик пляжная подстилка пляжное покрывало пляжный коврик пироженко </code> | <code>круглое п��яжное парео селфи коврик пляжная подстилка пляжное покрывало пляжный коврик клубника </code> | <code>0</code> |
342
+ | <code>аккумулятор батарея для ноутбука asus g751 </code> | <code>аккумулятор батарея для ноутбука asus g75 series</code> | <code>0</code> |
343
+ | <code>миксер bosch mfq3520 mfq 3520 </code> | <code>миксер bosch mfq 4020 </code> | <code>0</code> |
344
+ * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
345
+ ```json
346
+ {
347
+ "scale": 20.0,
348
+ "similarity_fct": "pairwise_cos_sim"
349
+ }
350
+ ```
351
+
352
+ ### Training Hyperparameters
353
+ #### Non-Default Hyperparameters
354
+
355
+ - `eval_strategy`: steps
356
+ - `per_device_train_batch_size`: 16
357
+ - `per_device_eval_batch_size`: 16
358
+ - `learning_rate`: 2e-05
359
+ - `num_train_epochs`: 1
360
+ - `lr_scheduler_type`: cosine
361
+ - `warmup_ratio`: 0.1
362
+ - `load_best_model_at_end`: True
363
+ - `batch_sampler`: no_duplicates
364
+
365
+ #### All Hyperparameters
366
+ <details><summary>Click to expand</summary>
367
+
368
+ - `overwrite_output_dir`: False
369
+ - `do_predict`: False
370
+ - `eval_strategy`: steps
371
+ - `prediction_loss_only`: True
372
+ - `per_device_train_batch_size`: 16
373
+ - `per_device_eval_batch_size`: 16
374
+ - `per_gpu_train_batch_size`: None
375
+ - `per_gpu_eval_batch_size`: None
376
+ - `gradient_accumulation_steps`: 1
377
+ - `eval_accumulation_steps`: None
378
+ - `torch_empty_cache_steps`: None
379
+ - `learning_rate`: 2e-05
380
+ - `weight_decay`: 0.0
381
+ - `adam_beta1`: 0.9
382
+ - `adam_beta2`: 0.999
383
+ - `adam_epsilon`: 1e-08
384
+ - `max_grad_norm`: 1.0
385
+ - `num_train_epochs`: 1
386
+ - `max_steps`: -1
387
+ - `lr_scheduler_type`: cosine
388
+ - `lr_scheduler_kwargs`: {}
389
+ - `warmup_ratio`: 0.1
390
+ - `warmup_steps`: 0
391
+ - `log_level`: passive
392
+ - `log_level_replica`: warning
393
+ - `log_on_each_node`: True
394
+ - `logging_nan_inf_filter`: True
395
+ - `save_safetensors`: True
396
+ - `save_on_each_node`: False
397
+ - `save_only_model`: False
398
+ - `restore_callback_states_from_checkpoint`: False
399
+ - `no_cuda`: False
400
+ - `use_cpu`: False
401
+ - `use_mps_device`: False
402
+ - `seed`: 42
403
+ - `data_seed`: None
404
+ - `jit_mode_eval`: False
405
+ - `use_ipex`: False
406
+ - `bf16`: False
407
+ - `fp16`: False
408
+ - `fp16_opt_level`: O1
409
+ - `half_precision_backend`: auto
410
+ - `bf16_full_eval`: False
411
+ - `fp16_full_eval`: False
412
+ - `tf32`: None
413
+ - `local_rank`: 0
414
+ - `ddp_backend`: None
415
+ - `tpu_num_cores`: None
416
+ - `tpu_metrics_debug`: False
417
+ - `debug`: []
418
+ - `dataloader_drop_last`: False
419
+ - `dataloader_num_workers`: 0
420
+ - `dataloader_prefetch_factor`: None
421
+ - `past_index`: -1
422
+ - `disable_tqdm`: False
423
+ - `remove_unused_columns`: True
424
+ - `label_names`: None
425
+ - `load_best_model_at_end`: True
426
+ - `ignore_data_skip`: False
427
+ - `fsdp`: []
428
+ - `fsdp_min_num_params`: 0
429
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
430
+ - `fsdp_transformer_layer_cls_to_wrap`: None
431
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
432
+ - `deepspeed`: None
433
+ - `label_smoothing_factor`: 0.0
434
+ - `optim`: adamw_torch
435
+ - `optim_args`: None
436
+ - `adafactor`: False
437
+ - `group_by_length`: False
438
+ - `length_column_name`: length
439
+ - `ddp_find_unused_parameters`: None
440
+ - `ddp_bucket_cap_mb`: None
441
+ - `ddp_broadcast_buffers`: False
442
+ - `dataloader_pin_memory`: True
443
+ - `dataloader_persistent_workers`: False
444
+ - `skip_memory_metrics`: True
445
+ - `use_legacy_prediction_loop`: False
446
+ - `push_to_hub`: False
447
+ - `resume_from_checkpoint`: None
448
+ - `hub_model_id`: None
449
+ - `hub_strategy`: every_save
450
+ - `hub_private_repo`: False
451
+ - `hub_always_push`: False
452
+ - `gradient_checkpointing`: False
453
+ - `gradient_checkpointing_kwargs`: None
454
+ - `include_inputs_for_metrics`: False
455
+ - `include_for_metrics`: []
456
+ - `eval_do_concat_batches`: True
457
+ - `fp16_backend`: auto
458
+ - `push_to_hub_model_id`: None
459
+ - `push_to_hub_organization`: None
460
+ - `mp_parameters`:
461
+ - `auto_find_batch_size`: False
462
+ - `full_determinism`: False
463
+ - `torchdynamo`: None
464
+ - `ray_scope`: last
465
+ - `ddp_timeout`: 1800
466
+ - `torch_compile`: False
467
+ - `torch_compile_backend`: None
468
+ - `torch_compile_mode`: None
469
+ - `dispatch_batches`: None
470
+ - `split_batches`: None
471
+ - `include_tokens_per_second`: False
472
+ - `include_num_input_tokens_seen`: False
473
+ - `neftune_noise_alpha`: None
474
+ - `optim_target_modules`: None
475
+ - `batch_eval_metrics`: False
476
+ - `eval_on_start`: False
477
+ - `use_liger_kernel`: False
478
+ - `eval_use_gather_object`: False
479
+ - `average_tokens_across_devices`: False
480
+ - `prompts`: None
481
+ - `batch_sampler`: no_duplicates
482
+ - `multi_dataset_batch_sampler`: proportional
483
+
484
+ </details>
485
+
486
+ ### Training Logs
487
+ | Epoch | Step | Training Loss | Validation Loss | example-dev_spearman_cosine |
488
+ |:------:|:----:|:-------------:|:---------------:|:---------------------------:|
489
+ | 0 | 0 | - | - | 0.0849 |
490
+ | 0.1254 | 500 | 3.7498 | 3.0315 | 0.3797 |
491
+ | 0.2508 | 1000 | 2.7653 | 2.7538 | 0.4508 |
492
+ | 0.3761 | 1500 | 2.5938 | 2.7853 | 0.4689 |
493
+ | 0.5015 | 2000 | 2.6425 | 2.6761 | 0.4800 |
494
+ | 0.6269 | 2500 | 2.6859 | 2.6341 | 0.4840 |
495
+ | 0.7523 | 3000 | 2.5805 | 2.6350 | 0.4855 |
496
+ | 0.8776 | 3500 | 2.7247 | 2.6087 | 0.4874 |
497
+
498
+
499
+ ### Framework Versions
500
+ - Python: 3.10.14
501
+ - Sentence Transformers: 3.3.1
502
+ - Transformers: 4.46.3
503
+ - PyTorch: 2.4.0
504
+ - Accelerate: 0.34.2
505
+ - Datasets: 3.0.1
506
+ - Tokenizers: 0.20.0
507
+
508
+ ## Citation
509
+
510
+ ### BibTeX
511
+
512
+ #### Sentence Transformers
513
+ ```bibtex
514
+ @inproceedings{reimers-2019-sentence-bert,
515
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
516
+ author = "Reimers, Nils and Gurevych, Iryna",
517
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
518
+ month = "11",
519
+ year = "2019",
520
+ publisher = "Association for Computational Linguistics",
521
+ url = "https://arxiv.org/abs/1908.10084",
522
+ }
523
+ ```
524
+
525
+ #### CoSENTLoss
526
+ ```bibtex
527
+ @online{kexuefm-8847,
528
+ title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
529
+ author={Su Jianlin},
530
+ year={2022},
531
+ month={Jan},
532
+ url={https://kexue.fm/archives/8847},
533
+ }
534
+ ```
535
+
536
+ <!--
537
+ ## Glossary
538
+
539
+ *Clearly define terms in order to be accessible across audiences.*
540
+ -->
541
+
542
+ <!--
543
+ ## Model Card Authors
544
+
545
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
546
+ -->
547
+
548
+ <!--
549
+ ## Model Card Contact
550
+
551
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
552
+ -->
config_sentence_transformers.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.3.1",
4
+ "transformers": "4.46.3",
5
+ "pytorch": "2.4.0"
6
+ },
7
+ "prompts": {
8
+ "retrieval.query": "Represent the query for retrieving evidence documents: "
9
+ },
10
+ "default_prompt_name": null,
11
+ "similarity_fn_name": "cosine"
12
+ }
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "transformer",
5
+ "path": "0_Transformer",
6
+ "type": "custom_st.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "normalizer",
11
+ "path": "1_Normalize",
12
+ "type": "sentence_transformers.models.Normalize"
13
+ }
14
+ ]