Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +636 -0
- config.json +43 -0
- config_sentence_transformers.json +10 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.json +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,636 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: allenai/longformer-base-4096
|
3 |
+
datasets:
|
4 |
+
- sentence-transformers/all-nli
|
5 |
+
- sentence-transformers/stsb
|
6 |
+
- sentence-transformers/quora-duplicates
|
7 |
+
- sentence-transformers/natural-questions
|
8 |
+
language:
|
9 |
+
- en
|
10 |
+
library_name: sentence-transformers
|
11 |
+
pipeline_tag: sentence-similarity
|
12 |
+
tags:
|
13 |
+
- sentence-transformers
|
14 |
+
- sentence-similarity
|
15 |
+
- feature-extraction
|
16 |
+
- generated_from_trainer
|
17 |
+
- dataset_size:65749
|
18 |
+
- loss:MultipleNegativesRankingLoss
|
19 |
+
- loss:SoftmaxLoss
|
20 |
+
- loss:CoSENTLoss
|
21 |
+
widget:
|
22 |
+
- source_sentence: A construction worker is standing on a crane placing a large arm
|
23 |
+
on top of a stature in progress.
|
24 |
+
sentences:
|
25 |
+
- The man is wearing black.
|
26 |
+
- A person standing
|
27 |
+
- Nobody is standing
|
28 |
+
- source_sentence: A boy in red slides down an inflatable ride.
|
29 |
+
sentences:
|
30 |
+
- A man holding a drill stands next to a girl holding a vacuum hose.
|
31 |
+
- A boy is playing on an inflatable ride.
|
32 |
+
- A boy pierces a knife through an inflatable ride.
|
33 |
+
- source_sentence: An animal is chewing on something.
|
34 |
+
sentences:
|
35 |
+
- A dog with a red leash still attached chases over the grass toward a tennis ball.
|
36 |
+
- A man is eating something.
|
37 |
+
- An animal is chewing on a key chain.
|
38 |
+
- source_sentence: What are some good books or references to get started with machine
|
39 |
+
learning?
|
40 |
+
sentences:
|
41 |
+
- What caused the British Empire to fall?
|
42 |
+
- How should I go about learning Machine Learning?
|
43 |
+
- Can an infinite amount of dark or vacuum or gravitational energy be created with
|
44 |
+
expansion?
|
45 |
+
- source_sentence: How do I attract a girl?
|
46 |
+
sentences:
|
47 |
+
- How can I attract girls?
|
48 |
+
- Why isn't my iPhone 5 charging?
|
49 |
+
- What would the world be like now in 2016 if Hitler's Germany won the war?
|
50 |
+
---
|
51 |
+
|
52 |
+
# SentenceTransformer based on allenai/longformer-base-4096
|
53 |
+
|
54 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [allenai/longformer-base-4096](https://huggingface.co/allenai/longformer-base-4096) on the [all-nli-pair](https://huggingface.co/datasets/sentence-transformers/all-nli), [all-nli-pair-class](https://huggingface.co/datasets/sentence-transformers/all-nli), [all-nli-pair-score](https://huggingface.co/datasets/sentence-transformers/all-nli), [all-nli-triplet](https://huggingface.co/datasets/sentence-transformers/all-nli), [stsb](https://huggingface.co/datasets/sentence-transformers/stsb), [quora](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) and [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) datasets. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
55 |
+
|
56 |
+
## Model Details
|
57 |
+
|
58 |
+
### Model Description
|
59 |
+
- **Model Type:** Sentence Transformer
|
60 |
+
- **Base model:** [allenai/longformer-base-4096](https://huggingface.co/allenai/longformer-base-4096) <!-- at revision 301e6a42cb0d9976a6d6a26a079fef81c18aa895 -->
|
61 |
+
- **Maximum Sequence Length:** 4098 tokens
|
62 |
+
- **Output Dimensionality:** 768 tokens
|
63 |
+
- **Similarity Function:** Cosine Similarity
|
64 |
+
- **Training Datasets:**
|
65 |
+
- [all-nli-pair](https://huggingface.co/datasets/sentence-transformers/all-nli)
|
66 |
+
- [all-nli-pair-class](https://huggingface.co/datasets/sentence-transformers/all-nli)
|
67 |
+
- [all-nli-pair-score](https://huggingface.co/datasets/sentence-transformers/all-nli)
|
68 |
+
- [all-nli-triplet](https://huggingface.co/datasets/sentence-transformers/all-nli)
|
69 |
+
- [stsb](https://huggingface.co/datasets/sentence-transformers/stsb)
|
70 |
+
- [quora](https://huggingface.co/datasets/sentence-transformers/quora-duplicates)
|
71 |
+
- [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions)
|
72 |
+
- **Language:** en
|
73 |
+
<!-- - **License:** Unknown -->
|
74 |
+
|
75 |
+
### Model Sources
|
76 |
+
|
77 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
78 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
79 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
80 |
+
|
81 |
+
### Full Model Architecture
|
82 |
+
|
83 |
+
```
|
84 |
+
SentenceTransformer(
|
85 |
+
(0): Transformer({'max_seq_length': 4098, 'do_lower_case': False}) with Transformer model: LongformerModel
|
86 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
87 |
+
)
|
88 |
+
```
|
89 |
+
|
90 |
+
## Usage
|
91 |
+
|
92 |
+
### Direct Usage (Sentence Transformers)
|
93 |
+
|
94 |
+
First install the Sentence Transformers library:
|
95 |
+
|
96 |
+
```bash
|
97 |
+
pip install -U sentence-transformers
|
98 |
+
```
|
99 |
+
|
100 |
+
Then you can load this model and run inference.
|
101 |
+
```python
|
102 |
+
from sentence_transformers import SentenceTransformer
|
103 |
+
|
104 |
+
# Download from the 🤗 Hub
|
105 |
+
model = SentenceTransformer("Leo1212/longformer-base-4096-sentence-transformers-all-nli-stsb-quora-nq")
|
106 |
+
# Run inference
|
107 |
+
sentences = [
|
108 |
+
'How do I attract a girl?',
|
109 |
+
'How can I attract girls?',
|
110 |
+
"Why isn't my iPhone 5 charging?",
|
111 |
+
]
|
112 |
+
embeddings = model.encode(sentences)
|
113 |
+
print(embeddings.shape)
|
114 |
+
# [3, 768]
|
115 |
+
|
116 |
+
# Get the similarity scores for the embeddings
|
117 |
+
similarities = model.similarity(embeddings, embeddings)
|
118 |
+
print(similarities.shape)
|
119 |
+
# [3, 3]
|
120 |
+
```
|
121 |
+
|
122 |
+
<!--
|
123 |
+
### Direct Usage (Transformers)
|
124 |
+
|
125 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
126 |
+
|
127 |
+
</details>
|
128 |
+
-->
|
129 |
+
|
130 |
+
<!--
|
131 |
+
### Downstream Usage (Sentence Transformers)
|
132 |
+
|
133 |
+
You can finetune this model on your own dataset.
|
134 |
+
|
135 |
+
<details><summary>Click to expand</summary>
|
136 |
+
|
137 |
+
</details>
|
138 |
+
-->
|
139 |
+
|
140 |
+
<!--
|
141 |
+
### Out-of-Scope Use
|
142 |
+
|
143 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
144 |
+
-->
|
145 |
+
|
146 |
+
<!--
|
147 |
+
## Bias, Risks and Limitations
|
148 |
+
|
149 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
150 |
+
-->
|
151 |
+
|
152 |
+
<!--
|
153 |
+
### Recommendations
|
154 |
+
|
155 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
156 |
+
-->
|
157 |
+
|
158 |
+
## Training Details
|
159 |
+
|
160 |
+
### Training Datasets
|
161 |
+
|
162 |
+
#### all-nli-pair
|
163 |
+
|
164 |
+
* Dataset: [all-nli-pair](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab)
|
165 |
+
* Size: 10,000 training samples
|
166 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
167 |
+
* Approximate statistics based on the first 1000 samples:
|
168 |
+
| | anchor | positive |
|
169 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
|
170 |
+
| type | string | string |
|
171 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 17.06 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 9.64 tokens</li><li>max: 31 tokens</li></ul> |
|
172 |
+
* Samples:
|
173 |
+
| anchor | positive |
|
174 |
+
|:---------------------------------------------------------------------------|:-------------------------------------------------|
|
175 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is outdoors, on a horse.</code> |
|
176 |
+
| <code>Children smiling and waving at camera</code> | <code>There are children present</code> |
|
177 |
+
| <code>A boy is jumping on skateboard in the middle of a red bridge.</code> | <code>The boy does a skateboarding trick.</code> |
|
178 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
179 |
+
```json
|
180 |
+
{
|
181 |
+
"scale": 20.0,
|
182 |
+
"similarity_fct": "cos_sim"
|
183 |
+
}
|
184 |
+
```
|
185 |
+
|
186 |
+
#### all-nli-pair-class
|
187 |
+
|
188 |
+
* Dataset: [all-nli-pair-class](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab)
|
189 |
+
* Size: 10,000 training samples
|
190 |
+
* Columns: <code>premise</code>, <code>hypothesis</code>, and <code>label</code>
|
191 |
+
* Approximate statistics based on the first 1000 samples:
|
192 |
+
| | premise | hypothesis | label |
|
193 |
+
|:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------|
|
194 |
+
| type | string | string | int |
|
195 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 17.4 tokens</li><li>max: 50 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 10.69 tokens</li><li>max: 31 tokens</li></ul> | <ul><li>0: ~33.40%</li><li>1: ~33.30%</li><li>2: ~33.30%</li></ul> |
|
196 |
+
* Samples:
|
197 |
+
| premise | hypothesis | label |
|
198 |
+
|:--------------------------------------------------------------------|:---------------------------------------------------------------|:---------------|
|
199 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is training his horse for a competition.</code> | <code>1</code> |
|
200 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is at a diner, ordering an omelette.</code> | <code>2</code> |
|
201 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is outdoors, on a horse.</code> | <code>0</code> |
|
202 |
+
* Loss: [<code>SoftmaxLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#softmaxloss)
|
203 |
+
|
204 |
+
#### all-nli-pair-score
|
205 |
+
|
206 |
+
* Dataset: [all-nli-pair-score](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab)
|
207 |
+
* Size: 10,000 training samples
|
208 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
|
209 |
+
* Approximate statistics based on the first 1000 samples:
|
210 |
+
| | sentence1 | sentence2 | score |
|
211 |
+
|:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
212 |
+
| type | string | string | float |
|
213 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 17.4 tokens</li><li>max: 50 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 10.69 tokens</li><li>max: 31 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.5</li><li>max: 1.0</li></ul> |
|
214 |
+
* Samples:
|
215 |
+
| sentence1 | sentence2 | score |
|
216 |
+
|:--------------------------------------------------------------------|:---------------------------------------------------------------|:-----------------|
|
217 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is training his horse for a competition.</code> | <code>0.5</code> |
|
218 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is at a diner, ordering an omelette.</code> | <code>0.0</code> |
|
219 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is outdoors, on a horse.</code> | <code>1.0</code> |
|
220 |
+
* Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
|
221 |
+
```json
|
222 |
+
{
|
223 |
+
"scale": 20.0,
|
224 |
+
"similarity_fct": "pairwise_cos_sim"
|
225 |
+
}
|
226 |
+
```
|
227 |
+
|
228 |
+
#### all-nli-triplet
|
229 |
+
|
230 |
+
* Dataset: [all-nli-triplet](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab)
|
231 |
+
* Size: 10,000 training samples
|
232 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
233 |
+
* Approximate statistics based on the first 1000 samples:
|
234 |
+
| | anchor | positive | negative |
|
235 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
|
236 |
+
| type | string | string | string |
|
237 |
+
| details | <ul><li>min: 7 tokens</li><li>mean: 10.38 tokens</li><li>max: 45 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 12.8 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.4 tokens</li><li>max: 50 tokens</li></ul> |
|
238 |
+
* Samples:
|
239 |
+
| anchor | positive | negative |
|
240 |
+
|:---------------------------------------------------------------------------|:-------------------------------------------------|:-----------------------------------------------------------|
|
241 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is outdoors, on a horse.</code> | <code>A person is at a diner, ordering an omelette.</code> |
|
242 |
+
| <code>Children smiling and waving at camera</code> | <code>There are children present</code> | <code>The kids are frowning</code> |
|
243 |
+
| <code>A boy is jumping on skateboard in the middle of a red bridge.</code> | <code>The boy does a skateboarding trick.</code> | <code>The boy skates down the sidewalk.</code> |
|
244 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
245 |
+
```json
|
246 |
+
{
|
247 |
+
"scale": 20.0,
|
248 |
+
"similarity_fct": "cos_sim"
|
249 |
+
}
|
250 |
+
```
|
251 |
+
|
252 |
+
#### stsb
|
253 |
+
|
254 |
+
* Dataset: [stsb](https://huggingface.co/datasets/sentence-transformers/stsb) at [ab7a5ac](https://huggingface.co/datasets/sentence-transformers/stsb/tree/ab7a5ac0e35aa22088bdcf23e7fd99b220e53308)
|
255 |
+
* Size: 5,749 training samples
|
256 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
|
257 |
+
* Approximate statistics based on the first 1000 samples:
|
258 |
+
| | sentence1 | sentence2 | score |
|
259 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
260 |
+
| type | string | string | float |
|
261 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 10.02 tokens</li><li>max: 28 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 9.96 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.54</li><li>max: 1.0</li></ul> |
|
262 |
+
* Samples:
|
263 |
+
| sentence1 | sentence2 | score |
|
264 |
+
|:-----------------------------------------------------------|:----------------------------------------------------------------------|:------------------|
|
265 |
+
| <code>A plane is taking off.</code> | <code>An air plane is taking off.</code> | <code>1.0</code> |
|
266 |
+
| <code>A man is playing a large flute.</code> | <code>A man is playing a flute.</code> | <code>0.76</code> |
|
267 |
+
| <code>A man is spreading shreded cheese on a pizza.</code> | <code>A man is spreading shredded cheese on an uncooked pizza.</code> | <code>0.76</code> |
|
268 |
+
* Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
|
269 |
+
```json
|
270 |
+
{
|
271 |
+
"scale": 20.0,
|
272 |
+
"similarity_fct": "pairwise_cos_sim"
|
273 |
+
}
|
274 |
+
```
|
275 |
+
|
276 |
+
#### quora
|
277 |
+
|
278 |
+
* Dataset: [quora](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb)
|
279 |
+
* Size: 10,000 training samples
|
280 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
281 |
+
* Approximate statistics based on the first 1000 samples:
|
282 |
+
| | anchor | positive |
|
283 |
+
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
284 |
+
| type | string | string |
|
285 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 13.74 tokens</li><li>max: 43 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.91 tokens</li><li>max: 44 tokens</li></ul> |
|
286 |
+
* Samples:
|
287 |
+
| anchor | positive |
|
288 |
+
|:----------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------|
|
289 |
+
| <code>Astrology: I am a Capricorn Sun Cap moon and cap rising...what does that say about me?</code> | <code>I'm a triple Capricorn (Sun, Moon and ascendant in Capricorn) What does this say about me?</code> |
|
290 |
+
| <code>How can I be a good geologist?</code> | <code>What should I do to be a great geologist?</code> |
|
291 |
+
| <code>How do I read and find my YouTube comments?</code> | <code>How can I see all my Youtube comments?</code> |
|
292 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
293 |
+
```json
|
294 |
+
{
|
295 |
+
"scale": 20.0,
|
296 |
+
"similarity_fct": "cos_sim"
|
297 |
+
}
|
298 |
+
```
|
299 |
+
|
300 |
+
#### natural-questions
|
301 |
+
|
302 |
+
* Dataset: [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) at [f9e894e](https://huggingface.co/datasets/sentence-transformers/natural-questions/tree/f9e894e1081e206e577b4eaa9ee6de2b06ae6f17)
|
303 |
+
* Size: 10,000 training samples
|
304 |
+
* Columns: <code>query</code> and <code>answer</code>
|
305 |
+
* Approximate statistics based on the first 1000 samples:
|
306 |
+
| | query | answer |
|
307 |
+
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
308 |
+
| type | string | string |
|
309 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 12.43 tokens</li><li>max: 23 tokens</li></ul> | <ul><li>min: 18 tokens</li><li>mean: 136.19 tokens</li><li>max: 543 tokens</li></ul> |
|
310 |
+
* Samples:
|
311 |
+
| query | answer |
|
312 |
+
|:----------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
313 |
+
| <code>when did richmond last play in a preliminary final</code> | <code>Richmond Football Club Richmond began 2017 with 5 straight wins, a feat it had not achieved since 1995. A series of close losses hampered the Tigers throughout the middle of the season, including a 5-point loss to the Western Bulldogs, 2-point loss to Fremantle, and a 3-point loss to the Giants. Richmond ended the season strongly with convincing victories over Fremantle and St Kilda in the final two rounds, elevating the club to 3rd on the ladder. Richmond's first final of the season against the Cats at the MCG attracted a record qualifying final crowd of 95,028; the Tigers won by 51 points. Having advanced to the first preliminary finals for the first time since 2001, Richmond defeated Greater Western Sydney by 36 points in front of a crowd of 94,258 to progress to the Grand Final against Adelaide, their first Grand Final appearance since 1982. The attendance was 100,021, the largest crowd to a grand final since 1986. The Crows led at quarter time and led by as many as 13, but the Tigers took over the game as it progressed and scored seven straight goals at one point. They eventually would win by 48 points – 16.12 (108) to Adelaide's 8.12 (60) – to end their 37-year flag drought.[22] Dustin Martin also became the first player to win a Premiership medal, the Brownlow Medal and the Norm Smith Medal in the same season, while Damien Hardwick was named AFL Coaches Association Coach of the Year. Richmond's jump from 13th to premiers also marked the biggest jump from one AFL season to the next.</code> |
|
314 |
+
| <code>who sang what in the world's come over you</code> | <code>Jack Scott (singer) At the beginning of 1960, Scott again changed record labels, this time to Top Rank Records.[1] He then recorded four Billboard Hot 100 hits – "What in the World's Come Over You" (#5), "Burning Bridges" (#3) b/w "Oh Little One" (#34), and "It Only Happened Yesterday" (#38).[1] "What in the World's Come Over You" was Scott's second gold disc winner.[6] Scott continued to record and perform during the 1960s and 1970s.[1] His song "You're Just Gettin' Better" reached the country charts in 1974.[1] In May 1977, Scott recorded a Peel session for BBC Radio 1 disc jockey, John Peel.</code> |
|
315 |
+
| <code>who produces the most wool in the world</code> | <code>Wool Global wool production is about 2 million tonnes per year, of which 60% goes into apparel. Wool comprises ca 3% of the global textile market, but its value is higher owing to dying and other modifications of the material.[1] Australia is a leading producer of wool which is mostly from Merino sheep but has been eclipsed by China in terms of total weight.[30] New Zealand (2016) is the third-largest producer of wool, and the largest producer of crossbred wool. Breeds such as Lincoln, Romney, Drysdale, and Elliotdale produce coarser fibers, and wool from these sheep is usually used for making carpets.</code> |
|
316 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
317 |
+
```json
|
318 |
+
{
|
319 |
+
"scale": 20.0,
|
320 |
+
"similarity_fct": "cos_sim"
|
321 |
+
}
|
322 |
+
```
|
323 |
+
|
324 |
+
### Evaluation Datasets
|
325 |
+
|
326 |
+
#### all-nli-triplet
|
327 |
+
|
328 |
+
* Dataset: [all-nli-triplet](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab)
|
329 |
+
* Size: 6,584 evaluation samples
|
330 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
331 |
+
* Approximate statistics based on the first 1000 samples:
|
332 |
+
| | anchor | positive | negative |
|
333 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
334 |
+
| type | string | string | string |
|
335 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 18.02 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 9.81 tokens</li><li>max: 29 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 10.37 tokens</li><li>max: 29 tokens</li></ul> |
|
336 |
+
* Samples:
|
337 |
+
| anchor | positive | negative |
|
338 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------|:--------------------------------------------------------|
|
339 |
+
| <code>Two women are embracing while holding to go packages.</code> | <code>Two woman are holding packages.</code> | <code>The men are fighting outside a deli.</code> |
|
340 |
+
| <code>Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink.</code> | <code>Two kids in numbered jerseys wash their hands.</code> | <code>Two kids in jackets walk to school.</code> |
|
341 |
+
| <code>A man selling donuts to a customer during a world exhibition event held in the city of Angeles</code> | <code>A man selling donuts to a customer.</code> | <code>A woman drinks her coffee in a small cafe.</code> |
|
342 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
343 |
+
```json
|
344 |
+
{
|
345 |
+
"scale": 20.0,
|
346 |
+
"similarity_fct": "cos_sim"
|
347 |
+
}
|
348 |
+
```
|
349 |
+
|
350 |
+
#### stsb
|
351 |
+
|
352 |
+
* Dataset: [stsb](https://huggingface.co/datasets/sentence-transformers/stsb) at [ab7a5ac](https://huggingface.co/datasets/sentence-transformers/stsb/tree/ab7a5ac0e35aa22088bdcf23e7fd99b220e53308)
|
353 |
+
* Size: 1,500 evaluation samples
|
354 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
|
355 |
+
* Approximate statistics based on the first 1000 samples:
|
356 |
+
| | sentence1 | sentence2 | score |
|
357 |
+
|:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
358 |
+
| type | string | string | float |
|
359 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 15.0 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.99 tokens</li><li>max: 61 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.47</li><li>max: 1.0</li></ul> |
|
360 |
+
* Samples:
|
361 |
+
| sentence1 | sentence2 | score |
|
362 |
+
|:--------------------------------------------------|:------------------------------------------------------|:------------------|
|
363 |
+
| <code>A man with a hard hat is dancing.</code> | <code>A man wearing a hard hat is dancing.</code> | <code>1.0</code> |
|
364 |
+
| <code>A young child is riding a horse.</code> | <code>A child is riding a horse.</code> | <code>0.95</code> |
|
365 |
+
| <code>A man is feeding a mouse to a snake.</code> | <code>The man is feeding a mouse to the snake.</code> | <code>1.0</code> |
|
366 |
+
* Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
|
367 |
+
```json
|
368 |
+
{
|
369 |
+
"scale": 20.0,
|
370 |
+
"similarity_fct": "pairwise_cos_sim"
|
371 |
+
}
|
372 |
+
```
|
373 |
+
|
374 |
+
#### quora
|
375 |
+
|
376 |
+
* Dataset: [quora](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb)
|
377 |
+
* Size: 1,000 evaluation samples
|
378 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
379 |
+
* Approximate statistics based on the first 1000 samples:
|
380 |
+
| | anchor | positive |
|
381 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
|
382 |
+
| type | string | string |
|
383 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 13.86 tokens</li><li>max: 63 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.9 tokens</li><li>max: 46 tokens</li></ul> |
|
384 |
+
* Samples:
|
385 |
+
| anchor | positive |
|
386 |
+
|:----------------------------------------------------------------------------|:--------------------------------------------------------------------------------|
|
387 |
+
| <code>What is your New Year resolution?</code> | <code>What can be my new year resolution for 2017?</code> |
|
388 |
+
| <code>Should I buy the IPhone 6s or Samsung Galaxy s7?</code> | <code>Which is better: the iPhone 6S Plus or the Samsung Galaxy S7 Edge?</code> |
|
389 |
+
| <code>What are the differences between transgression and regression?</code> | <code>What is the difference between transgression and regression?</code> |
|
390 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
391 |
+
```json
|
392 |
+
{
|
393 |
+
"scale": 20.0,
|
394 |
+
"similarity_fct": "cos_sim"
|
395 |
+
}
|
396 |
+
```
|
397 |
+
|
398 |
+
#### natural-questions
|
399 |
+
|
400 |
+
* Dataset: [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) at [f9e894e](https://huggingface.co/datasets/sentence-transformers/natural-questions/tree/f9e894e1081e206e577b4eaa9ee6de2b06ae6f17)
|
401 |
+
* Size: 1,000 evaluation samples
|
402 |
+
* Columns: <code>query</code> and <code>answer</code>
|
403 |
+
* Approximate statistics based on the first 1000 samples:
|
404 |
+
| | query | answer |
|
405 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
406 |
+
| type | string | string |
|
407 |
+
| details | <ul><li>min: 9 tokens</li><li>mean: 12.47 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 18 tokens</li><li>mean: 139.05 tokens</li><li>max: 572 tokens</li></ul> |
|
408 |
+
* Samples:
|
409 |
+
| query | answer |
|
410 |
+
|:--------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
411 |
+
| <code>where does the waikato river begin and end</code> | <code>Waikato River The Waikato River is the longest river in New Zealand, running for 425 kilometres (264Â mi) through the North Island. It rises in the eastern slopes of Mount Ruapehu, joining the Tongariro River system and flowing through Lake Taupo, New Zealand's largest lake. It then drains Taupo at the lake's northeastern edge, creates the Huka Falls, and flows northwest through the Waikato Plains. It empties into the Tasman Sea south of Auckland, at Port Waikato. It gives its name to the Waikato Region that surrounds the Waikato Plains. The present course of the river was largely formed about 17,000 years ago. Contributing factors were climate warming, forest being reestablished in the river headwaters and the deepening, rather than widening, of the existing river channel. The channel was gradually eroded as far up river as Piarere, leaving the old Hinuera channel high and dry.[2] The remains of the old river path can be clearly seen at Hinuera where the cliffs mark the ancient river edges. The river's main tributary is the Waipa River, which has its confluence with the Waikato at Ngaruawahia.</code> |
|
412 |
+
| <code>what type of gas is produced during fermentation</code> | <code>Fermentation Fermentation reacts NADH with an endogenous, organic electron acceptor.[1] Usually this is pyruvate formed from sugar through glycolysis. The reaction produces NAD+ and an organic product, typical examples being ethanol, lactic acid, carbon dioxide, and hydrogen gas (H2). However, more exotic compounds can be produced by fermentation, such as butyric acid and acetone. Fermentation products contain chemical energy (they are not fully oxidized), but are considered waste products, since they cannot be metabolized further without the use of oxygen.</code> |
|
413 |
+
| <code>why was star wars episode iv released first</code> | <code>Star Wars (film) Star Wars (later retitled Star Wars: Episode IV – A New Hope) is a 1977 American epic space opera film written and directed by George Lucas. It is the first film in the original Star Wars trilogy and the beginning of the Star Wars franchise. Starring Mark Hamill, Harrison Ford, Carrie Fisher, Peter Cushing, Alec Guinness, David Prowse, James Earl Jones, Anthony Daniels, Kenny Baker, and Peter Mayhew, the film's plot focuses on the Rebel Alliance, led by Princess Leia (Fisher), and its attempt to destroy the Galactic Empire's space station, the Death Star. This conflict disrupts the isolated life of farmhand Luke Skywalker (Hamill), who inadvertently acquires two droids that possess stolen architectural plans for the Death Star. When the Empire begins a destructive search for the missing droids, Skywalker accompanies Jedi Master Obi-Wan Kenobi (Guinness) on a mission to return the plans to the Rebel Alliance and rescue Leia from her imprisonment by the Empire.</code> |
|
414 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
415 |
+
```json
|
416 |
+
{
|
417 |
+
"scale": 20.0,
|
418 |
+
"similarity_fct": "cos_sim"
|
419 |
+
}
|
420 |
+
```
|
421 |
+
|
422 |
+
### Training Hyperparameters
|
423 |
+
|
424 |
+
#### All Hyperparameters
|
425 |
+
<details><summary>Click to expand</summary>
|
426 |
+
|
427 |
+
- `overwrite_output_dir`: False
|
428 |
+
- `do_predict`: False
|
429 |
+
- `eval_strategy`: no
|
430 |
+
- `prediction_loss_only`: True
|
431 |
+
- `per_device_train_batch_size`: 8
|
432 |
+
- `per_device_eval_batch_size`: 8
|
433 |
+
- `per_gpu_train_batch_size`: None
|
434 |
+
- `per_gpu_eval_batch_size`: None
|
435 |
+
- `gradient_accumulation_steps`: 1
|
436 |
+
- `eval_accumulation_steps`: None
|
437 |
+
- `torch_empty_cache_steps`: None
|
438 |
+
- `learning_rate`: 5e-05
|
439 |
+
- `weight_decay`: 0.0
|
440 |
+
- `adam_beta1`: 0.9
|
441 |
+
- `adam_beta2`: 0.999
|
442 |
+
- `adam_epsilon`: 1e-08
|
443 |
+
- `max_grad_norm`: 1.0
|
444 |
+
- `num_train_epochs`: 3.0
|
445 |
+
- `max_steps`: -1
|
446 |
+
- `lr_scheduler_type`: linear
|
447 |
+
- `lr_scheduler_kwargs`: {}
|
448 |
+
- `warmup_ratio`: 0.0
|
449 |
+
- `warmup_steps`: 0
|
450 |
+
- `log_level`: passive
|
451 |
+
- `log_level_replica`: warning
|
452 |
+
- `log_on_each_node`: True
|
453 |
+
- `logging_nan_inf_filter`: True
|
454 |
+
- `save_safetensors`: True
|
455 |
+
- `save_on_each_node`: False
|
456 |
+
- `save_only_model`: False
|
457 |
+
- `restore_callback_states_from_checkpoint`: False
|
458 |
+
- `no_cuda`: False
|
459 |
+
- `use_cpu`: False
|
460 |
+
- `use_mps_device`: False
|
461 |
+
- `seed`: 42
|
462 |
+
- `data_seed`: None
|
463 |
+
- `jit_mode_eval`: False
|
464 |
+
- `use_ipex`: False
|
465 |
+
- `bf16`: False
|
466 |
+
- `fp16`: False
|
467 |
+
- `fp16_opt_level`: O1
|
468 |
+
- `half_precision_backend`: auto
|
469 |
+
- `bf16_full_eval`: False
|
470 |
+
- `fp16_full_eval`: False
|
471 |
+
- `tf32`: None
|
472 |
+
- `local_rank`: 0
|
473 |
+
- `ddp_backend`: None
|
474 |
+
- `tpu_num_cores`: None
|
475 |
+
- `tpu_metrics_debug`: False
|
476 |
+
- `debug`: []
|
477 |
+
- `dataloader_drop_last`: False
|
478 |
+
- `dataloader_num_workers`: 0
|
479 |
+
- `dataloader_prefetch_factor`: None
|
480 |
+
- `past_index`: -1
|
481 |
+
- `disable_tqdm`: False
|
482 |
+
- `remove_unused_columns`: True
|
483 |
+
- `label_names`: None
|
484 |
+
- `load_best_model_at_end`: False
|
485 |
+
- `ignore_data_skip`: False
|
486 |
+
- `fsdp`: []
|
487 |
+
- `fsdp_min_num_params`: 0
|
488 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
489 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
490 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
491 |
+
- `deepspeed`: None
|
492 |
+
- `label_smoothing_factor`: 0.0
|
493 |
+
- `optim`: adamw_torch
|
494 |
+
- `optim_args`: None
|
495 |
+
- `adafactor`: False
|
496 |
+
- `group_by_length`: False
|
497 |
+
- `length_column_name`: length
|
498 |
+
- `ddp_find_unused_parameters`: None
|
499 |
+
- `ddp_bucket_cap_mb`: None
|
500 |
+
- `ddp_broadcast_buffers`: False
|
501 |
+
- `dataloader_pin_memory`: True
|
502 |
+
- `dataloader_persistent_workers`: False
|
503 |
+
- `skip_memory_metrics`: True
|
504 |
+
- `use_legacy_prediction_loop`: False
|
505 |
+
- `push_to_hub`: False
|
506 |
+
- `resume_from_checkpoint`: None
|
507 |
+
- `hub_model_id`: None
|
508 |
+
- `hub_strategy`: every_save
|
509 |
+
- `hub_private_repo`: False
|
510 |
+
- `hub_always_push`: False
|
511 |
+
- `gradient_checkpointing`: False
|
512 |
+
- `gradient_checkpointing_kwargs`: None
|
513 |
+
- `include_inputs_for_metrics`: False
|
514 |
+
- `eval_do_concat_batches`: True
|
515 |
+
- `fp16_backend`: auto
|
516 |
+
- `push_to_hub_model_id`: None
|
517 |
+
- `push_to_hub_organization`: None
|
518 |
+
- `mp_parameters`:
|
519 |
+
- `auto_find_batch_size`: False
|
520 |
+
- `full_determinism`: False
|
521 |
+
- `torchdynamo`: None
|
522 |
+
- `ray_scope`: last
|
523 |
+
- `ddp_timeout`: 1800
|
524 |
+
- `torch_compile`: False
|
525 |
+
- `torch_compile_backend`: None
|
526 |
+
- `torch_compile_mode`: None
|
527 |
+
- `dispatch_batches`: None
|
528 |
+
- `split_batches`: None
|
529 |
+
- `include_tokens_per_second`: False
|
530 |
+
- `include_num_input_tokens_seen`: False
|
531 |
+
- `neftune_noise_alpha`: None
|
532 |
+
- `optim_target_modules`: None
|
533 |
+
- `batch_eval_metrics`: False
|
534 |
+
- `eval_on_start`: False
|
535 |
+
- `use_liger_kernel`: False
|
536 |
+
- `eval_use_gather_object`: False
|
537 |
+
- `batch_sampler`: batch_sampler
|
538 |
+
- `multi_dataset_batch_sampler`: proportional
|
539 |
+
|
540 |
+
</details>
|
541 |
+
|
542 |
+
### Training Logs
|
543 |
+
| Epoch | Step | Training Loss |
|
544 |
+
|:------:|:-----:|:-------------:|
|
545 |
+
| 0.1217 | 500 | 2.0816 |
|
546 |
+
| 0.2433 | 1000 | 1.8989 |
|
547 |
+
| 0.3650 | 1500 | 1.7863 |
|
548 |
+
| 0.4866 | 2000 | 1.6893 |
|
549 |
+
| 0.6083 | 2500 | 1.7278 |
|
550 |
+
| 0.7299 | 3000 | 1.6332 |
|
551 |
+
| 0.8516 | 3500 | 1.5289 |
|
552 |
+
| 0.9732 | 4000 | 1.6122 |
|
553 |
+
| 1.0949 | 4500 | 1.5243 |
|
554 |
+
| 1.2165 | 5000 | 1.4054 |
|
555 |
+
| 1.3382 | 5500 | 1.5066 |
|
556 |
+
| 1.4599 | 6000 | 1.2831 |
|
557 |
+
| 1.5815 | 6500 | 1.4375 |
|
558 |
+
| 1.7032 | 7000 | 1.3062 |
|
559 |
+
| 1.8248 | 7500 | 1.3748 |
|
560 |
+
| 1.9465 | 8000 | 1.1605 |
|
561 |
+
| 2.0681 | 8500 | 1.2467 |
|
562 |
+
| 2.1898 | 9000 | 1.1417 |
|
563 |
+
| 2.3114 | 9500 | 1.26 |
|
564 |
+
| 2.4331 | 10000 | 1.0447 |
|
565 |
+
| 2.5547 | 10500 | 1.159 |
|
566 |
+
| 2.6764 | 11000 | 0.9982 |
|
567 |
+
| 2.7981 | 11500 | 1.0904 |
|
568 |
+
| 2.9197 | 12000 | 0.9434 |
|
569 |
+
|
570 |
+
|
571 |
+
### Framework Versions
|
572 |
+
- Python: 3.11.9
|
573 |
+
- Sentence Transformers: 3.1.1
|
574 |
+
- Transformers: 4.45.2
|
575 |
+
- PyTorch: 2.3.1+cu121
|
576 |
+
- Accelerate: 1.0.0
|
577 |
+
- Datasets: 3.0.1
|
578 |
+
- Tokenizers: 0.20.0
|
579 |
+
|
580 |
+
## Citation
|
581 |
+
|
582 |
+
### BibTeX
|
583 |
+
|
584 |
+
#### Sentence Transformers and SoftmaxLoss
|
585 |
+
```bibtex
|
586 |
+
@inproceedings{reimers-2019-sentence-bert,
|
587 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
588 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
589 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
590 |
+
month = "11",
|
591 |
+
year = "2019",
|
592 |
+
publisher = "Association for Computational Linguistics",
|
593 |
+
url = "https://arxiv.org/abs/1908.10084",
|
594 |
+
}
|
595 |
+
```
|
596 |
+
|
597 |
+
#### MultipleNegativesRankingLoss
|
598 |
+
```bibtex
|
599 |
+
@misc{henderson2017efficient,
|
600 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
601 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
602 |
+
year={2017},
|
603 |
+
eprint={1705.00652},
|
604 |
+
archivePrefix={arXiv},
|
605 |
+
primaryClass={cs.CL}
|
606 |
+
}
|
607 |
+
```
|
608 |
+
|
609 |
+
#### CoSENTLoss
|
610 |
+
```bibtex
|
611 |
+
@online{kexuefm-8847,
|
612 |
+
title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
|
613 |
+
author={Su Jianlin},
|
614 |
+
year={2022},
|
615 |
+
month={Jan},
|
616 |
+
url={https://kexue.fm/archives/8847},
|
617 |
+
}
|
618 |
+
```
|
619 |
+
|
620 |
+
<!--
|
621 |
+
## Glossary
|
622 |
+
|
623 |
+
*Clearly define terms in order to be accessible across audiences.*
|
624 |
+
-->
|
625 |
+
|
626 |
+
<!--
|
627 |
+
## Model Card Authors
|
628 |
+
|
629 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
630 |
+
-->
|
631 |
+
|
632 |
+
<!--
|
633 |
+
## Model Card Contact
|
634 |
+
|
635 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
636 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "allenai/longformer-base-4096",
|
3 |
+
"architectures": [
|
4 |
+
"LongformerModel"
|
5 |
+
],
|
6 |
+
"attention_mode": "longformer",
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"attention_window": [
|
9 |
+
512,
|
10 |
+
512,
|
11 |
+
512,
|
12 |
+
512,
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512
|
21 |
+
],
|
22 |
+
"bos_token_id": 0,
|
23 |
+
"eos_token_id": 2,
|
24 |
+
"gradient_checkpointing": false,
|
25 |
+
"hidden_act": "gelu",
|
26 |
+
"hidden_dropout_prob": 0.1,
|
27 |
+
"hidden_size": 768,
|
28 |
+
"ignore_attention_mask": false,
|
29 |
+
"initializer_range": 0.02,
|
30 |
+
"intermediate_size": 3072,
|
31 |
+
"layer_norm_eps": 1e-05,
|
32 |
+
"max_position_embeddings": 4098,
|
33 |
+
"model_type": "longformer",
|
34 |
+
"num_attention_heads": 12,
|
35 |
+
"num_hidden_layers": 12,
|
36 |
+
"onnx_export": false,
|
37 |
+
"pad_token_id": 1,
|
38 |
+
"sep_token_id": 2,
|
39 |
+
"torch_dtype": "float32",
|
40 |
+
"transformers_version": "4.45.2",
|
41 |
+
"type_vocab_size": 1,
|
42 |
+
"vocab_size": 50265
|
43 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
+
"transformers": "4.45.2",
|
5 |
+
"pytorch": "2.3.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d07d8769dae10deca2cf5f0aa5e64c226fd57b086435e42dfcefee4b3bfa43f8
|
3 |
+
size 594668880
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 4098,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<pad>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"50264": {
|
37 |
+
"content": "<mask>",
|
38 |
+
"lstrip": true,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"clean_up_tokenization_spaces": false,
|
47 |
+
"cls_token": "<s>",
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"errors": "replace",
|
50 |
+
"mask_token": "<mask>",
|
51 |
+
"model_max_length": 1000000000000000019884624838656,
|
52 |
+
"pad_token": "<pad>",
|
53 |
+
"sep_token": "</s>",
|
54 |
+
"tokenizer_class": "LongformerTokenizer",
|
55 |
+
"trim_offsets": true,
|
56 |
+
"unk_token": "<unk>"
|
57 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|